/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.plan.function.stream; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.datum.AnyDatum; import org.apache.tajo.datum.Datum; import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.exception.UnsupportedException; import org.apache.tajo.plan.function.FunctionContext; import org.apache.tajo.plan.function.PythonAggFunctionInvoke.PythonAggFunctionContext; import org.apache.tajo.storage.Tuple; import java.io.IOException; import java.io.OutputStream; public class CSVLineSerializer extends TextLineSerializer { private FieldSerializerDeserializer serde; private byte[] nullChars; private byte[] delimiter; public final static String PARAM_DELIM = "|\t_"; public CSVLineSerializer(TableMeta meta) { super(meta); } @Override public void init() { nullChars = TextLineSerDe.getNullCharsAsBytes(meta); delimiter = "|,_".getBytes(); serde = new TextFieldSerializerDeserializer(meta); } @Override public int serialize(OutputStream out, Tuple input, Schema schema) throws IOException { int writtenBytes = 0; for (int i = 0; i < input.size(); i++) { Datum datum = input.asDatum(i); String typeStr; if (datum.type().isAny()) { typeStr = getTypeString(((AnyDatum)datum).getActual()); } else { typeStr = getTypeString(datum); } out.write(typeStr.getBytes()); out.write(PARAM_DELIM.getBytes()); writtenBytes += serde.serialize(out, datum, schema.getColumn(i).getDataType(), nullChars); if (input.size() - 1 > i) { out.write(delimiter); writtenBytes += delimiter.length; } } return writtenBytes; } @Override public int serializeContext(OutputStream out, FunctionContext context) throws IOException { int writtenBytes = 0; PythonAggFunctionContext pythonContext = (PythonAggFunctionContext) context; if (pythonContext.getJsonData() == null) { byte[] bytes = "-".getBytes(); out.write(bytes); writtenBytes += bytes.length; } else { byte[] bytes = pythonContext.getJsonData().getBytes(); out.write(bytes); writtenBytes += bytes.length; } return writtenBytes; } @Override public void release() { } public static String getTypeString(Datum val) { switch (val.kind()) { case NULL_TYPE: return "-"; case BOOLEAN: return "B"; case INT1: case INT2: case INT4: return "I"; case INT8: return "L"; case FLOAT4: return "F"; case FLOAT8: return "D"; case NUMERIC: return "E"; case CHAR: case TEXT: return "C"; case DATE: case TIME: case TIMESTAMP: return "T"; case BLOB: return "A"; default: throw new TajoRuntimeException(new UnsupportedException("data type '" + val.type() + "'")); } } }