/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.plan.function.stream; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufProcessor; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.datum.Datum; import org.apache.tajo.plan.function.FunctionContext; import org.apache.tajo.plan.function.PythonAggFunctionInvoke.PythonAggFunctionContext; import org.apache.tajo.storage.Tuple; import java.io.IOException; public class CSVLineDeserializer extends TextLineDeserializer { private ByteBufProcessor processor; private FieldSerializerDeserializer fieldSerDer; private ByteBuf nullChars; private int delimiterCompensation; public CSVLineDeserializer(Schema schema, TableMeta meta, int[] targetColumnIndexes) { super(schema, meta, targetColumnIndexes); } @Override public void init() { byte[] delimiter = CSVLineSerDe.getFieldDelimiter(meta); this.processor = new FieldSplitProcessor(delimiter[0]); this.delimiterCompensation = delimiter.length - 1; if (nullChars != null) { nullChars.release(); } nullChars = TextLineSerDe.getNullChars(meta); fieldSerDer = new TextFieldSerializerDeserializer(meta); } @Override public void deserialize(final ByteBuf lineBuf, Tuple output) throws IOException, TextLineParsingError { int[] projection = targetColumnIndexes; if (lineBuf == null || targetColumnIndexes == null || targetColumnIndexes.length == 0) { return; } final int rowLength = lineBuf.readableBytes(); int start = 0, fieldLength = 0, end = 0; //Projection int currentTarget = 0; int currentIndex = 0; while (end != -1) { end = lineBuf.forEachByte(start, rowLength - start, processor); if (end < 0) { fieldLength = rowLength - start; } else { fieldLength = end - start - delimiterCompensation; } if (projection.length > currentTarget && currentIndex == projection[currentTarget]) { lineBuf.setIndex(start, start + fieldLength); Datum datum = fieldSerDer.deserialize(lineBuf, schema.getColumn(currentIndex).getDataType(), nullChars); output.put(currentIndex, datum); currentTarget++; } if (projection.length == currentTarget) { break; } start = end + 1; currentIndex++; } } @Override public void deserialize(final ByteBuf lineBuf, FunctionContext context) throws IOException, TextLineParsingError { PythonAggFunctionContext pythonContext = (PythonAggFunctionContext) context; if (lineBuf == null) { return; } byte[] bytes = new byte[lineBuf.readableBytes()]; lineBuf.readBytes(bytes); pythonContext.setJsonData(new String(bytes)); } @Override public void release() { if (nullChars != null) { nullChars.release(); nullChars = null; } } }