/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.parser.text; import com.streamsets.pipeline.api.Field; import com.streamsets.pipeline.api.Record; import com.streamsets.pipeline.api.Stage; import com.streamsets.pipeline.api.impl.Utils; import com.streamsets.pipeline.lib.io.AbstractOverrunDelimitedReader; import com.streamsets.pipeline.lib.io.OverrunCustomDelimiterReader; import com.streamsets.pipeline.lib.io.OverrunLineReader; import com.streamsets.pipeline.api.ext.io.OverrunReader; import com.streamsets.pipeline.lib.parser.AbstractDataParser; import com.streamsets.pipeline.lib.parser.DataParserException; import org.apache.commons.io.IOUtils; import org.apache.commons.pool2.impl.GenericObjectPool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.HashMap; import java.util.Map; public class TextCharDataParser extends AbstractDataParser { private static final Logger LOG = LoggerFactory.getLogger(TextCharDataParser.class); private final Stage.Context context; private final String readerId; private final boolean collapseAllLines; private final AbstractOverrunDelimitedReader reader; private final int maxObjectLen; private final String fieldTextName; private final String fieldTruncatedName; private final StringBuilder recordIdSb; private final int recordIdOffset; private final GenericObjectPool<StringBuilder> stringBuilderPool; private final StringBuilder stringBuilder; private boolean eof; public TextCharDataParser( Stage.Context context, String readerId, boolean collapseAllLines, boolean useCustomDelimiter, String customDelimiter, boolean includeCustomDelimiterInText, OverrunReader reader, long readerOffset, int maxObjectLen, String fieldTextName, String fieldTruncatedName, GenericObjectPool<StringBuilder> stringBuilderPool ) throws IOException { this.context = context; this.readerId = readerId; this.collapseAllLines = collapseAllLines; this.reader = (!collapseAllLines && useCustomDelimiter)? new OverrunCustomDelimiterReader(reader, maxObjectLen, customDelimiter, includeCustomDelimiterInText): new OverrunLineReader(reader, maxObjectLen); this.maxObjectLen = maxObjectLen; this.fieldTextName = fieldTextName; this.fieldTruncatedName = fieldTruncatedName; reader.setEnabled(false); IOUtils.skipFully(reader, readerOffset); reader.setEnabled(true); this.stringBuilderPool = stringBuilderPool; try { this.stringBuilder = stringBuilderPool.borrowObject(); LOG.debug("Borrowed string builder from pool. Num Active {}, Num Idle {}", this.stringBuilderPool.getNumActive(), this.stringBuilderPool.getNumIdle()); } catch (Exception e) { throw new IOException(Utils.format("Error borrowing string builder object from pool : {}", e.toString()), e); } recordIdSb = new StringBuilder(readerId.length() + 15); recordIdSb.append(readerId).append("::"); recordIdOffset = recordIdSb.length(); } private boolean isOverMaxObjectLen(int len) { return maxObjectLen > -1 && len > maxObjectLen; } private boolean isTruncated(int len) { return isOverMaxObjectLen(len) || truncated; } @Override public Record parse() throws IOException, DataParserException { Record record; if (collapseAllLines) { record = parseAll(); } else { record = parseLine(); } return record; } public Record parseAll() throws IOException, DataParserException { Record record = null; reader.resetCount(); long offset = reader.getPos(); stringBuilder.setLength(0); while (reader.readLine(stringBuilder) > -1) { stringBuilder.append('\n'); } if (stringBuilder.length() > 0) { record = context.createRecord(readerId + "::" + offset); Map<String, Field> map = new HashMap<>(); map.put(fieldTextName, Field.create(stringBuilder.toString())); if (isTruncated(stringBuilder.length())) { map.put(fieldTruncatedName, Field.create(true)); } record.set(Field.create(map)); } eof = true; return record; } public Record parseLine() throws IOException, DataParserException { reader.resetCount(); long offset = reader.getPos(); stringBuilder.setLength(0); int read = reader.readLine(stringBuilder); Record record = null; if (read > -1) { recordIdSb.setLength(recordIdOffset); recordIdSb.append(offset); record = context.createRecord(recordIdSb.toString()); Map<String, Field> map = new HashMap<>(); map.put(fieldTextName, Field.create(stringBuilder.toString())); if (isTruncated(read)) { map.put(fieldTruncatedName, Field.create(true)); } record.set(Field.create(map)); } else { eof = true; } return record; } @Override public String getOffset() { return (eof) ? String.valueOf(-1) : String.valueOf(reader.getPos()); } @Override public void close() throws IOException { stringBuilderPool.returnObject(this.stringBuilder); LOG.debug("Returned string builder to pool. Num Active {}, Num Idle {}", this.stringBuilderPool.getNumActive(), this.stringBuilderPool.getNumIdle()); reader.close(); } }