/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.parser; import com.streamsets.pipeline.api.FileRef; import com.streamsets.pipeline.api.ext.io.OverrunReader; import com.streamsets.pipeline.api.impl.Utils; import com.streamsets.pipeline.lib.data.DataFactory; import com.streamsets.pipeline.api.ext.io.OverrunReader; import org.apache.commons.pool2.impl.GenericObjectPool; import org.apache.commons.pool2.impl.GenericObjectPoolConfig; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.util.Map; public abstract class DataParserFactory extends DataFactory { private static final int DEFAULT_MAX_RECORD_LENGTH = 1024; protected DataParserFactory(Settings settings) { super(settings); Utils.checkState(settings.getMaxRecordLen() != 0, "maxDataLen has not been set"); } public DataParser getParser(String id, byte[] data, int offset, int len) throws DataParserException { return getParser(id, new ByteArrayInputStream(data, offset, len), "0"); } public DataParser getParser(String id, byte[] data) throws DataParserException { return getParser(id, data, 0, data.length); } public DataParser getParser(String id, String data) throws DataParserException { return getParser(id, new StringReader(data)); } public DataParser getParser(String id, Reader reader) throws DataParserException { return getParser(id, reader, 0); } public DataParser getParser(File file, String fileOffset) throws DataParserException { try { return getParser(file.getName(), new FileInputStream(file), fileOffset); } catch (FileNotFoundException e) { throw new DataParserException(Errors.DATA_PARSER_00, file.getAbsolutePath(), e.toString(), e); } } public abstract DataParser getParser(String id, InputStream is, String offset) throws DataParserException; public abstract DataParser getParser(String id, Reader reader, long offset) throws DataParserException; public DataParser getParser( String id, Map<String, Object> metadata, FileRef fileRef ) throws DataParserException { throw new UnsupportedOperationException(); } protected OverrunReader createReader(InputStream is) { Reader bufferedReader = new BufferedReader(new InputStreamReader(is, getSettings().getCharset())); return new OverrunReader(bufferedReader, getSettings().getOverRunLimit(), false, getSettings().getRemoveCtrlChars() ); } protected OverrunReader createReader(Reader reader) { return new OverrunReader( bufferReader(reader), getSettings().getOverRunLimit(), false, getSettings().getRemoveCtrlChars() ); } private Reader bufferReader(Reader reader) { if (reader instanceof BufferedReader) { return reader; } return new BufferedReader(reader); } protected GenericObjectPool<StringBuilder> getStringBuilderPool(Settings settings) { int maxRecordLen = getSettings().getMaxRecordLen(); int poolSize = getSettings().getStringBuilderPoolSize(); GenericObjectPoolConfig stringBuilderPoolConfig = new GenericObjectPoolConfig(); stringBuilderPoolConfig.setMaxTotal(poolSize); stringBuilderPoolConfig.setMinIdle(poolSize); stringBuilderPoolConfig.setMaxIdle(poolSize); stringBuilderPoolConfig.setBlockWhenExhausted(false); return new GenericObjectPool<>( new StringBuilderPoolFactory(maxRecordLen > 0 ? maxRecordLen : DEFAULT_MAX_RECORD_LENGTH), stringBuilderPoolConfig ); } }