/**
* Copyright 2016 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.io;
import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.io.Reader;
public class OverrunCustomDelimiterReader extends AbstractOverrunDelimitedReader {
private final String customDelimiter;
private final boolean includeDelimiterInTheText;
//For Testing purposes
@VisibleForTesting
OverrunCustomDelimiterReader(Reader reader, int maxLine, int bufferSize, String customDelimiter, boolean includeDelimiterInTheText) {
super(reader, maxLine, bufferSize);
this.customDelimiter = customDelimiter;
this.includeDelimiterInTheText = includeDelimiterInTheText;
}
public OverrunCustomDelimiterReader(Reader reader, int maxLine, String customDelimiter, boolean includeDelimiterInTheText) {
super(reader, maxLine, 8192 * 2);
this.customDelimiter = customDelimiter;
this.includeDelimiterInTheText = includeDelimiterInTheText;
}
@Override
public int readLine(StringBuilder s) throws IOException {
int initialLen = s.length(), overrun = 0, delimiterIndexToBeMatched = 0, startChar;
for (; ; ) {
if (nextChar >= nChars) {
fill();
}
if (nextChar >= nChars) { /* EOF */
int read = s.length() - initialLen;
if (read > 0) {
return read + overrun;
} else {
return -1;
}
}
boolean eol = false;
char c = 0;
int searchIdx = nextChar, nextPossibleMatch = -1;
while (searchIdx < nChars) {
c = cb[searchIdx];
//Optimization for starting the search again.
//In stream if we see a match for first character in delimiter, we can start the search from there,
//if the current search for delimiter is not successful
if (nextPossibleMatch == -1 && searchIdx != 0 && c == customDelimiter.charAt(0)) {
nextPossibleMatch = searchIdx;
}
if (c == customDelimiter.charAt(delimiterIndexToBeMatched)) {
delimiterIndexToBeMatched++;
} else if (delimiterIndexToBeMatched > 0) {
delimiterIndexToBeMatched = 0;
if (nextPossibleMatch > 0) {
searchIdx = nextPossibleMatch;
}
nextPossibleMatch = -1;
}
searchIdx++;
if (delimiterIndexToBeMatched == customDelimiter.length()) {
eol = true;
delimiterIndexToBeMatched = 0;
nextPossibleMatch = -1;
break;
}
}
startChar = nextChar;
nextChar = searchIdx;
if (eol) {
//Strip off the delimiter if needed.
overrun += copyToBuffer(s, initialLen, startChar, searchIdx);
if (!includeDelimiterInTheText) {
s.setLength(s.length() - customDelimiter.length());
}
return s.length() - initialLen + overrun;
}
overrun += copyToBuffer(s, initialLen, startChar, searchIdx);
}
}
}