/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.io;
import java.io.ByteArrayInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
/**
* A <code>LiveFileChunk</code> is a data segmented of a {@link LiveFile} that is guaranteed to be comprised of
* full text lines.
*/
public class LiveFileChunk {
private final String tag;
private final LiveFile file;
private final byte[] data;
private final Charset charset;
private final long dataOffset;
private final int length;
private final boolean truncated;
private final List<FileLine> lines;
// creates a chunk using existing FileLines instead of a raw buffer to create them
LiveFileChunk(String tag, LiveFile file, Charset charset, List<FileLine> lines, boolean truncated) {
this.tag = tag;
this.file = file;
this.charset = charset;
data = null;
dataOffset = -1;
length = -1;
this.lines = lines;
this.truncated = truncated;
}
// creates a chunk using a raw buffer and creates the FileLines from it
LiveFileChunk(String tag, LiveFile file, Charset charset, byte[] data, long dataOffset, int length,
boolean truncated) {
this.tag = tag;
this.file = file;
this.data = data;
this.charset = charset;
this.dataOffset = dataOffset;
this.length = length;
lines = createLines();
this.truncated = truncated;
}
/**
* Returns the tag associated with file where the chunk was read from.
*
* @return the tag associated with file where the chunk was read from.
*/
public String getTag() {
return tag;
}
/**
* Returns the file the chunk was read from.
*
* @return the file the chunk was read from.
*/
public LiveFile getFile() {
return file;
}
/**
* Returns the chunk charset.
*
* @return the chunk charset.
*/
public Charset getCharset() {
return charset;
}
/**
* Returns the chunk buffer. It is reference, do not modify.
*
* @return the chunk buffer. It is reference, do not modify.
*/
public byte[] getBuffer() {
return data;
}
/**
* Returns a {@link Reader} to the data in the chunk.
* <p/>
* The {@link Reader} is created using the {@link java.nio.charset.Charset} specified in the {@link SingleLineLiveFileReader}.
*
* @return a {@link Reader} to the data in the chunk.
*/
public Reader getReader() {
return new InputStreamReader(new ByteArrayInputStream(data, 0, length), charset);
}
/**
* Returns the byte offset of the chunk in the {@link LiveFile}.
*
* @return the byte offset of the chunk in the {@link LiveFile}.
*/
public long getOffset() {
return dataOffset;
}
/**
* Returns the byte length of the data in the chunk.
*
* @return the byte length of the data in the chunk.
*/
public int getLength() {
return length;
}
/**
* Returns if the chunk has been truncated. This happens if the last line of the data chunk exceeds the maximum
* length specified in the {@link SingleLineLiveFileReader}.
*
* @return <code>true</code> if the chunk has been truncated, <code>false</code> if not.
*/
public boolean isTruncated() {
return truncated;
}
/**
* Returns a list with the {@link FileLine} in the chunk. Using <code>FileLine</code>s gives access to the
* byte offset of each line (which is important when using multi-byte character encodings).
*
* @return a list with the {@link FileLine} in the chunk.
*/
public List<FileLine> getLines() {
return lines;
}
private List<FileLine> createLines() {
List<FileLine> lines = new ArrayList<>();
int start = 0;
for (int i = 0; i < length; i++) {
if (data[i] == '\n') {
lines.add(new FileLine(this, start, i + 1 - start));
start = i + 1;
} else if (data[i] == '\r') {
if (i + 1 < length && data[i + 1] == '\n') {
lines.add(new FileLine(this, start, i + 2 - start));
start = i + 2;
i++;
}
}
}
if (start < length) {
lines.add(new FileLine(this, start, length - start));
}
return lines;
}
}