/* ************************************************************************
#
# DivConq
#
# http://divconq.com/
#
# Copyright:
# Copyright 2014 eTimeline, LLC. All rights reserved.
#
# License:
# See the license.txt file in the project's top-level directory for details.
#
# Authors:
# * Andy White
#
************************************************************************ */
package divconq.ctp.stream;
import java.util.ArrayList;
import java.util.List;
import io.netty.buffer.ByteBuf;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarConstants;
import org.apache.commons.compress.archivers.zip.ZipEncoding;
import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
import org.apache.commons.compress.utils.ArchiveUtils;
import divconq.ctp.f.FileDescriptor;
import divconq.lang.op.OperationContext;
import divconq.script.StackEntry;
import divconq.xml.XElement;
public class UntarStream extends BaseStream implements IStreamSource {
protected enum UntarState {
RECORD,
XTRAS,
PREP,
CONTENT,
SKIP
}
protected byte[] header_buffer = new byte[TarConstants.DEFAULT_RCDSIZE];
protected int partialLength = 0;
protected TarArchiveEntry currEntry = null;
protected ZipEncoding encoding = null;
protected long remainContent = 0;
protected long remainSkip = 0;
protected List<FileDescriptor> outlist = new ArrayList<>();
protected List<ByteBuf> outbuf = new ArrayList<>();
protected UntarState tstate = UntarState.RECORD;
public UntarStream() {
this.encoding = ZipEncodingHelper.getZipEncoding("UTF-8");
}
@Override
public void init(StackEntry stack, XElement el) {
}
@Override
public void close() {
this.currEntry = null;
// not truly thread safe, consider
for (ByteBuf bb : this.outbuf)
bb.release();
this.outlist.clear();
this.outbuf.clear();
super.close();
}
// make sure we don't return without first releasing the file reference content
@Override
public ReturnOption handle(FileDescriptor file, ByteBuf data) {
if (file == FileDescriptor.FINAL)
return this.downstream.handle(file, data);
ByteBuf in = data;
if (in != null) {
while (in.isReadable()) {
switch (this.tstate) {
case RECORD:
// starting a new record
if (in.readableBytes() < TarConstants.DEFAULT_RCDSIZE - this.partialLength) {
int offset = this.partialLength;
this.partialLength += in.readableBytes();
in.readBytes(this.header_buffer, offset, in.readableBytes());
continue;
}
in.readBytes(this.header_buffer, this.partialLength, TarConstants.DEFAULT_RCDSIZE - this.partialLength);
this.partialLength = 0;
//in.readBytes(this.header_buffer, 0, this.header_buffer.length);
boolean hasHitEOF = this.isEOFRecord(this.header_buffer);
// if we hit this twice in a row we are at the end - however, source will send FINAL anyway so we don't really care
if (hasHitEOF) {
this.currEntry = null;
continue;
}
try {
this.currEntry = new TarArchiveEntry(this.header_buffer, this.encoding);
}
catch (Exception x) {
OperationContext.get().getTaskRun().kill("Error detected parsing the header: " + x);
in.release();
return ReturnOption.DONE;
}
this.tstate = UntarState.XTRAS;
case XTRAS:
if (!in.isReadable())
continue;
// TODO support long names and such - see org.apache.commons.compress.archivers.tar.TarArchiveInputStream
if (this.currEntry.isGNULongLinkEntry()) {
/*
byte[] longLinkData = getLongNameData();
if (longLinkData == null) {
// Bugzilla: 40334
// Malformed tar file - long link entry name not followed by
// entry
return null;
}
currEntry.setLinkName(encoding.decode(longLinkData));
*/
OperationContext.get().getTaskRun().kill("long link currently not supported");
in.release();
return ReturnOption.DONE;
}
if (this.currEntry.isGNULongNameEntry()) {
/*
byte[] longNameData = getLongNameData();
if (longNameData == null) {
// Bugzilla: 40334
// Malformed tar file - long entry name not followed by
// entry
return null;
}
currEntry.setName(encoding.decode(longNameData));
*/
OperationContext.get().getTaskRun().kill("long name currently not supported");
in.release();
return ReturnOption.DONE;
}
if (this.currEntry.isPaxHeader()) {
// Process Pax headers
/*
paxHeaders();
*/
OperationContext.get().getTaskRun().kill("pax currently not supported");
in.release();
return ReturnOption.DONE;
}
if (this.currEntry.isGNUSparse()) {
// Process sparse files
/*
readGNUSparse();
*/
OperationContext.get().getTaskRun().kill("sparse currently not supported");
in.release();
return ReturnOption.DONE;
}
this.tstate = UntarState.PREP;
case PREP:
if (!in.isReadable())
continue;
// TODO remove
System.out.println("name: " + this.currEntry.getName());
System.out.println("size: " + this.currEntry.getSize());
System.out.println("modified: " + this.currEntry.getModTime());
// If the size of the next element in the archive has changed
// due to a new size being reported in the posix header
// information, we update entrySize here so that it contains
// the correct value.
long entrySize = this.currEntry.getSize();
this.remainContent = entrySize;
long numRecords = (entrySize / this.header_buffer.length) + 1;
this.remainSkip = (numRecords * this.header_buffer.length) - entrySize;
// grab as much as we can from the current buffer
int readSize = (int) Math.min(this.remainContent, in.readableBytes());
this.remainContent -= readSize;
// handle empty files too
if ((readSize > 0) || (this.remainContent == 0)) {
System.out.println("reading content: " + readSize);
ByteBuf out = in.copy(in.readerIndex(), readSize);
int skipSize = (int) Math.min(this.remainSkip, in.readableBytes() - readSize);
this.remainSkip -= skipSize;
in.skipBytes(readSize + skipSize);
this.nextMessage(out);
}
this.tstate = UntarState.CONTENT;
case CONTENT:
if (!in.isReadable())
continue;
// check if there is still content left in the entry we were last reading from
if (this.remainContent > 0) {
readSize = (int) Math.min(this.remainContent, in.readableBytes());
this.remainContent -= readSize;
//System.out.println("reading content: " + readSize);
//ByteBuf out = Hub.instance.getBufferAllocator().heapBuffer((int) readSize);
ByteBuf out = in.copy(in.readerIndex(), readSize);
int skipSize = (int) Math.min(this.remainSkip, in.readableBytes() - readSize);
this.remainSkip -= skipSize;
//System.out.println("skipping content: " + skipSize);
in.skipBytes(readSize + skipSize);
this.nextMessage(out);
}
if (this.remainContent > 0)
continue;
this.currEntry = null;
this.tstate = UntarState.SKIP;
case SKIP:
if (!in.isReadable())
continue;
// check if there is still padding left in the entry we were last reading from
if (this.remainSkip > 0) {
int skipSize = (int) Math.min(this.remainSkip, in.readableBytes());
this.remainSkip -= skipSize;
//System.out.println("skipping content: " + skipSize);
in.skipBytes((int) skipSize);
}
if (this.remainSkip > 0)
continue;
this.tstate = UntarState.RECORD;
}
}
in.release();
}
// write all messages in the queue
while (this.outlist.size() > 0) {
ReturnOption ret = this.downstream.handle(this.outlist.remove(0), this.outbuf.remove(0));
if (ret != ReturnOption.CONTINUE)
return ret;
}
return ReturnOption.CONTINUE;
}
public void nextMessage(ByteBuf out) {
// create the output message
FileDescriptor blk = new FileDescriptor();
blk.setPath("/" + this.currEntry.getName());
blk.setSize(this.currEntry.getRealSize());
blk.setModTime(this.currEntry.getModTime().getTime());
blk.setEof(this.remainContent == 0);
this.outlist.add(blk);
this.outbuf.add(out);
}
protected boolean isEOFRecord(byte[] record) {
return record == null || ArchiveUtils.isArrayZero(record, TarConstants.DEFAULT_RCDSIZE);
}
@Override
public void read() {
// write all messages in the queue
while (this.outlist.size() > 0) {
ReturnOption ret = this.downstream.handle(this.outlist.remove(0), this.outbuf.remove(0));
if (ret != ReturnOption.CONTINUE)
return;
}
this.upstream.read();
}
}