/* ************************************************************************ # # DivConq # # http://divconq.com/ # # Copyright: # Copyright 2014 eTimeline, LLC. All rights reserved. # # License: # See the license.txt file in the project's top-level directory for details. # # Authors: # * Andy White # ************************************************************************ */ package divconq.ctp.stream; import java.util.ArrayList; import java.util.List; import io.netty.buffer.ByteBuf; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarConstants; import org.apache.commons.compress.archivers.zip.ZipEncoding; import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; import org.apache.commons.compress.utils.ArchiveUtils; import divconq.ctp.f.FileDescriptor; import divconq.lang.op.OperationContext; import divconq.script.StackEntry; import divconq.xml.XElement; public class UntarStream extends BaseStream implements IStreamSource { protected enum UntarState { RECORD, XTRAS, PREP, CONTENT, SKIP } protected byte[] header_buffer = new byte[TarConstants.DEFAULT_RCDSIZE]; protected int partialLength = 0; protected TarArchiveEntry currEntry = null; protected ZipEncoding encoding = null; protected long remainContent = 0; protected long remainSkip = 0; protected List<FileDescriptor> outlist = new ArrayList<>(); protected List<ByteBuf> outbuf = new ArrayList<>(); protected UntarState tstate = UntarState.RECORD; public UntarStream() { this.encoding = ZipEncodingHelper.getZipEncoding("UTF-8"); } @Override public void init(StackEntry stack, XElement el) { } @Override public void close() { this.currEntry = null; // not truly thread safe, consider for (ByteBuf bb : this.outbuf) bb.release(); this.outlist.clear(); this.outbuf.clear(); super.close(); } // make sure we don't return without first releasing the file reference content @Override public ReturnOption handle(FileDescriptor file, ByteBuf data) { if (file == FileDescriptor.FINAL) return this.downstream.handle(file, data); ByteBuf in = data; if (in != null) { while (in.isReadable()) { switch (this.tstate) { case RECORD: // starting a new record if (in.readableBytes() < TarConstants.DEFAULT_RCDSIZE - this.partialLength) { int offset = this.partialLength; this.partialLength += in.readableBytes(); in.readBytes(this.header_buffer, offset, in.readableBytes()); continue; } in.readBytes(this.header_buffer, this.partialLength, TarConstants.DEFAULT_RCDSIZE - this.partialLength); this.partialLength = 0; //in.readBytes(this.header_buffer, 0, this.header_buffer.length); boolean hasHitEOF = this.isEOFRecord(this.header_buffer); // if we hit this twice in a row we are at the end - however, source will send FINAL anyway so we don't really care if (hasHitEOF) { this.currEntry = null; continue; } try { this.currEntry = new TarArchiveEntry(this.header_buffer, this.encoding); } catch (Exception x) { OperationContext.get().getTaskRun().kill("Error detected parsing the header: " + x); in.release(); return ReturnOption.DONE; } this.tstate = UntarState.XTRAS; case XTRAS: if (!in.isReadable()) continue; // TODO support long names and such - see org.apache.commons.compress.archivers.tar.TarArchiveInputStream if (this.currEntry.isGNULongLinkEntry()) { /* byte[] longLinkData = getLongNameData(); if (longLinkData == null) { // Bugzilla: 40334 // Malformed tar file - long link entry name not followed by // entry return null; } currEntry.setLinkName(encoding.decode(longLinkData)); */ OperationContext.get().getTaskRun().kill("long link currently not supported"); in.release(); return ReturnOption.DONE; } if (this.currEntry.isGNULongNameEntry()) { /* byte[] longNameData = getLongNameData(); if (longNameData == null) { // Bugzilla: 40334 // Malformed tar file - long entry name not followed by // entry return null; } currEntry.setName(encoding.decode(longNameData)); */ OperationContext.get().getTaskRun().kill("long name currently not supported"); in.release(); return ReturnOption.DONE; } if (this.currEntry.isPaxHeader()) { // Process Pax headers /* paxHeaders(); */ OperationContext.get().getTaskRun().kill("pax currently not supported"); in.release(); return ReturnOption.DONE; } if (this.currEntry.isGNUSparse()) { // Process sparse files /* readGNUSparse(); */ OperationContext.get().getTaskRun().kill("sparse currently not supported"); in.release(); return ReturnOption.DONE; } this.tstate = UntarState.PREP; case PREP: if (!in.isReadable()) continue; // TODO remove System.out.println("name: " + this.currEntry.getName()); System.out.println("size: " + this.currEntry.getSize()); System.out.println("modified: " + this.currEntry.getModTime()); // If the size of the next element in the archive has changed // due to a new size being reported in the posix header // information, we update entrySize here so that it contains // the correct value. long entrySize = this.currEntry.getSize(); this.remainContent = entrySize; long numRecords = (entrySize / this.header_buffer.length) + 1; this.remainSkip = (numRecords * this.header_buffer.length) - entrySize; // grab as much as we can from the current buffer int readSize = (int) Math.min(this.remainContent, in.readableBytes()); this.remainContent -= readSize; // handle empty files too if ((readSize > 0) || (this.remainContent == 0)) { System.out.println("reading content: " + readSize); ByteBuf out = in.copy(in.readerIndex(), readSize); int skipSize = (int) Math.min(this.remainSkip, in.readableBytes() - readSize); this.remainSkip -= skipSize; in.skipBytes(readSize + skipSize); this.nextMessage(out); } this.tstate = UntarState.CONTENT; case CONTENT: if (!in.isReadable()) continue; // check if there is still content left in the entry we were last reading from if (this.remainContent > 0) { readSize = (int) Math.min(this.remainContent, in.readableBytes()); this.remainContent -= readSize; //System.out.println("reading content: " + readSize); //ByteBuf out = Hub.instance.getBufferAllocator().heapBuffer((int) readSize); ByteBuf out = in.copy(in.readerIndex(), readSize); int skipSize = (int) Math.min(this.remainSkip, in.readableBytes() - readSize); this.remainSkip -= skipSize; //System.out.println("skipping content: " + skipSize); in.skipBytes(readSize + skipSize); this.nextMessage(out); } if (this.remainContent > 0) continue; this.currEntry = null; this.tstate = UntarState.SKIP; case SKIP: if (!in.isReadable()) continue; // check if there is still padding left in the entry we were last reading from if (this.remainSkip > 0) { int skipSize = (int) Math.min(this.remainSkip, in.readableBytes()); this.remainSkip -= skipSize; //System.out.println("skipping content: " + skipSize); in.skipBytes((int) skipSize); } if (this.remainSkip > 0) continue; this.tstate = UntarState.RECORD; } } in.release(); } // write all messages in the queue while (this.outlist.size() > 0) { ReturnOption ret = this.downstream.handle(this.outlist.remove(0), this.outbuf.remove(0)); if (ret != ReturnOption.CONTINUE) return ret; } return ReturnOption.CONTINUE; } public void nextMessage(ByteBuf out) { // create the output message FileDescriptor blk = new FileDescriptor(); blk.setPath("/" + this.currEntry.getName()); blk.setSize(this.currEntry.getRealSize()); blk.setModTime(this.currEntry.getModTime().getTime()); blk.setEof(this.remainContent == 0); this.outlist.add(blk); this.outbuf.add(out); } protected boolean isEOFRecord(byte[] record) { return record == null || ArchiveUtils.isArrayZero(record, TarConstants.DEFAULT_RCDSIZE); } @Override public void read() { // write all messages in the queue while (this.outlist.size() > 0) { ReturnOption ret = this.downstream.handle(this.outlist.remove(0), this.outbuf.remove(0)); if (ret != ReturnOption.CONTINUE) return; } this.upstream.read(); } }