package org.juxtasoftware.resource;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.juxtasoftware.dao.AlignmentDao;
import org.juxtasoftware.dao.CacheDao;
import org.juxtasoftware.dao.ComparisonSetDao;
import org.juxtasoftware.dao.WitnessDao;
import org.juxtasoftware.model.Alignment;
import org.juxtasoftware.model.Alignment.AlignedAnnotation;
import org.juxtasoftware.model.AlignmentConstraint;
import org.juxtasoftware.model.ComparisonSet;
import org.juxtasoftware.model.QNameFilter;
import org.juxtasoftware.model.Witness;
import org.juxtasoftware.util.BackgroundTask;
import org.juxtasoftware.util.BackgroundTaskCanceledException;
import org.juxtasoftware.util.BackgroundTaskStatus;
import org.juxtasoftware.util.QNameFilters;
import org.juxtasoftware.util.TaskManager;
import org.restlet.data.Encoding;
import org.restlet.data.MediaType;
import org.restlet.data.Status;
import org.restlet.engine.application.EncodeRepresentation;
import org.restlet.representation.FileRepresentation;
import org.restlet.representation.ReaderRepresentation;
import org.restlet.representation.Representation;
import org.restlet.resource.Get;
import org.restlet.resource.ResourceException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.config.BeanDefinition;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Service;
import eu.interedition.text.Range;
/**
* Resource used to export sets in various formats.
*
* @author lfoster
*
*/
@Service
@Scope(BeanDefinition.SCOPE_PROTOTYPE)
public class Exporter extends BaseResource {
@Autowired private ComparisonSetDao setDao;
@Autowired private WitnessDao witnessDao;
@Autowired private QNameFilters qnameFilters;
@Autowired private AlignmentDao alignmentDao;
@Autowired private CacheDao cacheDao;
@Autowired private TaskManager taskManager;
private boolean asynchronous;
private ComparisonSet set;
private Witness base;
@Override
protected void doInit() throws ResourceException {
super.doInit();
Long setId = getIdFromAttributes("id");
if ( setId == null ) {
return;
}
this.set = this.setDao.find(setId);
if ( validateModel(this.set) == false ) {
return;
}
if (getQuery().getValuesMap().containsKey("mode") ) {
String mode = getQuery().getValuesMap().get("mode").toLowerCase();
if ( mode.equals("teips") == false ) {
setStatus(Status.CLIENT_ERROR_BAD_REQUEST, "Unsupported export mode specified");
}
} else {
setStatus(Status.CLIENT_ERROR_BAD_REQUEST, "Missing required mode parameter");
}
if (getQuery().getValuesMap().containsKey("base") ) {
String idStr = getQuery().getValuesMap().get("base");
Long id = null;
try {
id = Long.parseLong(idStr);
} catch (NumberFormatException e) {
setStatus(Status.CLIENT_ERROR_BAD_REQUEST, "Invalid base identifer specified");
}
this.base = this.witnessDao.find(id);
if ( validateModel(this.base) == false ) {
return;
}
} else {
setStatus(Status.CLIENT_ERROR_BAD_REQUEST, "Missing required base parameter");
}
this.asynchronous = (getQuery().getValuesMap().containsKey("sync") == false);
}
@Get
public Representation exportSet() throws IOException {
if ( this.asynchronous ) {
// is set able to be exported?
if ( this.set.getStatus().equals(ComparisonSet.Status.COLLATED) == false ) {
setStatus(Status.CLIENT_ERROR_PRECONDITION_FAILED);
return toTextRepresentation("Cannot export set that is not collated");
}
if ( this.cacheDao.exportExists(this.set.getId(), this.base.getId())) {
Reader rdr = this.cacheDao.getExport(this.set.getId(), this.base.getId());
if ( rdr != null ) {
Representation rep = new ReaderRepresentation( rdr, MediaType.TEXT_XML);
if ( isZipSupported() ) {
return new EncodeRepresentation(Encoding.GZIP, rep);
} else {
return rep;
}
} else {
LOG.warn("Unable to retrieved cached data for "+set+". Clearing bad data");
this.cacheDao.deleteAll(set.getId());
}
}
final String taskId = generateTaskId(set.getId(), base.getId() );
if ( this.taskManager.exists(taskId) == false ) {
ExportTask task = new ExportTask(taskId);
this.taskManager.submit(task);
}
return toTextRepresentation("EXPORTING "+taskId );
} else {
return syncExport();
}
}
private Representation syncExport() throws IOException {
File out = doExport();
FileRepresentation rep = new FileRepresentation( out, MediaType.TEXT_XML);
rep.setAutoDeleting(true);
if ( isZipSupported() ) {
return new EncodeRepresentation(Encoding.GZIP, rep);
} else {
return rep;
}
}
private void asyncEport() throws IOException {
File out = doExport();
FileReader r = new FileReader( out );
this.cacheDao.cacheExport(this.set.getId(), this.base.getId(), r);
IOUtils.closeQuietly(r);
out.delete();
}
private File doExport() throws IOException {
// get the TEI PS template and sub in the name
String template = IOUtils.toString(ClassLoader.getSystemResourceAsStream("templates/xml/teips.xml"));
template = template.replace("$TITLE", this.set.getName());
// add listWit
List<Witness> witnesses = this.setDao.getWitnesses(this.set);
final String listWit = generateListWitContent(witnesses);
template = template.replace("$LISTWIT", listWit);
// generate the main body: text interwoven with app tags
File appFile = generateApparatus(witnesses);
// assemble everything in a temp file
FileInputStream fis = new FileInputStream(appFile);
File out = File.createTempFile("psfinal", "dat");
out.deleteOnExit();
FileOutputStream fos = new FileOutputStream(out);
OutputStream bout = new BufferedOutputStream(fos);
OutputStreamWriter ow = new OutputStreamWriter(bout, "UTF-8");
int pos = template.indexOf("$BODY");
ow.write(template.substring(0, pos));
IOUtils.copy(fis, ow);
ow.write(template.substring(pos+5));
IOUtils.closeQuietly(ow);
appFile.delete();
return out;
}
private String generateTaskId( final Long setId, final Long baseId) {
final int prime = 31;
int result = 1;
result = prime * result + setId.hashCode();
result = prime * result + baseId.hashCode();
return "export-"+result;
}
private File generateApparatus(List<Witness> witnesses) throws IOException {
// Algo: stream text from the pase witness until a diff is found
// at that point, inject an <app>. Each witness content will be
// added in <rdg> tags.
OutputStreamWriter ow = null;
Reader witReader = null;
File out = null;
try {
// setup readers/writers for the data
out = File.createTempFile("ps_app", "dat");
out.deleteOnExit();
FileOutputStream fos = new FileOutputStream(out);
OutputStream bout = new BufferedOutputStream(fos);
ow = new OutputStreamWriter(bout, "UTF-8");
witReader = this.witnessDao.getContentStream(this.base);
ow.write("<p>");
// get a batch of alignments to work with.
QNameFilter changesFilter = this.qnameFilters.getDifferencesFilter();
AlignmentConstraint constraint = new AlignmentConstraint(set, this.base.getId());
constraint.setFilter(changesFilter);
List<Alignment> alignments = this.alignmentDao.list(constraint);
List<AppData> appData = generateAppData(alignments);
Iterator<AppData> itr = appData.iterator();
// set the current align to first in the available list
AppData currApp = null;
if ( itr.hasNext() ) {
currApp = itr.next();
}
long pos = 0;
boolean appJustClosed = false;
int lastWritten = -1;
while ( true ) {
int data = witReader.read();
if ( data == -1 ) {
break;
}
// new lines in base turn into TEI linebreaks
if ( data == '\n' ) {
ow.write("<lb/>");
}
if ( currApp != null && pos == currApp.getBaseRange().getStart() ) {
boolean firstPass = true;
while ( true ) {
// write the initial APP, RDG tags
// NOTE: Always ensure that there is a space before the App tag
// this will prevent words from running together
if ( appJustClosed == false && lastWritten != -1 && Character.isWhitespace(lastWritten) == false ) {
ow.write(" \n<app>\n");
} else {
ow.write("\n<app>\n");
}
ow.write( " "+generateRdgTag(witnesses, currApp) );
// write the character that triggered this first
// Note that this only applies on the first time thru this
// loop. Additional entries will not have data pre-seeded
// with the initial rdg character.
StringBuilder baseRdg = new StringBuilder();
if ( firstPass == true ) {
firstPass = false;
baseRdg.append((char)data);
pos++;
}
// write the rest of the rdg content
while ( pos < currApp.getBaseRange().getEnd() ) {
data = witReader.read();
if ( data == -1 ) {
ow.close();
throw new IOException("invalid aligment: past end of document");
} else {
if ( data == '\n') {
baseRdg.append("|*LB*|");
} else {
baseRdg.append((char)data);
}
pos++;
}
}
// end the rdg tag
ow.write( StringEscapeUtils.escapeXml(baseRdg.toString()).replaceAll("\\|\\*LB\\*\\|", "<lb/>"));
ow.write("</rdg>\n");
// write witnesses
for ( Entry<Long, Range> entry : currApp.getWitnessData().entrySet()) {
final String rdg = String.format(" <rdg wit=\"#wit-%d\">", entry.getKey());
ow.write(rdg);
ow.write( getWitnessFragment(entry.getKey(), entry.getValue() ) );
ow.write("</rdg>\n");
}
// NOTE: Added space after the APP tag to be sure words cannot run together
// Also flag that this is the end of an app tag so we ca properly
// detect wether ot not to write the next character
ow.write("</app> ");
appJustClosed = true;
// move on to the next annotation
currApp = null;
if ( itr.hasNext() ) {
currApp = itr.next();
if ( currApp.getBaseRange().getStart() > pos ) {
break;
}
} else {
break;
}
}
} else {
if ( appJustClosed && Character.isWhitespace(data)) {
pos++;
} else {
ow.write( StringEscapeUtils.escapeXml( ""+(char)data) );
lastWritten = data;
pos++;
}
appJustClosed = false;
}
}
ow.write("</p>");
} finally {
IOUtils.closeQuietly(ow);
IOUtils.closeQuietly(witReader);
}
return out;
}
private String generateRdgTag(List<Witness> witnesses, AppData currApp) {
// any wit ids that are NOT present in the app data are the
// same as the base text. be sure to add them to the rdg below
List<Long> ids = new ArrayList<Long>();
for ( Witness w : witnesses ) {
if ( currApp.getWitnessData().containsKey(w.getId()) == false ) {
ids.add(w.getId());
}
}
StringBuilder sb = new StringBuilder();
sb.append("<rdg wit=\"");
int cnt=0;
for (Long id : ids) {
if ( cnt > 0 ) {
sb.append(" ");
}
sb.append("#wit-").append(id);
cnt++;
}
sb.append("\">");
return sb.toString();
}
/**
* Extract the text fragment for a witness. NOTE: the isAddedContent flag is necessary
* to ensure that the proper trailing non-token text gets addded to the <rdg> tag. Without it
* the pieced together witness would run together without spacing/punctuation for base GAPS
* @param witId
* @param range
* @param isAddedContent
* @return
* @throws IOException
*/
private String getWitnessFragment(Long witId, Range range ) throws IOException {
Witness w = this.witnessDao.find(witId);
Reader r = this.witnessDao.getContentStream(w);
StringBuilder buff = new StringBuilder();
long pos= 0;
while ( true) {
int data = r.read();
if ( data == -1 ) {
return buff.toString();
}
if ( pos >= range.getStart() && pos < range.getEnd()) {
if ( data == '\n') {
buff.append("|*LB*|");
} else {
buff.append((char)data);
}
}
pos++;
if ( pos == range.getEnd() ) {
return StringEscapeUtils.escapeXml(buff.toString()).replaceAll("\\|\\*LB\\*\\|", "<lb/>");
}
}
}
private List<AppData> generateAppData( List<Alignment> alignments ) {
Collections.sort(alignments, new Comparator<Alignment>() {
@Override
public int compare(Alignment a, Alignment b) {
// NOTE: There is a bug in interedition Range. It will
// order range [0,1] before [0,0] when sorting ascending.
// So.. do NOT use its compareTo. Roll own.
Range r1 = a.getWitnessAnnotation(base.getId()).getRange();
Range r2 = b.getWitnessAnnotation(base.getId()).getRange();
if ( r1.getStart() < r2.getStart() ) {
return -1;
} else if ( r1.getStart() > r2.getStart() ) {
return 1;
} else {
if ( r1.getEnd() < r2.getEnd() ) {
return -1;
} else if ( r1.getEnd() > r2.getEnd() ) {
return 1;
}
}
return 0;
}
});
List<AppData> data = new ArrayList<Exporter.AppData>();
Map<Range, AppData> changeMap = new HashMap<Range, AppData>();
Iterator<Alignment> itr = alignments.iterator();
while ( itr.hasNext() ) {
Alignment align = itr.next();
itr.remove();
// get base and add it to list of found ranges or get
// pre-existing data for that range
AlignedAnnotation baseAnno = align.getWitnessAnnotation(this.base.getId());
Range baseRange = baseAnno.getRange();
AppData appData = changeMap.get(baseRange);
if ( appData == null ) {
appData= new AppData( this.base.getId(), baseRange, align.getGroup() );
changeMap.put(baseRange, appData);
data.add(appData);
}
// add witness data to the app info
for (AlignedAnnotation a : align.getAnnotations()) {
if (a.getWitnessId().equals(base.getId()) == false) {
Range r = a.getRange();
appData.addWitness(a.getWitnessId(), r);
break;
}
}
}
// take a pass thru the data and merge items with same group id
Iterator<AppData> appItr = data.iterator();
AppData prior = null;
while ( appItr.hasNext() ) {
AppData curr = appItr.next();
if (prior != null) {
if ( prior.canMerge( curr )) {
prior.merge(curr);
appItr.remove();
} else {
prior = curr;
}
} else {
prior = curr;
}
}
return data;
}
private String generateListWitContent(List<Witness> witnesses) throws IOException {
StringBuilder listWit = new StringBuilder();
for (Witness w : witnesses ) {
if ( listWit.length() > 0 ) {
listWit.append("\n ");
}
String frag = IOUtils.toString(ClassLoader.getSystemResourceAsStream("templates/xml/listwit_frag.xml"));
frag = frag.replace("$NAME", w.getName());
frag = frag.replace("$ID", "wit-"+w.getId().toString());
listWit.append(frag);
}
return listWit.toString();
}
private static class AppData {
private Long baseId;
private int groupId;
private Range baseRange;
private Map<Long, Range> witnessRanges = new HashMap<Long, Range>();
public AppData( Long baseId, Range r, int groupId) {
this.baseId = baseId;
this.baseRange = new Range(r);
this.groupId = groupId;
}
public void addWitness( Long id, Range r) {
Range orig = this.witnessRanges.get(id);
if ( orig == null ) {
this.witnessRanges.put(id, new Range(r));
} else {
this.witnessRanges.put(id, new Range(
Math.min( orig.getStart(), r.getStart() ),
Math.max( orig.getEnd(), r.getEnd() )
));
}
}
public Map<Long, Range> getWitnessData() {
return this.witnessRanges;
}
public boolean canMerge( AppData other) {
return this.groupId == other.groupId &&
this.baseId.equals(other.getBaseId()) &&
hasMatchingWitnesses(other);
}
private boolean hasMatchingWitnesses(AppData other) {
if (this.witnessRanges.size() != other.witnessRanges.size() ) {
return false;
}
for ( Long witId : this.witnessRanges.keySet() ) {
if ( other.witnessRanges.containsKey(witId) == false ) {
return false;
}
}
return true;
}
public void merge(AppData other) {
this.baseRange = new Range(
Math.min( this.baseRange.getStart(), other.getBaseRange().getStart() ),
Math.max( this.baseRange.getEnd(), other.getBaseRange().getEnd() )
);
for (Entry<Long, Range> entry : other.witnessRanges.entrySet() ) {
Range oldRange = this.witnessRanges.get(entry.getKey());
if (oldRange == null ) {
this.witnessRanges.put(entry.getKey(), entry.getValue());
} else {
Range newRange = new Range(
Math.min( oldRange.getStart(), entry.getValue().getStart() ),
Math.max( oldRange.getEnd(), entry.getValue().getEnd() )
);
this.witnessRanges.put( entry.getKey(), newRange );
}
}
}
public Long getBaseId() {
return this.baseId;
}
public Range getBaseRange() {
return this.baseRange;
}
}
/**
* Task to asynchronously render the visualization
*/
private class ExportTask implements BackgroundTask {
private final String name;
private BackgroundTaskStatus status;
private Date startDate;
private Date endDate;
public ExportTask(final String name) {
this.name = name;
this.status = new BackgroundTaskStatus( this.name );
this.startDate = new Date();
}
@Override
public Type getType() {
return BackgroundTask.Type.VISUALIZE;
}
@Override
public void run() {
try {
LOG.info("Begin task "+this.name);
this.status.begin();
Exporter.this.asyncEport();
LOG.info("Task "+this.name+" COMPLETE");
this.endDate = new Date();
this.status.finish();
} catch (IOException e) {
LOG.error(this.name+" task failed", e.toString());
this.status.fail(e.toString());
this.endDate = new Date();
} catch ( BackgroundTaskCanceledException e) {
LOG.info( this.name+" task was canceled");
this.endDate = new Date();
} catch (Exception e) {
LOG.error(this.name+" task failed", e);
this.status.fail(e.toString());
this.endDate = new Date();
}
}
@Override
public void cancel() {
this.status.cancel();
}
@Override
public BackgroundTaskStatus.Status getStatus() {
return this.status.getStatus();
}
@Override
public String getName() {
return this.name;
}
@Override
public Date getEndTime() {
return this.endDate;
}
@Override
public Date getStartTime() {
return this.startDate;
}
@Override
public String getMessage() {
return this.status.getNote();
}
}
}