/*******************************************************************************
* Trombone is a flexible text processing and analysis library used
* primarily by Voyant Tools (voyant-tools.org).
*
* Copyright (©) 2007-2012 Stéfan Sinclair & Geoffrey Rockwell
*
* This file is part of Trombone.
*
* Trombone is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Trombone is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Trombone. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package org.voyanttools.trombone.model;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Comparator;
import org.voyanttools.trombone.input.source.Source;
import org.voyanttools.trombone.util.FlexibleParameters;
/**
* This encapsulates various types of metadata about content, including {@link Source},
* location, last modified timestamp and {@link DocumentFormat}. All modifications
* to the metadata should be done by the explicit getters and setters.
*
* @author Stéfan Sinclair
*/
//@XStreamConverter(MetadataConverter.class)
public class DocumentMetadata implements Comparable<DocumentMetadata> {
public enum ParentType {
EXTRACTION, EXPANSION, MIGRATION, UNKNOWN
};
private transient int index = 0;
private FlexibleParameters parameters;
// private Properties properties;
static Comparator<DocumentMetadata> AuthorAscendingComparator = new Comparator<DocumentMetadata>() {
@Override
public int compare(DocumentMetadata o1, DocumentMetadata o2) {
if (o1.getAuthor().equals(o2.getAuthor())) {return o1.compareTo(o2);}
return o1.getAuthor().compareTo(o2.getAuthor());
}
};
static Comparator<DocumentMetadata> AuthorDescendingComparator = new Comparator<DocumentMetadata>() {
@Override
public int compare(DocumentMetadata o1, DocumentMetadata o2) {
if (o1.getAuthor().equals(o2.getAuthor())) {return o1.compareTo(o2);}
return o1.getAuthor().compareTo(o2.getAuthor());
}
};
static Comparator<DocumentMetadata> TitleAscendingComparator = new Comparator<DocumentMetadata>() {
@Override
public int compare(DocumentMetadata o1, DocumentMetadata o2) {
if (o1.getTitle().equals(o2.getTitle())) {return o1.compareTo(o2);}
return o1.getTitle().compareTo(o2.getTitle());
}
};
static Comparator<DocumentMetadata> TitleDescendingComparator = new Comparator<DocumentMetadata>() {
@Override
public int compare(DocumentMetadata o1, DocumentMetadata o2) {
if (o1.getTitle().equals(o2.getTitle())) {return o1.compareTo(o2);}
return o1.getTitle().compareTo(o2.getTitle());
}
};
static Comparator<DocumentMetadata> PubDateAscendingComparator = new Comparator<DocumentMetadata>() {
@Override
public int compare(DocumentMetadata o1, DocumentMetadata o2) {
if (o1.getPubDate().equals(o2.getPubDate())) {return o1.compareTo(o2);}
return o1.getPubDate().compareTo(o2.getPubDate());
}
};
static Comparator<DocumentMetadata> PubDateDescendingComparator = new Comparator<DocumentMetadata>() {
@Override
public int compare(DocumentMetadata o1, DocumentMetadata o2) {
if (o1.getPubDate().equals(o2.getPubDate())) {return o1.compareTo(o2);}
return o1.getPubDate().compareTo(o2.getPubDate());
}
};
public DocumentMetadata() {
parameters = new FlexibleParameters();
}
public DocumentMetadata(FlexibleParameters parameters) {
this.parameters = parameters;
}
/**
* Creates a new child Metadata object with this object as its parent, including provided parent ID.
* @return a new child Metadata object
*/
public DocumentMetadata asParent(String id, ParentType parentType) {
setProperty("id", id);
FlexibleParameters newParameters = new FlexibleParameters();
for (String key : parameters.getKeys()) {
newParameters.setParameter("parent_"+key, parameters.getParameterValues(key));
}
newParameters.setParameter("parentType", parentType.name().toLowerCase());
return new DocumentMetadata(newParameters);
}
public void setParent(DocumentMetadata parentMetadata, ParentType parentType) {
FlexibleParameters parentParameters = parentMetadata.getFlexibleParameters();
for (String key : parentParameters.getKeys()) {
parameters.setParameter("parent_"+key, parentParameters.getParameterValues(key));
}
parameters.setParameter("parentType", parentType.name().toLowerCase());
}
public ParentType getParentType() {
return ParentType.valueOf(getProperty("parentType", "unknown").toUpperCase());
}
public String getParentId() {
return getProperty("parent_id", "");
}
@Override
public int compareTo(DocumentMetadata o) {
// don't use static comparators since we may get bounced back here
if (getTitle().equals(o.getTitle())==false) {return getTitle().compareTo(o.getTitle());}
if (getAuthor().equals(o.getAuthor())==false) {return getAuthor().compareTo(o.getAuthor());}
if (getPubDate().equals(o.getPubDate())==false) {return getPubDate().compareTo(o.getPubDate());}
return Integer.compare(hashCode(), o.hashCode()); // give up
}
public boolean containsKey(String string) {
return parameters.containsKey(string);
}
public boolean equals(DocumentMetadata metadata) {
return parameters.equals(metadata.parameters);
}
public String getAuthor() {
return getProperty("author", "");
}
/**
* Get the default {@link DocumentFormat} of the metadata (or
* {@link DocumentFormat#UNKNOWN} if unknown). This differs from the
* {@link #getDefaultFormat()} in that it's a back-up format, for instance
* the one provided by a web server (even if a document can override it).
*
* @return the {@link DocumentFormat} of the metadata (or
* {@link DocumentFormat#UNKNOWN} if unknown)
*/
public DocumentFormat getDefaultFormat() {
String format = getProperty("defaultFormat");
if (format != null && format.isEmpty() == false) {
return DocumentFormat.valueOf(format.toUpperCase());
}
return DocumentFormat.UNKNOWN;
}
/**
* Get the {@link DocumentFormat} of the metadata (or
* {@link DocumentFormat#UNKNOWN} if unknown). If this hasn't been set
* explicitly (using {@link #setDocumentFormat}) then an attempt is made to
* guess at the format using other heuristics (especially file names and
* URIs where applicable).
*
* @return the {@link DocumentFormat} of the metadata (or
* {@link DocumentFormat#UNKNOWN} if unknown)
* @throws IOException is thrown when there's a problem determining format
*/
public DocumentFormat getDocumentFormat() throws IOException {
// try regular format
String format = getProperty("format");
if (format != null && format.isEmpty() == false) {
return DocumentFormat.valueOf(format.toUpperCase());
}
Source source = getSource();
if (source == Source.FILE) {
return DocumentFormat.fromFile(new File(getLocation()));
}
else if (source == Source.URI) {
// first try to guess from file name
URI uri;
try {
uri = new URI(getLocation());
}
catch (URISyntaxException e) {
throw new IOException("Unable to get URI: "+getLocation(), e);
}
String path = uri.getPath();
DocumentFormat documentFormat = DocumentFormat.fromFile(new File(
path));
if (documentFormat != DocumentFormat.UNKNOWN) {
return documentFormat;
}
return getDefaultFormat();
}
else if (source == Source.STREAM) {
String location = getLocation();
if (location != null && location.isEmpty() == false) {
return DocumentFormat.fromFilename(location);
}
}
return getDefaultFormat();
}
public FlexibleParameters getFlexibleParameters() {
return parameters;
}
public int getIndex() {
return index;
}
public String getKeywords() {
return getProperty("keywords", "");
}
public String getLanguageCode() {
return getProperty("language", "");
}
public int getLastTokenPositionIndex(TokenType tokenType) {
return Integer.parseInt(getProperty("lastTokenPositionIndex-"+tokenType.name(), "0"));
}
/**
* Get the location of the source. This is a String representation that will
* depend on the {@link Source} but may include a file name, a URI, "memory"
* (for a String or transient InputStream).
*
* @return the location of the source
*/
public String getLocation() {
return getProperty("location");
}
/**
* Get the last modified timestamp (milliseconds since January 1, 1970 GMT)
* or 0 if unknown.
*
* @return modified timestamp (milliseconds since January 1, 1970 GMT) or 0
* if unknown
*/
public long getModified() {
return Long.valueOf(getProperty("modified", "0"));
}
private String getProperty(String key) {
return parameters.getParameterValue(key);
// return properties.getProperty(key);
}
/**
* Determines if this metadata is the same as the specified metadata
*
* @param metadata
* the metadata to compare to this one
* @return whether or not they are the same
*/
// public boolean equals(Metadata metadata) {
// return this.equals(metadata);
// }
private String getProperty(String key, String defaultValue) {
return parameters.getParameterValue(key, defaultValue);
}
public String getPubDate() {
return getProperty("pubDate", "");
}
/**
* Get the {@link Source} ({@link Source#UNKNOWN} if unknown)
*
* @return the {@link Source} ({@link Source#UNKNOWN} if unknown)
*/
public Source getSource() {
String source = getProperty("source");
return source == null || source.isEmpty() ? Source.UNKNOWN : Source.valueOf(source.toUpperCase());
}
public String getTitle() {
return getProperty("title", "");
}
public int getTokensCount(TokenType tokenType) {
return Integer.parseInt(getProperty("tokensCount-"+tokenType, "0"));
}
public int getTypesCount(TokenType tokenType) {
return Integer.parseInt(getProperty("typesCount-"+tokenType, "0"));
}
public float getTypesCountMean(TokenType tokenType) {
return Float.parseFloat(getProperty("typesCountMean-"+tokenType.name(), "0"));
}
public float getTypesCountStdDev(TokenType tokenType) {
return Float.parseFloat(getProperty("typesCountStdDev-"+tokenType.name(), "0"));
}
public void setAuthor(String value) {
setProperty("author", value);
}
public void setAuthors(String[] values) {
setProperty("author", values);
}
/**
* Set the {@link DocumentFormat} of the metadata
*
* @param format
* the {@link DocumentFormat} of the metadata
*/
public void setDefaultFormat(DocumentFormat format) {
setProperty("defaultFormat", format.name().toLowerCase());
}
/**
* Get the {@link DocumentFormat} of the metadata (or
* {@link DocumentFormat#UNKNOWN} if unknown). If this hasn't been set
* explicitly (using {@link #setDocumentFormat}) then an attempt is made to
* guess at the format using other heuristics (especially file names and
* URIs where applicable).
*
* @param documentFormat the {@link DocumentFormat} of the metadata
*/
public void setDocumentFormat(DocumentFormat documentFormat) {
setProperty("format", documentFormat.name().toLowerCase());
}
public void setExtra(String key, String value) {
setProperty("extra."+key, value);
}
public String getExtra(String key) {
return getProperty("extra."+key);
}
public void setExtras(String key, String[] values) {
setProperty("extra."+key, values);
}
public void setIndex(int index) {
setProperty("index", String.valueOf(index));
this.index = index;
}
public void setKeywords(String value) {
setProperty("keywords", value);
}
public void setLanguageCode(String lang) {
setProperty("language", lang);
}
public void setLastTokenOffsetIndex(TokenType tokenType, int lastOffset) {
setProperty("lastTokenStartOffset-"+tokenType.name(), String.valueOf(lastOffset));
}
public void setLastTokenPositionIndex(TokenType tokenType, int lastPosition) {
setProperty("lastTokenPositionIndex-"+tokenType.name(), String.valueOf(lastPosition));
}
/**
* Set the location of the source. This is a String representation that will
* depend on the {@link Source} but may include a file name, a URI, "memory"
* (for a String or transient InputStream).
*
* @param location
* the location of the source
*/
public void setLocation(String location) {
setProperty("location", location);
}
/**
* Set the last modified timestamp (milliseconds since January 1, 1970 GMT)
*
* @param modified
* timestamp (milliseconds since January 1, 1970 GMT)
*/
public void setModified(long modified) {
setProperty("modified", String.valueOf(modified));
}
private void setProperty(String key, String value) {
if (value!=null) {
parameters.setParameter(key, value.trim());
}
}
private void setProperty(String key, String[] values) {
for (int i=0; i<values.length; i++) {
values[i] = values[i].trim();
}
parameters.setParameter(key, values);
}
public void setPubDate(String value) {
setProperty("pubDate", value);
}
public void setPubDates(String[] values) {
setProperty("pubDate", values);
}
public void setPublishers(String[] values) {
setProperty("publisher", values);
}
public void setPubPlaces(String[] values) {
setProperty("pubPlace", values);
}
public void setKeywords(String[] keywords) {
setProperty("keyword", keywords);
}
public void setCollections(String[] collections) {
setProperty("collection", collections);
}
/**
* Set the {@link Source}.
*
* @param source
* the {@link Source}
*/
public void setSource(Source source) {
setProperty("source", source.name().toLowerCase());
}
public void setTitle(String value) {
setProperty("title", value);
}
public void setTitles(String[] values) {
setProperty("title", values);
}
public void setTokensCount(TokenType tokenType, int total) {
setProperty("tokensCount-"+tokenType.name(), String.valueOf(total));
}
public void setTypesCount(TokenType tokenType, int totalTypes) {
setProperty("typesCount-"+tokenType.name(), String.valueOf(totalTypes));
}
public void setTypesCountMean(TokenType tokenType, float mean) {
setProperty("typesCountMean-"+tokenType.name(), String.valueOf(mean));
}
public void setTypesCountStdDev(TokenType tokenType, float mean) {
setProperty("typesCountStdDev-"+tokenType.name(), String.valueOf(mean));
}
public void setSentencesCount(int count) {
setProperty("sentencesCount", String.valueOf(count));
}
public int getSentencesCount() {
return Integer.parseInt(getProperty("sentencesCount", "0"));
}
public String toString() {
return getSource().name()+": "+getLocation();
}
public void setQueryParameters(FlexibleParameters storedparams) throws UnsupportedEncodingException {
setProperty("queryParameters", storedparams.getAsQueryString());
}}