/*******************************************************************************
* Trombone is a flexible text processing and analysis library used
* primarily by Voyant Tools (voyant-tools.org).
*
* Copyright (©) 2007-2012 Stéfan Sinclair & Geoffrey Rockwell
*
* This file is part of Trombone.
*
* Trombone is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Trombone is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Trombone. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package org.voyanttools.trombone.model;
import java.io.Serializable;
import java.text.Normalizer;
import java.util.Comparator;
import org.voyanttools.trombone.util.FlexibleParameters;
import com.thoughtworks.xstream.annotations.XStreamOmitField;
/**
* @author sgs
*
*/
public class DocumentCollocate implements Serializable {
private int docIndex;
private String keyword;
private int keywordContextRawFrequency;
private String term;
@XStreamOmitField
private String normalizedString = null;
@XStreamOmitField
private String normalizedTerm;
private int termContextRawFrequency;
private float termContextRelativeFrequency;
private int termDocumentRawFrequency;
private float termDocumentRelativeFrequency;
private float termContextDocumentRelativeFrequencyDifference;
public enum Sort {
termAsc,
termDesc,
relDesc,
relAsc,
rawDesc,
rawAsc,
docRelDesc,
docRelAsc,
docRawDesc,
docRawAsc,
contextDocRelDiffDesc,
contextDocRelDiffAsc;
public static Sort valueOfForgivingly(FlexibleParameters parameters) {
if (parameters.containsKey("sort")) return valueOfForgivingly(parameters.getParameterValue("sort"));
if (parameters.containsKey("sortBy")) return valueOfForgivingly(parameters.getParameterValue("sortBy"), parameters.getParameterValue("sortDirection"));
return valueOfForgivingly(""); // use default
}
public static Sort valueOfForgivingly(String sortBy, String sortDirection) {
if (sortBy==null) return valueOfForgivingly(""); // direction doesn't matter if not sortBy provided
if (sortDirection!=null && sortDirection.toLowerCase().startsWith("asc")) {
return valueOfForgivingly(sortBy+"Asc");
}
else {
return valueOfForgivingly(sortBy+"Desc");
}
}
public static Sort valueOfForgivingly(String string) {
if (string!=null) {
String compareString = string.toLowerCase();
for (Sort t : values()) {
if (t.name().toLowerCase().equals(compareString)) return t;
}
}
return contextDocRelDiffDesc;
}
}
public DocumentCollocate(int corpusDocumentIndex, String keyword, String term,
int keywordContextRawFrequency, int termContextRawFrequency, int termDocumentRawFrequency,
int contextTotalTokens, int documentTotalTokens) {
this.docIndex = corpusDocumentIndex;
this.keyword = keyword;
this.keywordContextRawFrequency = keywordContextRawFrequency;
this.term = term;
this.termContextRawFrequency = termContextRawFrequency;
this.termContextRelativeFrequency = termContextRawFrequency / contextTotalTokens;
this.termDocumentRawFrequency = termDocumentRawFrequency;
this.termDocumentRelativeFrequency = termDocumentRawFrequency / documentTotalTokens;
termContextDocumentRelativeFrequencyDifference = termContextRelativeFrequency-termDocumentRelativeFrequency;
}
private String getNormalizedTerm() {
if (normalizedTerm==null) {
normalizedTerm = Normalizer.normalize(term, Normalizer.Form.NFD);
}
return normalizedTerm;
}
public static Comparator<DocumentCollocate> getComparator(Sort sort) {
switch (sort) {
case termAsc: return TermAscendingComparator;
case termDesc: return TermDescendingComparator;
case relDesc: return ContextRelativeFrequencyDescendingComparator;
case relAsc: return ContextRelativeFrequencyAscendingComparator;
case rawDesc: return ContextRawFrequencyDescendingComparator;
case rawAsc: return ContextRawFrequencyAscendingComparator;
case docRelDesc: return DocumentRelativeFrequencyDescendingComparator;
case docRelAsc: return DocumentRelativeFrequencyAscendingComparator;
case docRawDesc: return DocumentRawFrequencyDescendingComparator;
case docRawAsc: return DocumentRawFrequencyAscendingComparator;
case contextDocRelDiffAsc: return ContextDocumentRelativeDifferenceAscendingComparator;
default: // contextDocumentRelativeDifferenceDescending
return ContextDocumentRelativeDifferenceAscendingComparator;
}
}
private static Comparator<DocumentCollocate> ContextDocumentRelativeDifferenceAscendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termContextDocumentRelativeFrequencyDifference==documentCollocate2.termContextDocumentRelativeFrequencyDifference) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return documentCollocate1.termContextDocumentRelativeFrequencyDifference > documentCollocate2.termContextDocumentRelativeFrequencyDifference ? -1 : 1;
}
}
};
private static Comparator<DocumentCollocate> ContextDocumentRelativeDifferenceDescendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termContextDocumentRelativeFrequencyDifference==documentCollocate2.termContextDocumentRelativeFrequencyDifference) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return documentCollocate2.termContextDocumentRelativeFrequencyDifference > documentCollocate1.termContextDocumentRelativeFrequencyDifference ? 1 : -1;
}
}
};
private static Comparator<DocumentCollocate> ContextRawFrequencyDescendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termContextRawFrequency==documentCollocate2.termContextRawFrequency) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return documentCollocate2.termContextRawFrequency - documentCollocate1.termContextRawFrequency;
}
}
};
private static Comparator<DocumentCollocate> ContextRawFrequencyAscendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termContextRawFrequency==documentCollocate2.termContextRawFrequency) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return documentCollocate1.termContextRawFrequency - documentCollocate2.termContextRawFrequency;
}
}
};
private static Comparator<DocumentCollocate> TermAscendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.term.equals(documentCollocate2.term)) {
return documentCollocate1.termContextDocumentRelativeFrequencyDifference > documentCollocate2.termContextDocumentRelativeFrequencyDifference ? 1 : -1;
}
else {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
}
};
private static Comparator<DocumentCollocate> TermDescendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.term.equals(documentCollocate2.term)) {
return documentCollocate1.termContextDocumentRelativeFrequencyDifference > documentCollocate2.termContextDocumentRelativeFrequencyDifference ? 1 : -1;
}
else {
return documentCollocate2.getNormalizedTerm().compareTo(documentCollocate1.getNormalizedTerm());
}
}
};
private static Comparator<DocumentCollocate> ContextRelativeFrequencyDescendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termContextRelativeFrequency==documentCollocate2.termContextRelativeFrequency) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return documentCollocate1.termContextRelativeFrequency > documentCollocate2.termContextRelativeFrequency ? -1 : 1;
}
}
};
private static Comparator<DocumentCollocate> ContextRelativeFrequencyAscendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termContextRelativeFrequency==documentCollocate2.termContextRelativeFrequency) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return documentCollocate1.termContextRelativeFrequency > documentCollocate2.termContextRelativeFrequency ? 1 : -1;
}
}
};
private static Comparator<DocumentCollocate> DocumentRelativeFrequencyDescendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termDocumentRelativeFrequency==documentCollocate2.termDocumentRelativeFrequency) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return Float.compare(documentCollocate2.termDocumentRelativeFrequency, documentCollocate1.termDocumentRelativeFrequency);
}
}
};
private static Comparator<DocumentCollocate> DocumentRelativeFrequencyAscendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termDocumentRelativeFrequency==documentCollocate2.termDocumentRelativeFrequency) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return Float.compare(documentCollocate1.termDocumentRelativeFrequency, documentCollocate2.termDocumentRelativeFrequency);
}
}
};
private static Comparator<DocumentCollocate> DocumentRawFrequencyDescendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termDocumentRawFrequency==documentCollocate2.termDocumentRawFrequency) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return documentCollocate2.termDocumentRawFrequency - documentCollocate1.termDocumentRawFrequency;
}
}
};
private static Comparator<DocumentCollocate> DocumentRawFrequencyAscendingComparator = new Comparator<DocumentCollocate>() {
@Override
public int compare(DocumentCollocate documentCollocate1, DocumentCollocate documentCollocate2) {
if (documentCollocate1.termDocumentRelativeFrequency==documentCollocate2.termDocumentRelativeFrequency) {
return documentCollocate1.getNormalizedTerm().compareTo(documentCollocate2.getNormalizedTerm());
}
else {
return documentCollocate2.termDocumentRawFrequency - documentCollocate1.termDocumentRawFrequency;
}
}
};
public String toString() {
return "("+keyword+") "+term+": "+termContextRawFrequency+" ("+termContextRelativeFrequency+") / "+termDocumentRawFrequency+" ("+termDocumentRelativeFrequency+"); difference: "+termContextDocumentRelativeFrequencyDifference;
}
public String getTerm() {
return term;
}
public String getKeyword() {
return keyword;
}
public int getKeywordContextRawFrequency() {
return keywordContextRawFrequency;
}
public int getContextRawFrequency() {
return termContextRawFrequency;
}
public float getTermContextRelativeFrequency() {
return termContextRelativeFrequency;
}
public int getTermDocumentRawFrequency() {
return termDocumentRawFrequency;
}
public float getTermDocumentRelativeFrequency() {
return termDocumentRelativeFrequency;
}
public int getDocIndex() {
return docIndex;
}
}