package org.voyanttools.trombone.model;
import java.text.Normalizer;
import java.util.Comparator;
import org.voyanttools.trombone.util.FlexibleParameters;
import com.thoughtworks.xstream.annotations.XStreamAlias;
@XStreamAlias("collocate")
public class CorpusCollocate implements Comparable<CorpusCollocate> {
private String term;
private int rawFreq;
private String contextTerm;
private int contextTermRawFreq;
private transient String normalizedContextTerm = null;
private transient String normalizedKeyword = null;
public enum Sort {
RAWFREQASC, RAWFREQDESC, TERMASC, TERMDESC, CONTEXTTERMASC, CONTEXTTERMDESC, CONTEXTTERMRAWFREQASC, CONTEXTTERMRAWFREQDESC;
public static Sort getForgivingly(FlexibleParameters parameters) {
String sort = parameters.getParameterValue("sort", "").toUpperCase();
String sortPrefix = "RAWFREQ"; // default
if (sort.startsWith("TERM")) {sortPrefix = "TERM";}
if (sort.startsWith("CONTEXTTERM")) {sortPrefix = "CONTEXTTERM";}
if (sort.startsWith("CONTEXTTERMRAWFREQ")) {sortPrefix = "CONTEXTTERMRAWFREQ";}
String dir = parameters.getParameterValue("dir", "").toUpperCase();
String dirSuffix = "DESC";
if (dir.endsWith("ASC")) {dirSuffix="ASC";}
return valueOf(sortPrefix+dirSuffix);
}
}
public CorpusCollocate(String keyword, int keywordRawFrequency, String contextTerm, int contextTermRawFrequency) {
this.term = keyword;
this.rawFreq = keywordRawFrequency;
this.contextTerm = contextTerm;
this.contextTermRawFreq = contextTermRawFrequency;
}
private String getNormalizedContextTerm() {
if (normalizedContextTerm==null) {normalizedContextTerm = Normalizer.normalize(contextTerm, Normalizer.Form.NFD);}
return normalizedContextTerm;
}
public String getContextTerm() {
return contextTerm;
}
public int getContextTermRawFrequency() {
return contextTermRawFreq;
}
private String getNormalizedKeyword() {
if (normalizedKeyword==null) {normalizedKeyword = Normalizer.normalize(term, Normalizer.Form.NFD);}
return normalizedKeyword;
}
public static Comparator<CorpusCollocate> getComparator(Sort sort) {
switch (sort) {
case RAWFREQASC:
return RawFrequencyAscendingComparator;
case TERMASC:
return TermAscendingComparator;
case TERMDESC:
return TermDescendingComparator;
case CONTEXTTERMASC:
return ContextTermAscendingComparator;
case CONTEXTTERMDESC:
return ContextTermDescendingComparator;
case CONTEXTTERMRAWFREQASC:
return ContextTermRawFrequencyAscendingComparator;
case CONTEXTTERMRAWFREQDESC:
return ContextTermRawFrequencyDescendingComparator;
default: // rawFrequencyDesc
return RawFrequencyDescendingComparator;
}
}
private static Comparator<CorpusCollocate> RawFrequencyAscendingComparator = new Comparator<CorpusCollocate>() {
@Override
public int compare(CorpusCollocate corpusCollocate1, CorpusCollocate corpusCollocate2) {
return corpusCollocate1.compareTo(corpusCollocate2);
}
};
private static Comparator<CorpusCollocate> RawFrequencyDescendingComparator = new Comparator<CorpusCollocate>() {
@Override
public int compare(CorpusCollocate corpusCollocate1, CorpusCollocate corpusCollocate2) {
return corpusCollocate2.compareTo(corpusCollocate1);
}
};
private static Comparator<CorpusCollocate> ContextTermRawFrequencyAscendingComparator = new Comparator<CorpusCollocate>() {
@Override
public int compare(CorpusCollocate corpusCollocate1, CorpusCollocate corpusCollocate2) {
return corpusCollocate1.contextTermRawFreq==corpusCollocate2.contextTermRawFreq ?
corpusCollocate1.compareTo(corpusCollocate2) :
Integer.compare(corpusCollocate1.contextTermRawFreq, corpusCollocate2.contextTermRawFreq);
}
};
private static Comparator<CorpusCollocate> ContextTermRawFrequencyDescendingComparator = new Comparator<CorpusCollocate>() {
@Override
public int compare(CorpusCollocate corpusCollocate1, CorpusCollocate corpusCollocate2) {
return corpusCollocate1.contextTermRawFreq==corpusCollocate2.contextTermRawFreq ?
corpusCollocate1.compareTo(corpusCollocate2) :
Integer.compare(corpusCollocate2.contextTermRawFreq, corpusCollocate1.contextTermRawFreq);
}
};
private static Comparator<CorpusCollocate> ContextTermAscendingComparator = new Comparator<CorpusCollocate>() {
@Override
public int compare(CorpusCollocate corpusCollocate1, CorpusCollocate corpusCollocate2) {
return corpusCollocate1.contextTerm.equals(corpusCollocate2.contextTerm) ?
corpusCollocate1.compareTo(corpusCollocate2) :
corpusCollocate1.getNormalizedContextTerm().compareTo(corpusCollocate2.getNormalizedContextTerm());
}
};
private static Comparator<CorpusCollocate> ContextTermDescendingComparator = new Comparator<CorpusCollocate>() {
@Override
public int compare(CorpusCollocate corpusCollocate1, CorpusCollocate corpusCollocate2) {
return corpusCollocate1.contextTerm.equals(corpusCollocate2.contextTerm) ?
corpusCollocate1.compareTo(corpusCollocate2) :
corpusCollocate2.getNormalizedContextTerm().compareTo(corpusCollocate1.getNormalizedContextTerm());
}
};
private static Comparator<CorpusCollocate> TermAscendingComparator = new Comparator<CorpusCollocate>() {
@Override
public int compare(CorpusCollocate corpusCollocate1, CorpusCollocate corpusCollocate2) {
return corpusCollocate1.term.equals(corpusCollocate2.term) ?
corpusCollocate1.compareTo(corpusCollocate2) :
corpusCollocate1.getNormalizedKeyword().compareTo(corpusCollocate2.getNormalizedKeyword());
}
};
private static Comparator<CorpusCollocate> TermDescendingComparator = new Comparator<CorpusCollocate>() {
@Override
public int compare(CorpusCollocate corpusCollocate1, CorpusCollocate corpusCollocate2) {
return corpusCollocate1.term.equals(corpusCollocate2.term) ?
corpusCollocate1.compareTo(corpusCollocate2) :
corpusCollocate2.getNormalizedKeyword().compareTo(corpusCollocate1.getNormalizedKeyword());
}
};
@Override
public int compareTo(CorpusCollocate o) {
// first by keyword raw frequency
if (rawFreq!=o.rawFreq) {
return Integer.compare(o.rawFreq, rawFreq);
}
// next by ascending keyword term
if (!term.equals(o.term)) {
return o.getNormalizedKeyword().compareTo(getNormalizedKeyword());
}
// next by context term desending frequency
if (contextTermRawFreq!=o.contextTermRawFreq) {
return Integer.compare(contextTermRawFreq, o.contextTermRawFreq);
}
// next by ascending context term
if (!contextTerm.equals(o.contextTerm)) {
return o.getNormalizedContextTerm().compareTo(getNormalizedContextTerm());
}
// next by hashcode
return Integer.compare(hashCode(), o.hashCode());
}
public String toString() {
return new StringBuilder("{corpus collocate - context: ").append(contextTerm).append(" (").append(contextTermRawFreq).append("); keyword: ").append(term).append(" (").append(rawFreq).append(")}").toString();
}
}