/**
*
*/
package org.voyanttools.trombone.model;
import java.text.Normalizer;
import java.util.Comparator;
import org.voyanttools.trombone.util.FlexibleParameters;
import com.thoughtworks.xstream.annotations.XStreamAlias;
import com.thoughtworks.xstream.annotations.XStreamOmitField;
/**
* @author sgs
*
*/
@XStreamAlias("ngram")
public class CorpusNgram {
public enum Sort {
RAWFREQASC, RAWFREQDESC, TERMASC, TERMDESC, LENGTHASC, LENGTHDESC;
public static Sort getForgivingly(FlexibleParameters parameters) {
String sort = parameters.getParameterValue("sort", "").toUpperCase();
String sortPrefix = "RAWFREQ"; // default
if (sort.startsWith("TERM")) {sortPrefix = "TERM";}
else if (sort.startsWith("LENGTH")) {sortPrefix = "LENGTH";}
String dir = parameters.getParameterValue("dir", "").toUpperCase();
String dirSuffix = "DESC";
if (dir.endsWith("ASC")) {dirSuffix="ASC";}
return valueOf(sortPrefix+dirSuffix);
}
}
private String term;
private int rawFreq;
private int length;
private int[] distributions;
@XStreamOmitField
private transient String normalizedString = null;
/**
* @param rawFreqs
* @param length
* @param term
*
*/
public CorpusNgram(String term, int length, int[] rawFreqs) {
this.term = term;
this.length = length;
this.distributions = rawFreqs;
this.rawFreq = 0;
for (int i : rawFreqs) {
this.rawFreq+=i;
}
}
private String getNormalizedTerm() {
if (normalizedString==null) {normalizedString = Normalizer.normalize(term, Normalizer.Form.NFD);}
return normalizedString;
}
public static Comparator<CorpusNgram> getComparator(Sort sort) {
switch (sort) {
case RAWFREQASC:
return RawFrequencyAscendingComparator;
case TERMASC:
return TermAscendingComparator;
case TERMDESC:
return TermDescendingComparator;
case LENGTHASC:
return LengthAscendingComparator;
case LENGTHDESC:
return LengthDescendingComparator;
default: // rawFrequencyDesc
return RawFrequencyDescendingComparator;
}
}
private static Comparator<CorpusNgram> TermAscendingComparator = new Comparator<CorpusNgram>() {
@Override
public int compare(CorpusNgram term1, CorpusNgram term2) {
int i = term2.getNormalizedTerm().compareTo(term1.getNormalizedTerm());
if (i==0) {
return term1.rawFreq - term2.rawFreq;
}
return i;
}
};
private static Comparator<CorpusNgram> TermDescendingComparator = new Comparator<CorpusNgram>() {
@Override
public int compare(CorpusNgram term1, CorpusNgram term2) {
int i = term1.getNormalizedTerm().compareTo(term2.getNormalizedTerm());
if (i==0) {
return term1.rawFreq - term2.rawFreq;
}
return i;
}
};
private static Comparator<CorpusNgram> RawFrequencyDescendingComparator = new Comparator<CorpusNgram>() {
@Override
public int compare(CorpusNgram term1, CorpusNgram term2) {
if (term1.rawFreq==term2.rawFreq) {
return term1.getNormalizedTerm().compareTo(term2.getNormalizedTerm());
}
else {
return term2.rawFreq - term1.rawFreq;
}
}
};
private static Comparator<CorpusNgram> RawFrequencyAscendingComparator = new Comparator<CorpusNgram>() {
@Override
public int compare(CorpusNgram term1, CorpusNgram term2) {
if (term1.rawFreq==term2.rawFreq) {
return term2.getNormalizedTerm().compareTo(term1.getNormalizedTerm());
}
else {
return term1.rawFreq - term2.rawFreq;
}
}
};
private static Comparator<CorpusNgram> LengthDescendingComparator = new Comparator<CorpusNgram>() {
@Override
public int compare(CorpusNgram term1, CorpusNgram term2) {
if (term1.length==term2.length) {
return term1.getNormalizedTerm().compareTo(term2.getNormalizedTerm());
}
else {
return term2.length - term1.length;
}
}
};
private static Comparator<CorpusNgram> LengthAscendingComparator = new Comparator<CorpusNgram>() {
@Override
public int compare(CorpusNgram term1, CorpusNgram term2) {
if (term1.length==term2.length) {
return term2.getNormalizedTerm().compareTo(term1.getNormalizedTerm());
}
else {
return term1.length - term2.length;
}
}
};
@Override
public String toString() {
return "{"+term+": "+rawFreq+" ("+length+")";
}
}