/** * */ package org.voyanttools.trombone.model; import java.io.Serializable; import java.text.Normalizer; import java.util.Comparator; import org.voyanttools.trombone.util.FlexibleParameters; import com.thoughtworks.xstream.annotations.XStreamAlias; import com.thoughtworks.xstream.annotations.XStreamOmitField; /** * @author sgs * */ @XStreamAlias("entity") public class CorpusEntity implements Serializable, Cloneable { public enum Sort { RAWFREQASC, RAWFREQDESC, TERMASC, TERMDESC, INDOCUMENTSCOUNTASC, INDOCUMENTSCOUNTDESC; public static Sort getForgivingly(FlexibleParameters parameters) { String sort = parameters.getParameterValue("sort", "").toUpperCase(); String sortPrefix = "RAWFREQ"; // default if (sort.startsWith("TERM")) {sortPrefix = "TERM";} else if (sort.startsWith("INDOCUMENTSCOUNT")) {sortPrefix = "INDOCUMENTSCOUNT";} String dir = parameters.getParameterValue("dir", "").toUpperCase(); String dirSuffix = "DESC"; if (dir.endsWith("ASC")) {dirSuffix="ASC";} return valueOf(sortPrefix+dirSuffix); } } private String term; private EntityType type; private int rawFreq; private int[] rawFreqs; private int inDocumentsCount; @XStreamOmitField private transient String normalizedString = null; /** * @param inDocumentsCount * */ public CorpusEntity(String term, EntityType type, int rawFreq, int inDocumentsCount, int[] rawFreqs) { this.term = term; this.type = type; this.rawFreq = rawFreq; this.inDocumentsCount = inDocumentsCount; this.rawFreqs = rawFreqs; } public String getTerm() { return term; } public EntityType getType() { return type; } public CorpusEntity clone() { return new CorpusEntity(term, type, rawFreq, inDocumentsCount, rawFreqs); } private String getNormalizedTerm() { if (normalizedString==null) {normalizedString = Normalizer.normalize(term, Normalizer.Form.NFD);} return normalizedString; } public static Comparator<CorpusEntity> getComparator(Sort sort) { switch (sort) { case RAWFREQASC: return RawFrequencyAscendingComparator; case TERMASC: return TermAscendingComparator; case TERMDESC: return TermDescendingComparator; case INDOCUMENTSCOUNTASC: return InDocumentsCountAscendingComparator; case INDOCUMENTSCOUNTDESC: return InDocumentsCountDescendingComparator; default: // rawFrequencyDesc return RawFrequencyDescendingComparator; } } private static Comparator<CorpusEntity> TermAscendingComparator = new Comparator<CorpusEntity>() { @Override public int compare(CorpusEntity term1, CorpusEntity term2) { int i = term2.getNormalizedTerm().compareTo(term1.getNormalizedTerm()); if (i==0) { return term1.rawFreq - term2.rawFreq; } return i; } }; private static Comparator<CorpusEntity> TermDescendingComparator = new Comparator<CorpusEntity>() { @Override public int compare(CorpusEntity term1, CorpusEntity term2) { int i = term1.getNormalizedTerm().compareTo(term2.getNormalizedTerm()); if (i==0) { return term1.rawFreq - term2.rawFreq; } return i; } }; private static Comparator<CorpusEntity> RawFrequencyDescendingComparator = new Comparator<CorpusEntity>() { @Override public int compare(CorpusEntity term1, CorpusEntity term2) { if (term1.rawFreq==term2.rawFreq) { return term1.getNormalizedTerm().compareTo(term2.getNormalizedTerm()); } else { return term2.rawFreq - term1.rawFreq; } } }; private static Comparator<CorpusEntity> RawFrequencyAscendingComparator = new Comparator<CorpusEntity>() { @Override public int compare(CorpusEntity term1, CorpusEntity term2) { if (term1.rawFreq==term2.rawFreq) { return term1.getNormalizedTerm().compareTo(term2.getNormalizedTerm()); } else { return term1.rawFreq - term2.rawFreq; } } }; private static Comparator<CorpusEntity> InDocumentsCountAscendingComparator = new Comparator<CorpusEntity>() { @Override public int compare(CorpusEntity term1, CorpusEntity term2) { if (term1.inDocumentsCount==term2.inDocumentsCount) { return term1.getNormalizedTerm().compareTo(term2.getNormalizedTerm()); } else { return term1.inDocumentsCount - term2.inDocumentsCount; } } }; private static Comparator<CorpusEntity> InDocumentsCountDescendingComparator = new Comparator<CorpusEntity>() { @Override public int compare(CorpusEntity term1, CorpusEntity term2) { if (term1.inDocumentsCount==term2.inDocumentsCount) { return term1.getNormalizedTerm().compareTo(term2.getNormalizedTerm()); } else { return term2.inDocumentsCount - term1.inDocumentsCount; } } }; public int getRawFreq() { return rawFreq; } }