/*******************************************************************************
* Trombone is a flexible text processing and analysis library used
* primarily by Voyant Tools (voyant-tools.org).
*
* Copyright (©) 2007-2012 Stéfan Sinclair & Geoffrey Rockwell
*
* This file is part of Trombone.
*
* Trombone is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Trombone is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Trombone. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package org.voyanttools.trombone.tool.analysis;
import java.text.Normalizer;
import java.util.Comparator;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
/**
* @author sgs
*
*/
public class DistributedTermFrequencies implements Comparable<DistributedTermFrequencies> {
private String string;
private int[] freqs;
private int totalRawFrequency;
private String normalizedString; // for better sorting
private DescriptiveStatistics stats = null;
public DistributedTermFrequencies(String string, int bins) {
this(string, new int[bins]);
}
public DistributedTermFrequencies(String string, int[] freqs) {
this.string = string;
this.freqs = new int[freqs.length];
for (int i=0, len=freqs.length; i<len; i++) {add(i,freqs[i]);}
this.normalizedString = Normalizer.normalize(string.toLowerCase(), Normalizer.Form.NFD);
}
public void add(int bin, int freq) {
freqs[bin] = freq;
this.totalRawFrequency += freq;
}
@Override
public int compareTo(DistributedTermFrequencies other) {
return this.normalizedString.compareTo(other.normalizedString);
}
public String getString() {
return this.string;
}
public int getRawFrequency() {
return this.totalRawFrequency;
}
public double getMean() {
if (this.stats==null) {buildStats();}
return stats.getMean();
}
public double getSkewness() {
if (this.stats==null) {buildStats();}
return stats.getSkewness();
}
public double getKurtosis() {
if (this.stats==null) {buildStats();}
return stats.getKurtosis();
}
public double getStandardDeviation() {
if (this.stats==null) {buildStats();}
return stats.getStandardDeviation();
}
@Override
public String toString() {
return string+" ("+totalRawFrequency+"): "+stats;
}
private void buildStats() {
stats = new DescriptiveStatistics(freqs.length);
for (int i : freqs) {stats.addValue(i);}
}
public static class DistributedTermFrequenciesTotalFrequencyComparator implements Comparator<DistributedTermFrequencies> {
@Override
public int compare(DistributedTermFrequencies arg0,
DistributedTermFrequencies arg1) {
if (arg0.totalRawFrequency==arg1.totalRawFrequency) {
return arg0.compareTo(arg1);
}
else {
return arg0.totalRawFrequency > arg1.totalRawFrequency ? -1 : 1;
}
}
}
public static class DistributedTermFrequenciesDescriptiveStatsComparator implements Comparator<DistributedTermFrequencies> {
public enum STATS {MEAN, SKEWNESS, KURTOSIS, STANDARDDEVIATION};
private Comparator<DistributedTermFrequencies> statsComparator;
public DistributedTermFrequenciesDescriptiveStatsComparator(STATS stat) {
switch(stat) {
case MEAN:
statsComparator = new DistributedTermFrequenciesMeanComparator(); break;
case SKEWNESS:
statsComparator = new DistributedTermFrequenciesMeanComparator(); break;
case KURTOSIS:
statsComparator = new DistributedTermFrequenciesMeanComparator(); break;
case STANDARDDEVIATION:
statsComparator = new DistributedTermFrequenciesMeanComparator(); break;
}
}
@Override
public int compare(DistributedTermFrequencies arg0,
DistributedTermFrequencies arg1) {
return this.statsComparator.compare(arg0,arg1);
}
}
public static class DistributedTermFrequenciesMeanComparator implements Comparator<DistributedTermFrequencies> {
@Override
public int compare(DistributedTermFrequencies arg0,
DistributedTermFrequencies arg1) {
double a = arg0.getMean();
double b = arg1.getMean();
if (a==b) {
return arg0.compareTo(arg1);
}
else {return a > b ? -1 : 1;}
}
}
public static class DistributedTermFrequenciesKurtosisComparator implements Comparator<DistributedTermFrequencies> {
@Override
public int compare(DistributedTermFrequencies arg0,
DistributedTermFrequencies arg1) {
double a = arg0.getKurtosis();
double b = arg1.getKurtosis();
if (a==b) {
return arg0.compareTo(arg1);
}
else {return a > b ? -1 : 1;}
}
}
public static class DistributedTermFrequenciesSkewnessComparator implements Comparator<DistributedTermFrequencies> {
@Override
public int compare(DistributedTermFrequencies arg0,
DistributedTermFrequencies arg1) {
double a = arg0.getSkewness();
double b = arg1.getSkewness();
if (a==b) {
return arg0.compareTo(arg1);
}
else {return a > b ? -1 : 1;}
}
}
public static class DistributedTermFrequenciesStandardDeviationComparator implements Comparator<DistributedTermFrequencies> {
@Override
public int compare(DistributedTermFrequencies arg0,
DistributedTermFrequencies arg1) {
double a = arg0.getStandardDeviation();
double b = arg1.getStandardDeviation();
if (a==b) {
return arg0.compareTo(arg1);
}
else {return a > b ? -1 : 1;}
}
}
}