/*
* FrequencyCounter.java
*
* Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard
*
* This file is part of BEAST.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* BEAST is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
package dr.stats;
import java.util.*;
/**
* frequency counter
*
* @author Walter Xie
*/
public class FrequencyCounter<T> {
private int MAX_COUNTER_SIZE = 100;
protected Map<T, Integer> frequencyCounter;
protected boolean sortedByCounts = false;
protected int total;
protected int min;
protected int max;
public FrequencyCounter(List<T> values, boolean sortedByCounts) {
// http://stackoverflow.com/questions/12998568/hashmap-vs-linkedhashmap-performance-in-iteration-over-values
frequencyCounter = new LinkedHashMap<T, Integer>();
for (T value : values) {
if (frequencyCounter.containsKey(value)) {
int i = frequencyCounter.get(value) + 1;
frequencyCounter.put(value, i);
} else {
frequencyCounter.put(value, 1);
}
}
// limit the counter size, avoid expensive computation
// if (frequencyCounter.size() > MAX_COUNTER_SIZE)
// throw new IllegalArgumentException("Fail to create frequency counter: " +
// "number of unique values must <=" + MAX_COUNTER_SIZE + " !");
if (sortedByCounts && frequencyCounter.size() > 0)
sortCounterByCounts();
// store {total min max} counts
total = calculateTotalCount();
int[] minMax = calculateMinMaxCount();
min = minMax[0];
max = minMax[1];
}
public Map<T, Integer> getFrequencyCounter() {
return frequencyCounter;
}
public int getCounterSize() {
return frequencyCounter.size();
}
/**
* sort counter by counts to calculate correct credibility set
*/
public void sortCounterByCounts() {
frequencyCounter = Utils.sortByValue(frequencyCounter);
sortedByCounts = true;
}
public boolean isSortedByCounts() {
return sortedByCounts;
}
/**
* the unique values in a frequency counter,
* which are also the keys of the map
*
* @param sort
* @return
*/
public Set<T> uniqueValues(boolean sort) {
if (sort)
return new TreeSet<T>(frequencyCounter.keySet());
else
return frequencyCounter.keySet();
}
/**
* default to sort unique values (keys).
*
* @return
*/
public Set<T> uniqueValues() {
return uniqueValues(true);
}
public String uniqueValuesToString() {
return Utils.setToString(uniqueValues());
}
/**
* sort the key of frequency counter, and return the index of a given key,
* if not exist, return -1.
*
* @param key
* @return
*/
public int getKeyIndex(T key) {
int i = -1;
for (T v : uniqueValues()) {
i++;
if (v.equals(key)) return i;
}
return i;
}
/**
* get the count from counter given a key
*
* @param key
* @return
*/
public int getCount(T key) {
return frequencyCounter.get(key);
}
/**
* the total counts of this frequency counter
*
* @return
*/
public int calculateTotalCount() {
int tot = 0;
for (Map.Entry<T, Integer> entry : frequencyCounter.entrySet()) {
Integer count = entry.getValue();
tot += count;
}
return tot;
}
public int getTotalCount() {
if (total <= 0)
total= calculateTotalCount();
return total;
}
public double getProbability(T key) {
return (double) getCount(key) / (double) getTotalCount();
}
/**
* rescale the frequency to make maximum count equal to 1.
* <code>count / max_count</code>.
* @param key
* @return
*/
public double getFreqScaledMaxTo1(T key) {
return (double) getCount(key) / (double) getMaxCount();
}
/**
* the min and max count in the frequency counter
*
* @return <code>int[]</code>, 1st is min, 2nd is max.
*/
public int[] calculateMinMaxCount() {
int min = Integer.MAX_VALUE;
int max = Integer.MIN_VALUE;
for (Map.Entry<T, Integer> entry : frequencyCounter.entrySet()) {
Integer count = entry.getValue();
if (min > count)
min = count;
if (max < count)
max = count;
}
return new int[]{min, max};
}
public int getMaxCount() {
if (max <= 0) {
int[] minMax = calculateMinMaxCount();
min = minMax[0];
max = minMax[1];
}
return max;
}
/**
* mode calculated from this frequency counter
*
* @return
*/
public Mode<T> getModeStats() {
return new Mode<T>(this);
}
/**
* Include credible and incredible set calculated from this frequency counter
*
* @param probability
* @return
*/
public CredibleSetAnalysis<T> getCredibleSetAnalysis(double probability) {
return new CredibleSetAnalysis<T>(this, probability);
}
public static class Utils {
public static <T> String setToString(Set<T> aSet) {
String line = "{";
for (T value : aSet) {
line = line + value + ", ";
}
if (line.endsWith(", ")) {
line = line.substring(0, line.lastIndexOf(", ")) + "}";
} else {
line = "{}";
}
return line;
}
// http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java
public static <K, V extends Comparable<? super V>> Map<K, V> sortByValue(Map<K, V> map) {
List<Map.Entry<K, V>> list = new LinkedList<Map.Entry<K, V>>(map.entrySet());
Collections.sort(list, new Comparator<Map.Entry<K, V>>() {
@Override
public int compare(Map.Entry<K, V> o1, Map.Entry<K, V> o2) {
return (o1.getValue()).compareTo(o2.getValue()) * -1;
}
});
Map<K, V> result = new LinkedHashMap<K, V>();
for (Map.Entry<K, V> entry : list) {
result.put(entry.getKey(), entry.getValue());
}
return result;
}
}
}