package org.jactr.tools.itr.fit; import java.util.ArrayList; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.math.stat.regression.SimpleRegression; public class FitStatistics { /** * Logger definition */ static private transient Log LOGGER = LogFactory.getLog(FitStatistics.class); private double _rmse; private double _rSquare; private long _n; private double _chiSquare; public FitStatistics(Set<String> comparisonPoints, Map<String, Double> model, Map<String, Double> data, boolean allowMissing) { ArrayList<double[]> values = new ArrayList<double[]>( comparisonPoints.size()); for (String label : comparisonPoints) { Double modelValue = model.get(label); Double dataValue = data.get(label); if (modelValue == null || Double.isNaN(modelValue) || dataValue == null || Double.isNaN(dataValue)) if (!allowMissing) throw new IllegalArgumentException( "Both model and data must contain values for " + label); else continue; values.add(new double[] { modelValue, dataValue }); } compute(values.toArray()); } /** * @param array * of pairs of points [[modelData, observedData]....] */ public FitStatistics(double[][] comparisonPoints) { int i = 0; for (double[] pair : comparisonPoints) { if (pair.length != 2) throw new IllegalArgumentException("Comparisonpoints[" + i + "] should be 2 long, not " + pair.length); i++; } compute(comparisonPoints); } private void compute(Object[] values) { SimpleRegression regression = new SimpleRegression(); double sse = 0; double chiSquare = 0; long n = 0; for (Object val : values) { double[] value = (double[]) val; regression.addData(value[0], value[1]); double squareDiff = Math.pow(value[0] - value[1], 2); double chiPartial = squareDiff / value[1]; chiSquare += chiPartial; n++; sse += squareDiff; } /* * cant use regression.getMeanSquaredError() as that is in comparison to the * regression line */ _rmse = Math.sqrt(sse / n); _rSquare = regression.getRSquare(); _n = regression.getN(); _chiSquare = chiSquare; } public long getN() { return _n; } public double getRMSE() { return _rmse; } public double getRSquared() { return _rSquare; } public double getChiSquare() { return _chiSquare; } /** * compute bayesian information criterion using sample size, chiSquare, and * number of free parameters * * @see http://en.wikipedia.org/wiki/Bayesian_information_criterion * @return */ public double computeBIC(int freeParameters) { return getChiSquare() + freeParameters * Math.log(getN()); } /** * compute Akaike information criterion using chiSq and free parameters * * @param freeParameters * @see http://en.wikipedia.org/wiki/Akaike_information_criterion * @return */ public double computeAIC(int freeParameters) { return getChiSquare() + 2 * freeParameters; } /** * compute the corrected AIC, taking into account N. This is best for small N * or large # freeParameters. * * @param freeParameters * @see http://en.wikipedia.org/wiki/Akaike_information_criterion * @return */ public double computeAICc(int freeParameters) { double numerator = 2 * freeParameters * (freeParameters + 1); double denom = getN() - freeParameters - 1; if (denom < 0) { if (LOGGER.isWarnEnabled()) LOGGER.warn(String.format( "Free Parameters (%d) greater than data to fit (%d)", freeParameters, getN())); return Double.NaN; } return computeAIC(freeParameters) + numerator / denom; } }