/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.igss.utility;
import com.rapidminer.operator.learner.igss.hypothesis.Hypothesis;
/** Abstract superclass for all utility functions.
*
* @author Dirk Dach
*/
public abstract class AbstractUtility implements Utility{
/** The prior probability of the two classes of the label. */
protected double[] priors;
/** The number of covered examples before normal approximation is used. */
protected int large;
/** Constructor for all utilities. */
public AbstractUtility (double[] priors, int large) {
this.priors=new double[priors.length];
System.arraycopy(priors,0,this.priors,0,2);
this.large=large;
}
/** Calculates the M-value needed for the GSS algorithm. */
public double calculateM (double delta, double epsilon) {
double i=1;
// perfomance: start with step=10000
while (conf(i,delta) > epsilon/2.0d) {
i=i+10000;
}
if (i>1) { //i=i+10000 has been executed at least once.
i=i-10000;
}
while (conf(i,delta) > (epsilon/2.0d)) {
i++;
}
return Math.ceil(i);
}
/** Calculates the the unspecific confidence intervall.
* Uses Chernoff bounds if the number of random experiments is too small and normal approximatione otherwise.
* Considers the number of examples as the number of random experiments. problematic for g*(p-p0)) hypothesis,
* that only cover a small amount of examples. No normal approximation should be used in this case. */
public double confidenceIntervall (double totalWeight, double delta) {
if (totalWeight<large) {
return confSmallM(totalWeight,delta);
}
else {
return conf(totalWeight,delta);
}
}
/** Calculates the the confidence intervall for a specific hypothesis.
* Uses Chernoff bounds if the number of random experiments is too small and normal approximation otherwise.
* This method is adapted for g*(p-p0) utility types. Every example for that the rule is applicable is one random experiment.
* Should be overwritten by subclasses if they make a different random experiment.*/
public double confidenceIntervall (double totalWeight, double totalPositiveWeight, Hypothesis hypo, double delta) {
if (hypo.getCoveredWeight()<large) {
return confSmallM(totalWeight,delta);
}
else {
return conf(totalWeight,totalPositiveWeight,hypo,delta);
}
}
/** Calculates the confidence intervall for small numbers of examples.*/
public abstract double confSmallM (double totalWeight, double delta);
/** Calculates the normal approximation of the confidence intervall. */
public abstract double conf(double totalWeight, double delta);
/** Calculates the normal approximation of the confidence intervall for a specific hypothesis.*/
public abstract double conf(double totalWeight, double totalPositiveWeight, Hypothesis hypo, double delta);
/** Calculates the inverse of the normal distribution, e.g.inverseNormal(0.95)==1.64. */
public double inverseNormal(double p) {
// Coefficients in rational approximations
double[] a = {-3.969683028665376e+01, 2.209460984245205e+02,
-2.759285104469687e+02, 1.383577518672690e+02,
-3.066479806614716e+01, 2.506628277459239e+00};
double[] b = {-5.447609879822406e+01, 1.615858368580409e+02,
-1.556989798598866e+02, 6.680131188771972e+01,
-1.328068155288572e+01 };
double[] c = {-7.784894002430293e-03, -3.223964580411365e-01,
-2.400758277161838e+00, -2.549732539343734e+00,
4.374664141464968e+00, 2.938163982698783e+00};
double[] d = {7.784695709041462e-03, 3.224671290700398e-01,
2.445134137142996e+00, 3.754408661907416e+00};
// Define break-points.
double plow = 0.02425;
double phigh = 1 - plow;
// Rational approximation for lower region:
if ( p < plow ) {
double q = Math.sqrt(-2*Math.log(p));
return (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1);
}
// Rational approximation for upper region:
if ( phigh < p ) {
double q = Math.sqrt(-2*Math.log(1-p));
return -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1);
}
// Rational approximation for central region:
double q = p - 0.5;
double r = q*q;
return (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q /
(((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1);
}
}