package org.baderlab.csplugins.enrichmentmap.task;
import cern.jet.stat.Gamma;
public class Hypergeometric {
private Hypergeometric() {
}
/**
* Calculate the p-Value of the Hypergeometric Distribution
* <p>
*
* from: http://en.wikipedia.org/wiki/Hypergeometric_distribution
* <p>
*
* P(X=k) = {m over k} * { (N-m) over (n-k) } / {N over n}
*
* @param N size of the population (Universe of genes)
* @param n size of the sample (signature geneset)
* @param m successes in population (enrichment geneset)
* @param k successes in sample (intersection of both genesets)
*
* @return the p-Value of the Hypergeometric Distribution for P(X=k)
*/
public static double hyperGeomPvalue(final int N, final int n, final int m, final int k)
throws ArithmeticException {
//calculating in logarithmic scale as we are dealing with large numbers.
double log_p = binomialLog(m, k) + binomialLog(N - m, n - k) - binomialLog(N, n);
return Math.exp(log_p);
}
/**
* Calculate sum over distinct p-Values of the Hypergeometric Distribution
* <p>
*
* for P(X ≥ k) : Probability to get k or more successes in the sample
* with a size of n<br>
* P(X > k) : Probability to get more that k successes in the sample with
* a size of n<br>
* P(X ≤ k) : Probability to get k or less successes in the sample with a
* size of n<br>
* P(X < k) : Probability to get less than k successes in the sample with
* a size of n
* <p>
*
* @param N size of the population (Universe of genes)
* @param n size of the sample (signature geneset)
* @param m successes in population (enrichment geneset)
* @param k successes in sample (intersection of both genesets)
* @param mode = 0 : P(X ≥ k) (default)<br>
* mode = 1 : P(X > k) (behavior of R with "lower.tail=FALSE")
* <br>
* mode = 2 : P(X ≤ k) (behavior of R with "lower.tail=TRUE")
* <br>
* mode = 3 : P(X < k)<br>
*
* @return the p-Value of the Hypergeometric Distribution for P(X>=k)
*/
public static double hyperGeomPvalueSum(int N, int n, int m, int k, int mode) throws ArithmeticException {
// the number of successes in the sample (k) cannot be larger than the sample (n) or the number of total successes (m)
double sum = 0.0;
int kMax;
switch(mode) {
case 0:
kMax = Math.min(n, m);
for(int k_prime = k; k_prime <= kMax; k_prime++) {
sum += hyperGeomPvalue(N, n, m, k_prime);
}
break;
case 1:
kMax = Math.min(n, m);
for(int k_prime = k + 1; k_prime <= kMax; k_prime++) {
sum += hyperGeomPvalue(N, n, m, k_prime);
}
break;
case 2:
for(int k_prime = k; k_prime >= 0; k_prime--) {
sum += hyperGeomPvalue(N, n, m, k_prime);
}
break;
case 3:
for(int k_prime = k - 1; k_prime >= 0; k_prime--) {
sum += hyperGeomPvalue(N, n, m, k_prime);
}
break;
}
return sum;
}
/**
* Equivalent to hyperGeomPvalueSum(N, n, m, k, 0)
*/
public static double hyperGeomPvalueSum(int N, int n, int m, int k) throws ArithmeticException {
return hyperGeomPvalueSum(N, n, m, k, 0);
}
/**
* Calculate the log of Binomial coefficient "n over k" aka "n choose k"
*
* adapted from
* http://code.google.com/p/beast-mcmc/source/browse/trunk/src/dr/math/Binomial.java?spec=svn1660&r=1660
*
* @version Id: Binomial.java,v 1.11 2005/05/24 20:26:00 rambaut Exp
* Licensed under "LGPL 2.1 or later"
*
* original by:
* @author Andrew Rambaut
* @author Alexei Drummond
* @author Korbinian Strimmer
*
* adapted for using cern.jet.stat:
* @author Oliver Stueker
*
* @param n
* @param k
* @return the binomial coefficient "n over k"
*/
public static double binomialLog(final int n, final int k) throws ArithmeticException {
return Gamma.logGamma(n + 1.0) - Gamma.logGamma(k + 1.0) - Gamma.logGamma(n - k + 1.0);
}
}