package org.wikibrain.utils;
import gnu.trove.list.TDoubleList;
import gnu.trove.list.TIntList;
import gnu.trove.list.array.TDoubleArrayList;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.set.hash.TIntHashSet;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.util.FastMath;
import org.apache.commons.math3.util.MathArrays;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
public class WbMathUtils {
/**
* @param nums Any descendant of Number (Integer, Short, Double, Float, etc)
* @return The mean of the number, or Double.NaN if the list is empty.
*/
public static double mean(Collection<? extends Number> nums) {
if (nums == null || nums.isEmpty()) {
return Double.NaN;
}
double sum = 0.0;
for (Number n : nums) {
sum += n.doubleValue();
}
return sum / nums.size();
}
/**
* Returns true if the number is not NaN or infinite.
*/
public static boolean isReal(double d) {
return !Double.isNaN(d) && !Double.isInfinite(d);
}
/**
* Returns a value that approaches yInf from y0 as xDelta increases.
* The values for the function range from (0, y0) to (inf, yInf).
* For each xHalfLife, the function moves 50% closer to yInf.
*
* @param x
* @param xHalfLife
* @param y0
* @param yInf
* @return
*/
public static double toAsymptote(double x, double xHalfLife, double y0, double yInf) {
assert(x > 0);
double hl = x / xHalfLife;
return y0 + (1.0 - FastMath.exp(-hl)) * (yInf - y0);
}
/**
* Smooths the function specified by the X, Y pairs.
* The result will be a new {X, Y} containing numPoints.
* The new X values will be sampled uniformly from the original X (i.e. every k'th point)
* The new Y values will be calculated using a #robustMean around each X point with window windowSize.
* @param X
* @param Y
* @param windowSize
* @param numPoints
* @return
*/
public static double[][] smooth(double X[], double Y[], int windowSize, int numPoints) {
TDoubleArrayList smoothedX = new TDoubleArrayList();
TDoubleArrayList smoothedY= new TDoubleArrayList();
for (int i = windowSize / 2; i < X.length - windowSize / 2; i += X.length / (numPoints + 1)) {
double subYs[] = Arrays.copyOfRange(Y, i - windowSize / 2, i + windowSize);
smoothedX.add(X[i]);
smoothedY.add(robustMean(subYs)); // median
}
// replace duplicate X points with their mean
for (int i = 0; i < smoothedX.size();) {
double x = smoothedX.get(i);
int span = 0;
while (i + span < smoothedX.size() && smoothedX.get(i + span) == x) {
span++;
}
if (span > 1) {
double mean = smoothedY.subList(i, i + span).sum() / span;
for (int j = i; j < i + span; j++) {
smoothedY.set(j, mean);
}
}
i += span;
}
return new double[][] { smoothedX.toArray(), smoothedY.toArray()};
}
/**
* Calculates the weighted mean, where the weight is based on the rank difference
* from the median and normally distributed. The std-dev for the normal distribution
* is set to X.length / 4, so about two-thirds of the weight is contained within
* the middle half of all points. The std-dev is bottom capped at 3.
*
* @param X
* @return
*/
public static double robustMean(double[] X) {
X = ArrayUtils.clone(X);
Arrays.sort(X);
NormalDistribution dist = new NormalDistribution(
X.length / 2, // heaviest weight at midpoint
FastMath.max(3, X.length / 4)); // 66% of the weight within 3 pts on either side
double sum = 0.0;
double weight = 0.0;
for (int i = 0; i < X.length; i++) {
double d = dist.density(i);
weight += d;
sum += X[i] * d;
}
return sum / weight;
}
/**
* Make an array monotonically increasing by epsilon.
* The algorithm is simple and iterative:
* For each point x_k:
* x_k = max(x_k, x_{k-1} + epsilon)
* This function is destructive.
*
* @param X Input data.
* @param epsilon The minimum increase allowed between consecutive points.
*/
public static void makeMonotonicIncreasing(double [] X, double epsilon) {
for (int i = 1; i < X.length; i++) {
X[i] = FastMath.max(X[i], X[i-1] + epsilon);
}
MathArrays.checkOrder(X, MathArrays.OrderDirection.INCREASING, true);
}
public static void makeMonotonicIncreasing(TDoubleList X, double epsilon) {
double X2[] = X.toArray();
makeMonotonicIncreasing(X2, epsilon);
X.set(0, X2);
}
/**
* Given two parallel arrays of doubles representing x,y pairs
* remove all pairs that are NaN or Infinite, and return the result.
*/
public static double[][] removeNotNumberPoints(double X[], double Y[]) {
TDoubleArrayList prunedX = new TDoubleArrayList();
TDoubleArrayList prunedY = new TDoubleArrayList();
for (int i = 0; i < X.length; i++) {
double x = X[i];
double y = Y[i];
if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y)) {
// skip
} else {
prunedX.add(x);
prunedY.add(y);
}
}
return new double[][] { prunedX.toArray(), prunedY.toArray() };
}
public static TDoubleList[] removeNotNumberPoints(TDoubleList X, TDoubleList Y) {
double pruned[][] = removeNotNumberPoints(X.toArray(), Y.toArray());
return new TDoubleList[] {
new TDoubleArrayList(pruned[0]),
new TDoubleArrayList(pruned[1])
};
}
/**
* Find colinear columns in a matrix.
*
* @param X A rectangular matrix in row-major format.
* @param epsilon the singularity threshold delta
*
* @return A 2-D array from columns to a list of all colinear
* columns. Each column will appear at most once in the second
* level of the array. So if columns 1, 4, 6, and 7 are colinear.
* The entry for column 1 will contain [4, 6, 7] and the entries
* for 4, 6, and 7 will be empty.
*
*/
public static int[][] findColinearColumns(double X[][], double epsilon) {
if (X.length <= 1) {
return new int[0][0];
}
int numCols = X[0].length;
if (numCols <= 1) {
return new int[0][0];
}
int [][] colinear = new int[numCols][];
TIntHashSet alreadyColinear = new TIntHashSet();
// skip last column, it can't be colinear with anybody!
for (int col1 = 0; col1 < numCols-1; col1++) {
if (alreadyColinear.contains(col1)) {
colinear[col1] = new int[0];
continue;
}
TIntList matches = new TIntArrayList();
for (int col2 = col1+1; col2 < numCols; col2++) {
boolean isColinear = true;
double k = X[0][col1] / X[0][col2];
for (double row[] : X) {
if (row.length != numCols) { throw new IllegalArgumentException(); }
if (FastMath.abs(row[col1] - k * row[col2]) > epsilon) {
isColinear = false;
break;
}
}
if (isColinear) {
matches.add(col2);
alreadyColinear.add(col2);
}
}
colinear[col1] = matches.toArray();
}
colinear[colinear.length - 1] = new int[0];
return colinear;
}
public static int[][] findColinearColumns(double X[][]) {
return findColinearColumns(X, 0.000001);
}
public static String toString(int X[][]) {
StringBuffer res = new StringBuffer("{");
for (int row[] : X) {
if (row != X[0]) { res.append(", "); }
res.append(Arrays.toString(row));
}
res.append("}");
return res.toString();
}
public static double dot(float[] v1, float[] v2) {
if (v1.length != v2.length) {
throw new IllegalArgumentException();
}
double dot = 0.0;
for (int i = 0; i < v1.length; i++) {
dot += v1[i] * v2[i];
}
return dot;
}
public static double dot(double[] v1, float[] v2) {
if (v1.length != v2.length) {
throw new IllegalArgumentException();
}
double dot = 0.0;
for (int i = 0; i < v1.length; i++) {
dot += v1[i] * v2[i];
}
return dot;
}
public static double dot(double[] v1, double[] v2) {
if (v1.length != v2.length) {
throw new IllegalArgumentException();
}
double dot = 0.0;
for (int i = 0; i < v1.length; i++) {
dot += v1[i] * v2[i];
}
return dot;
}
public static double distance(double[] v1, double[] v2) {
if (v1.length != v2.length) {
throw new IllegalArgumentException();
}
double l2 = 0.0;
for (int i = 0; i < v1.length; i++) {
l2 += (v1[i] - v2[i]) * (v1[i] - v2[i]);
}
return FastMath.sqrt(l2);
}
/**
* v1 <- alpha * v2 + v3
* @param v1
* @param v2
* @param alpha
*/
public static void add(double alpha, double[] v1, double[] v2, double v3[]) {
for (int i = 0; i < v1.length; i++) {
v1[i] = alpha * v2[i] + v3[i];
}
}
/**
* v1 <- alpha * v2 + v3
* @param v1
* @param v2
* @param alpha
*/
public static void add(double alpha, double[] v1, double[] v2, float v3[]) {
for (int i = 0; i < v1.length; i++) {
v1[i] = alpha * v2[i] + v3[i];
}
}
/**
* v1 <- alpha * v2 + v3
* @param v1
* @param v2
* @param alpha
*/
public static void add(double alpha, double[] v1, float[] v2, float v3[]) {
for (int i = 0; i < v1.length; i++) {
v1[i] = alpha * v2[i] + v3[i];
}
}
/**
* v1 <- alpha * v2 + v3
* @param v1
* @param v2
* @param alpha
*/
public static void add(float alpha, float[] v1, float[] v2, float v3[]) {
for (int i = 0; i < v1.length; i++) {
v1[i] = alpha * v2[i] + v3[i];
}
}
/**
* v1 += v2
* @param v1
* @param v2
*/
public static void increment(double v1[], double[] v2) {
add(1.0, v1, v1, v2);
}
/**
* v1 += v2
* @param v1
* @param v2
*/
public static void increment(double v1[], float[] v2) {
add(1.0, v1, v1, v2);
}
/**
* v1 += v2
* @param v1
* @param v2
*/
public static void increment(float v1[], float[] v2) {
add(1.0f, v1, v1, v2);
}
public static void normalize(float[] vector) {
double sum = 0.0;
for (float x : vector) {
sum += x * x;
}
if (sum == 0) {
return;
}
sum = FastMath.sqrt(sum);
for (int i = 0; i < vector.length; i++) {
vector[i] /= sum;
}
}
public static float[] double2Float(double []v) {
float f[] = new float[v.length];
for (int i = 0; i < v.length; i++) {
f[i] = (float) v[i];
}
return f;
}
public static void normalize(double[] vector) {
double sum = 0.0;
for (double x : vector) {
sum += x * x;
}
if (sum == 0) {
return;
}
sum = FastMath.sqrt(sum);
for (int i = 0; i < vector.length; i++) {
vector[i] /= sum;
}
}
public static float[][] transpose(float [][]M) {
if (M.length == 0) {
return new float[0][0];
}
float T[][] = new float[M[0].length][M.length];
for (int i = 0; i < M.length; i++) {
for (int j = 0; j < M[0].length; j++) {
T[j][i] = M[i][j];
}
}
return T;
}
public static void zero(float [][] M) {
for (int i = 0; i < M.length; i++) {
Arrays.fill(M[i], 0);
}
}
public static void zero(double [][] M) {
for (int i = 0; i < M.length; i++) {
Arrays.fill(M[i], 0);
}
}
}