package org.wikibrain.sr.normalize;
import com.typesafe.config.Config;
import gnu.trove.list.array.TDoubleArrayList;
import org.apache.commons.math3.analysis.interpolation.LinearInterpolator;
import org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction;
import org.apache.commons.math3.distribution.BetaDistribution;
import org.apache.commons.math3.util.FastMath;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.utils.WbMathUtils;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Map;
/**
* This class is called percentile normalizer, but it returns normalized values in [0,1].
*/
public class PercentileNormalizer extends BaseNormalizer {
protected transient PolynomialSplineFunction interpolator;
/**
* If the power variable has been set, the percentile is raised to this power.
* This has the effect of making things "less related" overall.
*/
protected double power = 0.0;
@Override
public void reset() {
super.reset();
interpolator = null;
}
@Override
public void observationsFinished() {
super.observationsFinished();
makeInterpolater();
}
protected void makeInterpolater() {
TDoubleArrayList X = new TDoubleArrayList();
TDoubleArrayList Y = new TDoubleArrayList();
for (int i = 0; i < sample.size(); i++) {
double fudge = max * 10E-9 * i; // ensures monotonic increasing
X.add(sample.get(i) + fudge);
Y.add((i + 1.0) / (sample.size() + 1));
}
interpolator = new LinearInterpolator().interpolate(X.toArray(), Y.toArray());
}
private void writeObject(ObjectOutputStream out) throws IOException {
out.defaultWriteObject();
}
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
in.defaultReadObject();
makeInterpolater();
}
@Override
public double normalize(double x) {
double sMin = sample.get(0);
double sMax = sample.get(sample.size() - 1);
double halfLife = (sMax - sMin) / 4.0;
double yDelta = 1.0 / (sample.size() + 1);
double y;
if (x < sMin) {
y = WbMathUtils.toAsymptote(sMin - x, halfLife, yDelta, 0.0);
} else if (x > sMax) {
y = WbMathUtils.toAsymptote(x - sMax, halfLife, 1.0 - yDelta, 1.0);
} else {
y = interpolator.value(x);
}
if (power > 0.0) {
y = FastMath.pow(y, power);
}
return y;
}
@Override
public String dump() {
StringBuffer buff = new StringBuffer("percentile normalizer: ");
for (int i = 0; i <= 20; i++) {
int p = i * 100 / 20;
int index = p * sample.size() / 100;
index = Math.min(index, sample.size() - 1);
buff.append(p + "%: ");
buff.append(sample.get(index));
buff.append(", ");
}
return buff.toString();
}
public void setPower(double power) {
this.power = power;
}
public static class Provider extends org.wikibrain.conf.Provider<PercentileNormalizer> {
public Provider(Configurator configurator, Configuration config) throws ConfigurationException {
super(configurator, config);
}
@Override
public Class getType() {
return Normalizer.class;
}
@Override
public String getPath() {
return "sr.normalizer";
}
@Override
public Scope getScope() {
return Scope.INSTANCE;
}
@Override
public PercentileNormalizer get(String name, Config config, Map<String, String> runtimeParams) throws ConfigurationException {
if (!config.getString("type").equals("percentile")) {
return null;
}
PercentileNormalizer n = new PercentileNormalizer();
if (config.hasPath("power")) {
n.setPower(config.getDouble("power"));
}
return n;
}
}
}