package org.wikibrain.sr.ensemble;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Shilad Sen
*/
public class Interpolator implements Serializable {
private static final Logger LOG = LoggerFactory.getLogger(Interpolator.class);
private final int numMetrics;
private int[] missingRanks;
private double[] missingScores;
public Interpolator(int numMetrics) {
this.numMetrics = numMetrics;
missingRanks = new int[numMetrics];
missingScores = new double[numMetrics];
Arrays.fill(missingRanks, 1000);
Arrays.fill(missingScores, 0.0);
}
/**
* calculate interpolated values for missing ranks and scores
* @param examples
*/
public void trainSimilarity(List<EnsembleSim> examples) {
for (int i = 0; i < numMetrics; i++) {
int numMissingScores = 0;
double sumMissingScores = 0.0;
for (EnsembleSim es : examples) {
if (es != null) {
double v = es.getScores().get(i);
if (Double.isNaN(v) || Double.isInfinite(v)) {
sumMissingScores += es.getKnownSim().similarity;
numMissingScores++;
}
}
}
missingScores[i] = (numMissingScores > 0) ? (sumMissingScores / numMissingScores) : 0.0;
LOG.info("for metric " + i + ", " +
" estimated missing score " + missingScores[i]);
}
}
/**
* TODO: train similarity should use mean, not min.
* calculate interpolated values for missing ranks and scores
* @param examples
*/
public void trainMostSimilar(List<EnsembleSim> examples) {
for (int i = 0; i < numMetrics; i++) {
int maxMissingRanks = -1;
double maxScore = -1;
double minScore = 100;
for (EnsembleSim es : examples) {
if (es != null && es.getScores() != null) {
double v = es.getScores().get(i);
if (!Double.isNaN(v) && !Double.isInfinite(v)) {
maxScore = Math.max(maxScore, v);
minScore = Math.min(minScore, v);
}
maxMissingRanks = Math.max(maxMissingRanks, es.getRanks().get(i));
}
}
missingRanks[i] = Math.max(100, maxMissingRanks * 5 / 4);
missingScores[i] = minScore;
LOG.info("for metric " + i + ", " +
" estimated missing rank " + missingRanks[i] +
" and missing score " + missingScores[i]);
}
}
public EnsembleSim interpolate(EnsembleSim example) {
EnsembleSim result = new EnsembleSim(example.knownSim);
for (int i = 0; i < numMetrics; i++) {
double v = example.getScores().get(i);
int r = example.getRanks().get(i);
if (Double.isNaN(v) || Double.isInfinite(v)) {
v = missingScores[i];
}
if (r < 0) {
r = missingRanks[i];
}
result.add(v, r);
}
return result;
}
public double interpolateScore(int metricIndex, double score) {
if (Double.isNaN(score) || Double.isInfinite(score)) {
return missingScores[metricIndex];
} else {
return score;
}
}
public int interpolateRank(int metricIndex, int rank) {
if (rank < 0) {
return missingRanks[metricIndex];
} else {
return rank;
}
}
public double getInterpolatedScore(int metricIndex) {
return missingScores[metricIndex];
}
public int getInterpolatedRank(int metricIndex) {
return missingRanks[metricIndex];
}
}