package gov.nih.ncgc.bard.capextract;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* @author Rajarshi Guha
*/
public class ResultStatistics {
protected Logger log;
public ResultStatistics() {
log = LoggerFactory.getLogger(this.getClass());
}
public void generateStatistics(Long bardExptId) throws SQLException {
Connection conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl());
// first pull result types for this experiment
PreparedStatement pst = conn.prepareStatement("select distinct display_name from exploded_results where bard_expt_id = ?");
pst.setLong(1, bardExptId);
List<String> resultTypes = new ArrayList<String>();
ResultSet rs = pst.executeQuery();
while (rs.next()) resultTypes.add(rs.getString(1));
rs.close();
pst.close();
// for each result type, delete pre-existing histogram, make new histogram and insert it
for (String resultType : resultTypes) {
pst = conn.prepareStatement("delete from exploded_statistics where bard_expt_id = ? and display_name = ?");
pst.setLong(1, bardExptId);
pst.setString(2, resultType);
pst.executeUpdate();
pst.close();
pst = conn.prepareStatement("select value from exploded_results where bard_expt_id = ? and display_name = ?");
pst.setLong(1, bardExptId);
pst.setString(2, resultType);
rs = pst.executeQuery();
List<Float> values = new ArrayList<Float>();
while (rs.next()) values.add(rs.getFloat(1));
rs.close();
pst.close();
Float[] stats = calculateStatistics(values);
log.info("Got stats for " + bardExptId + "/" + resultType);
pst = conn.prepareStatement("insert into exploded_statistics (bard_expt_id, display_name, n, minval, maxval, mean, sd, q1, q2, q3) " +
" values (?,?,?,?,?,?,?,?,?,?)");
pst.setLong(1, bardExptId);
pst.setString(2, resultType);
for (int i = 0; i < stats.length; i++) {
if (stats[i] == null
|| stats[i].equals(Float.NaN)
|| stats[i].equals(Float.NEGATIVE_INFINITY)
|| stats[i].equals(Float.POSITIVE_INFINITY)) pst.setNull(3+i, Types.FLOAT);
else pst.setFloat(3+i, stats[i]);
}
pst.executeUpdate();
pst.close();
}
conn.commit();
conn.close();
}
Float median(List<Float> values) {
if (values.size() == 0) return null;
int middle = values.size() / 2;
if (values.size() % 2 == 1) return values.get(middle);
return (values.get(middle - 1) + values.get(middle)) / 2.0f;
}
private Float[] calculateStatistics(List<Float> values) {
if (values.size() == 0) return new Float[]{null, null, null, null, null, null, null, null};
Float minval, maxval, mean, sd, q1, q2, q3;
sd = CAPUtil.sd(values);
mean = 0f;
maxval = Float.MIN_VALUE;
minval = Float.MAX_VALUE;
for (Float v : values) {
if (v > maxval) maxval = v;
if (v < minval) minval = v;
mean += v;
}
mean /= (float) values.size();
// get quantiles
Collections.sort(values);
q2 = median(values);
List<Float> half = new ArrayList<Float>();
for (Float v : values) if (v <= q2) half.add(v);
q1 = median(half);
half = new ArrayList<Float>();
for (Float v : values) if (v >= q2) half.add(v);
if (half.size() == 0) q3 = null;
else q3 = median(half);
return new Float[]{(float) values.size(), minval, maxval, mean, sd, q1, q2, q3};
}
public static void main(String[] args) throws SQLException {
ResultStatistics rstats = new ResultStatistics();
Connection conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl());
PreparedStatement pst = conn.prepareStatement("select distinct bard_expt_id from exploded_results");
ResultSet rs = pst.executeQuery();
while (rs.next()) rstats.generateStatistics(rs.getLong(1));
conn.close();
}
}