package org.wikibrain.spatial.cookbook;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.Point;
import org.geotools.referencing.GeodeticCalculator;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.dao.UniversalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.model.Title;
import org.wikibrain.core.model.UniversalPage;
import org.wikibrain.spatial.dao.SpatialDataDao;
import org.wikibrain.sr.SRMetric;
import org.wikibrain.sr.SRResult;
import org.wikibrain.utils.ParallelForEach;
import org.wikibrain.utils.Procedure;
import org.wikibrain.utils.WpIOUtils;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
*/
public class SimpleToblersLawEvaluator {
private static final Logger LOG = LoggerFactory.getLogger(SimpleToblersLawEvaluator.class);
private static int NUM_SAMPLES = 1000000;
private Random random = new Random();
private final SpatialDataDao sdDao;
private final LocalPageDao lpDao;
private final UniversalPageDao upDao;
private final List<Language> langs;
private final Map<Language, SRMetric> metrics;
private final List<UniversalPage> concepts = new ArrayList<UniversalPage>();
private final Map<UniversalPage, Point> locations = new HashMap<UniversalPage, Point>();
private final Env env;
private BufferedWriter output;
public SimpleToblersLawEvaluator(Env env) throws ConfigurationException {
this.env = env;
this.langs = new ArrayList<Language>(env.getLanguages().getLanguages());
// Get data access objects
Configurator c = env.getConfigurator();
this.sdDao = c.get(SpatialDataDao.class);
this.lpDao = c.get(LocalPageDao.class);
this.upDao = c.get(UniversalPageDao.class);
// build SR metrics
this.metrics = new HashMap<Language, SRMetric>();
for(Language lang : langs){
SRMetric m = c.get(SRMetric.class, "ensemble", "language", lang.getLangCode());
metrics.put(lang, m);
}
}
public void retrieveLocations() throws DaoException {
// Get all known concept geometries
Map<Integer, Geometry> geometries = sdDao.getAllGeometriesInLayer("wikidata", "earth");
LOG.info(String.format("Get %d geometries, now building id-name mapping", geometries.size()));
// Build up list of concepts in all languages
for (Integer conceptId : geometries.keySet()){
UniversalPage concept = upDao.getById(conceptId);
if (concept != null && concept.hasAllLanguages(env.getLanguages())) {
concepts.add(concept);
locations.put(concept, geometries.get(conceptId).getCentroid());
if (concepts.size() % 1000 == 0) {
LOG.info(String.format("Loaded %d geometries with articles in %s...", concepts.size(), env.getLanguages()));
}
}
}
LOG.info(String.format("Found %d geometries with articles in %s", concepts.size(), env.getLanguages()));
}
public void evaluate(File outputPath, int numSamples) throws IOException {
this.output = WpIOUtils.openWriter(outputPath);
writeHeader();
ParallelForEach.range(0, numSamples, new Procedure<Integer>() {
@Override
public void call(Integer i) throws Exception {
evaluateOneSample();
}
});
this.output.close();
}
public void evaluateOneSample() throws DaoException, WikiBrainException, IOException {
UniversalPage c1 = concepts.get(random.nextInt(concepts.size()));
UniversalPage c2 = concepts.get(random.nextInt(concepts.size()));
List<SRResult> results = new ArrayList<SRResult>();
for (Language lang : langs) {
SRMetric sr = metrics.get(lang);
results.add(sr.similarity(c1.getLocalId(lang), c2.getLocalId(lang), false));
}
writeRow(c1, c2, results);
}
private void writeHeader() throws IOException {
output.write("ITEM_NAME_1");
output.write("\tITEM_ID_1");
output.write("\tITEM_NAME_2");
output.write("\tITEM_ID_2");
output.write("\tSPATIAL_DISTANCE");
for (Language lang : langs) {
output.write("\t" + lang.getLangCode() + "_SR");
}
}
private void writeRow(UniversalPage c1, UniversalPage c2, List<SRResult> results) throws WikiBrainException, IOException {
Point p1 = locations.get(c1).getCentroid();
Point p2 = locations.get(c2).getCentroid();
GeodeticCalculator geoCalc = new GeodeticCalculator();
geoCalc.setStartingGeographicPoint(p1.getX(), p1.getY());
geoCalc.setDestinationGeographicPoint(p2.getX(), p2.getY());
double km = geoCalc.getOrthodromicDistance() / 1000;
Title t1 = c1.getBestEnglishTitle(lpDao, true);
Title t2 = c2.getBestEnglishTitle(lpDao, true);
synchronized (output) {
output.write(t1.getCanonicalTitle() +
"\t" + c1.getUnivId() +
"\t" + t2.getCanonicalTitle() +
"\t" + c2.getUnivId() +
"\t" + km
);
for (SRResult result : results) {
output.write("\t" + result.getScore());
}
output.write("\n");
}
}
public static void main(String[] args) throws Exception {
Env env = EnvBuilder.envFromArgs(args);
SimpleToblersLawEvaluator eval = new SimpleToblersLawEvaluator(env);
eval.retrieveLocations();
eval.evaluate(new File("toblers_eval.tsv"), NUM_SAMPLES);
}
}