package org.wikibrain.spatial.cookbook.tflevaluate;
import com.google.common.collect.Sets;
import com.vividsolutions.jts.geom.Geometry;
import gnu.trove.procedure.TIntProcedure;
import gnu.trove.set.TIntSet;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.dao.UniversalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.core.model.NameSpace;
import org.wikibrain.core.model.Title;
import org.wikibrain.core.model.UniversalPage;
import org.wikibrain.spatial.dao.SpatialContainmentDao;
import org.wikibrain.spatial.dao.SpatialDataDao;
import org.wikibrain.wikidata.WikidataDao;
import java.io.File;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Created by toby on 4/17/14.
*/
public class BipartiteEvaluatorTest {
private static final Logger LOG = LoggerFactory.getLogger(BipartiteEvaluatorTest.class);
private static Set<Integer> PickSample(Set<Integer> originalSet, Integer size){
if (size > originalSet.size()){
LOG.warn(String.format("Want %d elements, only have %d", size, originalSet.size()));
return originalSet;
}
List<Integer> list = new LinkedList<Integer>(originalSet);
Collections.shuffle(list);
return new HashSet<Integer>(list.subList(0, size));
}
public static void main(String[] args) throws Exception {
LanguageSet languageSet = new LanguageSet("simple");
Env env = EnvBuilder.envFromArgs(args);
Configurator conf = env.getConfigurator();
ToblersLawEvaluator evaluator = new ToblersLawEvaluator(env, languageSet);
SpatialDataDao sdDao = conf.get(SpatialDataDao.class);
SpatialContainmentDao scDao = conf.get(SpatialContainmentDao.class);
LocalPageDao lpDao = conf.get(LocalPageDao.class);
WikidataDao wdDao = conf.get(WikidataDao.class);
UniversalPageDao upDao = conf.get(UniversalPageDao.class);
String layerName1 = "country";
String layerName2 = "states";
Set<String> subLayers = Sets.newHashSet();
subLayers.add("wikidata");
Integer containerId1 = wdDao.getItemId(lpDao.getByTitle(new Title("China", Language.getByLangCode("simple")), NameSpace.ARTICLE));
TIntSet containedItemIds1 = scDao.getContainedItemIds(containerId1,layerName1, "earth", subLayers, SpatialContainmentDao.ContainmentOperationType.CONTAINMENT);
Integer containerId2 = wdDao.getItemId(lpDao.getByTitle(new Title("California", Language.getByLangCode("simple")), NameSpace.ARTICLE));
TIntSet containedItemIds2 = scDao.getContainedItemIds(containerId2,layerName2, "earth", subLayers, SpatialContainmentDao.ContainmentOperationType.CONTAINMENT);
Map<Integer, Geometry> geometriesToParse = new HashMap<Integer, Geometry>();
List<UniversalPage> concepts1 = new ArrayList<UniversalPage>();
List<UniversalPage> concepts2 = new ArrayList<UniversalPage>();
final Set<Integer> containedId1 = new HashSet<Integer>();
final Set<Integer> containedId2 = new HashSet<Integer>();
LOG.info(String.format("%d items from set1, %d items from set2", containedItemIds1.size(), containedItemIds2.size()));
int counter = 0;
containedItemIds1.forEach(new TIntProcedure() {
@Override
public boolean execute(int i) {
containedId1.add(i);
return true;
}
});
containedItemIds2.forEach(new TIntProcedure() {
@Override
public boolean execute(int i) {
containedId2.add(i);
return true;
}
});
Set<Integer> sampledContainedId1 = PickSample(containedId1, 500);
Set<Integer> sampledContainedId2 = PickSample(containedId2, 500);
for(Integer i : sampledContainedId1){
if(counter % 100 == 0)
LOG.info(String.format("%d geometries added out of %d", counter, sampledContainedId1.size()));
UniversalPage concept = upDao.getById(i);
if(concept != null && concept.hasAllLanguages(languageSet)){
concepts1.add(upDao.getById(i));
geometriesToParse.put(i, sdDao.getGeometry(i, "wikidata", "earth"));
}
counter ++;
}
counter = 0;
for(Integer i : sampledContainedId2){
if(counter % 100 == 0)
LOG.info(String.format("%d geometries added out of %d", counter, sampledContainedId2.size()));
UniversalPage concept = upDao.getById(i);
if(concept != null && concept.hasAllLanguages(languageSet)){
concepts2.add(upDao.getById(i));
geometriesToParse.put(i, sdDao.getGeometry(i, "wikidata", "earth"));
}
counter ++;
}
LOG.info(String.format("Now retrieving %d locations", geometriesToParse.size()));
evaluator.retrieveLocations(geometriesToParse);
evaluator.evaluateBipartite(new File("GERMANY-NY_Test.csv"), concepts1, concepts2);
}
}