package edu.illinois.lis.utils; import java.io.File; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonArray; import com.google.gson.JsonObject; import edu.illinois.lis.query.TrecTemporalTopicSet; /** * creates a simple set of gQueries from the official TREC MB topic file * * @author Miles Efron * */ public class ExtractGqueriesFromTrecFormat { private JsonObject outputObjects = null; private String pathToTrecTopics; public ExtractGqueriesFromTrecFormat(String pathToTrecTopics) { this.pathToTrecTopics = pathToTrecTopics; outputObjects = new JsonObject(); } public void harvest() { TrecTemporalTopicSet topicsFile = null; try { topicsFile = TrecTemporalTopicSet.fromFile(new File(pathToTrecTopics)); } catch (Exception e) { e.printStackTrace(); } JsonArray outputJsonArray = new JsonArray(); for(edu.illinois.lis.query.TrecTemporalTopic query : topicsFile) { JsonObject outputQueryObject = new JsonObject(); outputQueryObject.addProperty("title", query.getId()); outputQueryObject.addProperty("text", query.getQuery()); outputQueryObject.addProperty("epoch", Double.toString(query.getEpoch())); outputQueryObject.addProperty("querytweettime", Long.toString(query.getQueryTweetTime())); String text = query.getQuery(); String[] toks = text.split(" "); JsonArray modelArray = new JsonArray(); for(String tok : toks) { JsonObject tupleObject = new JsonObject(); tupleObject.addProperty("weight", 1.0); tupleObject.addProperty("feature", tok); modelArray.add(tupleObject); } outputQueryObject.add("model", modelArray); outputJsonArray.add(outputQueryObject); } outputObjects.add("queries", outputJsonArray); } public String toString() { Gson gson = new GsonBuilder().setPrettyPrinting().create(); String json = gson.toJson(outputObjects); return json; } public static void main(String[] args) throws Exception { String trecQueryPath = args[0]; ExtractGqueriesFromTrecFormat harvester = new ExtractGqueriesFromTrecFormat(trecQueryPath); harvester.harvest(); System.out.println(harvester); } }