package org.loklak.harvester.strategy;
import org.eclipse.jetty.util.log.Log;
import org.loklak.api.search.SearchServlet;
import org.loklak.api.search.SuggestServlet;
import org.loklak.data.DAO;
import org.loklak.harvester.PushThread;
import org.loklak.harvester.TwitterAPI;
import org.loklak.harvester.TwitterScraper;
import org.loklak.objects.MessageEntry;
import org.loklak.objects.QueryEntry;
import org.loklak.objects.ResultList;
import org.loklak.objects.Timeline;
import twitter4j.Location;
import twitter4j.Trend;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import java.io.IOException;
import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* KaizenHarvester
*
* Kaizen is targeted to do more information and query grabbing, whether it
* uses the official Twitter API, meta-data from collected tweets, or the
* analysis of tweets.
*
* @author Kaisar Arkhan, @yuki_is_bored
*/
public class KaizenHarvester implements Harvester {
private final String BACKEND;
private final int SUGGESTIONS_COUNT;
private final int SUGGESTIONS_RANDOM;
private final int PLACE_RADIUS;
private final int QUERIES_LIMIT;
private final boolean VERBOSE;
private Random random;
private HashSet<String> queries = new HashSet<>();
private ExecutorService executorService = Executors.newFixedThreadPool(1);
private Twitter twitter = null;
public KaizenHarvester() {
BACKEND = DAO.getConfig("backend", "http://loklak.org");
SUGGESTIONS_COUNT = DAO.getConfig("harvester.kaizen.suggestions_count", 1000);
SUGGESTIONS_RANDOM = DAO.getConfig("harvester.kaizen.suggestions_random", 5);
PLACE_RADIUS = DAO.getConfig("harvester.kaizen.place_radius", 5);
QUERIES_LIMIT = DAO.getConfig("harvester.kaizen.queries_limit", 500);
VERBOSE = DAO.getConfig("harvester.kaizen.verbose", true);
random = new Random();
twitter = TwitterAPI.getAppTwitterFactory().getInstance();
if (twitter == null)
DAO.log("Kaizen can utilize Twitter API to get more queries, If you want to use it, " +
"Please add Application and Access tokens (twitterAccessToken, twitterAccessTokenSecret, " +
"client.twitterConsumerKey, client.twitterConsumerSecret)");
}
private void addQuery(String query) {
if (QUERIES_LIMIT > 0 && queries.size() > QUERIES_LIMIT)
return;
if (queries.contains(query))
return;
if (VERBOSE)
DAO.log("Adding '" + query + "' to queries");
queries.add(query);
}
private void grabInformation(Timeline timeline) {
if (VERBOSE)
DAO.log("Kaizen is going to grab more information" +
(timeline.getQuery() != null ? " from results of '" + timeline.getQuery() + "'" : ""));
for (MessageEntry message : timeline) {
for (String user : message.getMentions())
addQuery("from:" + user);
for (String hashtag : message.getHashtags())
addQuery(hashtag);
String place = message.getPlaceName();
if (!place.isEmpty())
addQuery("near:\"" + message.getPlaceName() + "\" within:" + PLACE_RADIUS + "mi");
}
}
private void pushToBackend(Timeline timeline) {
DAO.log("Pushing " + timeline.size() + " to backend ..." );
executorService.execute(new PushThread(BACKEND, timeline));
}
private int harvestMessages() {
if (VERBOSE)
DAO.log(queries.size() + " available queries, Harvest season!");
String query = queries.iterator().next();
queries.remove(query);
if (VERBOSE)
DAO.log("Kaizen is going to harvest messages with query '" + query + "'");
Timeline timeline = TwitterScraper.search(query, Timeline.Order.CREATED_AT, true, false, 400);
if (timeline == null)
timeline = new Timeline(Timeline.Order.CREATED_AT);
if (timeline.size() == 0) {
if (VERBOSE)
DAO.log(query + " gives us no result, Pushing to backend anyway ...");
timeline.setQuery(query);
pushToBackend(timeline);
return -1;
}
if (VERBOSE)
DAO.log("'" + query + "' gives us " + timeline.size() + " messages, Pushing to backend ...");
pushToBackend(timeline);
grabInformation(timeline);
return timeline.size();
}
private void grabTrending() {
try {
if (VERBOSE)
DAO.log("Kaizen is going to get trending topics ...");
for (Location location : twitter.trends().getAvailableTrends())
for (Trend trend : twitter.trends().getPlaceTrends(location.getWoeid()).getTrends())
addQuery(trend.getQuery());
} catch (TwitterException e) {
if (e.getErrorCode() != 88)
Log.getLog().warn(e);
}
}
private void grabSuggestions() {
if (VERBOSE)
DAO.log("Kaizen is going to request for queries suggestions from backend ...");
try {
ResultList<QueryEntry> suggestedQueries =
SuggestServlet.suggest(BACKEND, "", "all", SUGGESTIONS_COUNT,
"desc", "retrieval_next", 0, null, "now",
"retrieval_next", SUGGESTIONS_RANDOM);
if (VERBOSE)
DAO.log("Backend gave us " + suggestedQueries.size() + " suggested queries");
for (QueryEntry query : suggestedQueries) {
addQuery(query.getQuery());
}
if (suggestedQueries.size() == 0) {
if (VERBOSE)
DAO.log("It looks like backend doesn't have any suggested queries. "+
"Grabbing relevant context from backend collected messages ...");
Timeline timeline = SearchServlet.search(BACKEND, "", Timeline.Order.CREATED_AT, "cache",
SUGGESTIONS_RANDOM, 0, SearchServlet.backend_hash, 60000);
grabInformation(timeline);
}
} catch (IOException e) {
Log.getLog().warn(e);
}
if (twitter != null)
grabTrending();
}
@Override
public int harvest() {
if (!queries.isEmpty() && random.nextBoolean())
return harvestMessages();
grabSuggestions();
return 0;
}
@Override
public void stop() {
executorService.shutdown();
}
}