DAO.java example

Explorer
loklak_server-master
- src
  - org
    - json
    - loklak
- test
  - org
    - json
      - JSONObjectTest.java
    - loklak
      - data
        ElasticsearchClientTest.java
      - tools
        storage
        JsonDatasetTest.java
        JsonFileTest.java
        JsonMinifierTest.java
        JsonRandomAccessFileTest.java
/**
 *  DAO
 *  Copyright 22.02.2015 by Michael Peter Christen, @0rb1t3r
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *  
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *  
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package org.loklak.data;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Path;
import java.security.KeyPair;
import java.security.KeyPairGenerator;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;

import org.eclipse.jetty.util.ConcurrentHashSet;

import com.github.fge.jackson.JsonLoader;
import com.google.common.base.Charsets;

import org.eclipse.jetty.util.log.Log;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.logging.slf4j.Slf4jESLoggerFactory;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.json.JSONException;
import org.json.JSONObject;
import org.loklak.Caretaker;
import org.loklak.api.search.SearchServlet;
import org.loklak.geo.GeoNames;
import org.loklak.harvester.TwitterScraper;
import org.loklak.http.AccessTracker;
import org.loklak.http.ClientConnection;
import org.loklak.http.RemoteAccess;
import org.loklak.objects.AbstractObjectEntry;
import org.loklak.objects.AccountEntry;
import org.loklak.objects.ImportProfileEntry;
import org.loklak.objects.MessageEntry;
import org.loklak.objects.Peers;
import org.loklak.objects.QueryEntry;
import org.loklak.objects.ResultList;
import org.loklak.objects.SourceType;
import org.loklak.objects.Timeline;
import org.loklak.objects.TimelineCache;
import org.loklak.objects.UserEntry;
import org.loklak.server.*;
import org.loklak.tools.DateParser;
import org.loklak.tools.OS;
import org.loklak.tools.storage.*;

import com.fasterxml.jackson.databind.JsonNode;

/**
 * The Data Access Object for the message project.
 * This provides only static methods because the class methods shall be available for
 * all other classes.
 * 
 * To debug, call elasticsearch directly i.e.:
 * 
 * get statistics
 * curl localhost:9200/_stats?pretty=true
 * 
 * get statistics for message index
 * curl -XGET 'http://127.0.0.1:9200/messages?pretty=true'
 * 
 * get mappings in message index
 * curl -XGET "http://localhost:9200/messages/_mapping?pretty=true"
 * 
 * get search result from message index
 * curl -XGET 'http://127.0.0.1:9200/messages/_search?q=*&pretty=true'
 */
public class DAO {

    public final static com.fasterxml.jackson.core.JsonFactory jsonFactory = new com.fasterxml.jackson.core.JsonFactory();
    public final static com.fasterxml.jackson.databind.ObjectMapper jsonMapper = new com.fasterxml.jackson.databind.ObjectMapper(DAO.jsonFactory);
    public final static com.fasterxml.jackson.core.type.TypeReference<HashMap<String,Object>> jsonTypeRef = new com.fasterxml.jackson.core.type.TypeReference<HashMap<String,Object>>() {};
    
    public final static String MESSAGE_DUMP_FILE_PREFIX = "messages_";
    public final static String ACCOUNT_DUMP_FILE_PREFIX = "accounts_";
    public final static String USER_DUMP_FILE_PREFIX = "users_";
    public final static String ACCESS_DUMP_FILE_PREFIX = "access_";
    public final static String FOLLOWERS_DUMP_FILE_PREFIX = "followers_";
    public final static String FOLLOWING_DUMP_FILE_PREFIX = "following_";
    private static final String IMPORT_PROFILE_FILE_PREFIX = "profile_";
    
    public final static int CACHE_MAXSIZE =   10000;
    public final static int EXIST_MAXSIZE = 4000000;
    
    public  static File conf_dir, bin_dir, html_dir;
    private static File external_data, assets, dictionaries;
    public static Settings public_settings, private_settings;
    private static Path message_dump_dir, account_dump_dir, import_profile_dump_dir;
    public static Path push_cache_dir;
    public static JsonRepository message_dump;
    private static JsonRepository account_dump;
    private static JsonRepository import_profile_dump;
    public  static JsonDataset user_dump, followers_dump, following_dump;
    public  static AccessTracker access;
    private static File schema_dir, conv_schema_dir;
    private static ElasticsearchClient elasticsearch_client;
    //private static Node elasticsearch_node;
    //private static Client elasticsearch_client;
    public static UserFactory users;
    private static AccountFactory accounts;
    public static MessageFactory messages;
    public static MessageFactory messages_hour;
    public static MessageFactory messages_day;
    public static MessageFactory messages_week;
    public static QueryFactory queries;
    private static ImportProfileFactory importProfiles;
    private static Map<String, String> config = new HashMap<>();
    public  static GeoNames geoNames = null;
    public static Peers peers = new Peers();
    public static OutgoingMessageBuffer outgoingMessages = new OutgoingMessageBuffer();
    
    // AAA Schema for server usage
    public static JsonTray authentication;
    public static JsonTray authorization;
    public static JsonTray accounting;
    public static UserRoles userRoles;
    public static JsonTray passwordreset;
    public static Map<String, Accounting> accounting_temporary = new HashMap<>();
    public static JsonFile login_keys;
    public static TimelineCache timelineCache;
    
    public static enum IndexName {
    	messages_hour("messages.json"), messages_day("messages.json"), messages_week("messages.json"), messages, queries, users, accounts, import_profiles;
        private String schemaFileName;
    	private IndexName() {
    	    schemaFileName = this.name() + ".json";
    	}
    	private IndexName(String filename) {
            schemaFileName = filename;
        }
    	public String getSchemaFilename() {
    	    return this.schemaFileName;
    	}
    }
    
    /**
     * initialize the DAO
     * @param configMap
     * @param dataPath the path to the data directory
     */
    public static void init(Map<String, String> configMap, Path dataPath) throws Exception{

        log("initializing loklak DAO");
        
        config = configMap;
        conf_dir = new File("conf");
        bin_dir = new File("bin");
        html_dir = new File("html");
        
        // initialize public and private keys
		public_settings = new Settings(new File("data/settings/public.settings.json"));
		File private_file = new File("data/settings/private.settings.json");
		private_settings = new Settings(private_file);
		OS.protectPath(private_file.toPath());
		
		if(!private_settings.loadPrivateKey() || !public_settings.loadPublicKey()){
        	log("Can't load key pair. Creating new one");
        	
        	// create new key pair
        	KeyPairGenerator keyGen;
			try {
				String algorithm = "RSA";
				keyGen = KeyPairGenerator.getInstance(algorithm);
				keyGen.initialize(2048);
				KeyPair keyPair = keyGen.genKeyPair();
				private_settings.setPrivateKey(keyPair.getPrivate(), algorithm);
				public_settings.setPublicKey(keyPair.getPublic(), algorithm);
			} catch (NoSuchAlgorithmException e) {
				throw e;
			}
			log("Key creation finished. Peer hash: " + public_settings.getPeerHashAlgorithm() + " " + public_settings.getPeerHash());
        }
        else{
        	log("Key pair loaded from file. Peer hash: " + public_settings.getPeerHashAlgorithm() + " " + public_settings.getPeerHash());
        }
        
        File datadir = dataPath.toFile();
        // check if elasticsearch shall be accessed as external cluster
        String transport = configMap.get("elasticsearch_transport.enabled");
        if (transport != null && "true".equals(transport)) {
            String cluster_name = configMap.get("elasticsearch_transport.cluster.name");
            String transport_addresses_string = configMap.get("elasticsearch_transport.addresses");
            if (transport_addresses_string != null && transport_addresses_string.length() > 0) {
                String[] transport_addresses = transport_addresses_string.split(",");
                elasticsearch_client = new ElasticsearchClient(transport_addresses, cluster_name);
            }
        } else {
            // use all config attributes with a key starting with "elasticsearch." to set elasticsearch settings
        	
        	ESLoggerFactory.setDefaultFactory(new Slf4jESLoggerFactory());
            org.elasticsearch.common.settings.Settings.Builder settings = org.elasticsearch.common.settings.Settings.builder();
            for (Map.Entry<String, String> entry: config.entrySet()) {
                String key = entry.getKey();
                if (key.startsWith("elasticsearch.")) settings.put(key.substring(14), entry.getValue());
            }
            // patch the home path
            settings.put("path.home", datadir.getAbsolutePath());
            settings.put("path.data", datadir.getAbsolutePath());
            settings.build();
            
            // start elasticsearch
            elasticsearch_client = new ElasticsearchClient(settings);
        }
        
        // open AAA storage
        Path settings_dir = dataPath.resolve("settings");
        settings_dir.toFile().mkdirs();
        Path authentication_path = settings_dir.resolve("authentication.json");
        authentication = new JsonTray(authentication_path.toFile(), 10000);
        OS.protectPath(authentication_path);
        Path authorization_path = settings_dir.resolve("authorization.json");
        authorization = new JsonTray(authorization_path.toFile(), 10000);
        OS.protectPath(authorization_path);
        Path passwordreset_path = settings_dir.resolve("passwordreset.json");
        passwordreset = new JsonTray(passwordreset_path.toFile(), 10000);
        OS.protectPath(passwordreset_path);
        Path accounting_path = settings_dir.resolve("accounting.json");
        accounting = new JsonTray(accounting_path.toFile(), 10000);
        OS.protectPath(accounting_path);
        Path login_keys_path = settings_dir.resolve("login-keys.json");
        login_keys = new JsonFile(login_keys_path.toFile());
        OS.protectPath(login_keys_path);


        Log.getLog().info("Initializing user roles");

        Path userRoles_path = settings_dir.resolve("userRoles.json");
        userRoles = new UserRoles(new JsonFile(userRoles_path.toFile()));
        OS.protectPath(userRoles_path);

        try{
            userRoles.loadUserRolesFromObject();
            Log.getLog().info("Loaded user roles from file");
        }catch (IllegalArgumentException e){
            Log.getLog().info("Load default user roles");
            userRoles.loadDefaultUserRoles();
        }

        // open index
        Path index_dir = dataPath.resolve("index");
        if (index_dir.toFile().exists()) OS.protectPath(index_dir); // no other permissions to this path

        // define the index factories
        messages = new MessageFactory(elasticsearch_client, IndexName.messages.name(), CACHE_MAXSIZE, EXIST_MAXSIZE);
        messages_hour = new MessageFactory(elasticsearch_client, IndexName.messages_hour.name(), CACHE_MAXSIZE, EXIST_MAXSIZE);
        messages_day = new MessageFactory(elasticsearch_client, IndexName.messages_day.name(), CACHE_MAXSIZE, EXIST_MAXSIZE);
        messages_week = new MessageFactory(elasticsearch_client, IndexName.messages_week.name(), CACHE_MAXSIZE, EXIST_MAXSIZE);
        users = new UserFactory(elasticsearch_client, IndexName.users.name(), CACHE_MAXSIZE, EXIST_MAXSIZE);
        accounts = new AccountFactory(elasticsearch_client, IndexName.accounts.name(), CACHE_MAXSIZE, EXIST_MAXSIZE);
        queries = new QueryFactory(elasticsearch_client, IndexName.queries.name(), CACHE_MAXSIZE, EXIST_MAXSIZE);
        importProfiles = new ImportProfileFactory(elasticsearch_client, IndexName.import_profiles.name(), CACHE_MAXSIZE, EXIST_MAXSIZE);

        // create indices and set mapping (that shows how 'elastic' elasticsearch is: it's always good to define data types)
        File mappingsDir = new File(new File(conf_dir, "elasticsearch"), "mappings");
        int shards = Integer.parseInt(configMap.get("elasticsearch.index.number_of_shards"));
        int replicas = Integer.parseInt(configMap.get("elasticsearch.index.number_of_replicas"));
        for (IndexName index: IndexName.values()) {
            log("initializing index '" + index.name() + "'...");
        	try {
        	    elasticsearch_client.createIndexIfNotExists(index.name(), shards, replicas);
        	} catch (Throwable e) {
        		Log.getLog().warn(e);
        	}
            try {
                elasticsearch_client.setMapping(index.name(), new File(mappingsDir, index.getSchemaFilename()));
            } catch (Throwable e) {
            	Log.getLog().warn(e);
            }
        }
        // elasticsearch will probably take some time until it is started up. We do some other stuff meanwhile..

        // create and document the data dump dir
        assets = new File(datadir, "assets");
        external_data = new File(datadir, "external");
        dictionaries = new File(external_data, "dictionaries");
        dictionaries.mkdirs();
        
        push_cache_dir = dataPath.resolve("pushcache");
        push_cache_dir.toFile().mkdirs();

        // create message dump dir
        String message_dump_readme =
            "This directory contains dump files for messages which arrived the platform.\n" +
            "There are three subdirectories for dump files:\n" +
            "- own:      for messages received with this peer. There is one file for each month.\n" +
            "- import:   hand-over directory for message dumps to be imported. Drop dumps here and they are imported.\n" +
            "- imported: dump files which had been processed from the import directory are moved here.\n" +
            "You can import dump files from other peers by dropping them into the import directory.\n" +
            "Each dump file must start with the prefix '" + MESSAGE_DUMP_FILE_PREFIX + "' to be recognized.\n";
        message_dump_dir = dataPath.resolve("dump");
        message_dump = new JsonRepository(message_dump_dir.toFile(), MESSAGE_DUMP_FILE_PREFIX, message_dump_readme, JsonRepository.COMPRESSED_MODE, true, Runtime.getRuntime().availableProcessors());
        
        account_dump_dir = dataPath.resolve("accounts");
        account_dump_dir.toFile().mkdirs();
        OS.protectPath(account_dump_dir); // no other permissions to this path
        account_dump = new JsonRepository(account_dump_dir.toFile(), ACCOUNT_DUMP_FILE_PREFIX, null, JsonRepository.REWRITABLE_MODE, false, Runtime.getRuntime().availableProcessors());

        File user_dump_dir = new File(datadir, "accounts");
        user_dump_dir.mkdirs();
        user_dump = new JsonDataset(
                user_dump_dir,USER_DUMP_FILE_PREFIX,
                new JsonDataset.Column[]{new JsonDataset.Column("id_str", false), new JsonDataset.Column("screen_name", true)},
                "retrieval_date", DateParser.PATTERN_ISO8601MILLIS,
                JsonRepository.REWRITABLE_MODE, false, Integer.MAX_VALUE);
        followers_dump = new JsonDataset(
                user_dump_dir, FOLLOWERS_DUMP_FILE_PREFIX,
                new JsonDataset.Column[]{new JsonDataset.Column("screen_name", true)},
                "retrieval_date", DateParser.PATTERN_ISO8601MILLIS,
                JsonRepository.REWRITABLE_MODE, false, Integer.MAX_VALUE);
        following_dump = new JsonDataset(
                user_dump_dir, FOLLOWING_DUMP_FILE_PREFIX,
                new JsonDataset.Column[]{new JsonDataset.Column("screen_name", true)},
                "retrieval_date", DateParser.PATTERN_ISO8601MILLIS,
                JsonRepository.REWRITABLE_MODE, false, Integer.MAX_VALUE);
        
        Path log_dump_dir = dataPath.resolve("log");
        log_dump_dir.toFile().mkdirs();
        OS.protectPath(log_dump_dir); // no other permissions to this path
        access = new AccessTracker(log_dump_dir.toFile(), ACCESS_DUMP_FILE_PREFIX, 60000, 3000);
        access.start(); // start monitor

        timelineCache = new TimelineCache(60000);
        
        import_profile_dump_dir = dataPath.resolve("import-profiles");
        import_profile_dump = new JsonRepository(import_profile_dump_dir.toFile(), IMPORT_PROFILE_FILE_PREFIX, null, JsonRepository.COMPRESSED_MODE, false, Runtime.getRuntime().availableProcessors());

        // load schema folder
        conv_schema_dir = new File("conf/conversion");
        schema_dir = new File("conf/schema");            

        // load dictionaries if they are embedded here
        // read the file allCountries.zip from http://download.geonames.org/export/dump/allCountries.zip
        //File allCountries = new File(dictionaries, "allCountries.zip");
        File cities1000 = new File(dictionaries, "cities1000.zip");
        if (!cities1000.exists()) {
            // download this file
            ClientConnection.download("http://download.geonames.org/export/dump/cities1000.zip", cities1000);
        }
        
        if(cities1000.exists()){
	        try{
	        	geoNames = new GeoNames(cities1000, new File(conf_dir, "iso3166.json"), 1);
	        }catch(IOException e){
	        	Log.getLog().warn(e.getMessage());
	        	cities1000.delete();
	        	geoNames = null;
	        }
    	}
        
        // finally wait for healthy status of elasticsearch shards
        ClusterHealthStatus required_status = ClusterHealthStatus.fromString(config.get("elasticsearch_requiredClusterHealthStatus"));
        boolean ok;
        do {
            log("Waiting for elasticsearch " + required_status.name() + " status");
            ok = elasticsearch_client.wait_ready(60000l, required_status);
        } while (!ok);
        /**
        do {
            log("Waiting for elasticsearch green status");
            health = elasticsearch_client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet();
        } while (health.isTimedOut());
        **/
        log("elasticsearch has started up!");

        // start the classifier
        new Thread(){
            public void run() {
                log("initializing the classifier...");
                try {
                    Classifier.init(10000, 1000);
                } catch (Throwable e) {
                	Log.getLog().warn(e);
                }
                log("classifier initialized!");
            }
        }.start();

        log("initializing queries...");
        File harvestingPath = new File(datadir, "queries");
        if (!harvestingPath.exists()) harvestingPath.mkdirs();
        String[] list = harvestingPath.list();
        for (String queryfile: list) {
            if (queryfile.startsWith(".") || queryfile.endsWith("~")) continue;
            try {
                BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(harvestingPath, queryfile))));
                String line;
                List<IndexEntry<QueryEntry>> bulkEntries = new ArrayList<>();
                while ((line = reader.readLine()) != null) {
                    line = line.trim().toLowerCase();
                    if (line.length() == 0) continue;
                    if (line.charAt(0) <= '9') {
                        // truncate statistic
                        int p = line.indexOf(' ');
                        if (p < 0) continue;
                        line = line.substring(p + 1).trim();
                    }
                    // write line into query database
                    if (!existQuery(line)) {
                        bulkEntries.add(
                            new IndexEntry<QueryEntry>(
                                line,
                                SourceType.TWITTER,
                                new QueryEntry(line, 0, 60000, SourceType.TWITTER, false))
                        );
                    }
                    if (bulkEntries.size() > 1000) {
                        queries.writeEntries(bulkEntries);
                        bulkEntries.clear();
                    }
                }
                queries.writeEntries(bulkEntries);
                reader.close();
            } catch (IOException e) {
            	Log.getLog().warn(e);
            }
        }
        log("queries initialized.");
        
        log("finished DAO initialization");
    }
    
    public static boolean wait_ready(long maxtimemillis) {
        ClusterHealthStatus required_status = ClusterHealthStatus.fromString(config.get("elasticsearch_requiredClusterHealthStatus"));
        return elasticsearch_client.wait_ready(maxtimemillis, required_status);
    }
    
    public static String pendingClusterTasks() {
        return elasticsearch_client.pendingClusterTasks();
    }
    
    public static String clusterStats() {
        return elasticsearch_client.clusterStats();
    }

    public static Map<String, String> nodeSettings() {
        return elasticsearch_client.nodeSettings();
    }
    
    public static File getAssetFile(String screen_name, String id_str, String file) {
        String letter0 = ("" + screen_name.charAt(0)).toLowerCase();
        String letter1 = ("" + screen_name.charAt(1)).toLowerCase();
        File storage_path = new File(new File(new File(assets, letter0), letter1), screen_name);
        return new File(storage_path, id_str + "_" + file); // all assets for one user in one file
    }
    
    public static Collection<File> getTweetOwnDumps(int count) {
        return message_dump.getOwnDumps(count);
    }

    public static void importAccountDumps(int count) throws IOException {
        Collection<File> dumps = account_dump.getImportDumps(count);
        if (dumps == null || dumps.size() == 0) return;
        for (File dump: dumps) {
            JsonReader reader = account_dump.getDumpReader(dump);
            final JsonReader dumpReader = reader;
            Thread[] indexerThreads = new Thread[dumpReader.getConcurrency()];
            for (int i = 0; i < dumpReader.getConcurrency(); i++) {
                indexerThreads[i] = new Thread() {
                    public void run() {
                        JsonFactory accountEntry;
                        try {
                            while ((accountEntry = dumpReader.take()) != JsonStreamReader.POISON_JSON_MAP) {
                                try {
                                    JSONObject json = accountEntry.getJSON();
                                    AccountEntry a = new AccountEntry(json);
                                    DAO.writeAccount(a, false);
                                } catch (IOException e) {
                                	Log.getLog().warn(e);
                                }
                            }
                        } catch (InterruptedException e) {
                        	Log.getLog().warn(e);
                        }
                    }
                };
                indexerThreads[i].start();
            }
            for (int i = 0; i < dumpReader.getConcurrency(); i++) {
                try {indexerThreads[i].join();} catch (InterruptedException e) {}
            }
            account_dump.shiftProcessedDump(dump.getName());
        }
    }

    /**
     * close all objects in this class
     */
    public static void close() {
        Log.getLog().info("closing DAO");
        
        // close the dump files
        message_dump.close();
        account_dump.close();
        import_profile_dump.close();
        user_dump.close();
        followers_dump.close();
        following_dump.close();
        
        // close the tracker
        access.close();
        
        // close the index factories (flushes the caches)
        messages.close();
        messages_hour.close();
        messages_day.close();
        messages_week.close();
        users.close();
        accounts.close();
        queries.close();
        importProfiles.close();

        // close the index
        elasticsearch_client.close();

        Log.getLog().info("closed DAO");
    }
    
    /**
     * get values from 
     * @param key
     * @param default_val
     * @return
     */
    public static String getConfig(String key, String default_val) {
        String value = config.get(key);
        return value == null ? default_val : value;
    }
    
    public static String[] getConfig(String key, String[] default_val, String delim) {
        String value = config.get(key);
        return value == null || value.length() == 0 ? default_val : value.split(delim);
    }
    
    public static long getConfig(String key, long default_val) {
        String value = config.get(key);
        try {
            return value == null ? default_val : Long.parseLong(value);
        } catch (NumberFormatException e) {
            return default_val;
        }
    }
    
    public static double getConfig(String key, double default_val) {
        String value = config.get(key);
        try {
            return value == null ? default_val : Double.parseDouble(value);
        } catch (NumberFormatException e) {
            return default_val;
        }
    }

    public static int getConfig(String key, int default_val) {
        String value = config.get(key);
        try {
            return value == null ? default_val : Integer.parseInt(value);
        } catch (NumberFormatException e) {
            return default_val;
        }
    }

    public static void setConfig(String key, String value) {
        config.put(key, value);
    }

    public static void setConfig(String key, long value) {
        setConfig(key, Long.toString(value));
    }

    public static void setConfig(String key, double value) {
        setConfig(key, Double.toString(value));
    }

    public static JsonNode getSchema(String key) throws IOException {
        File schema = new File(schema_dir, key);
        if (!schema.exists()) {
            throw new FileNotFoundException("No schema file with name " + key + " found");
        }
        return JsonLoader.fromFile(schema);
    }

    public static JSONObject getConversionSchema(String key) throws IOException {
        File schema = new File(conv_schema_dir, key);
        if (!schema.exists()) {
            throw new FileNotFoundException("No schema file with name " + key + " found");
        }
        return new JSONObject(com.google.common.io.Files.toString(schema, Charsets.UTF_8));
    }

    public static boolean getConfig(String key, boolean default_val) {
        String value = config.get(key);
        return value == null ? default_val : value.equals("true") || value.equals("on") || value.equals("1");
    }
    
    public static Set<String> getConfigKeys() {
        return config.keySet();
    }

    public static class MessageWrapper {
        public MessageEntry t;
        public UserEntry u;
        public boolean dump;
        public MessageWrapper(MessageEntry t, UserEntry u, boolean dump) {
            this.t = t;
            this.u = u;
            this.dump = dump;
        }
    }
    
    /**
     * Store a message together with a user into the search index
     * @param mw a message wrapper
     * @return true if the record was stored because it did not exist, false if it was not stored because the record existed already
     */
    public static boolean writeMessage(MessageWrapper mw) {
        if (mw.t == null) return false;
        try {
            synchronized (DAO.class) {
                // record tweet into search index and check if this is a new entry
                // and check if the message exists
                boolean exists = false;
                if (mw.t.getCreatedAt().after(DateParser.oneHourAgo())) {
                    exists = messages_hour.writeEntry(new IndexEntry<MessageEntry>(mw.t.getIdStr(), mw.t.getSourceType(), mw.t));
                    if (exists) return false;
                }
                if (mw.t.getCreatedAt().after(DateParser.oneDayAgo())) {
                    exists = messages_day.writeEntry(new IndexEntry<MessageEntry>(mw.t.getIdStr(), mw.t.getSourceType(), mw.t));
                    if (exists) return false;
                }
                if (mw.t.getCreatedAt().after(DateParser.oneWeekAgo())) {
                    exists = messages_week.writeEntry(new IndexEntry<MessageEntry>(mw.t.getIdStr(), mw.t.getSourceType(), mw.t));
                    if (exists) return false;
                }
                exists = messages.writeEntry(new IndexEntry<MessageEntry>(mw.t.getIdStr(), mw.t.getSourceType(), mw.t));
                if (exists) return false;

                // write the user into the index
                users.writeEntry(new IndexEntry<UserEntry>(mw.u.getScreenName(), mw.t.getSourceType(), mw.u));

                // record tweet into text file
                if (mw.dump) message_dump.write(mw.t.toJSON(mw.u, false, Integer.MAX_VALUE, ""));
             }
            
            // teach the classifier
            Classifier.learnPhrase(mw.t.getText());
        } catch (IOException e) {
        	Log.getLog().warn(e);
        }
        return true;
    }

    public static Set<String> writeMessageBulk(Collection<MessageWrapper> mws) {
        List<MessageWrapper> noDump = new ArrayList<>();
        List<MessageWrapper> dump = new ArrayList<>();
        for (MessageWrapper mw: mws) {
            if (mw.t == null) continue;
            if (mw.dump) dump.add(mw); else noDump.add(mw);
        }
        Set<String> createdIDs = new HashSet<>();
        createdIDs.addAll(writeMessageBulkNoDump(noDump));
        createdIDs.addAll(writeMessageBulkDump(dump)); // does also do an writeMessageBulkNoDump internally
        return createdIDs;
    }

    /**
     * write messages without writing them to the dump file
     * @param mws a collection of message wrappers
     * @return a set of message IDs which had been created with this bulk write.
     */
    private static Set<String> writeMessageBulkNoDump(Collection<MessageWrapper> mws) {
        if (mws.size() == 0) return new HashSet<>();
        List<IndexEntry<UserEntry>> userBulk = new ArrayList<>();
        List<IndexEntry<MessageEntry>> messageBulk = new ArrayList<>();
        for (MessageWrapper mw: mws) {
            if (messages.existsCache(mw.t.getIdStr())) continue; // we omit writing this again
            synchronized (DAO.class) {
                // write the user into the index
                userBulk.add(new IndexEntry<UserEntry>(mw.u.getScreenName(), mw.t.getSourceType(), mw.u));
    
                // record tweet into search index
                messageBulk.add(new IndexEntry<MessageEntry>(mw.t.getIdStr(), mw.t.getSourceType(), mw.t));
             }
                
            // teach the classifier
            Classifier.learnPhrase(mw.t.getText());
        }
        ElasticsearchClient.BulkWriteResult result = null;
        try {
            final Date limitDate = new Date();
            List<IndexEntry<MessageEntry>> macc;
            final Set<String> existed = new HashSet<>();

            //DAO.log("***DEBUG messages     INIT: " + messageBulk.size());
            
            limitDate.setTime(DateParser.oneHourAgo().getTime());
            macc = messageBulk.stream().filter(i -> i.getObject().getCreatedAt().after(limitDate)).collect(Collectors.toList());
            //DAO.log("***DEBUG messages for HOUR: " + macc.size());
            result = messages_hour.writeEntries(macc);
            //DAO.log("***DEBUG messages for HOUR: " + result.getCreated().size() + "  created");
            for (IndexEntry<MessageEntry> i: macc) if (!(result.getCreated().contains(i.getId()))) existed.add(i.getId());
            //DAO.log("***DEBUG messages for HOUR: " + existed.size() + "  existed");
            
            limitDate.setTime(DateParser.oneDayAgo().getTime());
            macc = messageBulk.stream().filter(i -> !(existed.contains(i.getObject().getIdStr()))).filter(i -> i.getObject().getCreatedAt().after(limitDate)).collect(Collectors.toList());
            //DAO.log("***DEBUG messages for  DAY : " + macc.size());
            result = messages_day.writeEntries(macc);
            //DAO.log("***DEBUG messages for  DAY: " + result.getCreated().size() + " created");
            for (IndexEntry<MessageEntry> i: macc) if (!(result.getCreated().contains(i.getId()))) existed.add(i.getId());
            //DAO.log("***DEBUG messages for  DAY: " + existed.size()  + "  existed");
            
            limitDate.setTime(DateParser.oneWeekAgo().getTime());
            macc = messageBulk.stream().filter(i -> !(existed.contains(i.getObject().getIdStr()))).filter(i -> i.getObject().getCreatedAt().after(limitDate)).collect(Collectors.toList());
            //DAO.log("***DEBUG messages for WEEK: " + macc.size());
            result = messages_week.writeEntries(macc);
            //DAO.log("***DEBUG messages for WEEK: " + result.getCreated().size() + "  created");
            for (IndexEntry<MessageEntry> i: macc) if (!(result.getCreated().contains(i.getId()))) existed.add(i.getId());
            //DAO.log("***DEBUG messages for WEEK: " + existed.size()  + "  existed");
            
            macc = messageBulk.stream().filter(i -> !(existed.contains(i.getObject().getIdStr()))).collect(Collectors.toList());
            //DAO.log("***DEBUG messages for  ALL : " + macc.size());
            result = messages.writeEntries(macc);
            //DAO.log("***DEBUG messages for  ALL: " + result.getCreated().size() + "  created");
            for (IndexEntry<MessageEntry> i: macc) if (!(result.getCreated().contains(i.getId()))) existed.add(i.getId());
            //DAO.log("***DEBUG messages for  ALL: " + existed.size()  + "  existed");
            
            users.writeEntries(userBulk);
            
        } catch (IOException e) {
        	Log.getLog().warn(e);
        }
        if (result == null) return new HashSet<String>();
        return result.getCreated();
    }
    
    private static Set<String> writeMessageBulkDump(Collection<MessageWrapper> mws) {
        Set<String> created = writeMessageBulkNoDump(mws);

        for (MessageWrapper mw: mws) try {
            if (!created.contains(mw.t.getIdStr())) continue;
            synchronized (DAO.class) {
                // record tweet into text file
                message_dump.write(mw.t.toJSON(mw.u, false, Integer.MAX_VALUE, ""));
             }
                
            // teach the classifier
            Classifier.learnPhrase(mw.t.getText());
        } catch (IOException e) {
        	Log.getLog().warn(e);
        }
        
        return created;
    }
    
    /**
     * Store an account together with a user into the search index
     * This method is synchronized to prevent concurrent IO caused by this call.
     * @param a an account 
     * @param dump
     * @return true if the record was stored because it did not exist, false if it was not stored because the record existed already
     */
    public static boolean writeAccount(AccountEntry a, boolean dump) {
        try {
            // record account into text file
            if (dump) account_dump.write(a.toJSON(null));

            // record account into search index
            accounts.writeEntry(new IndexEntry<AccountEntry>(a.getScreenName(), a.getSourceType(), a));
        } catch (IOException e) {
        	Log.getLog().warn(e);
        }
        return true;
    }

    /**
     * Store an import profile into the search index
     * This method is synchronized to prevent concurrent IO caused by this call.
     * @param i an import profile
     * @return true if the record was stored because it did not exist, false if it was not stored because the record existed already
     */
    public static boolean writeImportProfile(ImportProfileEntry i, boolean dump) {
        try {
            // record import profile into text file
            if (dump) import_profile_dump.write(i.toJSON());
            // record import profile into search index
            importProfiles.writeEntry(new IndexEntry<ImportProfileEntry>(i.getId(), i.getSourceType(), i));
        } catch (IOException e) {
        	Log.getLog().warn(e);
        }
        return true;
    }
    
    private static long countLocalHourMessages(final long millis, boolean created_at) {
        if (millis > 3600000L) return countLocalDayMessages(millis, created_at);
        if (created_at && millis == 3600000L) return elasticsearch_client.count(IndexName.messages_hour.name());
        return elasticsearch_client.count(
                created_at ? IndexName.messages_hour.name() : IndexName.messages_week.name(),
                created_at ? AbstractObjectEntry.CREATED_AT_FIELDNAME : AbstractObjectEntry.TIMESTAMP_FIELDNAME,
                millis);
    }
    
    private static long countLocalDayMessages(final long millis, boolean created_at) {
        if (millis > 86400000L) return countLocalWeekMessages(millis, created_at);
        if (created_at && millis == 86400000L) return elasticsearch_client.count(IndexName.messages_hour.name());
        return elasticsearch_client.count(
                created_at ? IndexName.messages_day.name() : IndexName.messages.name(),
                created_at ? AbstractObjectEntry.CREATED_AT_FIELDNAME : AbstractObjectEntry.TIMESTAMP_FIELDNAME,
                millis);
    }
    
    private static long countLocalWeekMessages(final long millis, boolean created_at) {
        if (millis > 604800000L) return countLocalMessages(millis, created_at);
        if (created_at && millis == 604800000L) return elasticsearch_client.count(IndexName.messages_hour.name());
        return elasticsearch_client.count(
                created_at ? IndexName.messages_week.name() : IndexName.messages.name(),
                created_at ? AbstractObjectEntry.CREATED_AT_FIELDNAME : AbstractObjectEntry.TIMESTAMP_FIELDNAME,
                millis);
    }

    public static long countLocalMessages(final long millis, boolean created_at) {
        if (millis == 0) return 0;
        if (millis > 0) {
            if (millis <= 3600000L) return countLocalHourMessages(millis, created_at);
            if (millis <= 86400000L) return countLocalDayMessages(millis, created_at);
            if (millis <= 604800000L) return countLocalWeekMessages(millis, created_at);
        }
        return elasticsearch_client.count(
                IndexName.messages.name(),
                created_at ? AbstractObjectEntry.CREATED_AT_FIELDNAME : AbstractObjectEntry.TIMESTAMP_FIELDNAME,
                millis == Long.MAX_VALUE ? -1 : millis);
    }
    
    public static long countLocalMessages() {
        return elasticsearch_client.count(IndexName.messages.name(), AbstractObjectEntry.TIMESTAMP_FIELDNAME, -1);
    }

    public static long countLocalMessages(String provider_hash) {
        return elasticsearch_client.countLocal(IndexName.messages.name(), provider_hash);
    }
    
    public static long countLocalUsers() {
        return elasticsearch_client.count(IndexName.users.name(), AbstractObjectEntry.TIMESTAMP_FIELDNAME, -1);
    }

    public static long countLocalQueries() {
        return elasticsearch_client.count(IndexName.queries.name(), AbstractObjectEntry.TIMESTAMP_FIELDNAME, -1);
    }
    
    public static long countLocalAccounts() {
        return elasticsearch_client.count(IndexName.accounts.name(), AbstractObjectEntry.TIMESTAMP_FIELDNAME, -1);
    }

    public static MessageEntry readMessage(String id) throws IOException {
        MessageEntry m = null;
        return messages_hour != null && ((m = messages_hour.read(id)) != null) ? m :
               messages_day  != null && ((m = messages_day.read(id))  != null) ? m :
               messages_week != null && ((m = messages_week.read(id)) != null) ? m :
               messages.read(id);
    }
    
    public static boolean existMessage(String id) {
        return messages_hour != null && messages_hour.exists(id) ||
               messages_day  != null && messages_day.exists(id)  ||
               messages_week != null && messages_week.exists(id) ||
               messages      != null && messages.exists(id);
    }
    
    public static boolean existUser(String id) {
        return users.exists(id);
    }
    
    public static boolean existQuery(String id) {
        return queries.exists(id);
    }
    
    public static boolean deleteQuery(String id, SourceType sourceType) {
        return queries.delete(id, sourceType);
    }

    public static boolean deleteImportProfile(String id, SourceType sourceType) {
        return importProfiles.delete(id, sourceType);
    }
    
    public static int deleteOld(IndexName indexName, Date createDateLimit) {
        RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery(AbstractObjectEntry.CREATED_AT_FIELDNAME).to(createDateLimit);
        return elasticsearch_client.deleteByQuery(indexName.name(), rangeQuery);
    }
    
    public static class SearchLocalMessages {
        public Timeline timeline;
        public Map<String, List<Map.Entry<String, Long>>> aggregations;
        public ElasticsearchClient.Query query;
            
        /**
         * Search the local message cache using a elasticsearch query.
         * @param q - the query, for aggregation this which should include a time frame in the form since:yyyy-MM-dd until:yyyy-MM-dd
         * @param order_field - the field to order the results, i.e. Timeline.Order.CREATED_AT
         * @param timezoneOffset - an offset in minutes that is applied on dates given in the query of the form since:date until:date
         * @param resultCount - the number of messages in the result; can be zero if only aggregations are wanted
         * @param aggregationLimit - the maximum count of facet entities, not search results
         * @param aggregationFields - names of the aggregation fields. If no aggregation is wanted, pass no (zero) field(s)
         */
        public SearchLocalMessages(final String q, final Timeline.Order order_field, final int timezoneOffset, final int resultCount, final int aggregationLimit, final String... aggregationFields) {
            this.timeline = new Timeline(order_field);
            QueryEntry.ElasticsearchQuery sq = new QueryEntry.ElasticsearchQuery(q, timezoneOffset);
            long interval = sq.until.getTime() - sq.since.getTime();
            IndexName resultIndex;
            if (aggregationFields.length > 0 && q.contains("since:")) {
                if (q.contains("since:hour")) {
                    this.query =  elasticsearch_client.query((resultIndex = IndexName.messages_hour).name(), sq.queryBuilder, order_field.getMessageFieldName(), timezoneOffset, resultCount, interval, AbstractObjectEntry.CREATED_AT_FIELDNAME, aggregationLimit, aggregationFields);
                } else if (q.contains("since:day")) {
                    this.query =  elasticsearch_client.query((resultIndex = IndexName.messages_day).name(), sq.queryBuilder, order_field.getMessageFieldName(), timezoneOffset, resultCount, interval, AbstractObjectEntry.CREATED_AT_FIELDNAME, aggregationLimit, aggregationFields);
                } else if (q.contains("since:week")) {
                    this.query =  elasticsearch_client.query((resultIndex = IndexName.messages_week).name(), sq.queryBuilder, order_field.getMessageFieldName(), timezoneOffset, resultCount, interval, AbstractObjectEntry.CREATED_AT_FIELDNAME, aggregationLimit, aggregationFields);
                } else {
                    this.query = elasticsearch_client.query((resultIndex = IndexName.messages).name(), sq.queryBuilder, order_field.getMessageFieldName(), timezoneOffset, resultCount, interval, AbstractObjectEntry.CREATED_AT_FIELDNAME, aggregationLimit, aggregationFields);
                }
            } else {
                // use only a time frame that is sufficient for a result
                this.query = elasticsearch_client.query((resultIndex = IndexName.messages_hour).name(), sq.queryBuilder, order_field.getMessageFieldName(), timezoneOffset, resultCount, interval, AbstractObjectEntry.CREATED_AT_FIELDNAME, aggregationLimit, aggregationFields);
                if (!q.contains("since:hour") && insufficient(this.query, resultCount, aggregationLimit, aggregationFields)) {
                    this.query =  elasticsearch_client.query((resultIndex = IndexName.messages_day).name(), sq.queryBuilder, order_field.getMessageFieldName(), timezoneOffset, resultCount, interval, AbstractObjectEntry.CREATED_AT_FIELDNAME, aggregationLimit, aggregationFields);
                    if (!q.contains("since:day") && insufficient(this.query, resultCount, aggregationLimit, aggregationFields)) {
                        this.query =  elasticsearch_client.query((resultIndex = IndexName.messages_week).name(), sq.queryBuilder, order_field.getMessageFieldName(), timezoneOffset, resultCount, interval, AbstractObjectEntry.CREATED_AT_FIELDNAME, aggregationLimit, aggregationFields);
                        if (!q.contains("since:week") && insufficient(this.query, resultCount, aggregationLimit, aggregationFields)) {
                            this.query =  elasticsearch_client.query((resultIndex = IndexName.messages).name(), sq.queryBuilder, order_field.getMessageFieldName(), timezoneOffset, resultCount, interval, AbstractObjectEntry.CREATED_AT_FIELDNAME, aggregationLimit, aggregationFields);
                }}}
            }
            timeline.setHits(query.hitCount);
            timeline.setResultIndex(resultIndex);
                    
            // evaluate search result
            for (Map<String, Object> map: query.result) {
                MessageEntry tweet = new MessageEntry(new JSONObject(map));
                try {
                    UserEntry user = users.read(tweet.getScreenName());
                    assert user != null;
                    if (user != null) {
                        timeline.add(tweet, user);
                    }
                } catch (IOException e) {
                	Log.getLog().warn(e);
                }
            }
            this.aggregations = query.aggregations;
        }
        
        private static boolean insufficient(ElasticsearchClient.Query query, int resultCount, int aggregationLimit, String... aggregationFields) {
            return query.hitCount < resultCount || (aggregationFields.length > 0 && getAggregationResultLimit(query.aggregations) < aggregationLimit);
        }

        public JSONObject getAggregations() {
            JSONObject json = new JSONObject(true);
            if (aggregations == null) return json;
            for (Map.Entry<String, List<Map.Entry<String, Long>>> aggregation: aggregations.entrySet()) {
                JSONObject facet = new JSONObject(true);
                for (Map.Entry<String, Long> a: aggregation.getValue()) {
                    if (a.getValue().equals(query)) continue; // we omit obvious terms that cannot be used for faceting, like search for "#abc" -> most hashtag is "#abc"
                    facet.put(a.getKey(), a.getValue());
                }
                json.put(aggregation.getKey(), facet);
            }
            return json;
        }
        
        private static int getAggregationResultLimit(Map<String, List<Map.Entry<String, Long>>> agg) {
            if (agg == null) return 0;
            int l = 0;
            for (List<Map.Entry<String, Long>> a: agg.values()) l = Math.max(l, a.size());
            return l;
        }
    }

    public static LinkedHashMap<String, Long> FullDateHistogram(int timezoneOffset) {
        return elasticsearch_client.fullDateHistogram(IndexName.messages.name(), timezoneOffset, AbstractObjectEntry.CREATED_AT_FIELDNAME);
    }
    
    /**
     * Search the local user cache using a elasticsearch query.
     * @param screen_name - the user id
     */
    public static UserEntry searchLocalUserByScreenName(final String screen_name) {
        try {
            return users.read(screen_name);
        } catch (IOException e) {
        	Log.getLog().warn(e);
            return null;
        }
    }

    public static UserEntry searchLocalUserByUserId(final String user_id) {
        if (user_id == null || user_id.length() == 0) return null;
        Map<String, Object> map = elasticsearch_client.query(IndexName.users.name(), UserEntry.field_user_id, user_id);
        if (map == null) return null;
        return new UserEntry(new JSONObject(map));
    }
    
    /**
     * Search the local account cache using an elasticsearch query.
     * @param screen_name - the user id
     */
    public static AccountEntry searchLocalAccount(final String screen_name) {
        try {
            return accounts.read(screen_name);
        } catch (IOException e) {
        	Log.getLog().warn(e);
            return null;
        }
    }
    
    /**
     * Search the local message cache using a elasticsearch query.
     * @param q - the query, can be empty for a matchall-query
     * @param resultCount - the number of messages in the result
     * @param sort_field - the field name to sort the result list, i.e. "query_first"
     * @param sort_order - the sort order (you want to use SortOrder.DESC here)
     */
    public static ResultList<QueryEntry> SearchLocalQueries(final String q, final int resultCount, final String sort_field, final String default_sort_type, final SortOrder sort_order, final Date since, final Date until, final String range_field) {
        ResultList<QueryEntry> queries = new ResultList<>();
        ResultList<Map<String, Object>> result = elasticsearch_client.fuzzyquery(IndexName.queries.name(), "query", q, resultCount, sort_field, default_sort_type, sort_order, since, until, range_field);
        queries.setHits(result.getHits());
        for (Map<String, Object> map: result) {
            queries.add(new QueryEntry(new JSONObject(map)));
        }
        return queries;
    }

    public static ImportProfileEntry SearchLocalImportProfiles(final String id) {
        try {
            return importProfiles.read(id);
        } catch (IOException e) {
        	Log.getLog().warn(e);
            return null;
        }
    }

    public static Collection<ImportProfileEntry> SearchLocalImportProfilesWithConstraints(final Map<String, String> constraints, boolean latest) throws IOException {
        List<ImportProfileEntry> rawResults = new ArrayList<>();
        List<Map<String, Object>> result = elasticsearch_client.queryWithConstraints(IndexName.import_profiles.name(), "active_status", ImportProfileEntry.EntryStatus.ACTIVE.name().toLowerCase(), constraints, latest);
        for (Map<String, Object> map: result) {
            rawResults.add(new ImportProfileEntry(new JSONObject(map)));
        }

        if (!latest) {
            return rawResults;
        }

        // filter results to display only latest profiles
        Map<String, ImportProfileEntry> latests = new HashMap<>();
        for (ImportProfileEntry entry : rawResults) {
            String uniqueKey;
            if (entry.getImporter() != null) {
                uniqueKey = entry.getSourceUrl() + entry.getImporter();
            } else {
                uniqueKey = entry.getSourceUrl() + entry.getClientHost();
            }
            if (latests.containsKey(uniqueKey)) {
                if (entry.getLastModified().compareTo(latests.get(uniqueKey).getLastModified()) > 0) {
                    latests.put(uniqueKey, entry);
                }
            } else {
                latests.put(uniqueKey, entry);
            }
        }
        return latests.values();
    }
    
    public static Timeline scrapeTwitter(final Query post, final String q, final Timeline.Order order, final int timezoneOffset, boolean byUserQuery, long timeout, boolean recordQuery) {
        // retrieve messages from remote server
        ArrayList<String> remote = DAO.getFrontPeers();
        Timeline tl;
        if (remote.size() > 0 && (peerLatency.get(remote.get(0)) == null || peerLatency.get(remote.get(0)).longValue() < 3000)) {
            long start = System.currentTimeMillis();
            tl = searchOnOtherPeers(remote, q, order, 100, timezoneOffset, "all", SearchServlet.frontpeer_hash, timeout); // all must be selected here to catch up missing tweets between intervals
            // at this point the remote list can be empty as a side-effect of the remote search attempt
            if (post != null && remote.size() > 0 && tl != null) post.recordEvent("remote_scraper_on_" + remote.get(0), System.currentTimeMillis() - start);
            if (tl == null || tl.size() == 0) {
                // maybe the remote server died, we try then ourself
                start = System.currentTimeMillis();
                tl = TwitterScraper.search(q, order, true, true, 400);
                if (post != null) post.recordEvent("local_scraper_after_unsuccessful_remote", System.currentTimeMillis() - start);
            } else {
                tl.writeToIndex();
            }
        } else {
            if (post != null && remote.size() > 0) post.recordEvent("omitted_scraper_latency_" + remote.get(0), peerLatency.get(remote.get(0)));
            long start = System.currentTimeMillis();
            tl = TwitterScraper.search(q, order, true, true, 400);
            if (post != null) post.recordEvent("local_scraper", System.currentTimeMillis() - start);
        }

        // record the query
        long start2 = System.currentTimeMillis();
        QueryEntry qe = null;
        try {
            qe = queries.read(q);
        } catch (IOException | JSONException e) {
        	Log.getLog().warn(e);
        }
        
        if (recordQuery && Caretaker.acceptQuery4Retrieval(q)) {
            if (qe == null) {
                // a new query occurred
                qe = new QueryEntry(q, timezoneOffset, tl.period(), SourceType.TWITTER, byUserQuery);
            } else {
                // existing queries are updated
                qe.update(tl.period(), byUserQuery);
            }
            try {
                queries.writeEntry(new IndexEntry<QueryEntry>(q, qe.source_type == null ? SourceType.TWITTER : qe.source_type, qe));
            } catch (IOException e) {
            	Log.getLog().warn(e);
            }
        } else {
            // accept rules may change, we want to delete the query then in the index
            if (qe != null) queries.delete(q, qe.source_type);
        }
        if (post != null) post.recordEvent("query_recorder", System.currentTimeMillis() - start2);
        //log("SCRAPER: TIME LEFT after recording = " + (termination - System.currentTimeMillis()));
        
        return tl;
    }
    
    public static final Random random = new Random(System.currentTimeMillis());
    private static final Map<String, Long> peerLatency = new HashMap<>();
    private static ArrayList<String> getBestPeers(Collection<String> peers) {
        ArrayList<String> best = new ArrayList<>();
        if (peers == null || peers.size() == 0) return best;
        // first check if any of the given peers has unknown latency
        TreeMap<Long, String> o = new TreeMap<>();
        for (String peer: peers) {
            if (peerLatency.containsKey(peer)) {
                o.put(peerLatency.get(peer) * 1000 + best.size(), peer);
            } else {
                best.add(peer);
            }
        }
        best.addAll(o.values());
        return best;
    }
    public static void healLatency(float factor) {
        for (Map.Entry<String, Long> entry: peerLatency.entrySet()) {
            entry.setValue((long) (factor * entry.getValue()));
        }
    }

    private static Set<String> frontPeerCache = new HashSet<String>();
    private static Set<String> backendPeerCache = new HashSet<String>();
    
    public static void updateFrontPeerCache(RemoteAccess remoteAccess) {
        if (remoteAccess.getLocalHTTPPort() >= 80) {
            frontPeerCache.add("http://" + remoteAccess.getRemoteHost() + (remoteAccess.getLocalHTTPPort() == 80 ? "" : ":" + remoteAccess.getLocalHTTPPort()));
        } else if (remoteAccess.getLocalHTTPSPort() >= 443) {
            frontPeerCache.add("https://" + remoteAccess.getRemoteHost() + (remoteAccess.getLocalHTTPSPort() == 443 ? "" : ":" + remoteAccess.getLocalHTTPSPort()));
        }
    }
    
    /**
     * from all known front peers, generate a list of available peers, ordered by the peer latency
     * @return a list of front peers. only the first one shall be used, but the other are fail-over peers
     */
    public static ArrayList<String> getFrontPeers() {
        String[] remote = DAO.getConfig("frontpeers", new String[0], ",");
        ArrayList<String> testpeers = new ArrayList<>();
        if (remote.length > 0) {
            for (String peer: remote) testpeers.add(peer);
            return testpeers;
        }
        if (frontPeerCache.size() == 0) {
            // add dynamically all peers that contacted myself
            for (Map<String, RemoteAccess> hmap: RemoteAccess.history.values()) {
                for (Map.Entry<String, RemoteAccess> peer: hmap.entrySet()) {
                    updateFrontPeerCache(peer.getValue());
                }
            }
        }
        testpeers.addAll(frontPeerCache);
        return getBestPeers(testpeers);
    }
    
    public static List<String> getBackendPeers() {
        List<String> testpeers = new ArrayList<>();
        if (backendPeerCache.size() == 0) {
            String[] remote = DAO.getConfig("backend", new String[0], ",");
            for (String peer: remote) backendPeerCache.add(peer);
        }
        testpeers.addAll(backendPeerCache);
        return getBestPeers(testpeers);
    }
    
    public static Timeline searchBackend(final String q, final Timeline.Order order, final int count, final int timezoneOffset, final String where, final long timeout) {
        List<String> remote = getBackendPeers();
        
        if (remote.size() > 0 /*&& (peerLatency.get(remote.get(0)) == null || peerLatency.get(remote.get(0)) < 3000)*/) { // condition deactivated because we need always at least one peer
            Timeline tt = searchOnOtherPeers(remote, q, order, count, timezoneOffset, where, SearchServlet.backend_hash, timeout);
            if (tt != null) tt.writeToIndex();
            return tt;
        }
        return null;
    }

    private final static Random randomPicker = new Random(System.currentTimeMillis());
    
    public static Timeline searchOnOtherPeers(final List<String> remote, final String q, final Timeline.Order order, final int count, final int timezoneOffset, final String source, final String provider_hash, final long timeout) {
        // select remote peer
        while (remote.size() > 0) {
            int pick = randomPicker.nextInt(remote.size());
            String peer = remote.get(pick);
            long start = System.currentTimeMillis();
            try {
                Timeline tl = SearchServlet.search(peer, q, order, source, count, timezoneOffset, provider_hash, timeout);
                peerLatency.put(peer, System.currentTimeMillis() - start);
                // to show which peer was used for the retrieval, we move the picked peer to the front of the list
                if (pick != 0) remote.add(0, remote.remove(pick));
                tl.setScraperInfo(tl.getScraperInfo().length() > 0 ? peer + "," + tl.getScraperInfo() : peer);
                return tl;
            } catch (IOException e) {
                DAO.log("searchOnOtherPeers: no IO to scraping target: " + e.getMessage());
                // the remote peer seems to be unresponsive, remove it (temporary) from the remote peer list
                peerLatency.put(peer, 3600000L);
                frontPeerCache.remove(peer);
                backendPeerCache.remove(peer);
                remote.remove(pick);
            }
        }
        return null;
    }
    
    public final static Set<Number> newUserIds = new ConcurrentHashSet<>();
    
    public static void announceNewUserId(Timeline tl) {
        for (MessageEntry message: tl) {
            UserEntry user = tl.getUser(message);
            assert user != null;
            if (user == null) continue;
            Number id = user.getUser();
            if (id != null) announceNewUserId(id);
        }
    }

    public static void announceNewUserId(Number id) {
        JsonFactory mapcapsule = DAO.user_dump.get("id_str", id.toString());
        JSONObject map = null;
        try {map = mapcapsule == null ? null : mapcapsule.getJSON();} catch (IOException e) {}
        if (map == null) newUserIds.add(id);
    }
    
    public static Set<Number> getNewUserIdsChunk() {
        if (newUserIds.size() < 100) return null;
        Set<Number> chunk = new HashSet<>();
        Iterator<Number> i = newUserIds.iterator();
        for (int j = 0; j < 100; j++) {
            chunk.add(i.next());
            i.remove();
        }
        return chunk;
    }

    public static void log(String line) {
        Log.getLog().info(line);
    }

    public static void severe(String line) {
        Log.getLog().warn(line);
    }

    public static void severe(String line, Throwable e) {
        Log.getLog().warn(line, e);
    }
    
    public static void severe(Throwable e) {
        Log.getLog().warn(e);
    }

}