/******************************************************************************* * Trombone is a flexible text processing and analysis library used * primarily by Voyant Tools (voyant-tools.org). * * Copyright (©) 2007-2012 Stéfan Sinclair & Geoffrey Rockwell * * This file is part of Trombone. * * Trombone is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Trombone is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Trombone. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package org.voyanttools.trombone.tool.utils; import java.io.File; import java.io.IOException; import java.net.URL; import java.nio.file.Files; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.voyanttools.trombone.model.Corpus; import org.voyanttools.trombone.model.IndexedDocument; import org.voyanttools.trombone.model.Keywords; import org.voyanttools.trombone.model.VariantsDB; import org.voyanttools.trombone.storage.Storage; import org.voyanttools.trombone.util.FlexibleParameters; import com.thoughtworks.xstream.annotations.XStreamOmitField; import edu.stanford.nlp.util.StringUtils; /** * @author sgs * */ public abstract class AbstractTool implements RunnableTool { @XStreamOmitField protected FlexibleParameters parameters; @XStreamOmitField protected transient Storage storage; private static float VERSION = 5.2f; @XStreamOmitField private boolean isVerbose; @XStreamOmitField private DateFormat dateFormat = new SimpleDateFormat("HH:mm:ss.SSS"); /** * @param storage * */ public AbstractTool(Storage storage, FlexibleParameters parameters) { this.storage = storage; this.parameters = parameters; this.isVerbose = parameters.getParameterBooleanValue("verbose"); } public float getVersion() { return VERSION; } protected boolean isVerbose() { return isVerbose; } protected void log(String string) { log(string, null); } protected void log(String string, Calendar start) { if (isVerbose()) { Calendar now = Calendar.getInstance(); System.out.println(dateFormat.format(now.getTime())+"\t"+string+(start!=null ? "("+(now.getTimeInMillis()-start.getTimeInMillis())+" ms)" : "")); } } public FlexibleParameters getParameters() { return parameters; } protected Keywords getStopwords(Corpus corpus) throws IOException { Keywords keywords = new Keywords(); if (parameters.containsKey("stopList")) { if (parameters.getParameterValue("stopList", "").equals("auto")) { Set<String> langs = new HashSet<String>(); URL url = this.getClass().getResource("/org/voyanttools/trombone/keywords"); File dir = new File(url.getFile()); Map<String, String> stopLists = new HashMap<String, String>(); if (dir.exists() && dir.isDirectory()) { for (File file : dir.listFiles()) { String filename = file.getName(); if (file.isFile() && filename.startsWith("stop.")) { String langCode = filename.substring(5, filename.indexOf('.', 5)); stopLists.put(langCode, filename); } } } for (String lang : corpus.getLanguageCodes()) { if (lang.isEmpty() || lang.equals("en")) {langs.add("stop.en.taporware.txt");} else if (lang.equals("fr")) {langs.add("stop.fr.veronis.txt");} else if (lang.equals("se")) {langs.add("stop.se.long.txt");} else if (stopLists.containsKey(lang)) { langs.add(stopLists.get(lang)); } } if (langs.isEmpty()==false) { keywords.load(storage, langs.toArray(new String[0])); } } else { keywords.load(storage, parameters.getParameterValues("stopList")); } } return keywords; } protected String[] getQueries() throws IOException { return getQueries(parameters.getParameterValues("query")); } protected String[] getQueries(String[] queryStrings) throws IOException { List<String> queries = new ArrayList<String>(); VariantsDB variantsDB = null; // if (parameters.containsKey("variants")) { // variantsDB = new VariantsDB(storage, parameters.getParameterValue("variants"), true); // } for (String query : queryStrings) { // facets can be complex strings so they should be provided as individual queries and sent through String[] qs = query.startsWith("facet.") ? new String[]{query} : query.split("\\s*,\\s*"); for (String q :qs) { if (!q.trim().isEmpty()) { if (variantsDB!=null) { String[] variants = variantsDB.get(q); if (variants==null) { queries.add(q); } else { queries.add("("+StringUtils.join(variants, "|")+")"); } } else { queries.add(q); } } } } if (variantsDB!=null) {variantsDB.close();} return queries.toArray(new String[0]); } }