package de.berlin.hu.chemspot; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; import java.util.Properties; import de.berlin.hu.util.Constants.ChemicalType; public class ChemSpotConfiguration { // Constants public static enum Corpus {IOB, CRAFT, GZ, NACTEM, PATENT, DDI, XMI, CHEMDNER, TXT}; public static enum Component {TOKENIZER, SENTENCE_DETECTOR, POS_TAGGER, CRF, DICTIONARY, SUM_TAGGER, ABBREV, EUMED_TAGGER, MENTION_EXPANDER, ANNOTATION_MERGER, STOPWORD_FILTER, NORMALIZER, OPSIN, FEATURE_GENERATOR, CHEMHITS, PROFILER}; private static final Component[] DEFAULT_DEACTIVATED = {Component.FEATURE_GENERATOR, Component.CHEMHITS, Component.PROFILER}; private static final ChemicalType[] DEFAULT_DEACTIVATED_ANNOTATIONS = {}; private static final ChemicalType[] DEFAULT_DEACTIVATED_ANNOTATIONS_EUMED = {}; private static final String CORPUS_PREFIX = "corpus."; private static final String OUTPUT_PATH = "output.path"; private static final String XMI_OUTPUT_PATH = "output.path.xmi"; private static final String CONVERT_TO_IOB = "output.convertToIOB"; private static final String SENTENCE_MODEL = "sentence_model.path"; private static final String CRF_MODEL = "crf.model.path"; private static final String DICTIONARY = "dict.path"; private static final String IDS = "ids.path"; private static final String DRUG_MODEL = "drug.model.path"; private static final String EVALUATION = "evaluation"; private static final String DETAILED_EVALUATION = "evaluation.detailed"; private static final String THREADING = "threading"; private static final String THREAD_NR = "threading.number_of_threads"; private static final int DEFAULT_THREAD_NR = 4; private static final String COMPONENT_PREFIX = "component."; private static final String DICTIONARY_INITIALIZE_FROM_NORMALIZER = COMPONENT_PREFIX + Component.DICTIONARY.toString().toLowerCase() + ".initializeFromNormalizer"; private static final String DICTIONARY_FILTER_LENGTH = COMPONENT_PREFIX + Component.DICTIONARY.toString().toLowerCase() + ".filterLength"; private static final String ANNOTATIONS_PREFIX = "annotation."; private static final String ANNOTATIONS_PREFIX_EUMED = "annotation.eumed."; private static final String UPDATE_PREFIX = "update."; private static final String UPDATE_REMOVE_TEMPORARY_FILES = UPDATE_PREFIX + "removeTemporaryFiles"; private static final String UPDATE_CHEBI_SDF_URL = UPDATE_PREFIX + "chebi.sdf.url"; private static final String UPDATE_CHEBI_MUST_CONTAIN_FORMULA = UPDATE_PREFIX + "chebi.mustContainFormula"; private static final String UPDATE_PUBCHEM_SDF_URL = UPDATE_PREFIX + "pubchem.sdf.url"; private static final String UPDATE_PUBCHEM_MAX_LENGTH = UPDATE_PREFIX + "pubchem.maxLength"; // Variables private static Properties properties = null; static { properties = new Properties(); } public static void initialize() throws FileNotFoundException, IOException { if (new File("conf/chemspot.cfg").exists()) { initialize("conf/chemspot.cfg"); } else if (new File("chemspot.cfg").exists()) { initialize("chemspot.cfg"); } } public static void initialize(String configFilePath) throws FileNotFoundException, IOException { initialize(configFilePath, true); } public static void initialize(String configFilePath, boolean overwrite) throws FileNotFoundException, IOException { initialize(new FileInputStream(configFilePath), overwrite); } public static void initialize(InputStream inStream, boolean overwrite) throws IOException { if (overwrite) { properties.load(inStream); } else { Properties temp = new Properties(); temp.load(inStream); temp.putAll(properties); properties = temp; } } public static String getProperty(String property) { String result = properties.getProperty(property); if (result != null) result = result.trim(); return result; } public static String getProperty(String property, String defaultValue) { return properties.getProperty(property, defaultValue); } public static String getPathToCorpus(Corpus corpus) { return getProperty(CORPUS_PREFIX + corpus); } public static String getOutputPath() { return getProperty(OUTPUT_PATH); } public static String getXMIOutputPath() { return getProperty(XMI_OUTPUT_PATH); } public static String getUpdateOutputPath() { return getProperty(UPDATE_PREFIX + OUTPUT_PATH); } public static boolean isConvertToIob() { return "true".equals(getProperty(CONVERT_TO_IOB)); } public static String getSentenceModelPath() { return getProperty(SENTENCE_MODEL); } public static String getCRFModelPath() { return getProperty(CRF_MODEL); } public static String getDictionaryPath() { return getProperty(DICTIONARY); } public static String getDictionaryUpdatePath() { return getProperty(UPDATE_PREFIX + DICTIONARY); } public static String getIdsFilePath() { return getProperty(IDS); } public static String getIdsFileUpdatePath() { return getProperty(UPDATE_PREFIX + IDS); } public static String getDrugModelPath() { return getProperty(DRUG_MODEL); } public static boolean isEvaluate() { return "true".equals(getProperty(EVALUATION)); } public static boolean isDetailedEvaluation() { return "true".equals(getProperty(DETAILED_EVALUATION)); } public static boolean useComponent(Component component) { String defaultValue = Arrays.asList(DEFAULT_DEACTIVATED).contains(component) ? "false" : "true"; return "true".equals(getProperty(COMPONENT_PREFIX + component.toString().toLowerCase(), defaultValue).toLowerCase().trim()); } public static boolean isAnnotate(ChemicalType type) { String defaultValue = Arrays.asList(DEFAULT_DEACTIVATED_ANNOTATIONS).contains(type) ? "false" : "true"; return "true".equals(getProperty(ANNOTATIONS_PREFIX + type.toString().toLowerCase(), defaultValue).toLowerCase().trim()); } public static boolean isAnnotateEumed(ChemicalType type) { String defaultValue = Arrays.asList(DEFAULT_DEACTIVATED_ANNOTATIONS_EUMED).contains(type) ? "false" : "true"; return isAnnotate(type) && "true".equals(getProperty(ANNOTATIONS_PREFIX_EUMED + type.toString().toLowerCase(), defaultValue).toLowerCase().trim()); } public static boolean initializeDictionaryFromNormalizer() { return "true".equals(getProperty(DICTIONARY_INITIALIZE_FROM_NORMALIZER, "false").toLowerCase()); } public static int getDictionaryFilterLength() { return Integer.parseInt(getProperty(DICTIONARY_FILTER_LENGTH, "-1").toLowerCase()); } public static boolean isThreading() { return "true".equals(getProperty(THREADING)); } public static int getNumberOfThreads() { try { return Integer.valueOf(getProperty(THREAD_NR, DEFAULT_THREAD_NR + "")); } catch (NumberFormatException e) { System.out.println("ERROR: value of property '" + THREAD_NR + "' is not a number. Using defalut value " + DEFAULT_THREAD_NR); return DEFAULT_THREAD_NR; } } public static boolean isUpdate(String s) { return "true".equals(getProperty(UPDATE_PREFIX + s.toLowerCase())); } public static URL getChEBISDFUpdateURL() { String urlString = getProperty(UPDATE_CHEBI_SDF_URL); try { return new URL(urlString); } catch (MalformedURLException e) { System.err.println("The ChEBI update URL '" + urlString + "' in your configuration file is not a valid url"); e.printStackTrace(); return null; } } public static boolean isChEBIUpdateMustContainFormula() { return "true".equals(getProperty(UPDATE_CHEBI_MUST_CONTAIN_FORMULA)); } public static URL getPubChemSDFUpdateURL() { String urlString = getProperty(UPDATE_PUBCHEM_SDF_URL); try { return new URL(urlString); } catch (MalformedURLException e) { System.err.println("The PubChem update URL '" + urlString + "' in your configuration file is not a valid url"); e.printStackTrace(); return null; } } public static int getPubChemMaxLength() { return Integer.parseInt(getProperty(UPDATE_PUBCHEM_MAX_LENGTH, "40")); } public static boolean isRemoveTemporaryUpdateFiles() { return "true".equals(getProperty(UPDATE_REMOVE_TEMPORARY_FILES, "false")); } }