/**
*
*/
package org.voyanttools.trombone.nlp;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
//import de.tudarmstadt.ukp.dkpro.core.matetools.MateLemmatizer;
//import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordLemmatizer;
//import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordSegmenter;
//import de.tudarmstadt.ukp.dkpro.core.treetagger.TreeTaggerPosTagger;
/**
* The primary purpose of this factory class is to store reusable language models and data for NLP operations.
* Methods are synchronized to avoid multiple concurrent loading of data.
*
* @author Stéfan Sinclair
*/
public class NlpFactory {
// full NLP annotation
private Map<String, NlpAnnotator> nlpAnnotators = new HashMap<String, NlpAnnotator>();
// minimalist lemmatization
//private Map<String, AnalysisEngine> lemmatizationAnalysisEngines = new HashMap<String, AnalysisEngine>();
private Map<String, OpenNlpAnnotator> openNlpAnnotators = new HashMap<String, OpenNlpAnnotator>();
/**
* Get an {@link NlpAnnotator} for the specified language
* @param languageCode
* @return
*/
public synchronized NlpAnnotator getNlpAnnotator(String languageCode) {
if (!nlpAnnotators.containsKey(languageCode)) {
NlpAnnotator nlpAnnotator = new StanfordNlpAnnotator(languageCode);
nlpAnnotators.put(languageCode, nlpAnnotator);
}
return nlpAnnotators.get(languageCode);
}
public synchronized OpenNlpAnnotator getOpenNlpAnnotator(String languageCode) throws IOException {
if (!openNlpAnnotators.containsKey(languageCode)) {
OpenNlpAnnotator openNlpAnnotator = new OpenNlpAnnotator(languageCode);
openNlpAnnotators.put(languageCode, openNlpAnnotator);
}
return openNlpAnnotators.get(languageCode);
}
/**
* Get a UIMA {@link AnalysisEngine} for the specified language (or null if the language isn't supported). This is
* optimized for lemmatization only, as opposed to a full NLP annotation (including part-of-speech, etc.).
*
* At the moment this uses <a href="http://www.ims.uni-stuttgart.de/forschung/ressourcen/werkzeuge/matetools.en.html">Mate Tools</a> in a <a href="https://dkpro.github.io/dkpro-core/releases/1.8.0/docs/component-reference.html#engine-MateLemmatizer">DKPro</a> pipeline.
* @param languageCode the language of the document (currently supported: en, fr, es, de)
* @return a UIMA {@link AnalysisEngine} for the specified language
*/
// public synchronized AnalysisEngine getLemmatizationAnalysisEngine(String languageCode) {
// if (languageCode.equals("en") || languageCode.equals("fr") || languageCode.equals("es") || languageCode.equals("de")) {
// if (lemmatizationAnalysisEngines.containsKey(languageCode)==false) {
// AnalysisEngine engine;
// try {
// AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(ICUSegmenter.class, new Object[0]);
//// AnalysisEngineDescription lemmatizer = AnalysisEngineFactory.createEngineDescription(MateLemmatizer.class, new Object[0]);
// AnalysisEngineDescription engineDescription = AnalysisEngineFactory. createEngineDescription(segmenter/*, lemmatizer*/);
// engine = AnalysisEngineFactory.createEngine(engineDescription);
// lemmatizationAnalysisEngines.put(languageCode, engine);
// } catch (ResourceInitializationException e) {
// throw new RuntimeException("Unable to initialize a needed analysis engine during lemmatization.", e);
// }
// }
// return lemmatizationAnalysisEngines.get(languageCode);
// } else {
// return null;
// }
// }
}