package org.myrobotlab.document.transformer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.myrobotlab.document.Document;
import org.myrobotlab.logging.LoggerFactory;
import org.myrobotlab.string.StringUtil;
import org.slf4j.Logger;
public class DictionaryLookup extends AbstractStage {
public final static Logger log = LoggerFactory.getLogger(DictionaryLookup.class.getCanonicalName());
private String inputField;
private List<String> outputFields;
private String dictionaryFile;
private HashMap<String, List<String>> dictionary;
private String defaultValue = "Unknown";
@Override
public void startStage(StageConfiguration config) {
if (config != null) {
inputField = config.getProperty("inputField", "text");
outputFields = config.getListParam("outputFields");
dictionaryFile = config.getProperty("dictionaryFile", "mydict.csv");
if (outputFields == null) {
String outputField = config.getProperty("outputField");
if (!StringUtil.isEmpty(outputField)) {
outputFields = new ArrayList<String>();
outputFields.add(outputField);
}
}
}
try {
dictionary = DictionaryLoader.getInstance().loadDictionary(dictionaryFile);
} catch (IOException e) {
log.warn("Error loading dictionary {} IOException {}", dictionaryFile, e.getMessage());
e.printStackTrace();
}
}
@Override
public List<Document> processDocument(Document doc) {
/*
* input field values: I1, X1, I2, Y1, I3 output fields: out1, out2, out3
* dict: I1, A1, B1, C1 I2, A2, B2, C2 I3, A3, B3, C3
*
* Result: out1: A1, A2, A3 out2: B1, B2, B3 out3: C1, C2, C3
*/
if (!doc.hasField(inputField)) {
return null;
}
ArrayList<List<String>> lookedupValues = new ArrayList<List<String>>();
for (Object o : doc.getField(inputField)) {
if (o == null) {
continue;
}
List<String> dictCols = dictionary.get(o.toString());
if (dictCols != null) {
lookedupValues.add(dictCols);
}
}
for (int i = 0; i < outputFields.size(); i++) {
String outputField = outputFields.get(i);
if (inputField.equals(outputField)) {
doc.removeField(outputField);
}
for (List<String> dictCols : lookedupValues) {
String val = dictCols.get(i);
if (!StringUtil.isEmpty(val)) {
doc.addToField(outputField, val);
} else {
// handle an empty value?
if (defaultValue != null) {
doc.addToField(outputField, defaultValue);
}
}
}
}
// this stage doesn't emit child docs.
return null;
}
@Override
public void stopStage() {
}
@Override
public void flush() {
}
}