package org.myrobotlab.document.transformer; import java.util.List; import java.util.Set; import org.myrobotlab.document.Document; /** * This stage will rename fields on a document to lowercase them, and replace * punctuation with underscores This is useful to make the field names search * engine(solr) friendly. * * @author kwatters * */ public class NormalizeFieldNames extends AbstractStage { @Override public void startStage(StageConfiguration config) { // none.. yet. } @Override public List<Document> processDocument(Document doc) { Set<String> fieldNames = doc.getFields(); for (String fieldName : fieldNames) { doc.renameField(fieldName, normalizeFieldName(fieldName)); } return null; } private String normalizeFieldName(String fieldName) { // TODO: better field name normalization.. String normFieldName = fieldName.replaceAll(" ", "_"); normFieldName = normFieldName.replaceAll("/", "_"); normFieldName = normFieldName.replaceAll("-", "_"); normFieldName = normFieldName.replaceAll("_+", "_"); normFieldName = normFieldName.toLowerCase(); return normFieldName; } @Override public void stopStage() { // TODO Auto-generated method stub } @Override public void flush() { // TODO Auto-generated method stub } }