package edu.harvard.wcfia.yoshikoder.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.JOptionPane;
import javax.swing.JScrollPane;
import javax.swing.JTree;
import edu.harvard.wcfia.yoshikoder.dictionary.CategoryNode;
import edu.harvard.wcfia.yoshikoder.dictionary.CategoryNodeImpl;
import edu.harvard.wcfia.yoshikoder.dictionary.DuplicateException;
import edu.harvard.wcfia.yoshikoder.dictionary.PatternEngine;
import edu.harvard.wcfia.yoshikoder.dictionary.PatternNode;
import edu.harvard.wcfia.yoshikoder.dictionary.PatternNodeImpl;
import edu.harvard.wcfia.yoshikoder.dictionary.SimpleDictionary;
import edu.harvard.wcfia.yoshikoder.dictionary.YKDictionary;
public class VBProFileParser {
public class BadPattern {
boolean isDuplicate;
int line;
String pattern;
BadPattern(int errorLine, String errorPattern, boolean duplicate){
line=errorLine;
pattern = errorPattern;
isDuplicate = duplicate;
}
public String toString(){
return pattern + " (line " + line + ") " +
(isDuplicate ? "is a duplicate" : "could not be compiled");
}
}
protected Pattern arrows;
protected List errors;
public VBProFileParser(){
arrows = Pattern.compile("^\\>+(.+?)\\<+$");
}
public YKDictionary parse(File f, String enc) throws IOException {
String s = FileUtil.slurp(f, enc);
YKDictionary dict = parse(s);
return dict;
}
public YKDictionary parse(String s) {
errors = new ArrayList();
YKDictionary dict = new SimpleDictionary();
String newname = "Imported VBPro Dictionary";
dict.getDictionaryRoot().setName(newname);
PatternEngine rengine = dict.getPatternEngine();
dict.setName(newname);
BufferedReader in = new BufferedReader(new StringReader(s));
String line;
int ii=0;
CategoryNode cat = null; // the current category
int lineNumber = 0;
try {
while ((line = in.readLine()) != null) {
lineNumber++;
String trimmed = line.trim().toLowerCase();
if (trimmed.startsWith(">") && trimmed.endsWith("<")) {
// a category
String categoryName = stripArrows(trimmed);
if (categoryName.length() == 0) {
categoryName = "Entry_" + ii;
ii++;
}
cat = new CategoryNodeImpl(categoryName);
try {
dict.addCategory(cat, dict.getDictionaryRoot());
} catch (DuplicateException de){
errors.add(new BadPattern(lineNumber, trimmed, true));
}
} else if (trimmed.length() > 0) {
// no need to fix these for a SubString match-using dictionary
//String fixed = fixVBProWildcards(trimmed);
try {
Pattern regexp = rengine.makeRegexp(trimmed);
PatternNode pattern =
new PatternNodeImpl(trimmed, null, regexp);
dict.addPattern(pattern, cat);
} catch (DuplicateException de){
errors.add(new BadPattern(lineNumber, trimmed, true));
} catch (Exception re) {
errors.add(new BadPattern(lineNumber, trimmed, false));
}
} else {
// a blank line...
}
}
in.close(); // also strictly redundant
} catch (IOException ioe){
// redundant io catch block (we're a string!)
}
return dict;
}
public List getErrors(){
return errors;
}
protected String stripArrows(String name){
Matcher m = arrows.matcher(name);
if (m.matches())
return m.group(1);
else
return name;
}
public static void main(String[] args) {
VBProFileParser parser = new VBProFileParser();
File f = new File("/Users/will/Desktop/testfile.txt");
try {
YKDictionary dict = parser.parse(f, "GBK");
JOptionPane.showMessageDialog(null, new JScrollPane(new JTree(dict)));
} catch (IOException ioe){
ioe.printStackTrace();
}
for (Iterator iter = parser.getErrors().iterator(); iter.hasNext();) {
VBProFileParser.BadPattern bp = (VBProFileParser.BadPattern) iter.next();
System.out.println(bp);
}
System.exit(0);
}
}