package net.sf.hfst;
import java.io.DataInputStream;
import java.util.Vector;
import java.util.Hashtable;
/**
* On instantiation reads the transducer's alphabet and provides an interface
* to it. Flag diacritic parsing is also handled here.
*/
public class TransducerAlphabet
{
public Vector<String> keyTable;
public Hashtable<Integer, FlagDiacriticOperation> operations;
public Integer features;
public TransducerAlphabet(DataInputStream charstream,
int number_of_symbols) throws java.io.IOException
{
keyTable = new Vector<String>();
operations = new Hashtable<Integer, FlagDiacriticOperation>();
Hashtable<String, Integer> feature_bucket = new Hashtable<String, Integer>();
Hashtable<String, Integer> value_bucket = new Hashtable<String, Integer>();
features = 0;
Integer values = 1;
value_bucket.put("", 0); // neutral value
int i = 0;
int charindex;
byte[] chars = new byte[1000]; // FIXME magic number
while (i < number_of_symbols)
{
charindex = 0;
chars[charindex] = charstream.readByte();
while (chars[charindex] != 0)
{
++charindex;
chars[charindex] = charstream.readByte();
}
String ustring = new String(chars, 0, charindex, "UTF-8");
if (ustring.length() > 5 && ustring.charAt(0) == '@' && ustring.charAt(ustring.length()-1) == '@' && ustring.charAt(2) == '.')
{ // flag diacritic identified
HfstOptimizedLookup.FlagDiacriticOperator op;
String[] parts = ustring.substring(1,ustring.length()-1).split("\\.");
/* Not a flag diacritic after all, ignore it */
if (parts.length < 2) {
keyTable.add("");
i++;
continue;
}
String ops = parts[0];
String feats = parts[1];
String vals;
if (parts.length == 3) {
vals = parts[2];
} else {
vals = "";
}
if (ops.equals("P")) {
op = HfstOptimizedLookup.FlagDiacriticOperator.P;
} else if (ops.equals("N")) {
op = HfstOptimizedLookup.FlagDiacriticOperator.N;
} else if (ops.equals("R")) {
op = HfstOptimizedLookup.FlagDiacriticOperator.R;
} else if (ops.equals("D")) {
op = HfstOptimizedLookup.FlagDiacriticOperator.D;
} else if (ops.equals("C")) {
op = HfstOptimizedLookup.FlagDiacriticOperator.C;
} else if (ops.equals("U")) {
op = HfstOptimizedLookup.FlagDiacriticOperator.U;
} else { // Not a valid operator, ignore the operation
keyTable.add("");
i++;
continue;
}
if (value_bucket.containsKey(vals) == false) {
value_bucket.put(vals, values);
values++;
}
if (feature_bucket.containsKey(feats) == false) {
feature_bucket.put(feats, features);
features++;
}
operations.put(i, new FlagDiacriticOperation(op,
feature_bucket.get(feats),
value_bucket.get(vals)));
keyTable.add("");
i++;
continue;
}
keyTable.add(ustring);
i++;
}
keyTable.set(0, ""); // epsilon is zero
}
}