package net.sf.hfst;
import java.io.FileInputStream;
import java.io.DataInputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.Collection;
import net.sf.hfst.Transducer;
import net.sf.hfst.UnweightedTransducer;
import net.sf.hfst.WeightedTransducer;
import net.sf.hfst.NoTokenizationException;
import net.sf.hfst.FormatException;
/**
* HfstRuntimeReader takes a transducer (the name of which should
* be the first argument) of its own format (these can be generated with
* eg. hfst-runtime-convert) and reads one word at a time from standard
* input; output is a newline-separated list of analyses.
*
* This is essentially a Java port of hfst-runtime-reader
* written by Miikka Silfverberg in C++.
*
* @author sam.hardwick@iki.fi
*
*/
public class HfstOptimizedLookup {
public final static long TRANSITION_TARGET_TABLE_START = 2147483648l; // 2^31 or UINT_MAX/2 rounded up
public final static long NO_TABLE_INDEX = 4294967295l;
public final static float INFINITE_WEIGHT = (float) 4294967295l; // this is hopefully the same as
// static_cast<float>(UINT_MAX) in C++
public final static int NO_SYMBOL_NUMBER = 65535; // this is USHRT_MAX
public static enum FlagDiacriticOperator {P, N, R, D, C, U};
public static void runTransducer(Transducer t) {
System.out.println("Ready for input.");
BufferedReader stdin = new BufferedReader(new InputStreamReader(
System.in));
String str;
while (true) {
try {
str = stdin.readLine();
} catch (IOException e) {
break;
}
try {
Collection<String> analyses = t.analyze(str);
for (String analysis : analyses) {
System.out.println(str + "\t" + analysis);
}
if (analyses.isEmpty()) {
System.out.println(str + "\t+?");
}
} catch (NoTokenizationException e) {
// System.out.println(e.message());
System.out.println(str + "\t+?");
}
System.out.println();
}
}
public static void main(String[] argv) throws IOException
{
if (argv.length != 1)
{
System.err.println("Usage: java HfstRuntimeReader FILE");
System.exit(1);
}
FileInputStream transducerfile = null;
try
{ transducerfile = new FileInputStream(argv[0]); }
catch (java.io.FileNotFoundException e)
{
System.err.println("File not found: couldn't read transducer file " + argv[0] + ".");
System.exit(1);
}
System.out.println("Reading header...");
TransducerHeader h = null;
try { h = new TransducerHeader(transducerfile); }
catch (FormatException e) {
System.err.println("File must be in hfst optimized-lookup format");
System.exit(1);
}
DataInputStream charstream = new DataInputStream(transducerfile);
System.out.println("Reading alphabet...");
TransducerAlphabet a = new TransducerAlphabet(charstream, h.getSymbolCount());
System.out.println("Reading transition and index tables...");
if (h.isWeighted())
{
Transducer transducer = new WeightedTransducer(transducerfile, h, a);
runTransducer(transducer);
} else
{
Transducer transducer = new UnweightedTransducer(transducerfile, h, a);
runTransducer(transducer);
}
}
}