package net.sf.hfst;
//import java.io.DataInputStream;
import java.io.FileInputStream;
import net.sf.hfst.FormatException;
/**
* On instantiation reads the transducer's header and provides an interface
* to it.
*/
public class TransducerHeader
{
private int number_of_input_symbols;
private int number_of_symbols;
private int size_of_transition_index_table;
private int size_of_transition_target_table;
private int number_of_states;
private int number_of_transitions;
private Boolean weighted;
private Boolean deterministic;
private Boolean input_deterministic;
private Boolean minimized;
private Boolean cyclic;
private Boolean has_epsilon_epsilon_transitions;
private Boolean has_input_epsilon_transitions;
private Boolean has_input_epsilon_cycles;
private Boolean has_unweighted_input_epsilon_cycles;
private Boolean hfst3;
private Boolean intact;
/**
* Read in the (56 bytes of) header information, which unfortunately
* is mostly in little-endian unsigned form.
*/
public TransducerHeader(FileInputStream file) throws java.io.IOException, FormatException
{
hfst3 = false;
intact = true; // could add some checks to toggle this and check outside
ByteArray head = new ByteArray(5);
file.read(head.getBytes());
if (begins_hfst3_header(head)) {
read_hfst3_header(file);
file.read(head.getBytes());
hfst3 = true;
}
ByteArray b = new ByteArray(head, 56);
file.read(b.getBytes(), 5, 51);
number_of_input_symbols = b.getUShort();
number_of_symbols = b.getUShort();
size_of_transition_index_table = (int) b.getUInt();
size_of_transition_target_table = (int) b.getUInt();
number_of_states = (int) b.getUInt();
number_of_transitions = (int) b.getUInt();
weighted = b.getBool();
deterministic = b.getBool();
input_deterministic = b.getBool();
minimized = b.getBool();
cyclic = b.getBool();
has_epsilon_epsilon_transitions = b.getBool();
has_input_epsilon_transitions = b.getBool();
has_input_epsilon_cycles = b.getBool();
has_unweighted_input_epsilon_cycles = b.getBool();
}
public Boolean begins_hfst3_header(ByteArray bytes)
{
if (bytes.getSize() < 5) {
return false;
}
// HFST\0
return (bytes.getUByte() == 72 && bytes.getUByte() == 70 &&
bytes.getUByte() == 83 && bytes.getUByte() == 84 &&
bytes.getUByte() == 0);
}
public void read_hfst3_header(FileInputStream file) throws java.io.IOException, FormatException
{
// The only thing we really check is that the format begins with
// HFST_OL. First we read the two bytes giving the header size...
ByteArray len = new ByteArray(2);
file.read(len.getBytes());
// Then we read the rest...
ByteArray header = new ByteArray(len.getUShort() + 1);
file.read(header.getBytes());
// And just convert it to a String and see if the type is set to what we want
String s = new String(header.getBytes(), "UTF-8");
if (s.indexOf("type\0HFST_OL") == -1) {
throw new FormatException();
}
}
public int getInputSymbolCount()
{ return number_of_input_symbols; }
public int getSymbolCount()
{ return number_of_symbols; }
public int getIndexTableSize()
{ return size_of_transition_index_table; }
public int getTargetTableSize()
{ return size_of_transition_target_table; }
public Boolean isWeighted()
{ return weighted; }
public Boolean hasHfst3Header()
{ return hfst3; }
public Boolean isIntact()
{ return intact; }
}