//
// TextReader.java
//
/*
OME Bio-Formats package for reading and converting biological file formats.
Copyright (C) 2005-@year@ UW-Madison LOCI and Glencoe Software, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package loci.formats.in;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import loci.common.DataTools;
import loci.common.IRandomAccess;
import loci.common.Location;
import loci.common.RandomAccessInputStream;
import loci.formats.FormatException;
import loci.formats.FormatReader;
import loci.formats.FormatTools;
import loci.formats.MetadataTools;
import loci.formats.meta.MetadataStore;
/**
* Reader for text files containing tables of data. All image planes
* are stored in memory as 32-bit floats until the file is closed,
* so very large text documents will require commensurate available RAM.
*
* Text format is flexible, but assumed to be in tabular form with a consistent
* number of columns, and a labeled header line immediately preceding the data.
*
* <dl><dt><b>Source code:</b></dt>
* <dd><a href="http://trac.openmicroscopy.org.uk/ome/browser/bioformats.git/components/bio-formats/src/loci/formats/in/TextReader.java">Trac</a>,
* <a href="http://git.openmicroscopy.org/?p=bioformats.git;a=blob;f=components/bio-formats/src/loci/formats/in/TextReader.java;hb=HEAD">Gitweb</a></dd></dl>
*
* @author Curtis Rueden ctrueden at wisc.edu
*/
public class TextReader extends FormatReader {
// -- Constants --
private static final boolean LITTLE_ENDIAN = false;
private static final String LABEL_X = "x";
private static final String LABEL_Y = "y";
/** How often to report progress during initialization, in milliseconds. */
private static final long TIME_OFFSET = 2000;
// -- Fields --
/**
* Because we have no way of indexing into the text file efficiently
* in general, we cheat and store the entire file's data in a giant array.
*/
private float[][] data;
/** Current row number. */
private int row;
/** Number of tokens per row. */
private int rowLength;
/** Column index for X coordinate. */
private int xIndex = -1;
/** Column index for Y coordinate. */
private int yIndex = -1;
/** List of channel labels. */
private String[] channels;
/** Image width. */
private int sizeX;
/** Image height. */
private int sizeY;
// -- Constructor --
/** Constructs a new text reader. */
public TextReader() {
super("Text", new String[] {"txt", "csv"});
suffixSufficient = false;
}
// -- TextReader methods --
/** Gets the label for the given channel. */
public String getChannelLabel(int c) {
FormatTools.assertId(currentId, true, 1);
return channels[c];
}
// -- IFormatReader methods --
/* @see IFormatReader#isThisType(RandomAccessInputStream) */
public boolean isThisType(RandomAccessInputStream stream) throws IOException {
final int blockLen = 8192;
if (!FormatTools.validStream(stream, blockLen, false)) return false;
row = 0;
String data = stream.readString(blockLen);
List<String> lines = Arrays.asList(data.split("\n"));
String[] line = getNextLine(lines);
if (line == null) return false;
int headerRows = 0;
try {
headerRows = parseFileHeader(lines);
}
catch (FormatException e) { }
return headerRows > 0;
}
/* @see IFormatReader#openBytes(int, byte[], int, int, int, int) */
@Override
public byte[] openBytes(int no, byte[] buf, int x, int y, int w, int h)
throws FormatException, IOException
{
FormatTools.checkPlaneParameters(this, no, buf.length, x, y, w, h);
// copy floating point data into byte buffer
final float[] plane = data[no];
int q = 0;
for (int j=0; j<h; j++) {
final int yy = y + j;
for (int i=x; i<w; i++) {
final int xx = x + i;
final int index = yy * sizeX + xx;
final int bits = Float.floatToIntBits(plane[index]);
DataTools.unpackBytes(bits, buf, q, 4, LITTLE_ENDIAN);
q += 4;
}
}
return buf;
}
/* @see IFormatReader#openPlane(int, int, int, int, int int) */
@Override
public Object openPlane(int no, int x, int y, int w, int h)
throws FormatException, IOException
{
FormatTools.assertId(currentId, true, 1);
return data[no];
}
/* @see IFormatReader#close(boolean) */
@Override
public void close(boolean fileOnly) throws IOException {
super.close(fileOnly);
if (!fileOnly) {
data = null;
rowLength = 0;
xIndex = yIndex = -1;
channels = null;
sizeX = sizeY = 0;
row = 0;
}
}
// -- IFormatHandler methods --
/* @see IFormatHandler#getNativeDataType() */
@Override
public Class<?> getNativeDataType() {
return float[].class;
}
// -- Internal FormatReader API methods --
/* @see loci.formats.FormatReader#initFile(String) */
protected void initFile(String id) throws FormatException, IOException {
super.initFile(id);
// read file into memory
LOGGER.info("Reading file");
List<String> lines = readFile(id);
// parse file header
LOGGER.info("Parsing file header");
final int headerRows = parseFileHeader(lines);
LOGGER.info("Creating images");
// allocate memory for image data
final int sizeZ = 1, sizeT = 1; // no Z or T for now
final int sizeC = channels.length;
final int imageCount = sizeZ * sizeC * sizeT;
final int planeSize = sizeX * sizeY;
data = new float[imageCount][planeSize];
// flag all values as missing by default
for (int i=0; i<imageCount; i++) Arrays.fill(data[i], Float.NaN);
// read data into float array
parseTableData(lines, headerRows);
LOGGER.info("Populating metadata");
// populate core metadata
populateCoreMetadata(sizeX, sizeY, sizeZ, sizeC, sizeT);
// populate OME metadata
MetadataStore store = makeFilterMetadata();
MetadataTools.populatePixels(store, this);
}
// -- Helper methods --
private List<String> readFile(String id) throws IOException {
List<String> lines = new ArrayList<String>();
long time = System.currentTimeMillis();
IRandomAccess handle = Location.getMappedFile(id);
if (handle == null) {
// HACK: Read using vanilla BufferedReader, since it's faster.
String mapId = Location.getMappedId(id);
BufferedReader in = new BufferedReader(new FileReader(mapId));
int no = 0;
while (true) {
no++;
time = checkTime(time, no, 0, 0);
String line = in.readLine();
if (line == null) break; // eof
lines.add(line);
}
in.close();
}
else {
// read data using RandomAccessInputStream (data may not be a file)
RandomAccessInputStream in = new RandomAccessInputStream(handle);
int no = 0;
while (true) {
no++;
time = checkTime(time, no, in.getFilePointer(), in.length());
String line = in.readLine();
if (line == null) break; // eof
lines.add(line);
}
in.close();
}
return lines;
}
/**
* Parses the file looking for the file header.
* Determines image extents (sets sizeX and sizeY).
* Determines channel names (populates channels array).
*
* @return number of rows in the header
*/
private int parseFileHeader(List<String> lines) throws FormatException {
String[] lastTokens = null;
double[] rowData = null;
while (true) {
String[] tokens = getNextLine(lines);
if (tokens == null) throw new FormatException("No tabular data found");
if (tokens.length >= 3 && // need at least 3 columns of data
lastTokens != null && lastTokens.length == tokens.length)
{
// consistent number of tokens; might be the header and first data row
// allocate rowData as needed
if (rowData == null || rowData.length != tokens.length) {
rowData = new double[tokens.length];
}
// try to parse the first data row
if (getRowData(tokens, rowData)) {
LOGGER.info("Found header on line " + (row - 1));
// looks like tabular data; assume previous line is the header
parseHeaderRow(lastTokens);
break;
}
}
lastTokens = tokens;
}
final int headerRows = row - 1;
if (xIndex < 0) throw new FormatException("No X coordinate column found");
if (yIndex < 0) throw new FormatException("No Y coordinate column found");
// search remainder of tabular data for X and Y extents
boolean checkRow = true;
while (true) {
if (checkRow) {
// expand dimensional extents as needed
int x = getX(rowData);
if (x < 0) {
throw new FormatException("Row #" + row + ": invalid X: " + x);
}
if (sizeX <= x) sizeX = x + 1;
int y = getY(rowData);
if (y < 0) {
throw new FormatException("Row #" + row + ": invalid Y: " + x);
}
if (sizeY <= y) sizeY = y + 1;
}
// parse next row
String[] tokens = getNextLine(lines);
if (tokens == null) break; // eof
checkRow = getRowData(tokens, rowData);
}
return headerRows;
}
/** Reads the tabular data into the data array. */
private void parseTableData(List<String> lines, int linesToSkip) {
row = linesToSkip; // skip header lines
double[] rowData = new double[rowLength];
while (true) {
String[] tokens = getNextLine(lines);
if (tokens == null) break; // eof
if (tokens.length != rowLength) {
LOGGER.warn("Ignoring deviant row #" + row);
continue;
}
// parse values from row
boolean success = getRowData(tokens, rowData);
if (!success) {
LOGGER.warn("Ignoring non-numeric row #" + row);
continue;
}
// copy values into array
assignValues(rowData);
}
}
/** Populates the {@link CoreMetadata} values. */
private void populateCoreMetadata(int sizeX, int sizeY,
int sizeZ, int sizeC, int sizeT)
{
core[0].sizeX = sizeX;
core[0].sizeY = sizeY;
core[0].sizeZ = sizeZ;
core[0].sizeC = sizeC;
core[0].sizeT = sizeT;
core[0].pixelType = FormatTools.FLOAT;
core[0].bitsPerPixel = 32;
core[0].imageCount = sizeZ * sizeC * sizeT;
core[0].dimensionOrder = "XYZCT";
core[0].orderCertain = true;
core[0].littleEndian = LITTLE_ENDIAN;
core[0].metadataComplete = true;
}
/**
* Parses numerical row data from the given tokens.
*
* @param tokens list of token strings to parse
* @param rowData array to fill in with the data; length must match tokens
*
* @return true if the data could be parsed
*/
private boolean getRowData(String[] tokens, double[] rowData) {
try {
for (int i=0; i<tokens.length; i++) {
rowData[i] = Double.parseDouble(tokens[i]);
}
return true;
}
catch (NumberFormatException exc) {
// not a data row
return false;
}
}
/** Populates rowLength, xIndex, yIndex, and channels. */
private void parseHeaderRow(String[] tokens) {
rowLength = tokens.length;
List<String> channelsList = new ArrayList<String>();
for (int i=0; i<rowLength; i++) {
String token = tokens[i];
if (token.equals(LABEL_X)) xIndex = i;
else if (token.equals(LABEL_Y)) yIndex = i;
else {
// treat column as a channel
channelsList.add(token);
}
}
channels = channelsList.toArray(new String[0]);
}
/** Assigns values from the given row into the data array. */
private void assignValues(double[] rowData) {
int x = getX(rowData);
int y = getY(rowData);
int c = 0;
int index = sizeX * y + x;
for (int i=0; i<rowLength; i++) {
if (i == xIndex || i == yIndex) continue;
data[c++][index] = (float) rowData[i];
}
}
private long checkTime(long time, int no, long pos, long len) {
long t = System.currentTimeMillis();
if (t - time > TIME_OFFSET) {
// some time has passed; report progress
if (len > 0) {
int percent = (int) (100 * pos / len);
LOGGER.info("Reading line " + no + " (" + percent + "%)");
}
else LOGGER.info("Reading line " + no);
time = t;
}
return time;
}
private int getX(double[] rowData) { return (int) rowData[xIndex]; }
private int getY(double[] rowData) { return (int) rowData[yIndex]; }
private String[] getNextLine(List<String> lines) {
while (true) {
if (row >= lines.size()) return null; // end of list
String line = lines.get(row++);
line = line.trim();
if (line.equals("")) continue; // skip blank lines
return line.split("[\\s,]");
}
}
}