// // TextReader.java // /* OME Bio-Formats package for reading and converting biological file formats. Copyright (C) 2005-@year@ UW-Madison LOCI and Glencoe Software, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package loci.formats.in; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import loci.common.DataTools; import loci.common.IRandomAccess; import loci.common.Location; import loci.common.RandomAccessInputStream; import loci.formats.FormatException; import loci.formats.FormatReader; import loci.formats.FormatTools; import loci.formats.MetadataTools; import loci.formats.meta.MetadataStore; /** * Reader for text files containing tables of data. All image planes * are stored in memory as 32-bit floats until the file is closed, * so very large text documents will require commensurate available RAM. * * Text format is flexible, but assumed to be in tabular form with a consistent * number of columns, and a labeled header line immediately preceding the data. * * <dl><dt><b>Source code:</b></dt> * <dd><a href="http://trac.openmicroscopy.org.uk/ome/browser/bioformats.git/components/bio-formats/src/loci/formats/in/TextReader.java">Trac</a>, * <a href="http://git.openmicroscopy.org/?p=bioformats.git;a=blob;f=components/bio-formats/src/loci/formats/in/TextReader.java;hb=HEAD">Gitweb</a></dd></dl> * * @author Curtis Rueden ctrueden at wisc.edu */ public class TextReader extends FormatReader { // -- Constants -- private static final boolean LITTLE_ENDIAN = false; private static final String LABEL_X = "x"; private static final String LABEL_Y = "y"; /** How often to report progress during initialization, in milliseconds. */ private static final long TIME_OFFSET = 2000; // -- Fields -- /** * Because we have no way of indexing into the text file efficiently * in general, we cheat and store the entire file's data in a giant array. */ private float[][] data; /** Current row number. */ private int row; /** Number of tokens per row. */ private int rowLength; /** Column index for X coordinate. */ private int xIndex = -1; /** Column index for Y coordinate. */ private int yIndex = -1; /** List of channel labels. */ private String[] channels; /** Image width. */ private int sizeX; /** Image height. */ private int sizeY; // -- Constructor -- /** Constructs a new text reader. */ public TextReader() { super("Text", new String[] {"txt", "csv"}); suffixSufficient = false; } // -- TextReader methods -- /** Gets the label for the given channel. */ public String getChannelLabel(int c) { FormatTools.assertId(currentId, true, 1); return channels[c]; } // -- IFormatReader methods -- /* @see IFormatReader#isThisType(RandomAccessInputStream) */ public boolean isThisType(RandomAccessInputStream stream) throws IOException { final int blockLen = 8192; if (!FormatTools.validStream(stream, blockLen, false)) return false; row = 0; String data = stream.readString(blockLen); List<String> lines = Arrays.asList(data.split("\n")); String[] line = getNextLine(lines); if (line == null) return false; int headerRows = 0; try { headerRows = parseFileHeader(lines); } catch (FormatException e) { } return headerRows > 0; } /* @see IFormatReader#openBytes(int, byte[], int, int, int, int) */ @Override public byte[] openBytes(int no, byte[] buf, int x, int y, int w, int h) throws FormatException, IOException { FormatTools.checkPlaneParameters(this, no, buf.length, x, y, w, h); // copy floating point data into byte buffer final float[] plane = data[no]; int q = 0; for (int j=0; j<h; j++) { final int yy = y + j; for (int i=x; i<w; i++) { final int xx = x + i; final int index = yy * sizeX + xx; final int bits = Float.floatToIntBits(plane[index]); DataTools.unpackBytes(bits, buf, q, 4, LITTLE_ENDIAN); q += 4; } } return buf; } /* @see IFormatReader#openPlane(int, int, int, int, int int) */ @Override public Object openPlane(int no, int x, int y, int w, int h) throws FormatException, IOException { FormatTools.assertId(currentId, true, 1); return data[no]; } /* @see IFormatReader#close(boolean) */ @Override public void close(boolean fileOnly) throws IOException { super.close(fileOnly); if (!fileOnly) { data = null; rowLength = 0; xIndex = yIndex = -1; channels = null; sizeX = sizeY = 0; row = 0; } } // -- IFormatHandler methods -- /* @see IFormatHandler#getNativeDataType() */ @Override public Class<?> getNativeDataType() { return float[].class; } // -- Internal FormatReader API methods -- /* @see loci.formats.FormatReader#initFile(String) */ protected void initFile(String id) throws FormatException, IOException { super.initFile(id); // read file into memory LOGGER.info("Reading file"); List<String> lines = readFile(id); // parse file header LOGGER.info("Parsing file header"); final int headerRows = parseFileHeader(lines); LOGGER.info("Creating images"); // allocate memory for image data final int sizeZ = 1, sizeT = 1; // no Z or T for now final int sizeC = channels.length; final int imageCount = sizeZ * sizeC * sizeT; final int planeSize = sizeX * sizeY; data = new float[imageCount][planeSize]; // flag all values as missing by default for (int i=0; i<imageCount; i++) Arrays.fill(data[i], Float.NaN); // read data into float array parseTableData(lines, headerRows); LOGGER.info("Populating metadata"); // populate core metadata populateCoreMetadata(sizeX, sizeY, sizeZ, sizeC, sizeT); // populate OME metadata MetadataStore store = makeFilterMetadata(); MetadataTools.populatePixels(store, this); } // -- Helper methods -- private List<String> readFile(String id) throws IOException { List<String> lines = new ArrayList<String>(); long time = System.currentTimeMillis(); IRandomAccess handle = Location.getMappedFile(id); if (handle == null) { // HACK: Read using vanilla BufferedReader, since it's faster. String mapId = Location.getMappedId(id); BufferedReader in = new BufferedReader(new FileReader(mapId)); int no = 0; while (true) { no++; time = checkTime(time, no, 0, 0); String line = in.readLine(); if (line == null) break; // eof lines.add(line); } in.close(); } else { // read data using RandomAccessInputStream (data may not be a file) RandomAccessInputStream in = new RandomAccessInputStream(handle); int no = 0; while (true) { no++; time = checkTime(time, no, in.getFilePointer(), in.length()); String line = in.readLine(); if (line == null) break; // eof lines.add(line); } in.close(); } return lines; } /** * Parses the file looking for the file header. * Determines image extents (sets sizeX and sizeY). * Determines channel names (populates channels array). * * @return number of rows in the header */ private int parseFileHeader(List<String> lines) throws FormatException { String[] lastTokens = null; double[] rowData = null; while (true) { String[] tokens = getNextLine(lines); if (tokens == null) throw new FormatException("No tabular data found"); if (tokens.length >= 3 && // need at least 3 columns of data lastTokens != null && lastTokens.length == tokens.length) { // consistent number of tokens; might be the header and first data row // allocate rowData as needed if (rowData == null || rowData.length != tokens.length) { rowData = new double[tokens.length]; } // try to parse the first data row if (getRowData(tokens, rowData)) { LOGGER.info("Found header on line " + (row - 1)); // looks like tabular data; assume previous line is the header parseHeaderRow(lastTokens); break; } } lastTokens = tokens; } final int headerRows = row - 1; if (xIndex < 0) throw new FormatException("No X coordinate column found"); if (yIndex < 0) throw new FormatException("No Y coordinate column found"); // search remainder of tabular data for X and Y extents boolean checkRow = true; while (true) { if (checkRow) { // expand dimensional extents as needed int x = getX(rowData); if (x < 0) { throw new FormatException("Row #" + row + ": invalid X: " + x); } if (sizeX <= x) sizeX = x + 1; int y = getY(rowData); if (y < 0) { throw new FormatException("Row #" + row + ": invalid Y: " + x); } if (sizeY <= y) sizeY = y + 1; } // parse next row String[] tokens = getNextLine(lines); if (tokens == null) break; // eof checkRow = getRowData(tokens, rowData); } return headerRows; } /** Reads the tabular data into the data array. */ private void parseTableData(List<String> lines, int linesToSkip) { row = linesToSkip; // skip header lines double[] rowData = new double[rowLength]; while (true) { String[] tokens = getNextLine(lines); if (tokens == null) break; // eof if (tokens.length != rowLength) { LOGGER.warn("Ignoring deviant row #" + row); continue; } // parse values from row boolean success = getRowData(tokens, rowData); if (!success) { LOGGER.warn("Ignoring non-numeric row #" + row); continue; } // copy values into array assignValues(rowData); } } /** Populates the {@link CoreMetadata} values. */ private void populateCoreMetadata(int sizeX, int sizeY, int sizeZ, int sizeC, int sizeT) { core[0].sizeX = sizeX; core[0].sizeY = sizeY; core[0].sizeZ = sizeZ; core[0].sizeC = sizeC; core[0].sizeT = sizeT; core[0].pixelType = FormatTools.FLOAT; core[0].bitsPerPixel = 32; core[0].imageCount = sizeZ * sizeC * sizeT; core[0].dimensionOrder = "XYZCT"; core[0].orderCertain = true; core[0].littleEndian = LITTLE_ENDIAN; core[0].metadataComplete = true; } /** * Parses numerical row data from the given tokens. * * @param tokens list of token strings to parse * @param rowData array to fill in with the data; length must match tokens * * @return true if the data could be parsed */ private boolean getRowData(String[] tokens, double[] rowData) { try { for (int i=0; i<tokens.length; i++) { rowData[i] = Double.parseDouble(tokens[i]); } return true; } catch (NumberFormatException exc) { // not a data row return false; } } /** Populates rowLength, xIndex, yIndex, and channels. */ private void parseHeaderRow(String[] tokens) { rowLength = tokens.length; List<String> channelsList = new ArrayList<String>(); for (int i=0; i<rowLength; i++) { String token = tokens[i]; if (token.equals(LABEL_X)) xIndex = i; else if (token.equals(LABEL_Y)) yIndex = i; else { // treat column as a channel channelsList.add(token); } } channels = channelsList.toArray(new String[0]); } /** Assigns values from the given row into the data array. */ private void assignValues(double[] rowData) { int x = getX(rowData); int y = getY(rowData); int c = 0; int index = sizeX * y + x; for (int i=0; i<rowLength; i++) { if (i == xIndex || i == yIndex) continue; data[c++][index] = (float) rowData[i]; } } private long checkTime(long time, int no, long pos, long len) { long t = System.currentTimeMillis(); if (t - time > TIME_OFFSET) { // some time has passed; report progress if (len > 0) { int percent = (int) (100 * pos / len); LOGGER.info("Reading line " + no + " (" + percent + "%)"); } else LOGGER.info("Reading line " + no); time = t; } return time; } private int getX(double[] rowData) { return (int) rowData[xIndex]; } private int getY(double[] rowData) { return (int) rowData[yIndex]; } private String[] getNextLine(List<String> lines) { while (true) { if (row >= lines.size()) return null; // end of list String line = lines.get(row++); line = line.trim(); if (line.equals("")) continue; // skip blank lines return line.split("[\\s,]"); } } }