// Copyright (C) 2010, 2011 Zeno Gantner, Chris Newell // // This file is part of MyMediaLite. // // MyMediaLite is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // MyMediaLite is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with MyMediaLite. If not, see <http://www.gnu.org/licenses/>. package org.mymedialite.io; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import org.mymedialite.data.IEntityMapping; import org.mymedialite.datatype.SparseBooleanMatrix; /** * Class that offers static methods to read (binary) attribute data into SparseBooleanMatrix objects. * * The expected (sparse) line format is: * ENTITY_ID SEPARATOR ATTRIBUTE_ID * for attributes that are set. * SEPARATOR can be space, tab, or comma. * @version 2.03 */ public class AttributeData { // Prevent instantiation. private AttributeData() {} /** * Read binary attribute data from a file. * * Each line must consist of an item identifier followed by one or more attribute identifiers. * There can be one or more lines per item. Empty lines are ignored. * * See Constants.SPLIT_CHARS for details of the permissible field separators. * * @param filename the name of the file to be read from * @param itemMapping the mapping object for the given entity type * @param attributeMapping the mapping object for the attributes * @return the attribute data */ public static SparseBooleanMatrix read(String filename, IEntityMapping itemMapping, IEntityMapping attributeMapping) throws IOException { try { BufferedReader reader = new BufferedReader(new FileReader(filename)); return read(reader, itemMapping, attributeMapping); } catch (IOException e) { throw new IOException("Unable to read file " + filename + ": " + e.getMessage()); } } /** * Read binary attribute data from a BufferedReader. * * Each line must consist of an item identifier followed by one or more attribute identifiers. * There can be one or more lines per item. Empty lines are ignored. * * See Constants.SPLIT_CHARS for details of the permissible field separators. * * @param reader a BufferedReader to be read from * @param itemMapping the mapping object for the given entity type * @param attributeMapping the mapping object for the attributes * @return the attribute data */ public static SparseBooleanMatrix read(BufferedReader reader, IEntityMapping itemMapping, IEntityMapping attributeMapping) throws IOException { SparseBooleanMatrix matrix = new SparseBooleanMatrix(); String line; while ((line = reader.readLine()) != null) { line = line.trim(); if (line.length() == 0) continue; String[] tokens = line.split(Constants.SPLIT_CHARS, 0); if (tokens.length < 2) throw new IOException("Expected at least 2 columns: " + line); int entity_id = itemMapping.toInternalID(tokens[0]); for(int i = 1; i < tokens.length; i++) { int attr_id = attributeMapping.toInternalID(tokens[i]); matrix.set(entity_id, attr_id, true); } } return matrix; } }