// Copyright (C) 2010, 2011 Zeno Gantner
// Copyright (C) 2011 Artus Krohn-Grimberghe, Chris Newell
//
// This file is part of MyMediaLite.
//
// MyMediaLite is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// MyMediaLite is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with MyMediaLite. If not, see <http://www.gnu.org/licenses/>.
package org.mymedialite.io;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.regex.Pattern;
import org.mymedialite.data.IEntityMapping;
import org.mymedialite.data.IPosOnlyFeedback;
import org.mymedialite.data.IdentityMapping;
import org.mymedialite.data.PosOnlyFeedback;
import org.mymedialite.datatype.SparseBooleanMatrix;
/**
* Class that contains static methods for reading in implicit feedback data for ItemRecommenders.
* @version 2.03
*/
public class ItemData {
/**
* Read in implicit feedback data from a file.
*
* Each line must consist of at least two fields, the first being a user identifier, the second
* being an item identifier. Additional fields and empty lines are ignored.
*
* See Constants.SPLIT_CHARS for details of the permissible field separators.
*
* @param filename the name of the file to be read from or "-" if STDIN
* @param user_mapping a user IEntityMapping object
* @param item_mapping an item IEntityMapping object
* @param ignore_first_line if true, ignore the first line
* @return a IPosOnlyFeedback object with the user-wise collaborative data
*/
static public <T> IPosOnlyFeedback read(String filename, IEntityMapping user_mapping, IEntityMapping item_mapping, boolean ignore_first_line) throws Exception {
BufferedReader reader;
if (filename.equals("-")) {
reader = new BufferedReader(new InputStreamReader(System.in));
} else {
reader = new BufferedReader(new FileReader(filename));
}
return read(reader, user_mapping, item_mapping, ignore_first_line);
}
/**
* Read in implicit feedback data from a TextReader.
*
* Each line must consist of at least two fields, the first being a user identifier, the second
* being an item identifier. Additional fields and empty lines are ignored.
*
* See Constants.SPLIT_CHARS for details of the permissible field separators.
*
* @param reader the TextReader to be read from
* @param user_mapping a user IEntityMapping object
* @param item_mapping an item IEntityMapping object
* @param ignore_first_line if true, ignore the first line
* @return a PosOnlyFeedback object with the user-wise collaborative data
*/
static public <T> IPosOnlyFeedback read(BufferedReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping, boolean ignore_first_line) throws Exception {
if (user_mapping == null) user_mapping = new IdentityMapping();
if (item_mapping == null) item_mapping = new IdentityMapping();
if (ignore_first_line) reader.readLine();
PosOnlyFeedback<SparseBooleanMatrix> feedback = new PosOnlyFeedback<SparseBooleanMatrix>(SparseBooleanMatrix.class);
String line;
while ((line = reader.readLine()) != null ) {
line = line.trim();
if(line.length() == 0) continue;
String[] tokens = line.split(Constants.SPLIT_CHARS, 0);
if(tokens.length < 2) throw new IOException("Expected at least two columns: " + line);
int user_id = user_mapping.toInternalID((tokens[0]));
int item_id = item_mapping.toInternalID((tokens[1]));
feedback.add(user_id, item_id);
}
return feedback;
}
}