// Copyright (C) 2010, 2011 Zeno Gantner
// Copyright (C) 2011 Chris Newell, Zeno Gantner
//
//This file is part of MyMediaLite.
//
//MyMediaLite is free software: you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//
//MyMediaLite is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with MyMediaLite. If not, see <http://www.gnu.org/licenses/>.
package org.mymedialite.io;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Pattern;
import org.mymedialite.data.IEntityMapping;
import org.mymedialite.data.IRatings;
import org.mymedialite.data.IdentityMapping;
import org.mymedialite.data.Ratings;
/**
* Class that offers methods for reading in rating data
* @version 2.03
*/
public class RatingData {
/**
* Read in rating data from a file.
*
* Each line must consist of at least three fields, the first being a user identifier, the second being
* an item identifier and the third being a rating value. Additional fields and empty lines are ignored.
*
* See Constants.SPLIT_CHARS for details of the permissible field separators.
*
* @param filename the name of the file to read from
* @param userMapping mapping object for user IDs
* @param itemMapping mapping object for item IDs
* @param ignoreFirstLine if true, ignore the first line
* @return the rating data
*/
public static IRatings read(String filename, IEntityMapping userMapping, IEntityMapping itemMapping, boolean ignoreFirstLine) throws IOException, NumberFormatException {
return read(new BufferedReader(new FileReader(filename)), userMapping, itemMapping, ignoreFirstLine);
}
/**
* Read in rating data from a BufferedReader
*
* Each line must consist of at least three fields, the first being a user identifier, the second being
* an item identifier and the third being a rating value. Additional fields and empty lines are ignored.
*
* See Constants.SPLIT_CHARS for details of the permissible field separators.
*
* @param reader the BufferedReader to read from
* @param userMapping mapping object for user IDs
* @param itemMapping mapping object for item IDs
* @param ignoreFirstLine if true, ignore the first line
* @return the rating data
*/
public static IRatings read(BufferedReader reader, IEntityMapping userMapping, IEntityMapping itemMapping, boolean ignoreFirstLine) throws IOException, NumberFormatException {
if (userMapping == null)
userMapping = new IdentityMapping();
if (itemMapping == null)
itemMapping = new IdentityMapping();
if (ignoreFirstLine)
reader.readLine();
IRatings ratings = new Ratings();
String line;
while ((line = reader.readLine()) != null ) {
line = line.trim();
if(line.length() == 0) continue;
String[] tokens = line.split(Constants.SPLIT_CHARS, 0);
if(tokens.length < 3) throw new IOException("Expected at least three columns: " + line);
int user_id = userMapping.toInternalID(tokens[0]);
int item_id = itemMapping.toInternalID(tokens[1]);
double rating = Double.parseDouble(tokens[2]);
ratings.add(user_id, item_id, rating);
}
return ratings;
}
}