//Copyright (C) 2010, 2011 Zeno Gantner, Chris Newell
//
//This file is part of MyMediaLite.
//
//MyMediaLite is free software: you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//
//MyMediaLite is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with MyMediaLite. If not, see <http://www.gnu.org/licenses/>.
package org.mymedialite.io;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.regex.Pattern;
import org.mymedialite.data.IEntityMapping;
import org.mymedialite.data.ITimedRatings;
import org.mymedialite.data.IdentityMapping;
import org.mymedialite.data.TimedRatings;
import org.mymedialite.util.Utils;
/**
* Class that offers methods for reading in rating data with time information.
* @version 2.03
*/
public class TimedRatingData {
// Prevent instantiation.
private TimedRatingData() {}
/**
* Read in rating data from a file.
* @param filename the name of the file to read from
* @param user_mapping mapping object for user IDs
* @param item_mapping mapping object for item IDs
* @param ignore_first_line if true, ignore the first line
* @return the rating data
* @throws FileNotFoundException
*/
public static ITimedRatings read(String filename, IEntityMapping user_mapping, IEntityMapping item_mapping, boolean ignore_first_line) throws Exception {
return read(new BufferedReader(new FileReader(filename)), user_mapping, item_mapping, ignore_first_line);
}
/**
* Read in rating data from a TextReader.
* @param reader the <see cref="TextReader"/> to read from
* @param user_mapping mapping object for user IDs
* @param item_mapping mapping object for item IDs
* @param ignore_first_line if true, ignore the first line
* @return the rating data
* @throws ParseException
*/
public static ITimedRatings read(BufferedReader reader, IEntityMapping user_mapping, IEntityMapping item_mapping, boolean ignore_first_line) throws Exception {
if (user_mapping == null)
user_mapping = new IdentityMapping();
if (item_mapping == null)
item_mapping = new IdentityMapping();
if (ignore_first_line)
reader.readLine();
TimedRatings ratings = new TimedRatings();
Integer unix_time;
String line;
while ((line = reader.readLine()) != null) {
if (line.length() == 0)
continue;
String[] tokens = line.split("[,\\s]+");
if (tokens.length < 4)
throw new IOException("Expected at least 4 columns: " + line);
int user_id = user_mapping.toInternalID(tokens[0]);
int item_id = item_mapping.toInternalID(tokens[1]);
double rating = Double.parseDouble(tokens[2]);
String dateString;
if(tokens.length > 4 ) {
dateString = tokens[3] + " " + tokens[4];
} else {
dateString = tokens[3];
}
if (dateString.startsWith("\"") && dateString.endsWith("\"")) {
dateString = dateString.substring(1, dateString.length() - 1);
}
if (dateString.length() == 19) { // format "yyyy-mm-dd hh:mm:ss"
String[] date_time_tokens = dateString.split("[\\s-:]");
Calendar calendar = Calendar.getInstance();
calendar.set(
Integer.parseInt(date_time_tokens[0]),
Integer.parseInt(date_time_tokens[1]) - 1,
Integer.parseInt(date_time_tokens[2]),
Integer.parseInt(date_time_tokens[3]),
Integer.parseInt(date_time_tokens[4]),
Integer.parseInt(date_time_tokens[5])
);
ratings.add(user_id, item_id, rating, calendar.getTime());
} else if (dateString.length() == 10) { // format "yyyy-mm-dd"
String[] date_time_tokens = dateString.split("[\\s-:]");
Calendar calendar = Calendar.getInstance();
calendar.set(
Integer.parseInt(date_time_tokens[0]),
Integer.parseInt(date_time_tokens[1]),
Integer.parseInt(date_time_tokens[2])
);
ratings.add(user_id, item_id, rating, calendar.getTime());
} else if ((unix_time = Utils.parseInteger(dateString)) != null) { // unsigned integer value, interpreted as seconds since Unix epoch
Date date = new Date();
date.setTime((long)unix_time * 1000);
ratings.add(user_id, item_id, rating, date);
} else {
SimpleDateFormat dateFormat = new SimpleDateFormat();
Date date = dateFormat.parse(dateString);
ratings.add(user_id, item_id, rating, date);
}
if (ratings.size() % 200000 == 199999)
System.err.print(".");
if (ratings.size() % 12000000 == 11999999)
System.err.println();
}
return ratings;
}
}