//Copyright (C) 2011 Zeno Gantner
//
//This file is part of MyMediaLite.
//
//MyMediaLite is free software: you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//
//MyMediaLite is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with MyMediaLite. If not, see <http://www.gnu.org/licenses/>.
package org.mymedialite.io.kddcup2011;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import org.mymedialite.data.IRatings;
import org.mymedialite.data.StaticByteRatings;
/**
* Class that offers static methods for reading in rating data from the KDD Cup 2011 files.
* @version 2.03
*/
public class Ratings {
// Prevent instantiation.
private Ratings() {}
/**
* Read in rating data from a file.
* @param filename the name of the file to read from
* @return the rating data
* @throws IOException
*/
public static IRatings read(String filename) throws IOException {
// Create ratings data structure
IRatings ratings = new StaticByteRatings(getNumberOfRatings(new BufferedReader(new FileReader(filename))));
BufferedReader reader = new BufferedReader(new FileReader(filename));
// Read in ratings
String line;
while ((line = reader.readLine()) != null) {
String[] tokens = line.split("|");
int user_id = Integer.parseInt(tokens[0]);
int num_user_ratings = Integer.parseInt(tokens[1]); // number of ratings for this user
for (int i = 0; i < num_user_ratings; i++) {
line = reader.readLine();
tokens = line.split("\t");
int item_id = Integer.parseInt(tokens[0]);
byte rating = Byte.parseByte(tokens[1]);
ratings.add(user_id, item_id, rating);
}
}
return ratings;
}
/**
* Read in test rating data (Track 1) from a file.
* @param filename the name of the file to read from
* @return the rating data
* @throws IOException
*/
public static IRatings readTest(String filename) throws IOException {
IRatings ratings = new StaticByteRatings(getNumberOfRatings(new BufferedReader(new FileReader(filename))));
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line;
while ((line = reader.readLine()) != null) {
String[] tokens = line.split("|");
int user_id = Integer.parseInt(tokens[0]);
int num_user_ratings = Integer.parseInt(tokens[1]); // number of ratings for this user
for (int i = 0; i < num_user_ratings; i++) {
line = reader.readLine();
tokens = line.split("\t");
int item_id = Integer.parseInt(tokens[0]);
ratings.add(user_id, item_id, 0);
}
}
return ratings;
}
/**
* Read in rating data from a file.
* @param filename the name of the file to read from
* @return the rating data
*/
public static IRatings read80Plus(String filename) throws IOException {
// Create ratings data structure
IRatings ratings = new StaticByteRatings(getNumberOfRatings(new BufferedReader(new FileReader(filename))));
BufferedReader reader = new BufferedReader(new FileReader(filename));
// Read in ratings
String line;
while ((line = reader.readLine()) != null) {
String[] tokens = line.split("|");
int user_id = Integer.parseInt(tokens[0]);
int num_user_ratings = Integer.parseInt(tokens[1]); // number of ratings for this user
for (int i = 0; i < num_user_ratings; i++) {
line = reader.readLine();
tokens = line.split("\t");
int item_id = Integer.parseInt(tokens[0]);
byte rating = Byte.parseByte(tokens[1]);
ratings.add(user_id, item_id, rating >= 80 ? 1 : 0);
}
}
return ratings;
}
static int getNumberOfRatings(BufferedReader reader) throws IOException {
int num_ratings = 0;
String line;
while ((line = reader.readLine()) != null)
if (!line.contains("|"))
num_ratings++;
return num_ratings;
}
}