// Copyright (C) 2011 Zeno Gantner, CHris Newell // // This file is part of MyMediaLite. // // MyMediaLite is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // MyMediaLite is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with MyMediaLite. If not, see <http://www.gnu.org/licenses/>. package org.mymedialite.io.kddcup2011; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import org.mymedialite.data.KDDCupItems; import org.mymedialite.taxonomy.KDDCupItemType; /** * Routines for reading in the item taxonomy of the KDD Cup 2011 data. * @version 2.03 */ public class Items { /** * Read in the item data from several files. * @param tracks_filename name of the tracks file * @param albums_filename name of the album/record file * @param artists_filename name of the artists file * @param genres_filename name of the genre file * @param track_no 1 or 2 * @return the rating data * @throws IOException */ public static KDDCupItems read(String tracks_filename, String albums_filename, String artists_filename, String genres_filename, int track_no) throws IOException { KDDCupItems items = new KDDCupItems(track_no == 1 ? 624961 : 296111); readTracks(new BufferedReader(new FileReader(tracks_filename)), items); readAlbums(new BufferedReader(new FileReader(albums_filename)), items); readArtists(new BufferedReader(new FileReader(artists_filename)), items); readGenres(new BufferedReader(new FileReader(genres_filename)), items); return items; } /** * Read the track data. * @param reader a reader object to read the data from * @param items the <see cref="KDDCupItems"/> object * @throws IOException */ public static void readTracks(BufferedReader reader, KDDCupItems items) throws IOException { String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("|"); int track_id = Integer.parseInt(tokens[0]); int album_id = tokens[1] == "None" ? -1 : Integer.parseInt(tokens[1]); int artist_id = tokens[2] == "None" ? -1 : Integer.parseInt(tokens[2]); int[] genres = new int[tokens.length - 3]; for (int i = 0; i < genres.length; i++) genres[i] = Integer.parseInt(tokens[3 + i]); items.insert(track_id, KDDCupItemType.TRACK, album_id, artist_id, genres); } } /** * Read the album data. * @param reader a reader object to read the data from * @param items the <see cref="KDDCupItems"/> object * @throws IOException */ public static void readAlbums(BufferedReader reader, KDDCupItems items) throws IOException { String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("|"); int album_id = Integer.parseInt(tokens[0]); int artist_id = tokens[1] == "None" ? -1 : Integer.parseInt(tokens[1]); int[] genres = new int[tokens.length - 2]; for (int i = 0; i < genres.length; i++) genres[i] = Integer.parseInt(tokens[2 + i]); items.insert(album_id, KDDCupItemType.ALBUM, album_id, artist_id, genres); } } /** * Read the artist data. * @param reader a reader object to read the data from * @param items the <see cref="KDDCupItems"/> object * @throws IOException */ public static void readArtists(BufferedReader reader, KDDCupItems items) throws IOException { String line; while ((line = reader.readLine()) != null) { int artist_id = Integer.parseInt(line); items.insert(artist_id, KDDCupItemType.ARTIST, -1, artist_id, null); } } /** * Read the genre data. * @param reader a reader object to read the data from * @param items the <see cref="KDDCupItems"/> object * @throws IOException */ public static void readGenres(BufferedReader reader, KDDCupItems items) throws IOException { String line; while ((line = reader.readLine()) != null) { int genre_id = Integer.parseInt(line); items.insert(genre_id, KDDCupItemType.GENRE, -1, -1, null); } } }