// Copyright (C) 2011 Zeno Gantner, Chris Newell // // This file is part of MyMediaLite. // // MyMediaLite is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // MyMediaLite is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with MyMediaLite. If not, see <http://www.gnu.org/licenses/>. // package org.mymedialite.data; import it.unimi.dsi.fastutil.ints.IntCollection; import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntSet; import java.util.List; /** * Interface for different kinds of collaborative filtering data sets. * * Implementing classes/inheriting interfaces are e.g. for rating data and for positive-only implicit feedback. * * The main feature of a dataset is that it has some kind of order (not explicitly stated) * - random, chronological, user-wise, or item-wise - and that it contains tuples of users and * items (not necessarily unique tuples). * * Implementing classes and inheriting interfaces can add additional data to each user-item tuple, * e.g. the date/time of an event, location, context, etc., as well as additional index structures * to access the dataset in a certain fashion. * * @author Zeno Gantner * @version 2.03 */ public interface IDataSet { /** * @return the number of interaction events in the dataset. */ int size(); /** * @return the user entries. */ IntList users(); /** * @return the item entries. */ IntList items(); /** * @return the maximum user ID in the dataset. */ int maxUserID(); /** * @return the maximum item ID in the dataset. */ int maxItemID(); /** * @return all user IDs in the dataset. */ IntList allUsers(); /** * @return all item IDs in the dataset. */ IntList allItems(); /** * indices by user. * Should be implemented as a lazy data structure */ List<IntList> byUser(); /** * indices by item. * Should be implemented as a lazy data structure */ List<IntList> byItem(); /** * get a randomly ordered list of all indices. * Should be implemented as a lazy data structure */ IntList randomIndex(); /** Build the user indices. */ void buildUserIndices(); /** Build the item indices. */ void buildItemIndices(); /** Build the random index. */ void buildRandomIndex(); /** * Remove all events related to a given user. * @param user_id the user ID */ void removeUser(int user_id); /** * Remove all events related to a given item. * @param item_id the item ID */ void removeItem(int item_id); /** * Get all users that are referenced by a given list of indices. * @param indices the indices to take into account * @return all users referenced by the list of indices */ IntSet getUsers(IntList indices); /** * Get all items that are referenced by a given list of indices. * @param indices the indices to take into account * @return all items referenced by the list of indices */ IntSet getItems(IntList indices); /** * Get index for a given user and item. * @param user_id the user ID * @param item_id the item ID * @return the index of the first event encountered that matches the user ID and item ID */ int getIndex(int user_id, int item_id); /** * Get index for given user and item. * @param user_id the user ID * @param item_id the item ID * @param indexes the indexes to look at * @return the index of the first event encountered that matches the user ID and item ID */ int getIndex(int user_id, int item_id, IntCollection indexes); /** * Try to get the index for given user and item. * @param user_id the user ID * @param item_id the item ID * @return the index of the first event encountered that matches the user ID and item ID or null, if not found */ Integer tryGetIndex(int user_id, int item_id); /** * Try to get the index for given user and item. * @param user_id the user ID * @param item_id the item ID * @param indexes the indexes to look at * @return the index of the first event encountered that matches the user ID and item ID or null, if not found */ Integer tryGetIndex(int user_id, int item_id, IntCollection indexes); }