/* * Copyright 2011 Research Studios Austria Forschungsgesellschaft mBH * * This file is part of easyrec. * * easyrec is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * easyrec is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with easyrec. If not, see <http://www.gnu.org/licenses/>. */ package org.easyrec.plugin.mahout; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefUserBasedRecommender; import org.apache.mahout.cf.taste.impl.similarity.CityBlockSimilarity; import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity; import org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity; import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.cf.taste.recommender.Recommender; import org.apache.mahout.cf.taste.similarity.UserSimilarity; import org.easyrec.mahout.model.EasyrecDataModel; import org.easyrec.mahout.model.EasyrecInMemoryDataModel; import org.easyrec.mahout.store.MahoutDataModelMappingDAO; import org.easyrec.model.core.ItemAssocVO; import org.easyrec.model.core.ItemVO; import org.easyrec.plugin.model.Version; import org.easyrec.plugin.support.GeneratorPluginSupport; import org.easyrec.service.core.ItemAssocService; import org.easyrec.service.domain.TypeMappingService; import org.easyrec.store.dao.core.types.AssocTypeDAO; import org.easyrec.store.dao.core.types.ItemTypeDAO; import org.easyrec.store.dao.core.ItemDAO; import java.net.URI; import java.util.Date; import java.util.List; /** * @author David MAnn */ public class MahoutBooleanGenerator extends GeneratorPluginSupport<MahoutBooleanGeneratorConfig, MahoutBooleanGeneratorStats> { // ------------------------------ FIELDS ------------------------------ // the display name is the name of the generator that will show up in the admin tool when the plugin has been loaded. public static final String DISPLAY_NAME = "Mahout Boolean Generator"; // version of the generator, should be ascending for each new release public static final Version VERSION = new Version("0.98"); // The URI that uniquely identifies the plugin. While any valid URI is technically ok here, implementors // should choose their URIs wisely, ideally the URI should be 'cool' // (@see <a href="http://www.dfki.uni-kl.de/~sauermann/2006/11/cooluris/#cooluris">Cool URIs for the // Semantic Web</a>) If unsure, use an all-lowercase http URI pointing to a host/path that you control, // ending with '#[plugin-name]'. public static final URI ID = URI.create("http://www.easyrec.org/plugins/mahoutboolean"); private static final Log logger = LogFactory.getLog(MahoutBooleanGenerator.class); private MahoutDataModelMappingDAO mahoutDataModelMappingDAO; private AssocTypeDAO assocTypeDAO; private ItemTypeDAO itemTypeDAO; private ItemDAO itemDAO; public void setAssocTypeDAO(AssocTypeDAO assocTypeDAO) { this.assocTypeDAO = assocTypeDAO; } public void setItemTypeDAO(ItemTypeDAO itemTypeDAO) { this.itemTypeDAO = itemTypeDAO; } public void setItemDAO(ItemDAO itemDAO) { this.itemDAO = itemDAO; } public void setMahoutDataModelMappingDAO(MahoutDataModelMappingDAO mahoutDataModelMappingDAO) { this.mahoutDataModelMappingDAO = mahoutDataModelMappingDAO; } // --------------------------- CONSTRUCTORS --------------------------- public MahoutBooleanGenerator() { // we need to call the constructor of GeneratorPluginSupport to provide the name, id and version //additionally, we have to pass the class objects of config and stats classes. super(DISPLAY_NAME, ID, VERSION, MahoutBooleanGeneratorConfig.class, MahoutBooleanGeneratorStats.class); } // ------------------------ INTERFACE METHODS ------------------------ @Override public String getPluginDescription() { return "This is a plugin using algorihtms of the apache Mahout project using the CF code formerly known as the taste framework."; } // -------------------------- OTHER METHODS -------------------------- @Override protected void doCleanup() throws Exception { logger.info("The plugin is now being uninstalled."); // remove all tables/files/resources you created in {@link #doInitialize()}. // optional - you don't have to implement this method } @Override protected void doExecute(ExecutionControl executionControl, MahoutBooleanGeneratorStats stats) throws Exception { // when doExecute() is called, the generator has been initialized with the configuration we should use Date execution = new Date(); MahoutBooleanGeneratorConfig config = getConfiguration(); TypeMappingService typeMappingService = (TypeMappingService) super.getTypeMappingService(); ItemAssocService itemAssocService = getItemAssocService(); executionControl.updateProgress("initialize DataModel"); DataModel easyrecDataModel = new EasyrecDataModel(config.getTenantId(), typeMappingService.getIdOfActionType(config.getTenantId(), config.getActionType()), false, mahoutDataModelMappingDAO); if (config.getCacheDataInMemory() == 1) { executionControl.updateProgress("initialize EasyrecInMemoryDataModel"); easyrecDataModel = new EasyrecInMemoryDataModel(easyrecDataModel); } /*TanimotoCoefficientSimilarity is intended for "binary" data sets where a user either expresses a generic "yes" preference for an item or has no preference.*/ UserSimilarity userSimilarity = null; switch (config.getUserSimilarityMethod()) { case 1: executionControl.updateProgress("using LogLikelihoodSimilarity as UserSimilarity"); userSimilarity = new LogLikelihoodSimilarity(easyrecDataModel); break; case 2: executionControl.updateProgress("using TanimotoCoefficientSimilarity as UserSimilarity"); userSimilarity = new TanimotoCoefficientSimilarity(easyrecDataModel); break; case 3: executionControl.updateProgress("using SpearmanCorrelationSimilarity as UserSimilarity"); userSimilarity = new SpearmanCorrelationSimilarity(easyrecDataModel); break; case 4: executionControl.updateProgress("using CityBlockSimilarity as UserSimilarity"); userSimilarity = new CityBlockSimilarity(easyrecDataModel); break; } /*ThresholdUserNeighborhood is preferred in situations where we go in for a similarity measure between neighbors and not any number*/ UserNeighborhood neighborhood = null; Double userNeighborhoodSamplingRate = config.getUserNeighborhoodSamplingRate(); Double neighborhoodThreshold = config.getUserNeighborhoodThreshold(); int neighborhoodSize = config.getUserNeighborhoodSize(); double userNeighborhoodMinSimilarity = config.getUserNeighborhoodMinSimilarity(); switch (config.getUserNeighborhoodMethod()) { case 1: executionControl.updateProgress("using ThresholdUserNeighborhood as UserNeighborhood"); neighborhood = new ThresholdUserNeighborhood(neighborhoodThreshold, userSimilarity, easyrecDataModel, userNeighborhoodSamplingRate); break; case 2: executionControl.updateProgress("using NearestNUserNeighborhood as UserNeighborhood"); neighborhood = new NearestNUserNeighborhood(neighborhoodSize, userNeighborhoodMinSimilarity, userSimilarity, easyrecDataModel, userNeighborhoodSamplingRate); break; } /*GenericBooleanPrefUserBasedRecommender is appropriate for use when no notion of preference value exists in the data. */ executionControl.updateProgress("using GenericBooleanPrefUserBasedRecommender as Recommender"); Recommender recommender = new GenericBooleanPrefUserBasedRecommender(easyrecDataModel, neighborhood, userSimilarity); itemTypeDAO.insertOrUpdate(config.getTenantId(), "USER", true); Integer assocType = typeMappingService.getIdOfAssocType(config.getTenantId(), config.getAssociationType()); Integer userType = typeMappingService.getIdOfItemType(config.getTenantId(), "USER"); Integer sourceType = typeMappingService.getIdOfSourceType(config.getTenantId(), getId().toString()); Integer viewType = typeMappingService.getIdOfViewType(config.getTenantId(), config.getViewType()); stats.setNumberOfItems(easyrecDataModel.getNumItems()); int totalSteps = easyrecDataModel.getNumUsers(); int currentStep = 1; for (LongPrimitiveIterator it = easyrecDataModel.getUserIDs(); it.hasNext() && !executionControl.isAbortRequested(); ) { executionControl.updateProgress(currentStep++, totalSteps, "Saving Recommendations..."); long userId = it.nextLong(); List<RecommendedItem> recommendations = recommender.recommend(userId, config.getNumberOfRecs()); if (recommendations.isEmpty()) { logger.debug("User " + userId + " : no recommendations"); } // print the list of recommendations for each for (RecommendedItem recommendedItem : recommendations) { logger.debug("User " + userId + " : " + recommendedItem); Integer itemToId = (int) recommendedItem.getItemID(); Integer itemToType = itemDAO.getItemTypeIdOfItem(config.getTenantId(), itemToId); ItemVO<Integer, Integer> fromItem = new ItemVO<Integer, Integer>(config.getTenantId(), (int) userId, userType); Double recommendationStrength = (double) recommendedItem.getValue(); ItemVO<Integer, Integer> toItem = new ItemVO<Integer, Integer>(config.getTenantId(), itemToId, itemToType); ItemAssocVO<Integer,Integer> itemAssoc = new ItemAssocVO<Integer,Integer>( config.getTenantId(), fromItem, assocType, recommendationStrength, toItem, sourceType, "Mahout Boolean Generator", viewType, null, execution); itemAssocService.insertOrUpdateItemAssoc(itemAssoc); stats.incNumberOfRulesCreated(); } } } }