/*
* The MIT License (MIT)
* ------------------
*
* Copyright (c) 2012-2014 Philipp Nolte
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* This software was taken from https://github.com/ptnplanet/Java-Naive-Bayes-Classifier
* and inserted into the loklak class hierarchy to be enhanced and extended
* by @0rb1t3r. After optimization in loklak it was inserted into the net.yacy.cora.bayes
* package. It shall be used to create custom search navigation filters.
* The original copyright notice was copied from the README.mnd
* from https://github.com/ptnplanet/Java-Naive-Bayes-Classifier/blob/master/README.md
* The original package domain was de.daslaboratorium.machinelearning.classifier
*/
package net.yacy.cora.bayes;
import java.util.Collection;
import java.util.Comparator;
import java.util.SortedSet;
import java.util.TreeSet;
/**
* A concrete implementation of the abstract Classifier class. The Bayes
* classifier implements a naive Bayes approach to classifying a given set of
* features: classify(feat1,...,featN) = argmax(P(cat)*PROD(P(featI|cat)
*
* @author Philipp Nolte
*
* @see http://en.wikipedia.org/wiki/Naive_Bayes_classifier
*
* @param <T> The feature class.
* @param <K> The category class.
*/
public class BayesClassifier<T, K> extends Classifier<T, K> {
/**
* Calculates the product of all feature probabilities: PROD(P(featI|cat)
*
* @param features The set of features to use.
* @param category The category to test for.
* @return The product of all feature probabilities.
*/
private float featuresProbabilityProduct(Collection<T> features,
K category) {
float product = 1.0f;
for (T feature : features)
product *= this.featureWeighedAverage(feature, category);
return product;
}
/**
* Calculates the probability that the features can be classified as the
* category given.
*
* @param features The set of features to use.
* @param category The category to test for.
* @return The probability that the features can be classified as the
* category.
*/
private float categoryProbability(Collection<T> features, K category) {
return ((float) this.categoryCount(category)
/ (float) this.getCategoriesTotal())
* featuresProbabilityProduct(features, category);
}
/**
* Retrieves a sorted <code>Set</code> of probabilities that the given set
* of features is classified as the available categories.
*
* @param features The set of features to use.
* @return A sorted <code>Set</code> of category-probability-entries.
*/
private SortedSet<Classification<T, K>> categoryProbabilities(
Collection<T> features) {
/*
* Sort the set according to the possibilities. Because we have to sort
* by the mapped value and not by the mapped key, we can not use a
* sorted tree (TreeMap) and we have to use a set-entry approach to
* achieve the desired functionality. A custom comparator is therefore
* needed.
*/
SortedSet<Classification<T, K>> probabilities =
new TreeSet<Classification<T, K>>(
new Comparator<Classification<T, K>>() {
@Override
public int compare(Classification<T, K> o1,
Classification<T, K> o2) {
int toReturn = Float.compare(
o1.getProbability(), o2.getProbability());
if ((toReturn == 0)
&& !o1.getCategory().equals(o2.getCategory()))
toReturn = -1;
return toReturn;
}
});
for (K category : this.getCategories())
probabilities.add(new Classification<T, K>(
features, category,
this.categoryProbability(features, category)));
return probabilities;
}
/**
* Classifies the given set of features.
*
* @return The category the set of features is classified as.
*/
@Override
public Classification<T, K> classify(Collection<T> features) {
SortedSet<Classification<T, K>> probabilites =
this.categoryProbabilities(features);
if (probabilites.size() > 0) {
return probabilites.last();
}
return null;
}
/**
* Classifies the given set of features. and return the full details of the
* classification.
*
* @return The set of categories the set of features is classified as.
*/
public Collection<Classification<T, K>> classifyDetailed(
Collection<T> features) {
return this.categoryProbabilities(features);
}
}