HoeffdingTree.java example

Explorer

streaminer-master
- src
  - main
    - java
      - org
        streaminer
        stream
        avg
        ExponentialMovingAverage.java
        IAverage.java
        MovingAverage.java
        SimpleEWMA.java
        TEWMA.java
        VariableEWMA.java
        cardinality
        AdaptiveCounting.java
        BJKST.java
        CardinalityMergeException.java
        CountThenEstimate.java
        FlajoletMartin.java
        HyperLogLog.java
        HyperLogLogPlus.java
        IBaseCardinality.java
        IRichCardinality.java
        KMinValues.java
        LinearCounting.java
        LogLog.java
        RegisterSet.java
        change
        AbsoluteChange.java
        AbstractChange.java
        AbstractCusum.java
        CusumNP.java
        RelativeChange.java
        VariationalChange.java
        classifier
        AROWClassifier.java
        AbstractClassifier.java
        AbstractMultiClassifier.java
        BWinnowClassifier.java
        Classifier.java
        MajorityClass.java
        MultiClassPAClassifier.java
        MultiRandomClassifier.java
        PAClassifier.java
        Perceptron.java
        RandomClassifier.java
        SimpleClassifier.java
        WinnowClassifier.java
        bayes
        BoundedBayes.java
        LossyBayes.java
        MultiBayes.java
        MultiLossyBayes.java
        MultiTopkBayes.java
        NaiveBayes.java
        NaiveBayesWOP.java
        StickySamplingBayes.java
        TopKBayes.java
        tree
        BTreeNode.java
        BinaryTreeNode.java
        ChernoffSplitCriterion.java
        ChernoffStatistics.java
        HoeffdingTree.java
        HoeffdingTreeModel.java
        HoeffdingTreeNode.java
        InnerNode.java
        LeafNode.java
        LinearRegression.java
        ModelTree.java
        NodeInfo.java
        QualityCriterion.java
        RTree.java
        RTreeStatistics.java
        Range.java
        RegressionTree.java
        RegressionTreeModel.java
        RegressionTreeNode.java
        RegressionTreeStatistics.java
        SplitCriterion.java
        TreeNode.java
        Visitor.java
        clustering
        IClusterer.java
        KMeans.java
        birch
        CFEntry.java
        CFEntryPair.java
        CFNode.java
        CFTree.java
        data
        Data.java
        DataImpl.java
        DataUtils.java
        entropy
        EntropyExact.java
        EntropySketch.java
        IEntropy.java
        eval
        ConfusionMatrix.java
        TableOfConfusion.java
        frequency
        AMSSketch.java
        BaseFrequency.java
        CCFCSketch.java
        CGT.java
        CountMinSketch.java
        CountMinSketchAlt.java
        CountSketch.java
        FrequencyException.java
        IBaseFrequency.java
        IFrequencyList.java
        IRichFrequency.java
        ISimpleFrequency.java
        ITimeDecayFrequency.java
        LossyCounting.java
        Majority.java
        MisraGries.java
        RealCounting.java
        SimpleTopKCounting.java
        SpaceSaving.java
        StickySampling.java
        TimeDecayCountMinSketch.java
        TimeDecayRealCounting.java
        decay
        DecayFormula.java
        ExpDecayFormula.java
        LinDecayFormula.java
        LogDecayFormula.java
        Quantity.java
        topk
        ConcurrentStreamSummary.java
        Frequent.java
        ITopK.java
        StochasticTopper.java
        StreamSummary.java
        util
        CountEntry.java
        CountEntryWithMaxError.java
        Counter.java
        ISampleSet.java
        SampleSet.java
        ScoredItem.java
        histogram
        spdt
        ArrayBinReservoir.java
        ArrayCategoricalTarget.java
        Bin.java
        BinReservoir.java
        BinUpdateException.java
        CategoricalTarget.java
        Gap.java
        GroupTarget.java
        Histogram.java
        MapCategoricalTarget.java
        MixedInsertException.java
        NumericTarget.java
        SimpleTarget.java
        SumOutOfRangeException.java
        SumResult.java
        Target.java
        TreeBinReservoir.java
        learner
        AbstractRegressor.java
        Learner.java
        LearnerUtils.java
        Regressor.java
        mapper
        AddMax.java
        AddMinMax.java
        AttributeFilter.java
        ExtractDouble.java
        IMapper.java
        Identity.java
        KeyMapper.java
        Normalization.java
        NumericalBinning.java
        SelectFeaturesMapper.java
        membership
        AbstractFilter.java
        BloomCalculations.java
        BloomFilter.java
        BloomFilterAlt.java
        CountingBloomFilter.java
        CuckooFilter.java
        DLeftCountingBloomFilter.java
        DynamicBloomFilter.java
        HashFunction.java
        IFilter.java
        Key.java
        ODTDBloomFilter.java
        RemoveScheme.java
        RetouchedBloomFilter.java
        StableBloomFilter.java
        TimingBloomFilter.java
        VarCountingBloomFilter.java
        misc
        MovingWindowDelta.java
        Rate.java
        Significance.java
        Statistics.java
        model
        DescriptionModel.java
        Distribution.java
        HyperplaneModel.java
        Model.java
        NominalDistributionModel.java
        NumericalDistributionModel.java
        PredictionModel.java
        SelectiveDescriptionModel.java
        SlidingWindow.java
        StreamDistribution.java
        norm
        StableSketch.java
        quantile
        CKMSQuantiles.java
        EnsembleQuantiles.java
        ExactQuantiles.java
        ExactQuantilesAll.java
        Frugal2U.java
        GKQuantiles.java
        GroupTree.java
        IQuantiles.java
        MPQuantiles.java
        QDigest.java
        QuantilesException.java
        SimpleQuantiles.java
        SumQuantiles.java
        TDigest.java
        WindowSketchQuantiles.java
        rss
        Bucket.java
        Interval.java
        RSSQuantiles.java
        Subset.java
        SubsetTree.java
        sampler
        BernoulliSampler.java
        ChainSampler.java
        FrequentSampler.java
        ISampleList.java
        ISampler.java
        L0Sampler.java
        ReservoirSampler.java
        SpaceSavingSampler.java
        SystematicSampler.java
        WRSampler.java
        WeightedRandomSampler.java
        gamma
        GammaFunction.java
        R.java
        Z.java
        sre
        OneSparseRecoveryEstimator.java
        SSparseRecoveryEstimator.java
        wavelet
        HaarWaveletTransform.java
        IWavelet.java
        util
        AbstractIterator.java
        ArrayUtils.java
        Bits.java
        ByteArrayTable.java
        ByteUtil.java
        DoublyLinkedList.java
        ExternalizableUtil.java
        IBuilder.java
        ListNode2.java
        Lists.java
        Pair.java
        Preconditions.java
        QuotedStringTokenizer.java
        SizeOf.java
        UnsignedIntComparator.java
        Varint.java
        distance
        CosineDistance.java
        Kernel.java
        LinearDistance.java
        Radius.java
        SquaredDistance.java
        distribution
        ZipfDistribution.java
        hash
        Hash.java
        HashUtils.java
        JenkinsHash.java
        Lookup3Hash.java
        MurmurHash.java
        MurmurHash3.java
        SpookyHash.java
        SpookyHash32.java
        SpookyHash64.java
        factory
        HashFunctionFactory.java
        SimpleHashFactory.java
        TwoUniversalHashFactory.java
        function
        HashFunction.java
        JenkinsHashFunction.java
        MurmurHashFunction.java
        SimpleHashFunction.java
        TwoUniversalHashFunction.java
        math
        MathHelper.java
        MathUtil.java
        NumberUtil.java
        PowerOfTwo.java
        Prime.java
  - test
    - java
      - org
        streaminer
        stream
        avg
        TEWMATest.java
        benchmark
        CountMinSketchBenchmark.java
        cardinality
        BJKSTTest.java
        FlajoletMartinTest.java
        KMinValuesTest.java
        RegisterSetTest.java
        TestAdaptiveCounting.java
        TestCountThenEstimate.java
        TestHyperLogLog.java
        TestHyperLogLogPlus.java
        TestICardinality.java
        TestLinearCounting.java
        TestLogLog.java
        change
        AbsoluteChangeTest.java
        ExactSolution.java
        StreamGenerator.java
        classifier
        bayes
        NaiveBayesTest.java
        entropy
        EntropySketchTest.java
        frequency
        AMSSketchTest.java
        CCFCSketchTest.java
        CGTTest.java
        CountMinSketchAltTest.java
        SimpleTopKCountingTest.java
        SpaceSavingTest.java
        StreamGenerator.java
        TimeDecayCountMinSketchTest.java
        topk
        TestConcurrentStreamSummary.java
        TestStochasticTopper.java
        TestStreamSummary.java
        util
        TestSampleSet.java
        membership
        BloomFilterAltTest.java
        BloomFilterTest.java
        CountingBloomFilterTest.java
        CuckooFilterTest.java
        DLeftCountingBloomFilterTest.java
        ODTDBloomFilterTest.java
        StableBloomFilterTest.java
        TimingBloomFilterTest.java
        VarCountingBloomFilterTest.java
        model
        NominalDistributionModelTest.java
        norm
        StableSketchTest.java
        quantile
        CKMSQuantilesTest.java
        Frugal2UTest.java
        GroupTreeTest.java
        MPQuantilesTest.java
        QDigestTest.java
        TDigestTest.java
        sampler
        ChainSamplerTest.java
        util
        TestDoublyLinkedList.java
        hash
        Lookup3HashTest.java
        MurmurHashTest.java
        SpookyHash32Test.java
        SpookyHash64Test.java

package org.streaminer.stream.classifier.tree;

import org.streaminer.stream.data.Data;
import org.streaminer.stream.learner.Learner;
import org.streaminer.stream.learner.LearnerUtils;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Learns a {@link HoeffdingTreeModel} representing a Hoeffding Tree as described by<br/>
 * <br/>
 * <b>Domingos/Hulten/2000: Mining HighSpeed Data Streams</b>
 *
 * @author Tobias Beckers
 *
 */
public class HoeffdingTree implements Learner<Data, HoeffdingTreeModel> {

	/** The unique class ID*/
	private static final long serialVersionUID = -2578445642027682454L;

	private static final Logger logger = LoggerFactory.getLogger(HoeffdingTree.class);



	// static fields to declare the quality criterion
	/** use this field to define the quality criterion to be information gain */
	public static final QualityCriterion INFORMATION_GAIN = QualityCriterion.INFO_GAIN;

	/** use this field to define the quality criterion to be Gini-Index gain*/
	public static final QualityCriterion GINI_INDEX = QualityCriterion.GINI_INDEX;

	// default values
	/** The default number of different values existing for one feature to declare the feature {@link FeatureType#NUMERIC}: 5 */
	public static final int DEFAULT_DECLARE_NUMERIC = 5;

	/** The default quantiles for finding thresholds for numeric features: 0.25, 0.5, 0.75, 1 */
	public static final Double[] DEFAULT_QUANTILES = {0.25, 0.5, 0.75, 1d};

	/** The default value for delta. Delta is '1 - the probability to choose the correct feature at each node': 0.0000001 */
	public static final double DEFAULT_DELTA = 0.0000001;

	/** defines the default quality criterion: information gain */
	public static final QualityCriterion DEFAULT_QUALITY_CRITERION = INFORMATION_GAIN;

	// variables to update the model
	/** The resulting HoeffdingTree which is incrementally updated */
	protected HoeffdingTreeModel tree;
	/** contains all possible features and for each feature all possible values */
	protected Map<String, List<Serializable>> featureValuePairs;
	/** all possible labels */
	protected List<Serializable> labels;
	/** contains all current leafs and for each leaf the corresponding counters */
	protected Map<HoeffdingTreeNode, NodeData> NodeDataPairs;

	// parameter
	/** the numerator of the fraction under the square root in the hoeffding bound (used for epsilon computation) */
	protected double numerator;
	/** the chosen {@link QualityCriterion} */
	protected QualityCriterion qualityCriterion;
	/** defines which value the quality function must deliver at least to split a node. Default: Worst value */
	protected double minQualityForSplit;

	// variables
	/** indicates if the learner is initialized so it has all necessary information for learning */
	protected boolean initialized;

	/**
	 * Empty Constructor to instantiate the learner without initializing. The call of {@link #learn(LabeledExample)} will
	 * have no effect until the learner is initialized.<br/>
	 * It is recommended to instantiate the leaner with the constructors
	 * {@link #HoeffdingTree(Collection)} or {@link #HoeffdingTree(Map, Map, List, double, QualityCriterion, double)}.
	 */

	protected String labelAttribute;

	
	public HoeffdingTree(){
		this.labelAttribute = null;
		this.initialized = false;
	}
	
	
	public HoeffdingTree( String label ) {
		this.labelAttribute = label;
		this.initialized = false;
	}
	

	/**
	 * @return the labelAttribute
	 */
	public String getLabelAttribute() {
		return labelAttribute;
	}


	/**
	 * @param labelAttribute the labelAttribute to set
	 */
	public void setLabelAttribute(String labelAttribute) {
		this.labelAttribute = labelAttribute;
	}


	/**
	 * Most extensive constructor to define all parameters by setting them manually
	 * @param featureTypes for each feature one of {@link HoeffdingTree#NOMINAL} or {@link HoeffdingTree#NUMERIC}.
	 * Use {@link HoeffdingTree#estimateFeatureTypes(Collection, int)} to create this automatically.
	 * @param featureValuePairs must contain all possible features and for each feature all possible values or thresholds if numeric.
	 * Use {@link HoeffdingTree#constructFeatureValuePairs(Collection, Map, double...)} to create this automatically.
	 * @param labels must contain all possible labels. Use {@link HoeffdingTree#getAllLabels(Collection)} to extract all labels automatically.
	 * @param delta the error bound parameter, is one minus the desired probability of choosing the correct
	 * attribute at any given node. Default value: {@link HoeffdingTree#DEFAULT_DELTA}
	 * @param qualityCriterion a {@link QualityCriterion}, e.g. {@link HoeffdingTree#INFORMATION_GAIN}.
	 * Default: {@link HoeffdingTree#DEFAULT_QUALITY_CRITERION}
	 * @param minQualityForSplit used for pre-pruning: A node is not split until the hoeffding bound guarantees
	 * that the best feature has a quality of at least the specified. Default should be the {@link QualityCriterion#getLowestGain()}
	 * for the chosen criterion. For default criterion: {@link HoeffdingTree#DEFAULT_QUALITY_CRITERION#getLowestGain()}
	 */
	public HoeffdingTree(Map<String, Class<?>> featureTypes, Map<String, List<Serializable>> featureValuePairs,
			List<Serializable> labels, double delta, QualityCriterion qualityCriterion, double minQualityForSplit) {
		this.initialize(featureTypes, featureValuePairs, labels, delta, qualityCriterion, minQualityForSplit);
	}

	/**
	 * Constructor sets all parameters to default
	 * @param examples a collection of {@link LabeledExample}s containing all possible features, all possible values for
	 * each nominal feature (and as much as possible for numeric features) and all possible labels.
	 */
	public HoeffdingTree(Collection<Data> examples) {
		this.initialize(examples);
	}
	




	/* (non-Javadoc)
	 * @see stream.learner.Learner#init()
	 */
	@Override
	public void init() {
		this.initialize( new HashSet<Data>() );
	}

	/**
	 * Initializes the learner with all necessary information. See constructor
	 * {@link #HoeffdingTree(Map, Map, List, double, QualityCriterion, double)} to get a description of all parameters.
	 * @see #HoeffdingTree(Map, Map, List, double, QualityCriterion, double)
	 */
	public void initialize(Map<String, Class<?>> featureTypes, Map<String, List<Serializable>> featureValuePairs,
			List<Serializable> labels, double delta, QualityCriterion qualityCriterion, double minQualityForSplit) {
		this.qualityCriterion = qualityCriterion;
		this.minQualityForSplit = minQualityForSplit;
		this.labels = labels;
		this.featureValuePairs = featureValuePairs;
		this.tree = new HoeffdingTreeModel(featureTypes, labels.get(0));
		this.NodeDataPairs = new HashMap<HoeffdingTreeNode, NodeData>();
		// let the remaining features for the root be the initial features
		this.NodeDataPairs.put(this.tree.getLeaf(null), new NodeData( this.labelAttribute, new ArrayList<String>(featureValuePairs.keySet())));
		double range = qualityCriterion.getHighestGain(labels.size());
		this.numerator = range*range * Math.log(1d/delta);
		this.initialized = true;

		logger.debug("HoeffdingTree learner initialized with\n quality criterion: "+this.qualityCriterion+" gain,\n " +
				"prepruning min quality value: "+this.minQualityForSplit+",\n " +
				"delta: "+delta+"\n " +
				"valid class labels: "+this.labels+",\n " +
				"feature types: "+featureTypes+",\n "+
				"valid (nominal) / threshold (numeric) feature values: "+this.featureValuePairs+",\n " +
				"highest value of quality criterion (for hoeffding bound computation): "+range);
	}

	/**
	 * Initializes the learner by extracting all necessary information from the specified initial example set.
	 * See constructor {@link #HoeffdingTree(Collection)} for more information.
	 * @see #HoeffdingTree(Collection)
	 */
	public void initialize(Collection<Data> examples) {
		this.initialize( LearnerUtils.getTypes( examples ), //estimateFeatureTypes(examples, DEFAULT_DECLARE_NUMERIC),
				constructFeatureValuePairs(examples, LearnerUtils.getTypes( examples ) ), //estimateFeatureTypes(examples, DEFAULT_DECLARE_NUMERIC), DEFAULT_QUANTILES),
				getAllValues(examples, labelAttribute), // getAllLabels(examples),
				DEFAULT_DELTA,
				DEFAULT_QUALITY_CRITERION,
				DEFAULT_QUALITY_CRITERION.getLowestGain());
	}

	/**
	 * Method for convenient initialization.<br/>
	 * Returns a list of all values - including duplicates - the specified feature can have in the specified collection of examples.
	 * @param examples all examples
	 * @param feature a feature which values are wanted
	 * @return a list of all values - including duplicates - the specified feature can have in the specified collection of examples.
	 */
	public List<Serializable> getAllValues(Collection<Data> examples, String feature) {
		List<Serializable> allValues = new ArrayList<Serializable>();
		for (Data example : examples) {
			if( ! LearnerUtils.isHidden( feature ) ){
				Serializable value = example.get(feature);
				if (value != null) {
					allValues.add(value);
				}
			}
		}
		return allValues;
	}

	/**
	 * Method for convenient initialization.<br/>
	 * Extracts all values from all nominal features and estimates threshold values for numeric features.<br/>
	 * Returns all (discretized) possible values for each feature.
	 * @param examples examples must contain all possible features and all possible values for nominal features
	 * @param featureTypes for each feature one of {@link HoeffdingTree#NOMINAL} or {@link HoeffdingTree#NUMERIC}
	 * use {@link HoeffdingTree#estimateFeatureTypes(Collection, int)} to create this automatically
	 * @param quantileThresholds quantiles to find the best thresholds. Default: {@link HoeffdingTree#DEFAULT_QUANTILES}
	 * @return Returns all (discretized) possible values for each feature.
	 */
	public Map<String, List<Serializable>> constructFeatureValuePairs(Collection<Data> examples, Map<String, Class<?>> featureTypes, Double ... quantileThresholds ) {
		Map<String, List<Serializable>> featureValuePairs = new HashMap<String, List<Serializable>>();
		Set<String> allFeatures = getAllFeatures(examples);
		for (String feature : allFeatures) {
			List<Serializable> allValuesDuplicate = getAllValues(examples, feature);
			if(featureTypes.get(feature) != Double.class) {
				List<Serializable> allValuesUnique = new ArrayList<Serializable>(new HashSet<Serializable>(allValuesDuplicate));
				featureValuePairs.put(feature, allValuesUnique);
			}
			else {
				featureValuePairs.put(feature, getNumericThresholds(allValuesDuplicate, quantileThresholds));
			}
		}
		return featureValuePairs;
	}


	/**
	 * Method for convenient initialization.<br/>
	 * Returns a set of all features seen at least once in the specified collection of examples.
	 * @param examples a collection of {@link Example}s to extract the features from
	 * @return a set of all features seen at least once in the specified collection of examples.
	 */
	public Set<String> getAllFeatures(Collection<Data> examples) {
		Set<String> features = new HashSet<String>();
		for (Data example : examples) {
			for( String feature : LearnerUtils.getAttributes( example ) ){
				if( ! feature.equals( labelAttribute ) && ! LearnerUtils.isHiddenOrSpecial( feature ))
					features.add( feature );
			}
		}
		return features;
	}

	/**
	 * Method for convenient initialization.<br/>
	 * Returns a list of thresholds for testing values of the specified domain.<br/>
	 * All seen values - including duplicate values - should be specified.<br/>
	 * Quantiles must reside in interval [0,1]. The Number of specified quantiles define the number of returned thresholds
	 * (if enough different values exist). Default quantiles: {@link HoeffdingTree#DEFAULT_QUANTILES}
	 * @param values all seen values, including duplicates
	 * @param quantileThresholds quantiles to find the best thresholds. Default: {@link HoeffdingTree#DEFAULT_QUANTILES}
	 * @return Returns a list of thresholds for numeric features
	 */
	@SuppressWarnings({ "unchecked", "rawtypes" })
	public static List<Serializable> getNumericThresholds(Collection<Serializable> values, Double ... quantileThresholds) {
		List<Serializable> thresholds = new ArrayList<Serializable>();
		Arrays.sort(quantileThresholds);
		List<Serializable> orderedValues = new ArrayList<Serializable>(values);
		Collections.sort((List)orderedValues);
		for (int i = 0; i < quantileThresholds.length; i++) {
			// index = quantile * #values - 1
			int nextThresholdPosition = Math.round( (float)( quantileThresholds[i]*values.size() ) ) - 1;
			if (nextThresholdPosition < 0) {
				nextThresholdPosition = 0;
			}
			if (nextThresholdPosition >= values.size()) {
				nextThresholdPosition = values.size() - 1;
			}
			Serializable nextThreshold = orderedValues.get(nextThresholdPosition);
			if (thresholds.isEmpty() || !thresholds.get(thresholds.size()-1).equals(nextThreshold)) {
				thresholds.add(nextThreshold);
			}
		}
		return thresholds;
	}


	/**
	 * Maps the specified numeric value to one of the specified nominal values.<br/>
	 * Numeric and nominal values must be directly comparable.
	 * @param nominalValues possible mappings
	 * @param numericValue a numeric value
	 * @return the nominal value to which the numeric value is mapped.
	 */
	@SuppressWarnings({ "unchecked", "rawtypes" })
	public static Comparable numericToNominal(Collection<Comparable> nominalValues, Comparable numericValue) {
		Comparable smallestNominal = null;
		for (Comparable nominal : nominalValues) {
			if ( numericValue.compareTo(nominal) <= 0 ) {
				if (smallestNominal == null || nominal.compareTo(smallestNominal) < 0) { smallestNominal = nominal; }
			}
		}
		if (smallestNominal == null) {
			// numeric value was higher than the highest nominal value, so return the highest nominal value
			for (Comparable nominal : nominalValues) {
				if (smallestNominal == null || smallestNominal.compareTo(nominal) < 0) { smallestNominal = nominal; }
			}
		}
		return smallestNominal;
	}

	/**
	 * Returns the so far learned tree. At any time through the learning process, this method returns a valid tree.
	 * @return the so far learned tree
	 */
	@Override
	public HoeffdingTreeModel getModel() {
		return this.tree;
	}

	/**
	 * This method implements 'The Hoeffding tree algorithm' as presented in Table 1 on page 3 in<br/>
	 * <b>Domingos/Hulten/2000: Mining HighSpeed Data Streams</b> as precisely as possible.<br/>
	 * The comments in the method are taken from the original pseudo code implementation and each describes the following line(s).
	 */
	@Override
	// For each example (x, y_k) in S
	public void learn(Data example) {
		if (!this.initialized) {
			logger.warn( "Learner has not been initialized!" );
			return;
		}
		// Sort (x, y) into a leaf l using HT.
		HoeffdingTreeNode leaf = this.tree.getLeaf(example);
		NodeData leafData = this.NodeDataPairs.get(leaf);
		if (leafData.getRemainingFeatures().isEmpty()) {
			return;
		}
		// For each x_ij in x such that X_i is in X_l : Increment n_ijk(l).
		leafData.addExample(example);
		// Label l with the majority class among the examples seen so far at l.
		leaf.setLabel(leafData.getMajorityClass());
		// If the examples seen so far at l are not all of the same class, then
		if (!leafData.isUniformLabel()) {
			// Compute G_l(X_i) for each attribute X_i in X_l - {X_null} using the counts n_ijk(l).
			Map<String, Double> featureQuality = leafData.getQualityAllAttributes();
			// Let X_a be the attribute with highest G_l.
			String x_a = LearnerUtils.getMaximumKey( featureQuality );  //getBestFeature(featureQuality);
			double x_aG_lValue = featureQuality.get(x_a);

			// Let X_b be the attribute with second-highest G_l.
			featureQuality.remove(x_a);
			String x_b = LearnerUtils.getMaximumKey( featureQuality ); //getBestFeature(featureQuality);
			double x_bG_lValue = 0d;
			if (x_b != null) {
				x_bG_lValue = featureQuality.get(x_b);
			}

			// Compute epsilon using Equation 1.
			// Equation 1 - Hoeffding bound (or additive Chernoff bound):  epsilon = sqrt( (R^2 * ln(1/delta)) / 2n )
			// where R is the range of the estimated random variable (e.g. 1 for a probability or log2(#classes) for information gain).
			double epsilon = Math.sqrt( this.numerator / (2*leafData.getExampleCount()) );
			// If G_l(X_a) - G_l(X_b) > epsilon and X_a != X_null , then
			if ((x_aG_lValue - x_bG_lValue > epsilon) && ((x_aG_lValue - this.minQualityForSplit) >= epsilon)) {
				// Replace l by an internal node that splits on X_a.
				leaf.setFeature(x_a);
				leaf.setLabel(null);
				this.NodeDataPairs.remove(leaf);
				List<String> remainingFeatures = new ArrayList<String>(leafData.getRemainingFeatures());
				remainingFeatures.remove(x_a);
				// For each branch of the split
				for (Serializable value : this.featureValuePairs.get(x_a)) {
					// Add a new leaf l_m, and let X_m = X - {X_a} (I suppose it should be 'X_l' instead of 'X').
					// Let G_m(X_null) be the G obtained by predicting the most frequent class at l_m. (not explicitly implemented here)
					HoeffdingTreeNode newChild = new HoeffdingTreeNode(leafData.getMajorityClass(x_a, value));
					leaf.addChild(newChild, value);
					// For each class y_k and each value x_ij of each attribute X_i in X_m - {X_null}
					//   Let n_ijk(l_m) = 0.
					this.NodeDataPairs.put(newChild, new NodeData(this.labelAttribute, remainingFeatures));
				}
				logger.debug("split node on feature "+x_a+"\n " +
						"examples processed at this node: "+leafData.getExampleCount()+"\n " +
						"Epsilon="+epsilon+", G("+x_a+")="+x_aG_lValue+", 2nd best: G("+x_b+")="+x_bG_lValue+"\n " +
						"creating "+this.featureValuePairs.get(x_a).size()+" children for values: "+this.featureValuePairs.get(x_a)+"\n " +
						"remaining features for each child: "+remainingFeatures+"\n " +
						"new tree:\n"+this.tree.toString());
			}
		}
	}
	
	
	

	/**
	 * Gathers all necessary data for a corresponding node including remaining features and counters in one object.
	 * @author Tobias Beckers
	 *
	 */
	protected class NodeData implements Serializable{

		private static final long serialVersionUID = 1L;

		/** contains all remaining features */
		private final List<String> remainingFeatures;

		/**
		 * [remaining feature i][value j for feature i][label k] = number of examples arrived in the corresponding node
		 * where feature i had value j and class was k.
		 */
		private final int[][][] counter;

		/** the total number of examples arrived at the corresponding node */
		private int exampleCount;

		/** Contains the number of examples that will have to be seen to recompute the majority class for the corresponding node. */
		private int examplesTillNextLabelUpdate;

		/** contains the majority class of the last update */
		private Serializable lastMajorityClass;

		/** specifies the feature with the fewest values to speed up some feature independent computations */
		private final int fewestValuesFeature;

		/** specifies if all seen examples in the corresponding node have the same label */
		private boolean isUniLabel;

		/** specifies the label index, if {@link #isUniLabel} is true */
		private int uniLabelIndex;

		String labelAttribute;
		
		/**
		 * Constructs a node data object for a node with the specified remaining features. Sets all counters to '0'.
		 * @param remainingFeatures the remaining features for the associated node
		 */
		public NodeData(String labelAttribute, List<String> remainingFeatures) {
			this.labelAttribute = labelAttribute;
			this.exampleCount = 0;
			this.examplesTillNextLabelUpdate = 0;
			this.lastMajorityClass = null;
			this.isUniLabel = true;
			this.uniLabelIndex = -1;
			this.remainingFeatures = remainingFeatures;
			this.counter = new int[remainingFeatures.size()][][];
			int i = 0;
			int fewestValuesCount = -1;
			int tempFewestValuesFeature = 0;
			for (String feature : remainingFeatures) {
				int valuesCount = HoeffdingTree.this.featureValuePairs.get(feature).size();
				this.counter[i] = new int[valuesCount][HoeffdingTree.this.labels.size()];
				if (fewestValuesCount == -1 || valuesCount < fewestValuesCount) {
					tempFewestValuesFeature = i;
					fewestValuesCount = valuesCount;
				}
				i++;
			}
			this.fewestValuesFeature = tempFewestValuesFeature;
			this.initCounter();
		}

		/**
		 * Sets all counters to 0.
		 */
		private void initCounter() {
			for (int i = 0; i < this.counter.length; i++) {
				for (int j = 0; j < this.counter[i].length; j++) {
					for (int k = 0; k < this.counter[i][j].length; k++) {
						this.counter[i][j][k] = 0;
					}
				}
			}
		}

		/**
		 * Extracts the needed data of the specified example and adds it to the node data
		 * @param example the example which data should be added to the node data
		 */
		public void addExample( Data example ) {
			// increment example count
			this.exampleCount++;

			// update value for majority class recomputing time estimation

			// If the equals method is fast for the used labels, the estimation when to update the majority class can be improved
			// here to speed up the majority class computation.
			Serializable label = LearnerUtils.getLabel( example );
			if( label == null ){
				logger.warn( "No label found for example: {}", example );
				return;
			}

			if ( LearnerUtils.getLabel(example).equals(this.lastMajorityClass)) {
				this.examplesTillNextLabelUpdate++;
			} else {
				this.examplesTillNextLabelUpdate--;
				// if the equals method is slow for the used labels, the above improvement of estimating the next majority class
				// update time will result in a contra-productive deceleration of majority class computation.
				// In this case using the following line (means poorer estimation) instead of the previous two code lines is recommended.
				// this.examplesTillNextLabelUpdate--;
			}

			// increment the counters
			int labelIndex = HoeffdingTree.this.labels.indexOf( label );
			for (String feature : this.remainingFeatures) {
				this.incrementCounter(feature, example.get(feature), labelIndex);
			}

			// update the uniform label flag
			if (this.isUniLabel) {
				if (this.uniLabelIndex == -1) {
					this.uniLabelIndex = labelIndex;
				} else {
					if (this.uniLabelIndex != labelIndex) {
						this.isUniLabel = false;
						this.uniLabelIndex = 0;
					}
				}
			}
		}

		/**
		 * Returns all features remaining for the associated node
		 * @return all remaining features
		 */
		public List<String> getRemainingFeatures() {
			return this.remainingFeatures;
		}

		/**
		 * Increments the counter associated with the specified parameters. Does nothing if the specified feature
		 * is not remaining
		 * @param feature a feature
		 * @param value a valid value for the specified feature
		 * @param labelIndex the index of the class label
		 */
		@SuppressWarnings({ "unchecked", "rawtypes" })
		private void incrementCounter(String feature, Serializable value, int labelIndex) {
			int featureIndex = this.remainingFeatures.indexOf(feature);
			if (featureIndex != -1) {
				Class<?> type = tree.getType( feature );
				if( type == Double.class ){
					this.counter[featureIndex]
					             [HoeffdingTree.this.featureValuePairs.get(feature).indexOf(numericToNominal((List)HoeffdingTree.this.featureValuePairs.get(feature), (Comparable<?>)value))]
					              [labelIndex]++;
					return;
				} else {
					this.counter[featureIndex][HoeffdingTree.this.featureValuePairs.get(feature).indexOf(value)][labelIndex]++;
				}
			}
		}

		/**
		 * Returns the label of the most frequent class from all so far seen examples in the corresponding node.
		 * @return the most frequent class label
		 */
		public Serializable getMajorityClass() {
			// Speed up calculation by testing the difference between most and second most frequent label.
			if (this.examplesTillNextLabelUpdate <= 0) {

				if (this.isUniLabel) {
					this.lastMajorityClass = HoeffdingTree.this.labels.get(this.uniLabelIndex);
					this.examplesTillNextLabelUpdate = this.exampleCount + 1;
					return this.lastMajorityClass;
				}

				int majorityClass = 0;
				int highestFrequency = 0;
				int secondHighestFrequency = 0;
				for (int label = 0; label < HoeffdingTree.this.labels.size(); label++) {
					int currentFrequency = 0;
					for (int value = 0; value < this.counter[this.fewestValuesFeature].length; value++) {
						currentFrequency += this.counter[this.fewestValuesFeature][value][label];
					}
					if (currentFrequency > highestFrequency) {
						secondHighestFrequency = highestFrequency;
						highestFrequency = currentFrequency;
						majorityClass = label;
					}
					else {
						if (currentFrequency > secondHighestFrequency) {
							secondHighestFrequency = currentFrequency;
						}
					}
				}
				this.lastMajorityClass = HoeffdingTree.this.labels.get(majorityClass);
				this.examplesTillNextLabelUpdate = highestFrequency - secondHighestFrequency + 1;
			}
			return this.lastMajorityClass;
		}

		/**
		 * Returns the label of the most frequent class from the so far seen examples in the corresponding node where
		 * the specified feature had the specified value.
		 * @param feature a feature
		 * @param value a valid value of the feature
		 * @return the most frequent class label for a value of a feature
		 */
		public Serializable getMajorityClass(String feature, Serializable value) {
			int majorityClass = 0;
			int highestSum = 0;
			int featureIndex = this.remainingFeatures.indexOf(feature);
			int valueIndex = HoeffdingTree.this.featureValuePairs.get(feature).indexOf(value);
			for (int label = 0; label < HoeffdingTree.this.labels.size(); label++) {
				if (this.counter[featureIndex][valueIndex][label] > highestSum) {
					highestSum = this.counter[featureIndex][valueIndex][label];
					majorityClass = label;
				}
			}
			return HoeffdingTree.this.labels.get(majorityClass);
		}

		/**
		 * Returns the number of the so far seen examples
		 * @return the number of the so far seen examples
		 */
		public int getExampleCount() {
			return this.exampleCount;
		}

		/**
		 * Returns true iff all so far seen examples in the corresponding node had the same class
		 * @return true iff all examples had the same class
		 */
		public boolean isUniformLabel() {
			return this.isUniLabel;
		}

		/**
		 * Returns the value of the specified {@link QualityCriterion} for each of the remaining features
		 * @return the value of the specified {@link QualityCriterion} for each of the remaining features
		 */
		public Map<String, Double> getQualityAllAttributes() {
			Map<String, Double> featureQuality = new HashMap<String, Double>();
			double qualityNoSplit = this.getQualityNoSplit();
			for (String feature : this.getRemainingFeatures()) {
				featureQuality.put(feature, qualityNoSplit - this.getQualitySplitByFeature(feature));
			}
			return featureQuality;
		}

		/**
		 * Returns the quality for all examples without splitting
		 * @return the quality for all examples without splitting
		 */
		public double getQualityNoSplit() {
			int totalExamples = this.getExampleCount();
			// compute the probability for each class
			double[] totalLabel = this.initArray(new double[HoeffdingTree.this.labels.size()]);
			for (int value = 0; value < this.counter[this.fewestValuesFeature].length; value++) {
				for (int label = 0; label < totalLabel.length; label++) {
					totalLabel[label] += (double)this.counter[this.fewestValuesFeature][value][label];
				}
			}
			for (int i = 0; i < totalLabel.length; i++) {
				totalLabel[i] = (double)(totalLabel[i]) / totalExamples;
			}
			return HoeffdingTree.this.qualityCriterion.getQuality(totalLabel);
		}

		/**
		 * Returns the weighted averaged quality for splitting by the given feature
		 * @param feature a possible split feature
		 * @return the weighted averaged quality for splitting by the given feature
		 */
		public double getQualitySplitByFeature(String feature) {
			int totalExamples = this.getExampleCount();
			int featureIndex = this.remainingFeatures.indexOf(feature);
			// compute quality for each value of the feature and (weighted) sum them up
			double qualitySplitByFeature = 0d;
			for (int value = 0; value < this.counter[featureIndex].length; value++) {
				// compute total number of examples having this value for the feature
				int valueExamples = 0;
				for (int label = 0; label < this.counter[featureIndex][value].length; label++) {
					valueExamples += this.counter[featureIndex][value][label];
				}
				if (valueExamples > 0) {
					// compute the probability for each class
					double[] valueLabel = this.initArray(new double[this.counter[featureIndex][value].length]);
					for (int label = 0; label < valueLabel.length; label++) {
						valueLabel[label] = ((double)this.counter[featureIndex][value][label]) / valueExamples;
					}
					// add the weighted quality for this value
					qualitySplitByFeature += ((double)valueExamples / totalExamples) * HoeffdingTree.this.qualityCriterion.getQuality(valueLabel);
				}
			}
			return qualitySplitByFeature;
		}

		/**
		 * initializes the specified array by writing explicitly '0d' to each position.
		 * @param array the array to initialize
		 * @return the same (but initialized) Object as specified by the parameter
		 */
		public double[] initArray(double[] array) {
			for (int i = 0; i < array.length; i++) {
				array[i] = 0d;
			}
			return array;
		}

		/**
		 * Constructs a String representing the subtree beginning at the corresponding node
		 */
		@Override
		public String toString() {
			StringBuilder builder = new StringBuilder();
			builder.append("NodeData [counter=");
			for (int i = 0; i < this.counter.length; i++) {
				for (int j = 0; j < this.counter[i].length; j++) {
					for (int k = 0; k < this.counter[i][j].length; k++) {
						builder.append("\n["+i+"]["+j+"]["+k+"] = "+this.counter[i][j][k]);
					}
				}
			}

			builder.append("\n remainingFeatures=");
			builder.append(this.remainingFeatures);
			builder.append("]");
			return builder.toString();
		}
	}
}