/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.generator;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Statistics;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.tools.math.BinaryPeakFinder;
import com.rapidminer.tools.math.Complex;
import com.rapidminer.tools.math.FastFourierTransform;
import com.rapidminer.tools.math.Peak;
import com.rapidminer.tools.math.PeakFinder;
import com.rapidminer.tools.math.SpectrumFilter;
import com.rapidminer.tools.math.WindowFunction;
/**
* Factory class to produce new attributes based on the fourier synthesis of the
* label mapped on an attribute dimension.
*
* @author Ingo Mierswa
*/
public class SinusFactory {
/** Indicates the min evidence factor. */
private static final double MIN_EVIDENCE = 0.2d;
public static final String[] ADAPTION_TYPES = { "uniformly", "uniformly_without_nu", "gaussian" };
public static final int UNIFORMLY = 0;
public static final int UNIFORMLY_WITHOUT_NU = 1;
public static final int GAUSSIAN = 2;
/**
* Generates this number of peaks in a range of
* <code>epsilon * frequency</code>. Necessary because the FT does not
* deliver the correct frequency (aliasing, leakage) in all cases. In later
* releases this should be replaced by a gradient search or a evolutionary
* search for the correct value.
*/
private int attributesPerPeak = 3;
/**
* Generates this <code>peaksPerPeak</code> peaks in the range of
* <code>epsilon * frequency</code>. Necessary because the FT does not
* deliver the correct frequency (aliasing, leakage) in all cases. In later
* releases this should be replaced by a gradient search or a evolutionary
* search for the correct value.
*/
private double epsilon = 0.1;
/** Indicates the type of frequency adaption. */
private int adaptionType = UNIFORMLY;
/**
* The maximal number of generated attributes for each possible attribute.
* Corresponds to the highest peaks in the frequency spectrum of the label
* in the source attribute's space.
*/
private int maxPeaks = 5;
/** The fast fourier transformation calculator. */
private FastFourierTransform fft = null;
/**
* The spectrum filter type which should be applied on the spectrum after
* the fourier transformation.
*/
private SpectrumFilter filter = null;
/** The algorithm to find the peaks in the frequency spectrum. */
private PeakFinder peakFinder = null;
/**
* Creates a new sinus factory which creates <code>maxPeaks</code> new
* peaks. Uses Blackman-Harris window function and no spectrum filter as
* default. The adaption type is gaussian with an epsilon of 0.1. The
* factory produces three attributes for each highest peak as default.
*/
public SinusFactory(int maxPeaks) {
this.maxPeaks = maxPeaks;
this.fft = new FastFourierTransform(WindowFunction.BLACKMAN_HARRIS);
this.filter = new SpectrumFilter(SpectrumFilter.NONE);
this.peakFinder = new BinaryPeakFinder();
}
public void setAdaptionType(int type) {
this.adaptionType = type;
}
public void setEpsilon(double epsilon) {
this.epsilon = epsilon;
}
/** Must be bigger than 2! */
public void setAttributePerPeak(int attributesPerPeak) {
this.attributesPerPeak = attributesPerPeak;
}
/**
* Calculates the fourier transformation from the first attribute on the
* second and delivers the <code>maxPeaks</code> highest peaks. Returns a
* list with the highest attribute peaks.
*/
public List<AttributePeak> getAttributePeaks(ExampleSet exampleSet, Attribute first, Attribute second) throws OperatorException {
exampleSet.recalculateAllAttributeStatistics();
Complex[] result = fft.getFourierTransform(exampleSet, first, second);
Peak[] spectrum = filter.filter(result, exampleSet.size());
double average = 0.0d;
for (int k = 0; k < spectrum.length; k++)
average += spectrum[k].getMagnitude();
average /= spectrum.length;
List<Peak> peaks = peakFinder.getPeaks(spectrum);
Collections.sort(peaks);
// remember highest peaks
double inputDeviation = Math.sqrt(exampleSet.getStatistics(second, Statistics.VARIANCE)) / (exampleSet.getStatistics(second, Statistics.MAXIMUM) - exampleSet.getStatistics(second, Statistics.MINIMUM));
Iterator p = peaks.iterator();
double maxEvidence = Double.NaN;
List<AttributePeak> attributes = new LinkedList<AttributePeak>();
for (int k = 0; k < maxPeaks; k++) {
if (p.hasNext()) {
Peak peak = (Peak) p.next();
double evidence = (peak.getMagnitude() / average) * (1.0d / inputDeviation);
if (Double.isNaN(maxEvidence))
maxEvidence = evidence;
if (evidence > (MIN_EVIDENCE * maxEvidence))
attributes.add(new AttributePeak(second, peak.getIndex(), evidence));
}
}
return attributes;
}
/**
* Generates a new sinus function attribute for all given attribute peaks.
* Since the frequency cannot be calculated exactly (leakage, aliasing),
* several new attribute may be added for each peak. These additional
* attributes are randomly chosen (uniformly in epsilon range, uniformly
* without nu, gaussian with epsilon as standard deviation)
*/
public void generateSinusFunctions(ExampleSet exampleSet, List<AttributePeak> attributes, Random random) throws GenerationException {
if (attributes.size() > 0) {
Collections.sort(attributes);
double totalMaxEvidence = attributes.get(0).getEvidence();
Iterator<AttributePeak> a = attributes.iterator();
while (a.hasNext()) {
AttributePeak ae = a.next();
if (ae.getEvidence() > MIN_EVIDENCE * totalMaxEvidence) {
for (int i = 0; i < attributesPerPeak; i++) {
double frequency = ae.getFrequency();
switch (adaptionType) {
case UNIFORMLY:
if (attributesPerPeak != 1)
frequency = (double) i / (double) (attributesPerPeak - 1) * 2.0d * epsilon * frequency + (frequency - epsilon * frequency);
break;
case UNIFORMLY_WITHOUT_NU:
if (attributesPerPeak != 1)
frequency = (double) i / (double) (attributesPerPeak - 1) * 2.0d * epsilon + (frequency - epsilon);
break;
case GAUSSIAN:
frequency = random.nextGaussian() * epsilon + frequency;
break;
}
// frequency constant
List frequencyResult = generateAttribute(exampleSet, new ConstantGenerator(frequency));
// scaling with frequency
FeatureGenerator scale = new BasicArithmeticOperationGenerator(BasicArithmeticOperationGenerator.PRODUCT);
scale.setArguments(new Attribute[] { (Attribute) frequencyResult.get(0), ae.getAttribute() });
List scaleResult = generateAttribute(exampleSet, scale);
// calc sin
FeatureGenerator sin = new TrigonometricFunctionGenerator(TrigonometricFunctionGenerator.SINUS);
sin.setArguments(new Attribute[] { (Attribute) scaleResult.get(0) });
List<Attribute> sinResult = generateAttribute(exampleSet, sin);
for (Attribute attribute : sinResult)
exampleSet.getAttributes().addRegular(attribute);
}
}
}
}
}
private List<Attribute> generateAttribute(ExampleSet exampleSet, FeatureGenerator generator) throws GenerationException {
List<FeatureGenerator> generators = new LinkedList<FeatureGenerator>();
generators.add(generator);
return FeatureGenerator.generateAll(exampleSet.getExampleTable(), generators);
}
}