/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.functions.kernel.logistic;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.learner.functions.kernel.AbstractMySVMLearner;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExample;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.Kernel;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.svm.SVMInterface;
import com.rapidminer.parameter.UndefinedParameterError;
/**
* The main class for the Kernel Logistic Regression.
*
* @author Stefan Rueping
*/
public class KLR implements SVMInterface {
protected Kernel kernel;
protected SVMExamples examples;
int n; // #examples
double[] target;
int n1;
int n2;
// internal vars
double[] alphas;
double[] hCache;
boolean[] atBound;
int iUp;
int iLow;
double b;
double bUp;
double bLow;
// user-defined params
double tol = 1e-3; // Genauigkeit f?r b (convergence_epsilon)
double C = 1.0d;
double epsilon; // Genauigkeit f?r t (is_zero)
double mu; // Genauigkeit f?r bound (is_zero)
int maxIterations = 100000; // maximale Anzahl Iterationen im
// Newton-Raphson-Schritt
public KLR() {}
public KLR(Operator paramOperator) throws UndefinedParameterError {
C = paramOperator.getParameterAsDouble(AbstractMySVMLearner.PARAMETER_C);
tol = paramOperator.getParameterAsDouble(AbstractMySVMLearner.PARAMETER_CONVERGENCE_EPSILON);
maxIterations = paramOperator.getParameterAsInt(AbstractMySVMLearner.PARAMETER_MAX_ITERATIONS);
}
public void init(Kernel new_kernel, SVMExamples new_examples) {
kernel = new_kernel;
examples = new_examples;
// copy examples to local vars
target = examples.get_ys();
alphas = examples.get_alphas();
n = examples.count_examples();
epsilon = C * 1e-10; // C*getParam("is_zero")
mu = epsilon;
n1 = examples.count_pos_examples();
n2 = n - n1;
};
final double dG(double alpha) {
// return dG(alpha/C)
return Math.log(alpha / (C - alpha));
};
final double dPhi(double t, int i, int j, double ai, double aj, double Kii, double Kij, double Kjj) {
// return Hi(alpha(t)) - Hj(alpha(t))
double result = 0.0;
double ydG = 0.0;
if (target[i] > 0) {
result = Kii - Kij;
ydG = dG(ai + t) - dG(ai);
} else {
result = Kij - Kii;
ydG = dG(ai) - dG(ai - t);
};
if (target[j] > 0) {
result = Kjj - Kij;
ydG -= dG(aj - t) - dG(aj);
} else {
result = Kij - Kjj;
ydG -= dG(aj) - dG(aj + t);
};
// result *= t;
result = t * (Kii - 2.0 * Kij + Kjj);
result += ydG;
result += hCache[i] - hCache[j]; // value for t=0
return result;
};
final double d2Phi(double t, int i, int j, double ai, double aj, double Kii, double Kij, double Kjj) {
double result;
double atilde;
if (target[i] > 0) {
atilde = ai + t;
} else {
atilde = ai - t;
};
result = C / ((atilde) * (C - atilde));
if (target[j] > 0) {
atilde = aj - t;
} else {
atilde = aj + t;
};
result += C / ((atilde) * (C - atilde));
result += Kii - 2.0 * Kij + Kjj; // eta
return result;
};
protected boolean takeStep(int i, int j) {
double[] kernel_row_i = kernel.get_row(i);
double[] kernel_row_j = kernel.get_row(j);
double aio = alphas[i];
double ajo = alphas[j];
double yi = target[i];
double yj = target[j];
double Hio = hCache[i];
double Hjo = hCache[j];
double Kii = kernel_row_i[i];
double Kij = kernel_row_i[j];
double Kjj = kernel_row_j[j];
int takestepFlag = 1;
// Compute t_min and t_max
double t_max;
double t_min;
double t_tmp;
if (yi > 0) {
t_min = mu / 2.0 - aio;
t_max = C - mu / 2.0 - aio;
} else {
t_max = aio - mu / 2.0;
t_min = aio - (C - mu / 2.0);
};
if (yj > 0) {
t_tmp = ajo - mu / 2.0;
if (t_tmp < t_max) {
t_max = t_tmp;
};
t_tmp = ajo - (C - mu / 2.0);
if (t_tmp > t_min) {
t_min = t_tmp;
};
} else {
t_tmp = mu / 2.0 - ajo;
if (t_tmp > t_min) {
t_min = t_tmp;
};
t_tmp = C - mu / 2.0 - ajo;
if (t_tmp < t_max) {
t_max = t_tmp;
};
};
if (t_max - t_min <= epsilon) {
return false;
};
double t = 0.0;
double the_dPhi = Hio - Hjo;
double the_d2Phi = Kii - 2.0 * Kij + Kjj + C / (aio * (C - aio)) + C / (ajo * (C - ajo));
double dPhi_left = 0.0;
double d2Phi_left = 0.0;
double dPhi_right = 0.0;
double d2Phi_right = 0.0;
double t_left = 0.0;
double t_right = 0.0;
if (the_dPhi > 0) {
// Compute dPhi(t) and d2Phi(t) at t = t_min and denoted as
// dPhi_left, d2Phi_left
dPhi_left = dPhi(t_min, i, j, aio, ajo, Kii, Kij, Kjj);
d2Phi_left = d2Phi(t_min, i, j, aio, ajo, Kii, Kij, Kjj);
if (dPhi_left < 0) {
t_left = t_min;
t_right = t;
dPhi_right = the_dPhi;
d2Phi_right = the_d2Phi;
} else {
t = t_min;
takestepFlag = 2;
};
} else if (the_dPhi < 0) {
// Compute dPhi(t) and d2Phi(t) at t = t_max and denoted as
// dPhi_right, d2Phi_right
dPhi_right = dPhi(t_max, i, j, aio, ajo, Kii, Kij, Kjj);
d2Phi_right = d2Phi(t_max, i, j, aio, ajo, Kii, Kij, Kjj);
if (dPhi_right > 0) {
t_left = t;
t_right = t_max;
dPhi_left = the_dPhi;
d2Phi_left = the_d2Phi;
} else {
t = t_max;
takestepFlag = 2;
};
} else {
return false;
};
double t0;
double dt;
double ai = 0.0;
double aj = 0.0;
double Hi;
double Hj;
if (takestepFlag == 1) {
// Choose a better start point
if (Math.abs(dPhi_left) < Math.abs(dPhi_right)) {
t0 = t_left;
the_dPhi = dPhi_left;
the_d2Phi = d2Phi_left;
} else {
t0 = t_right;
the_dPhi = dPhi_right;
the_d2Phi = d2Phi_right;
}
do {
dt = -the_dPhi / the_d2Phi; // Newton-Raphson step
t = t0 + dt;
if ((t <= t_left) || (t >= t_right)) {
t = (t_left + t_right) / 2.0; // Bisection step
};
ai = aio + t / yi;
aj = ajo - t / yj;
Hi = Hio + t * (Kii - Kij) + yi * (Math.log(ai / (C - ai)) - Math.log(aio / (C - aio)));
Hj = Hjo + t * (Kij - Kjj) + yj * (Math.log(aj / (C - aj)) - Math.log(ajo / (C - ajo)));
the_dPhi = Hi - Hj;
the_d2Phi = Kii - 2 * Kij + Kjj + C / (ai * (C - ai)) + C / (aj * (C - aj));
// Update t_left and t_right
if (the_dPhi * dPhi_left > 0) {
t_left = t;
dPhi_left = the_dPhi;
} else {
t_right = t;
dPhi_right = the_dPhi;
};
t0 = t;
} while ((Math.abs(the_dPhi) > (0.1 * tol)) && (t_left + epsilon < t_right));
} else if (takestepFlag == 2) {
ai = aio + t / yi;
aj = ajo - t / yj;
Hi = Hio + t * (Kii - Kij) + yi * (Math.log(ai / (C - ai)) - Math.log(aio / (C - aio)));
Hj = Hjo + t * (Kij - Kjj) + yj * (Math.log(aj / (C - aj)) - Math.log(ajo / (C - ajo)));
};
if (t == 0) {
return false;
};
// Save ai, aj, Hi, Hj
alphas[i] = ai;
alphas[j] = aj;
// Update the boundary property of indices i and j (evt. clip alpha)
if (ai <= mu) {
if (((target[i] > 0) && (target[j] > 0)) || ((target[i] < 0) && (target[j] < 0))) {
alphas[j] -= (mu - alphas[i]);
} else {
alphas[j] += (mu - alphas[i]);
};
alphas[i] = mu;
atBound[i] = true;
} else if (ai >= C - mu) {
if (((target[i] > 0) && (target[j] > 0)) || ((target[i] < 0) && (target[j] < 0))) {
alphas[j] -= (C - mu - alphas[i]);
} else {
alphas[j] += (C - mu - alphas[i]);
};
alphas[i] = C - mu;
atBound[i] = true;
} else {
atBound[i] = false;
};
if (aj <= mu) {
if (((target[i] > 0) && (target[j] > 0)) || ((target[i] < 0) && (target[j] < 0))) {
alphas[i] -= (mu - alphas[j]);
} else {
alphas[i] += (mu - alphas[j]);
};
alphas[j] = mu;
atBound[j] = true;
} else if (aj >= C - mu) {
if (((target[i] > 0) && (target[j] > 0)) || ((target[i] < 0) && (target[j] < 0))) {
alphas[i] -= (C - mu - alphas[j]);
} else {
alphas[i] += (C - mu - alphas[j]);
};
alphas[j] = C - mu;
atBound[j] = true;
} else {
atBound[j] = false;
};
t = ((alphas[i] - aio) * yi + (ajo - alphas[j]) * yj) / 2.0;
Hi = Hio + t * (Kii - Kij) + yi * (Math.log(alphas[i] / (C - alphas[i])) - Math.log(aio / (C - aio)));
Hj = Hjo + t * (Kij - Kjj) + yj * (Math.log(alphas[j] / (C - alphas[j])) - Math.log(ajo / (C - ajo)));
for (int k = 0; k < n; k++) {
hCache[k] += t * (kernel_row_i[k] - kernel_row_j[k]);
};
hCache[i] = Hi;
hCache[j] = Hj;
// Update i_low, i_up, b_low and b_up over indices of non-boundary group
bUp = Double.NEGATIVE_INFINITY;
bLow = Double.POSITIVE_INFINITY;
iUp = 0;
iLow = 0;
for (int l = 0; l < n; l++) {
if (!atBound[l]) {
if (hCache[l] > bUp) {
bUp = hCache[l];
iUp = l;
};
if (hCache[l] < bLow) {
bLow = hCache[l];
iLow = l;
};
};
};
return true;
}; // end takeStep procedure
public void klr() {
// main routine:
// precond: n, target, examples, kernel intialisiert
// alphas = new double[n];
examples.clearAlphas();
hCache = new double[n];
atBound = new boolean[n];
int i;
int j;
double alpha_pos = C / n1; // !!! N1 = # pos examples
double alpha_neg = C / n2; // !!! N2 = # neg examples
for (i = 0; i < n; i++) {
if (target[i] > 0) {
alphas[i] = alpha_pos;
} else {
alphas[i] = alpha_neg;
};
atBound[i] = false;
};
// initialize all Hcache[i]
double sum_pos_K;
double sum_neg_K;
double[] kernel_row;
bUp = Double.NEGATIVE_INFINITY;
bLow = Double.POSITIVE_INFINITY;
iUp = 0;
iLow = 0;
for (i = 0; i < n; i++) {
sum_pos_K = 0.0;
sum_neg_K = 0.0;
kernel_row = kernel.get_row(i);
for (j = 0; j < n; j++) {
if (target[j] > 0) {
sum_pos_K += kernel_row[j];
} else {
sum_neg_K += kernel_row[j];
};
};
hCache[i] = alpha_pos * sum_pos_K - alpha_neg * sum_neg_K + (target[i]) * dG(alphas[i]);
if (hCache[i] > bUp) {
bUp = hCache[i];
iUp = i;
};
if (hCache[i] < bLow) {
bLow = hCache[i];
iLow = i;
};
};
boolean Flag;
double Hi;
int numChange;
int it = maxIterations;
do {
// takestep with i_low and i_up
while (2.0 * tol < bUp - bLow) {
it--;
Flag = takeStep(iLow, iUp);
if (!Flag) {
break;
};
};
// cout<<endl;
// check optimality of boundary indices
numChange = 0;
for (i = 0; i < n; i++) {
if (atBound[i]) {
Hi = hCache[i];
if (Math.abs(Hi - bLow) >= Math.abs(Hi - bUp)) {
it--;
Flag = takeStep(i, iLow);
if (!Flag) {
it--;
Flag = takeStep(i, iUp);
};
} else {
it--;
Flag = takeStep(i, iUp);
if (!Flag) {
it--;
Flag = takeStep(i, iLow);
};
};
if (!atBound[i]) {
numChange++;
};
};
};
} while ((numChange != 0) && (it > 0));
b = (bLow + bUp) / 2.0;
// double targetf = 0.0;
// double tmp;
// for(int i=0;i<n;i++){
// double* kernel_row_i = kernel->get_row(i);
// for(int j=0;j<n;j++){
// targetf += 0.5*alphas[i]*alphas[j]
// *target[i]*target[j]
// *kernel_row_i[j];
// };
// tmp = alphas[i]/C;
// targetf += C*(tmp*log(tmp)+(1-tmp)*log(1-tmp));
// };
// cout<<"TARGET = "<<targetf<<endl;
};
public double predict(SVMExample sVMExample) {
int i;
SVMExample sv;
double the_sum = examples.get_b();
double alpha;
for (i = 0; i < n; i++) {
alpha = alphas[i];
if (alpha != 0) {
sv = examples.get_example(i);
the_sum += alphas[i] * kernel.calculate_K(sv, sVMExample);
};
};
the_sum = 1.0 / (1.0 + Math.exp(-the_sum));
return the_sum;
};
public void predict(SVMExamples to_predict) {
int i;
double prediction;
SVMExample sVMExample;
// int size = examples.count_examples(); // IM 04/02/12
int size = to_predict.count_examples();
for (i = 0; i < size; i++) {
sVMExample = to_predict.get_example(i);
prediction = predict(sVMExample);
to_predict.set_y(i, prediction);
};
};
public void train() {
// train the klr
klr();
b = -b; // different b in SVM and KLR, set to SVM standard
// copy local vars to training_set
// save y_i*alpha_i instead of alpha_i !!! IM 04/02/12 passiert aber
// nicht :-) !!!
examples.set_b(b);
for (int i = 0; i < n; i++) {
if (target[i] < 0) {
alphas[i] = -alphas[i];
};
};
};
/** Return the weights of the features. */
public double[] getWeights() {
int dim = examples.get_dim();
int examples_total = examples.count_examples();
double[] w = new double[dim];
for (int j = 0; j < dim; j++)
w[j] = 0;
for (int i = 0; i < examples_total; i++) {
double[] x = examples.get_example(i).toDense(dim);
double alpha = alphas[i];
for (int j = 0; j < dim; j++) {
w[j] += alpha * x[j];
}
}
return w;
}
/** Returns the value of b. */
public double getB() {
return examples.get_b();
}
};