Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactored MALSAR-based multitask algorithms and added clustered mult…
…itask logistic regression
- Loading branch information
Showing
13 changed files
with
625 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,312 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Sergey Lisitsyn | ||
* Copyright (C) 2012 Jiayu Zhou and Jieping Ye | ||
*/ | ||
|
||
#include <shogun/lib/malsar/malsar_clustered.h> | ||
#ifdef HAVE_EIGEN3 | ||
#include <shogun/mathematics/Math.h> | ||
#include <iostream> | ||
#include <shogun/lib/external/libqp.h> | ||
|
||
using namespace Eigen; | ||
|
||
namespace shogun | ||
{ | ||
|
||
static double* H_diag_matrix; | ||
static int H_diag_matrix_ld; | ||
|
||
static const double* get_col(uint32_t j) | ||
{ | ||
return H_diag_matrix + j*H_diag_matrix_ld; | ||
} | ||
|
||
malsar_result_t malsar_clustered( | ||
CDotFeatures* features, | ||
double* y, | ||
double rho1, | ||
double rho2, | ||
const malsar_options& options) | ||
{ | ||
int task; | ||
int n_feats = features->get_dim_feature_space(); | ||
SG_SDEBUG("n feats = %d\n", n_feats); | ||
int n_vecs = features->get_num_vectors(); | ||
SG_SDEBUG("n vecs = %d\n", n_vecs); | ||
int n_tasks = options.n_tasks; | ||
SG_SDEBUG("n tasks = %d\n", n_tasks); | ||
|
||
H_diag_matrix = SG_CALLOC(double, n_tasks*n_tasks); | ||
for (int i=0; i<n_tasks; i++) | ||
H_diag_matrix[i*n_tasks+i] = 2.0; | ||
H_diag_matrix_ld = n_tasks; | ||
|
||
//SGMatrix<float64_t>::display_matrix(H_diag_matrix, n_tasks, n_tasks); | ||
|
||
int iter = 0; | ||
|
||
// initialize weight vector and bias for each task | ||
MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks); | ||
VectorXd Cs = VectorXd::Zero(n_tasks); | ||
for (task=0; task<n_tasks; task++) | ||
{ | ||
int n_pos = 0; | ||
int n_neg = 0; | ||
for (int i=options.ind[task]; i<options.ind[task+1]; i++) | ||
{ | ||
if (y[i] > 0) | ||
n_pos++; | ||
else | ||
n_neg++; | ||
} | ||
Cs[task] = CMath::log(double(n_pos)/n_neg); | ||
} | ||
MatrixXd Ms = MatrixXd::Identity(n_tasks, n_tasks)*options.n_clusters/n_tasks; | ||
MatrixXd IM = Ms; | ||
MatrixXd IMsqinv = Ms; | ||
MatrixXd invEtaMWt = Ms; | ||
|
||
MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws; | ||
VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs; | ||
MatrixXd Mz=Ms, Mzp=Ms, Mz_old=Ms, delta_Mzp=Ms, gMs=Ms; | ||
MatrixXd Mzp_Pz; | ||
|
||
double eta = rho1/rho2; | ||
double c = rho1*eta*(1+eta); | ||
|
||
double t=1, t_old=0; | ||
double gamma=1, gamma_inc=2; | ||
double obj=0.0, obj_old=0.0; | ||
|
||
double* diag_H = SG_MALLOC(double, n_tasks); | ||
double* f = SG_MALLOC(double, n_tasks); | ||
double* a = SG_MALLOC(double, n_tasks); | ||
double* lb = SG_MALLOC(double, n_tasks); | ||
double* ub = SG_MALLOC(double, n_tasks); | ||
double* x = SG_CALLOC(double, n_tasks); | ||
|
||
internal::set_is_malloc_allowed(false); | ||
bool done = false; | ||
while (!done && iter <= options.max_iter) | ||
{ | ||
double alpha = double(t_old - 1)/t; | ||
|
||
// compute search point | ||
Ws = (1+alpha)*Wz - alpha*Wz_old; | ||
Cs = (1+alpha)*Cz - alpha*Cz_old; | ||
Ms = (1+alpha)*Mz - alpha*Mz_old; | ||
|
||
// zero gradient | ||
gWs.setZero(); | ||
gCs.setZero(); | ||
internal::set_is_malloc_allowed(true); | ||
IM = (eta*MatrixXd::Identity(n_tasks,n_tasks)+Ms); | ||
IMsqinv = (IM*IM).inverse(); | ||
invEtaMWt = IM.inverse(); | ||
gMs.noalias() = -c*(Ws.transpose()*Ws)*IMsqinv; | ||
gWs.noalias() += 2*c*invEtaMWt.transpose(); | ||
internal::set_is_malloc_allowed(false); | ||
|
||
// compute gradient and objective at search point | ||
double Fs = 0; | ||
for (task=0; task<n_tasks; task++) | ||
{ | ||
for (int i=options.ind[task]; i<options.ind[task+1]; i++) | ||
{ | ||
double aa = -y[i]*(features->dense_dot(i, Ws.col(task).data(), n_feats)+Cs[task]); | ||
double bb = CMath::max(aa,0.0); | ||
|
||
// avoid underflow when computing exponential loss | ||
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs; | ||
double b = -y[i]*(1 - 1/(1+CMath::exp(aa)))/n_vecs; | ||
|
||
gCs[task] += b; | ||
features->add_to_dense_vec(b, i, gWs.col(task).data(), n_feats); | ||
} | ||
} | ||
|
||
// add regularizer | ||
Fs += c*(Ws*invEtaMWt).trace(); | ||
|
||
double Fzp = 0.0; | ||
|
||
int inner_iter = 0; | ||
// line search, Armijo-Goldstein scheme | ||
while (inner_iter <= 100) | ||
{ | ||
// compute singular projection of Ms - gMs/gamma with k | ||
internal::set_is_malloc_allowed(true); | ||
EigenSolver<MatrixXd> eigensolver(Ms-gMs/gamma); | ||
|
||
// solve problem | ||
// min sum_i (s_i - s*_i)^2 s.t. sum_i s_i = k, 0<=s_i<=1 | ||
for (int i=0; i<n_tasks; i++) | ||
{ | ||
diag_H[i] = 2.0; | ||
f[i] = 0.0; | ||
a[i] = 1.0; | ||
lb[i] = 0 - eigensolver.eigenvalues()[i].real(); | ||
ub[i] = 1 - eigensolver.eigenvalues()[i].real(); | ||
x[i] = ub[i]; | ||
} | ||
double b = n_tasks - eigensolver.eigenvalues().sum().real(); | ||
SG_SDEBUG("b = %f\n", b); | ||
SG_SDEBUG("Calling libqp\n"); | ||
libqp_state_T problem_state = libqp_gsmo_solver(&get_col,diag_H,f,a,b,lb,ub,x,n_tasks,1000,1e-6,NULL); | ||
SG_SDEBUG("Libqp objective = %f\n",problem_state.QP); | ||
SG_SDEBUG("Exit code = %d\n",problem_state.exitflag); | ||
SG_SDEBUG("%d iteration passed\n",problem_state.nIter); | ||
SG_SDEBUG("Solution is \n [ "); | ||
for (int i=0; i<n_tasks; i++) | ||
{ | ||
SG_SDEBUG("%f ", x[i]); | ||
x[i] += eigensolver.eigenvalues()[i].real(); | ||
} | ||
SG_SDEBUG("]\n"); | ||
Map<VectorXd> Mzp_DiagSigz(x,n_tasks); | ||
Mzp_Pz = eigensolver.eigenvectors().real(); | ||
Mzp = Mzp_Pz*Mzp_DiagSigz.asDiagonal()*Mzp_Pz.transpose(); | ||
internal::set_is_malloc_allowed(false); | ||
// walk in direction of antigradient | ||
Czp = Cs - gCs/gamma; | ||
|
||
for (int i=0; i<n_tasks; i++) | ||
Mzp_DiagSigz[i] += 1.0; | ||
internal::set_is_malloc_allowed(true); | ||
invEtaMWt = (Mzp_Pz* | ||
(Mzp_DiagSigz.cwiseInverse().asDiagonal())* | ||
Mzp_Pz.transpose())* | ||
Wzp.transpose(); | ||
internal::set_is_malloc_allowed(false); | ||
// compute objective at line search point | ||
Fzp = 0.0; | ||
for (task=0; task<n_tasks; task++) | ||
{ | ||
for (int i=options.ind[task]; i<options.ind[task+1]; i++) | ||
{ | ||
double aa = -y[i]*(features->dense_dot(i, Wzp.col(task).data(), n_feats)+Cs[task]); | ||
double bb = CMath::max(aa,0.0); | ||
|
||
Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs; | ||
} | ||
} | ||
Fzp += c*(Wzp*invEtaMWt).trace(); | ||
|
||
// compute delta between line search point and search point | ||
delta_Wzp = Wzp - Ws; | ||
delta_Czp = Czp - Cs; | ||
delta_Mzp = Mzp - Ms; | ||
|
||
// norms of delta | ||
double nrm_delta_Wzp = delta_Wzp.squaredNorm(); | ||
double nrm_delta_Czp = delta_Czp.squaredNorm(); | ||
double nrm_delta_Mzp = delta_Mzp.squaredNorm(); | ||
|
||
double r_sum = (nrm_delta_Wzp + nrm_delta_Czp + nrm_delta_Mzp)/3; | ||
|
||
double Fzp_gamma = 0.0; | ||
if (n_feats > n_tasks) | ||
{ | ||
Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() + | ||
(delta_Czp.transpose()*gCs).trace() + | ||
(delta_Mzp.transpose()*gMs).trace() + | ||
(gamma/2)*nrm_delta_Wzp + | ||
(gamma/2)*nrm_delta_Czp + | ||
(gamma/2)*nrm_delta_Mzp; | ||
} | ||
else | ||
{ | ||
Fzp_gamma = Fs + (gWs.transpose()*delta_Wzp).trace() + | ||
(gCs.transpose()*delta_Czp).trace() + | ||
(gMs.transpose()*delta_Mzp).trace() + | ||
(gamma/2)*nrm_delta_Wzp + | ||
(gamma/2)*nrm_delta_Czp + | ||
(gamma/2)*nrm_delta_Mzp; | ||
} | ||
|
||
// break if delta is getting too small | ||
if (r_sum <= 1e-20) | ||
{ | ||
done = true; | ||
break; | ||
} | ||
|
||
// break if objective at line searc point is smaller than Fzp_gamma | ||
if (Fzp <= Fzp_gamma) | ||
break; | ||
else | ||
gamma *= gamma_inc; | ||
|
||
inner_iter++; | ||
} | ||
|
||
Wz_old = Wz; | ||
Cz_old = Cz; | ||
Mz_old = Mz; | ||
Wz = Wzp; | ||
Cz = Czp; | ||
Mz = Mzp; | ||
|
||
// compute objective value | ||
obj_old = obj; | ||
obj = Fzp; | ||
|
||
// check if process should be terminated | ||
switch (options.termination) | ||
{ | ||
case 0: | ||
if (iter>=2) | ||
{ | ||
if ( CMath::abs(obj-obj_old) <= options.tolerance ) | ||
done = true; | ||
} | ||
break; | ||
case 1: | ||
if (iter>=2) | ||
{ | ||
if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old)) | ||
done = true; | ||
} | ||
break; | ||
case 2: | ||
if (CMath::abs(obj) <= options.tolerance) | ||
done = true; | ||
break; | ||
case 3: | ||
if (iter>=options.max_iter) | ||
done = true; | ||
break; | ||
} | ||
|
||
iter++; | ||
t_old = t; | ||
t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t)); | ||
} | ||
SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj); | ||
|
||
SG_FREE(H_diag_matrix); | ||
SG_FREE(diag_H); | ||
SG_FREE(f); | ||
SG_FREE(a); | ||
SG_FREE(lb); | ||
SG_FREE(ub); | ||
SG_FREE(x); | ||
|
||
SGMatrix<float64_t> tasks_w(n_feats, n_tasks); | ||
for (int i=0; i<n_feats; i++) | ||
{ | ||
for (task=0; task<n_tasks; task++) | ||
tasks_w[i] = Wzp(i,task); | ||
} | ||
SGVector<float64_t> tasks_c(n_tasks); | ||
for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i]; | ||
return malsar_result_t(tasks_w, tasks_c); | ||
}; | ||
}; | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Sergey Lisitsyn | ||
* Copyright (C) 2012 Jiayu Zhou and Jieping Ye | ||
*/ | ||
|
||
#ifndef MALSAR_CLUSTERED_H_ | ||
#define MALSAR_CLUSTERED_H_ | ||
#include <shogun/lib/config.h> | ||
#ifdef HAVE_EIGEN3 | ||
#include <shogun/lib/malsar/malsar_options.h> | ||
#include <shogun/features/DotFeatures.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
malsar_result_t malsar_clustered( | ||
CDotFeatures* features, | ||
double* y, | ||
double rho1, | ||
double rho2, | ||
const malsar_options& options); | ||
|
||
}; | ||
#endif | ||
#endif /* ----- #ifndef MALSAR_CLUSTERED_H_ ----- */ |
Oops, something went wrong.