Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added Multitask Trace Logistic Regression
- Loading branch information
Showing
8 changed files
with
390 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Sergey Lisitsyn | ||
* Copyright (C) 2012 Jiayu Zhou and Jieping Ye | ||
*/ | ||
|
||
#include <shogun/lib/slep/malsar_low_rank.h> | ||
#ifdef HAVE_EIGEN3 | ||
#include <shogun/mathematics/Math.h> | ||
#include <iostream> | ||
|
||
using namespace Eigen; | ||
|
||
namespace shogun | ||
{ | ||
|
||
slep_result_t malsar_low_rank( | ||
CDotFeatures* features, | ||
double* y, | ||
double rho, | ||
const slep_options& options) | ||
{ | ||
int task; | ||
int n_feats = features->get_dim_feature_space(); | ||
SG_SDEBUG("n feats = %d\n", n_feats); | ||
int n_vecs = features->get_num_vectors(); | ||
SG_SDEBUG("n vecs = %d\n", n_vecs); | ||
int n_tasks = options.n_tasks; | ||
SG_SDEBUG("n tasks = %d\n", n_tasks); | ||
|
||
int iter = 0; | ||
|
||
// initialize weight vector and bias for each task | ||
MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks); | ||
VectorXd Cs = VectorXd::Zero(n_tasks); | ||
for (task=0; task<n_tasks; task++) | ||
{ | ||
int n_pos = 0; | ||
int n_neg = 0; | ||
for (int i=options.ind[task]; i<options.ind[task+1]; i++) | ||
{ | ||
if (y[i] > 0) | ||
n_pos++; | ||
else | ||
n_neg++; | ||
} | ||
Cs[task] = CMath::log(double(n_pos)/n_neg); | ||
} | ||
|
||
MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws; | ||
VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs; | ||
|
||
double t=1, t_old=0; | ||
double gamma=1, gamma_inc=2; | ||
double obj=0.0, obj_old=0.0; | ||
|
||
internal::set_is_malloc_allowed(false); | ||
bool done = false; | ||
while (!done && iter <= options.max_iter) | ||
{ | ||
double alpha = double(t_old - 1)/t; | ||
|
||
// compute search point | ||
Ws = (1+alpha)*Wz - alpha*Wz_old; | ||
Cs = (1+alpha)*Cz - alpha*Cz_old; | ||
|
||
// zero gradient | ||
gWs.setZero(); | ||
gCs.setZero(); | ||
|
||
// compute gradient and objective at search point | ||
double Fs = 0; | ||
for (task=0; task<n_tasks; task++) | ||
{ | ||
for (int i=options.ind[task]; i<options.ind[task+1]; i++) | ||
{ | ||
double aa = -y[i]*(features->dense_dot(i, Ws.col(task).data(), n_feats)+Cs[task]); | ||
double bb = CMath::max(aa,0.0); | ||
|
||
// avoid underflow when computing exponential loss | ||
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs; | ||
double b = -y[i]*(1 - 1/(1+CMath::exp(aa)))/n_vecs; | ||
|
||
gCs[task] += b; | ||
features->add_to_dense_vec(b, i, gWs.col(task).data(), n_feats); | ||
} | ||
} | ||
gWs.noalias() += 2*rho*Ws; | ||
|
||
// add regularizer | ||
Fs += rho*Ws.squaredNorm(); | ||
|
||
double Fzp = 0.0; | ||
|
||
// line search, Armijo-Goldstein scheme | ||
while (true) | ||
{ | ||
// compute trace projection of Ws - gWs/gamma with 2*rho/gamma | ||
internal::set_is_malloc_allowed(true); | ||
Wzp.setZero(); | ||
JacobiSVD<MatrixXd> svd(Ws - gWs/gamma,ComputeThinU | ComputeThinV); | ||
for (int i=0; i<svd.singularValues().size(); i++) | ||
{ | ||
if (svd.singularValues()[i] > 2*rho/gamma) | ||
Wzp += svd.matrixU().col(i)* | ||
svd.singularValues()[i]* | ||
svd.matrixV().col(i).transpose(); | ||
} | ||
internal::set_is_malloc_allowed(false); | ||
// walk in direction of antigradient | ||
Czp = Cs - gCs/gamma; | ||
|
||
// compute objective at line search point | ||
Fzp = 0.0; | ||
for (task=0; task<n_tasks; task++) | ||
{ | ||
for (int i=options.ind[task]; i<options.ind[task+1]; i++) | ||
{ | ||
double aa = -y[i]*(features->dense_dot(i, Wzp.col(task).data(), n_feats)+Cs[task]); | ||
double bb = CMath::max(aa,0.0); | ||
|
||
Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs; | ||
} | ||
} | ||
Fzp += rho*Wzp.squaredNorm(); | ||
|
||
// compute delta between line search point and search point | ||
delta_Wzp = Wzp - Ws; | ||
delta_Czp = Czp - Cs; | ||
|
||
// norms of delta | ||
double nrm_delta_Wzp = delta_Wzp.squaredNorm(); | ||
double nrm_delta_Czp = delta_Czp.squaredNorm(); | ||
|
||
double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2; | ||
|
||
double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() + | ||
(delta_Czp.transpose()*gCs).trace() + | ||
(gamma/2)*nrm_delta_Wzp + | ||
(gamma/2)*nrm_delta_Czp; | ||
|
||
// break if delta is getting too small | ||
if (r_sum <= 1e-20) | ||
{ | ||
done = true; | ||
break; | ||
} | ||
|
||
// break if objective at line searc point is smaller than Fzp_gamma | ||
if (Fzp <= Fzp_gamma) | ||
break; | ||
else | ||
gamma *= gamma_inc; | ||
} | ||
|
||
Wz_old = Wz; | ||
Cz_old = Cz; | ||
Wz = Wzp; | ||
Cz = Czp; | ||
|
||
// compute objective value | ||
obj_old = obj; | ||
obj = Fzp; | ||
internal::set_is_malloc_allowed(true); | ||
JacobiSVD<MatrixXd> svd(Wzp, EigenvaluesOnly); | ||
obj += rho*svd.singularValues().sum(); | ||
internal::set_is_malloc_allowed(false); | ||
|
||
// check if process should be terminated | ||
switch (options.termination) | ||
{ | ||
case 0: | ||
if (iter>=2) | ||
{ | ||
if ( CMath::abs(obj-obj_old) <= options.tolerance ) | ||
done = true; | ||
} | ||
break; | ||
case 1: | ||
if (iter>=2) | ||
{ | ||
if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old)) | ||
done = true; | ||
} | ||
break; | ||
case 2: | ||
if (CMath::abs(obj) <= options.tolerance) | ||
done = true; | ||
break; | ||
case 3: | ||
if (iter>=options.max_iter) | ||
done = true; | ||
break; | ||
} | ||
|
||
iter++; | ||
t_old = t; | ||
t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t)); | ||
} | ||
SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj); | ||
|
||
SGMatrix<float64_t> tasks_w(n_feats, n_tasks); | ||
for (int i=0; i<n_feats; i++) | ||
{ | ||
for (task=0; task<n_tasks; task++) | ||
tasks_w[i] = Wzp(i,task); | ||
} | ||
SGVector<float64_t> tasks_c(n_tasks); | ||
for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i]; | ||
return slep_result_t(tasks_w, tasks_c); | ||
}; | ||
}; | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Sergey Lisitsyn | ||
* Copyright (C) 2012 Jiayu Zhou and Jieping Ye | ||
*/ | ||
|
||
#ifndef MALSAR_LOW_RANK_H_ | ||
#define MALSAR_LOW_RANK_H_ | ||
#include <shogun/lib/config.h> | ||
#ifdef HAVE_EIGEN3 | ||
#include <shogun/lib/slep/slep_options.h> | ||
#include <shogun/features/DotFeatures.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
slep_result_t malsar_low_rank( | ||
CDotFeatures* features, | ||
double* y, | ||
double rho, | ||
const slep_options& options); | ||
|
||
}; | ||
#endif | ||
#endif /* ----- #ifndef MALSAR_LOW_RANK_H_ ----- */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
src/shogun/transfer/multitask/MultitaskTraceLogisticRegression.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Copyright (C) 2012 Sergey Lisitsyn | ||
*/ | ||
|
||
#include <shogun/transfer/multitask/MultitaskTraceLogisticRegression.h> | ||
#include <shogun/lib/slep/malsar_low_rank.h> | ||
#include <shogun/lib/slep/slep_options.h> | ||
#include <shogun/lib/IndexBlockGroup.h> | ||
#include <shogun/lib/SGVector.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
CMultitaskTraceLogisticRegression::CMultitaskTraceLogisticRegression() : | ||
CMultitaskLogisticRegression(), m_rho(0.0) | ||
{ | ||
} | ||
|
||
CMultitaskTraceLogisticRegression::CMultitaskTraceLogisticRegression( | ||
float64_t rho, CDotFeatures* train_features, | ||
CBinaryLabels* train_labels, CIndexBlockRelation* task_relation) : | ||
CMultitaskLogisticRegression(0.0,train_features,train_labels,task_relation) | ||
{ | ||
set_rho(rho); | ||
} | ||
|
||
void CMultitaskTraceLogisticRegression::set_rho(float64_t rho) | ||
{ | ||
m_rho = rho; | ||
} | ||
|
||
CMultitaskTraceLogisticRegression::~CMultitaskTraceLogisticRegression() | ||
{ | ||
} | ||
|
||
bool CMultitaskTraceLogisticRegression::train_machine(CFeatures* data) | ||
{ | ||
if (data && (CDotFeatures*)data) | ||
set_features((CDotFeatures*)data); | ||
|
||
ASSERT(features); | ||
ASSERT(m_labels); | ||
|
||
SGVector<float64_t> y(m_labels->get_num_labels()); | ||
for (int32_t i=0; i<y.vlen; i++) | ||
y[i] = ((CBinaryLabels*)m_labels)->get_label(i); | ||
|
||
slep_options options = slep_options::default_options(); | ||
options.termination = m_termination; | ||
options.tolerance = m_tolerance; | ||
options.max_iter = m_max_iter; | ||
SGVector<index_t> ind = ((CIndexBlockGroup*)m_task_relation)->get_SLEP_ind(); | ||
options.ind = ind.vector; | ||
options.n_tasks = ind.vlen-1; | ||
|
||
#ifdef HAVE_EIGEN3 | ||
slep_result_t model = malsar_low_rank( | ||
features, y.vector, m_rho, options); | ||
|
||
m_tasks_w = model.w; | ||
m_tasks_c = model.c; | ||
#endif | ||
|
||
ASSERT(m_task_relation); | ||
|
||
return true; | ||
} | ||
|
||
} |
Oops, something went wrong.