Skip to content

Commit

Permalink
Added Multitask Trace Logistic Regression
Browse files Browse the repository at this point in the history
  • Loading branch information
lisitsyn committed Jul 16, 2012
1 parent f205180 commit 7b9ac25
Show file tree
Hide file tree
Showing 8 changed files with 390 additions and 4 deletions.
2 changes: 2 additions & 0 deletions src/interfaces/modular/Transfer.i
Expand Up @@ -21,6 +21,7 @@
%rename(MultitaskLSRegression) CMultitaskLSRegression;
%rename(MultitaskLogisticRegression) CMultitaskLogisticRegression;
%rename(MultitaskL1L2LogisticRegression) CMultitaskL1L2LogisticRegression;
%rename(MultitaskTraceLogisticRegression) CMultitaskTraceLogisticRegression;

%rename(LibLinearMTL) CLibLinearMTL;

Expand All @@ -44,6 +45,7 @@
%include <shogun/transfer/multitask/MultitaskLSRegression.h>
%include <shogun/transfer/multitask/MultitaskLogisticRegression.h>
%include <shogun/transfer/multitask/MultitaskL1L2LogisticRegression.h>
%include <shogun/transfer/multitask/MultitaskTraceLogisticRegression.h>

%include <shogun/transfer/multitask/LibLinearMTL.h>

Expand Down
1 change: 1 addition & 0 deletions src/interfaces/modular/Transfer_includes.i
Expand Up @@ -12,6 +12,7 @@
#include <shogun/transfer/multitask/MultitaskLSRegression.h>
#include <shogun/transfer/multitask/MultitaskLogisticRegression.h>
#include <shogun/transfer/multitask/MultitaskL1L2LogisticRegression.h>
#include <shogun/transfer/multitask/MultitaskTraceLogisticRegression.h>

#ifdef USE_SVMLIGHT
#include <shogun/transfer/domain_adaptation/DomainAdaptationSVM.h>
Expand Down
4 changes: 0 additions & 4 deletions src/shogun/lib/slep/malsar_joint_feature_learning.cpp
Expand Up @@ -13,10 +13,6 @@
#include <shogun/mathematics/Math.h>
#include <iostream>

#define EIGEN_RUNTIME_NO_MALLOC
#define NDEBUG
#include <eigen3/Eigen/Dense>

using namespace Eigen;

namespace shogun
Expand Down
217 changes: 217 additions & 0 deletions src/shogun/lib/slep/malsar_low_rank.cpp
@@ -0,0 +1,217 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Sergey Lisitsyn
* Copyright (C) 2012 Jiayu Zhou and Jieping Ye
*/

#include <shogun/lib/slep/malsar_low_rank.h>
#ifdef HAVE_EIGEN3
#include <shogun/mathematics/Math.h>
#include <iostream>

using namespace Eigen;

namespace shogun
{

slep_result_t malsar_low_rank(
CDotFeatures* features,
double* y,
double rho,
const slep_options& options)
{
int task;
int n_feats = features->get_dim_feature_space();
SG_SDEBUG("n feats = %d\n", n_feats);
int n_vecs = features->get_num_vectors();
SG_SDEBUG("n vecs = %d\n", n_vecs);
int n_tasks = options.n_tasks;
SG_SDEBUG("n tasks = %d\n", n_tasks);

int iter = 0;

// initialize weight vector and bias for each task
MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
VectorXd Cs = VectorXd::Zero(n_tasks);
for (task=0; task<n_tasks; task++)
{
int n_pos = 0;
int n_neg = 0;
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
{
if (y[i] > 0)
n_pos++;
else
n_neg++;
}
Cs[task] = CMath::log(double(n_pos)/n_neg);
}

MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;

double t=1, t_old=0;
double gamma=1, gamma_inc=2;
double obj=0.0, obj_old=0.0;

internal::set_is_malloc_allowed(false);
bool done = false;
while (!done && iter <= options.max_iter)
{
double alpha = double(t_old - 1)/t;

// compute search point
Ws = (1+alpha)*Wz - alpha*Wz_old;
Cs = (1+alpha)*Cz - alpha*Cz_old;

// zero gradient
gWs.setZero();
gCs.setZero();

// compute gradient and objective at search point
double Fs = 0;
for (task=0; task<n_tasks; task++)
{
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
{
double aa = -y[i]*(features->dense_dot(i, Ws.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

// avoid underflow when computing exponential loss
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
double b = -y[i]*(1 - 1/(1+CMath::exp(aa)))/n_vecs;

gCs[task] += b;
features->add_to_dense_vec(b, i, gWs.col(task).data(), n_feats);
}
}
gWs.noalias() += 2*rho*Ws;

// add regularizer
Fs += rho*Ws.squaredNorm();

double Fzp = 0.0;

// line search, Armijo-Goldstein scheme
while (true)
{
// compute trace projection of Ws - gWs/gamma with 2*rho/gamma
internal::set_is_malloc_allowed(true);
Wzp.setZero();
JacobiSVD<MatrixXd> svd(Ws - gWs/gamma,ComputeThinU | ComputeThinV);
for (int i=0; i<svd.singularValues().size(); i++)
{
if (svd.singularValues()[i] > 2*rho/gamma)
Wzp += svd.matrixU().col(i)*
svd.singularValues()[i]*
svd.matrixV().col(i).transpose();
}
internal::set_is_malloc_allowed(false);
// walk in direction of antigradient
Czp = Cs - gCs/gamma;

// compute objective at line search point
Fzp = 0.0;
for (task=0; task<n_tasks; task++)
{
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
{
double aa = -y[i]*(features->dense_dot(i, Wzp.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
}
}
Fzp += rho*Wzp.squaredNorm();

// compute delta between line search point and search point
delta_Wzp = Wzp - Ws;
delta_Czp = Czp - Cs;

// norms of delta
double nrm_delta_Wzp = delta_Wzp.squaredNorm();
double nrm_delta_Czp = delta_Czp.squaredNorm();

double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2;

double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
(delta_Czp.transpose()*gCs).trace() +
(gamma/2)*nrm_delta_Wzp +
(gamma/2)*nrm_delta_Czp;

// break if delta is getting too small
if (r_sum <= 1e-20)
{
done = true;
break;
}

// break if objective at line searc point is smaller than Fzp_gamma
if (Fzp <= Fzp_gamma)
break;
else
gamma *= gamma_inc;
}

Wz_old = Wz;
Cz_old = Cz;
Wz = Wzp;
Cz = Czp;

// compute objective value
obj_old = obj;
obj = Fzp;
internal::set_is_malloc_allowed(true);
JacobiSVD<MatrixXd> svd(Wzp, EigenvaluesOnly);
obj += rho*svd.singularValues().sum();
internal::set_is_malloc_allowed(false);

// check if process should be terminated
switch (options.termination)
{
case 0:
if (iter>=2)
{
if ( CMath::abs(obj-obj_old) <= options.tolerance )
done = true;
}
break;
case 1:
if (iter>=2)
{
if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
done = true;
}
break;
case 2:
if (CMath::abs(obj) <= options.tolerance)
done = true;
break;
case 3:
if (iter>=options.max_iter)
done = true;
break;
}

iter++;
t_old = t;
t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
}
SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj);

SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
for (int i=0; i<n_feats; i++)
{
for (task=0; task<n_tasks; task++)
tasks_w[i] = Wzp(i,task);
}
SGVector<float64_t> tasks_c(n_tasks);
for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
return slep_result_t(tasks_w, tasks_c);
};
};
#endif
29 changes: 29 additions & 0 deletions src/shogun/lib/slep/malsar_low_rank.h
@@ -0,0 +1,29 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Sergey Lisitsyn
* Copyright (C) 2012 Jiayu Zhou and Jieping Ye
*/

#ifndef MALSAR_LOW_RANK_H_
#define MALSAR_LOW_RANK_H_
#include <shogun/lib/config.h>
#ifdef HAVE_EIGEN3
#include <shogun/lib/slep/slep_options.h>
#include <shogun/features/DotFeatures.h>

namespace shogun
{

slep_result_t malsar_low_rank(
CDotFeatures* features,
double* y,
double rho,
const slep_options& options);

};
#endif
#endif /* ----- #ifndef MALSAR_LOW_RANK_H_ ----- */
3 changes: 3 additions & 0 deletions src/shogun/mathematics/Math.h
Expand Up @@ -36,6 +36,9 @@
#include <ieeefp.h>
#endif

#define EIGEN_RUNTIME_NO_MALLOC
#include <eigen3/Eigen/Dense>

/// workaround for log2 being a define on cygwin
#ifdef log2
#define cygwin_log2 log2
Expand Down
74 changes: 74 additions & 0 deletions src/shogun/transfer/multitask/MultitaskTraceLogisticRegression.cpp
@@ -0,0 +1,74 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Copyright (C) 2012 Sergey Lisitsyn
*/

#include <shogun/transfer/multitask/MultitaskTraceLogisticRegression.h>
#include <shogun/lib/slep/malsar_low_rank.h>
#include <shogun/lib/slep/slep_options.h>
#include <shogun/lib/IndexBlockGroup.h>
#include <shogun/lib/SGVector.h>

namespace shogun
{

CMultitaskTraceLogisticRegression::CMultitaskTraceLogisticRegression() :
CMultitaskLogisticRegression(), m_rho(0.0)
{
}

CMultitaskTraceLogisticRegression::CMultitaskTraceLogisticRegression(
float64_t rho, CDotFeatures* train_features,
CBinaryLabels* train_labels, CIndexBlockRelation* task_relation) :
CMultitaskLogisticRegression(0.0,train_features,train_labels,task_relation)
{
set_rho(rho);
}

void CMultitaskTraceLogisticRegression::set_rho(float64_t rho)
{
m_rho = rho;
}

CMultitaskTraceLogisticRegression::~CMultitaskTraceLogisticRegression()
{
}

bool CMultitaskTraceLogisticRegression::train_machine(CFeatures* data)
{
if (data && (CDotFeatures*)data)
set_features((CDotFeatures*)data);

ASSERT(features);
ASSERT(m_labels);

SGVector<float64_t> y(m_labels->get_num_labels());
for (int32_t i=0; i<y.vlen; i++)
y[i] = ((CBinaryLabels*)m_labels)->get_label(i);

slep_options options = slep_options::default_options();
options.termination = m_termination;
options.tolerance = m_tolerance;
options.max_iter = m_max_iter;
SGVector<index_t> ind = ((CIndexBlockGroup*)m_task_relation)->get_SLEP_ind();
options.ind = ind.vector;
options.n_tasks = ind.vlen-1;

#ifdef HAVE_EIGEN3
slep_result_t model = malsar_low_rank(
features, y.vector, m_rho, options);

m_tasks_w = model.w;
m_tasks_c = model.c;
#endif

ASSERT(m_task_relation);

return true;
}

}

0 comments on commit 7b9ac25

Please sign in to comment.