Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Changed task indexing way of MALSAR based algorithms and task api in …
…general
  • Loading branch information
lisitsyn committed Jul 21, 2012
1 parent 6eea837 commit 2b6dcd3
Show file tree
Hide file tree
Showing 14 changed files with 199 additions and 97 deletions.
56 changes: 33 additions & 23 deletions src/shogun/lib/malsar/malsar_clustered.cpp
Expand Up @@ -15,6 +15,7 @@
#include <shogun/lib/external/libqp.h>

using namespace Eigen;
using namespace std;

namespace shogun
{
Expand Down Expand Up @@ -47,8 +48,6 @@ malsar_result_t malsar_clustered(
H_diag_matrix[i*n_tasks+i] = 2.0;
H_diag_matrix_ld = n_tasks;

//SGMatrix<float64_t>::display_matrix(H_diag_matrix, n_tasks, n_tasks);

int iter = 0;

// initialize weight vector and bias for each task
Expand All @@ -58,9 +57,10 @@ malsar_result_t malsar_clustered(
{
int n_pos = 0;
int n_neg = 0;
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
{
if (y[i] > 0)
if (y[task_idx[i]] > 0)
n_pos++;
else
n_neg++;
Expand All @@ -77,7 +77,7 @@ malsar_result_t malsar_clustered(
MatrixXd Mz=Ms, Mzp=Ms, Mz_old=Ms, delta_Mzp=Ms, gMs=Ms;
MatrixXd Mzp_Pz;

double eta = rho1/rho2;
double eta = rho2/rho1;
double c = rho1*eta*(1+eta);

double t=1, t_old=0;
Expand All @@ -96,6 +96,7 @@ malsar_result_t malsar_clustered(
while (!done && iter <= options.max_iter)
{
double alpha = double(t_old - 1)/t;
SG_SDEBUG("alpha=%f\n",alpha);

// compute search point
Ws = (1+alpha)*Wz - alpha*Wz_old;
Expand All @@ -106,9 +107,13 @@ malsar_result_t malsar_clustered(
gWs.setZero();
gCs.setZero();
internal::set_is_malloc_allowed(true);
SG_SDEBUG("Computing gradient\n");
IM = (eta*MatrixXd::Identity(n_tasks,n_tasks)+Ms);
// cout << "M" << endl << Ms << endl;
// cout << "IM" << endl << IM << endl;
IMsqinv = (IM*IM).inverse();
invEtaMWt = IM.inverse();
invEtaMWt = IM.inverse()*Ws.transpose();
//cout << "invEtaMWt" << endl << invEtaMWt << endl;
gMs.noalias() = -c*(Ws.transpose()*Ws)*IMsqinv;
gWs.noalias() += 2*c*invEtaMWt.transpose();
internal::set_is_malloc_allowed(false);
Expand All @@ -117,22 +122,27 @@ malsar_result_t malsar_clustered(
double Fs = 0;
for (task=0; task<n_tasks; task++)
{
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
SGVector<index_t> task_idx = options.tasks_indices[task];
int n_vecs_task = task_idx.vlen;
for (int i=0; i<n_vecs_task; i++)
{
double aa = -y[i]*(features->dense_dot(i, Ws.col(task).data(), n_feats)+Cs[task]);
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

// avoid underflow when computing exponential loss
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
double b = -y[i]*(1 - 1/(1+CMath::exp(aa)))/n_vecs;

Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs_task;
gCs[task] += b;
features->add_to_dense_vec(b, i, gWs.col(task).data(), n_feats);
features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
}
}
//cout << "gWs" << endl << gWs << endl;
//cout << "gCs" << endl << gCs << endl;
SG_SDEBUG("Computed gradient\n");

// add regularizer
Fs += c*(Ws*invEtaMWt).trace();
SG_SDEBUG("Fs = %f \n", Fs);

double Fzp = 0.0;

Expand All @@ -149,13 +159,13 @@ malsar_result_t malsar_clustered(
for (int i=0; i<n_tasks; i++)
{
diag_H[i] = 2.0;
f[i] = 0.0;
f[i] = -2*eigensolver.eigenvalues()[i].real();
a[i] = 1.0;
lb[i] = 0 - eigensolver.eigenvalues()[i].real();
ub[i] = 1 - eigensolver.eigenvalues()[i].real();
x[i] = ub[i];
lb[i] = 0.0;
ub[i] = 1.0;
x[i] = double(options.n_clusters)/n_tasks;
}
double b = n_tasks - eigensolver.eigenvalues().sum().real();
double b = options.n_clusters;//eigensolver.eigenvalues().sum().real();
SG_SDEBUG("b = %f\n", b);
SG_SDEBUG("Calling libqp\n");
libqp_state_T problem_state = libqp_gsmo_solver(&get_col,diag_H,f,a,b,lb,ub,x,n_tasks,1000,1e-6,NULL);
Expand All @@ -164,10 +174,7 @@ malsar_result_t malsar_clustered(
SG_SDEBUG("%d iteration passed\n",problem_state.nIter);
SG_SDEBUG("Solution is \n [ ");
for (int i=0; i<n_tasks; i++)
{
SG_SDEBUG("%f ", x[i]);
x[i] += eigensolver.eigenvalues()[i].real();
}
SG_SDEBUG("]\n");
Map<VectorXd> Mzp_DiagSigz(x,n_tasks);
Mzp_Pz = eigensolver.eigenvectors().real();
Expand All @@ -188,12 +195,14 @@ malsar_result_t malsar_clustered(
Fzp = 0.0;
for (task=0; task<n_tasks; task++)
{
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
SGVector<index_t> task_idx = options.tasks_indices[task];
int n_vecs_task = task_idx.vlen;
for (int i=0; i<n_vecs_task; i++)
{
double aa = -y[i]*(features->dense_dot(i, Wzp.col(task).data(), n_feats)+Cs[task]);
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
}
}
Fzp += c*(Wzp*invEtaMWt).trace();
Expand Down Expand Up @@ -304,6 +313,7 @@ malsar_result_t malsar_clustered(
for (task=0; task<n_tasks; task++)
tasks_w[i] = Wzp(i,task);
}
tasks_w.display_matrix();
SGVector<float64_t> tasks_c(n_tasks);
for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
return malsar_result_t(tasks_w, tasks_c);
Expand Down
19 changes: 11 additions & 8 deletions src/shogun/lib/malsar/malsar_joint_feature_learning.cpp
Expand Up @@ -42,9 +42,10 @@ malsar_result_t malsar_joint_feature_learning(
{
int n_pos = 0;
int n_neg = 0;
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
{
if (y[i] > 0)
if (y[task_idx[i]] > 0)
n_pos++;
else
n_neg++;
Expand Down Expand Up @@ -77,17 +78,18 @@ malsar_result_t malsar_joint_feature_learning(
double Fs = 0;
for (task=0; task<n_tasks; task++)
{
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
{
double aa = -y[i]*(features->dense_dot(i, Ws.col(task).data(), n_feats)+Cs[task]);
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

// avoid underflow when computing exponential loss
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
double b = -y[i]*(1 - 1/(1+CMath::exp(aa)))/n_vecs;
double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs;

gCs[task] += b;
features->add_to_dense_vec(b, i, gWs.col(task).data(), n_feats);
features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
}
}
gWs.noalias() += 2*rho2*Ws;
Expand Down Expand Up @@ -115,9 +117,10 @@ malsar_result_t malsar_joint_feature_learning(
Fzp = 0.0;
for (task=0; task<n_tasks; task++)
{
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
{
double aa = -y[i]*(features->dense_dot(i, Wzp.col(task).data(), n_feats)+Cs[task]);
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
Expand Down
21 changes: 12 additions & 9 deletions src/shogun/lib/malsar/malsar_low_rank.cpp
Expand Up @@ -41,9 +41,10 @@ malsar_result_t malsar_low_rank(
{
int n_pos = 0;
int n_neg = 0;
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
{
if (y[i] > 0)
if (y[task_idx[i]] > 0)
n_pos++;
else
n_neg++;
Expand Down Expand Up @@ -76,17 +77,18 @@ malsar_result_t malsar_low_rank(
double Fs = 0;
for (task=0; task<n_tasks; task++)
{
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
{
double aa = -y[i]*(features->dense_dot(i, Ws.col(task).data(), n_feats)+Cs[task]);
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

// avoid underflow when computing exponential loss
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
double b = -y[i]*(1 - 1/(1+CMath::exp(aa)))/n_vecs;
double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs;

gCs[task] += b;
features->add_to_dense_vec(b, i, gWs.col(task).data(), n_feats);
features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
}
}
gWs.noalias() += 2*rho*Ws;
Expand Down Expand Up @@ -119,9 +121,10 @@ malsar_result_t malsar_low_rank(
Fzp = 0.0;
for (task=0; task<n_tasks; task++)
{
for (int i=options.ind[task]; i<options.ind[task+1]; i++)
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
{
double aa = -y[i]*(features->dense_dot(i, Wzp.col(task).data(), n_feats)+Cs[task]);
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
Expand Down Expand Up @@ -151,7 +154,7 @@ malsar_result_t malsar_low_rank(
break;
}

// break if objective at line searc point is smaller than Fzp_gamma
// break if objective at line search point is smaller than Fzp_gamma
if (Fzp <= Fzp_gamma)
break;
else
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/lib/malsar/malsar_options.h
Expand Up @@ -33,7 +33,7 @@ IGNORE_IN_CLASSLIST struct malsar_options
int max_iter;
int n_tasks;
int n_clusters;
int* ind;
SGVector<int>* tasks_indices;
malsar_loss loss;

static malsar_options default_options()
Expand All @@ -42,7 +42,7 @@ IGNORE_IN_CLASSLIST struct malsar_options
opts.termination = 2;
opts.tolerance = 1e-3;
opts.max_iter = 1000;
opts.ind = NULL;
opts.tasks_indices = NULL;
opts.n_clusters = 2;
opts.loss = MALSAR_LOGISTIC;
return opts;
Expand Down
1 change: 1 addition & 0 deletions src/shogun/lib/slep/slep_solver.cpp
Expand Up @@ -78,6 +78,7 @@ double compute_regularizer(double* w, int n_vecs, int n_feats,
for (int i=0; i<n_feats; i++)
regularizer += CMath::abs(w[i]);
}
break;
default:
SG_SERROR("WHOA?\n");
}
Expand Down
Expand Up @@ -80,9 +80,8 @@ bool CMultitaskClusteredLogisticRegression::train_machine(CFeatures* data)
options.termination = m_termination;
options.tolerance = m_tolerance;
options.max_iter = m_max_iter;
SGVector<index_t> ind = ((CTaskGroup*)m_task_relation)->get_SLEP_ind();
options.ind = ind.vector;
options.n_tasks = ind.vlen-1;
options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
options.tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();
options.n_clusters = m_num_clusters;

#ifdef HAVE_EIGEN3
Expand Down
Expand Up @@ -59,9 +59,8 @@ bool CMultitaskL1L2LogisticRegression::train_machine(CFeatures* data)
options.termination = m_termination;
options.tolerance = m_tolerance;
options.max_iter = m_max_iter;
SGVector<index_t> ind = ((CTaskGroup*)m_task_relation)->get_SLEP_ind();
options.ind = ind.vector;
options.n_tasks = ind.vlen-1;
options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
options.tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();

#ifdef HAVE_EIGEN3
malsar_result_t model = malsar_joint_feature_learning(
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/transfer/multitask/MultitaskLogisticRegression.cpp
Expand Up @@ -113,8 +113,8 @@ bool CMultitaskLogisticRegression::train_machine(CFeatures* data)
CTaskTree* task_tree = (CTaskTree*)m_task_relation;

CTask* root_task = (CTask*)task_tree->get_root_task();
if (root_task->get_max_index() > features->get_num_vectors())
SG_ERROR("Root task covers more vectors than available\n");
//if (root_task->get_max_index() > features->get_num_vectors())
// SG_ERROR("Root task covers more vectors than available\n");
SG_UNREF(root_task);

SGVector<index_t> ind = task_tree->get_SLEP_ind();
Expand Down
Expand Up @@ -54,9 +54,8 @@ bool CMultitaskTraceLogisticRegression::train_machine(CFeatures* data)
options.termination = m_termination;
options.tolerance = m_tolerance;
options.max_iter = m_max_iter;
SGVector<index_t> ind = ((CTaskGroup*)m_task_relation)->get_SLEP_ind();
options.ind = ind.vector;
options.n_tasks = ind.vlen-1;
options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
options.tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();

#ifdef HAVE_EIGEN3
malsar_result_t model = malsar_low_rank(
Expand Down

0 comments on commit 2b6dcd3

Please sign in to comment.