Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
A few fixes for malsar based algorithms
  • Loading branch information
lisitsyn committed Jul 22, 2012
1 parent 3d873b4 commit aaa603d
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 18 deletions.
12 changes: 7 additions & 5 deletions src/shogun/lib/malsar/malsar_joint_feature_learning.cpp
Expand Up @@ -79,14 +79,15 @@ malsar_result_t malsar_joint_feature_learning(
for (task=0; task<n_tasks; task++)
{
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
int n_task_vecs = task_idx.vlen;
for (int i=0; i<n_task_vecs; i++)
{
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

// avoid underflow when computing exponential loss
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs;
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;

gCs[task] += b;
features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
Expand Down Expand Up @@ -118,12 +119,13 @@ malsar_result_t malsar_joint_feature_learning(
for (task=0; task<n_tasks; task++)
{
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
int n_task_vecs = task_idx.vlen;
for (int i=0; i<n_task_vecs; i++)
{
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
}
}
Fzp += Wzp.squaredNorm();
Expand Down
32 changes: 19 additions & 13 deletions src/shogun/lib/malsar/malsar_low_rank.cpp
Expand Up @@ -49,6 +49,7 @@ malsar_result_t malsar_low_rank(
else
n_neg++;
}
SG_SDEBUG("There are %d positive and %d negative examples in task %d\n",n_pos,n_neg,task);
Cs[task] = CMath::log(double(n_pos)/n_neg);
}

Expand All @@ -59,6 +60,8 @@ malsar_result_t malsar_low_rank(
double gamma=1, gamma_inc=2;
double obj=0.0, obj_old=0.0;

double rho_L2 = 0.0;

internal::set_is_malloc_allowed(false);
bool done = false;
while (!done && iter <= options.max_iter)
Expand All @@ -78,23 +81,25 @@ malsar_result_t malsar_low_rank(
for (task=0; task<n_tasks; task++)
{
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
int n_task_vecs = task_idx.vlen;
for (int i=0; i<n_task_vecs; i++)
{
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

// avoid underflow when computing exponential loss
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs;
Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;

gCs[task] += b;
features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
}
}
gWs.noalias() += 2*rho*Ws;

gWs.noalias() += 2*rho_L2*Ws;
//SG_SDEBUG("gWs=%f\n",gWs.squaredNorm());

// add regularizer
Fs += rho*Ws.squaredNorm();
Fs += rho_L2*Ws.squaredNorm();

double Fzp = 0.0;

Expand All @@ -105,13 +110,13 @@ malsar_result_t malsar_low_rank(
// compute trace projection of Ws - gWs/gamma with 2*rho/gamma
internal::set_is_malloc_allowed(true);
Wzp.setZero();
JacobiSVD<MatrixXd> svd(Ws - gWs/gamma,ComputeThinU | ComputeThinV);
JacobiSVD<MatrixXd> svd((Ws - gWs/gamma).transpose(),ComputeThinU | ComputeThinV);
for (int i=0; i<svd.singularValues().size(); i++)
{
if (svd.singularValues()[i] > 2*rho/gamma)
Wzp += svd.matrixU().col(i)*
if (svd.singularValues()[i] > rho/gamma)
Wzp += (svd.matrixU().col(i)*
svd.singularValues()[i]*
svd.matrixV().col(i).transpose();
svd.matrixV().col(i).transpose()).transpose();
}
internal::set_is_malloc_allowed(false);
// walk in direction of antigradient
Expand All @@ -122,15 +127,16 @@ malsar_result_t malsar_low_rank(
for (task=0; task<n_tasks; task++)
{
SGVector<index_t> task_idx = options.tasks_indices[task];
for (int i=0; i<task_idx.vlen; i++)
int n_task_vecs = task_idx.vlen;
for (int i=0; i<n_task_vecs; i++)
{
double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]);
double bb = CMath::max(aa,0.0);

Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
}
}
Fzp += rho*Wzp.squaredNorm();
Fzp += rho_L2*Wzp.squaredNorm();

// compute delta between line search point and search point
delta_Wzp = Wzp - Ws;
Expand Down
Expand Up @@ -57,6 +57,11 @@ bool CMultitaskTraceLogisticRegression::train_machine(CFeatures* data)
options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
options.tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();

SG_DEBUG("Starting malsar solver\n");
SG_DEBUG("N tasks = %d \n", options.n_tasks);
for (int32_t i=0; i<options.n_tasks; i++)
options.tasks_indices[i].display_vector();

#ifdef HAVE_EIGEN3
malsar_result_t model = malsar_low_rank(
features, y.vector, m_rho, options);
Expand Down

0 comments on commit aaa603d

Please sign in to comment.