Skip to content

Commit

Permalink
Multitask crossvalidation support
Browse files Browse the repository at this point in the history
  • Loading branch information
lisitsyn committed Jul 30, 2012
1 parent 77433fe commit 1cee68b
Show file tree
Hide file tree
Showing 13 changed files with 235 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/shogun/lib/malsar/malsar_clustered.cpp
Expand Up @@ -285,6 +285,7 @@ malsar_result_t malsar_clustered(
t_old = t;
t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
}
internal::set_is_malloc_allowed(true);
SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj);

SG_FREE(H_diag_matrix);
Expand Down
7 changes: 5 additions & 2 deletions src/shogun/lib/malsar/malsar_joint_feature_learning.cpp
Expand Up @@ -10,6 +10,7 @@

#include <shogun/lib/malsar/malsar_joint_feature_learning.h>
#ifdef HAVE_EIGEN3
#include <shogun/lib/Signal.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/eigen3.h>
#include <iostream>
Expand Down Expand Up @@ -49,7 +50,7 @@ malsar_result_t malsar_joint_feature_learning(

internal::set_is_malloc_allowed(false);
bool done = false;
while (!done && iter <= options.max_iter)
while (!done && iter <= options.max_iter && !CSignal::cancel_computations())
{
double alpha = double(t_old - 1)/t;

Expand Down Expand Up @@ -166,7 +167,7 @@ malsar_result_t malsar_joint_feature_learning(
//for (task=0; task<n_tasks; task++)
// obj += rho1*(Wz.col(task).norm());
SG_SDEBUG("Obj = %f\n",obj);

//SG_SABS_PROGRESS(obj,0.0);
// check if process should be terminated
switch (options.termination)
{
Expand Down Expand Up @@ -201,6 +202,8 @@ malsar_result_t malsar_joint_feature_learning(
t_old = t;
t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
}
internal::set_is_malloc_allowed(true);
SG_SDONE();
SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj);

SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
Expand Down
1 change: 1 addition & 0 deletions src/shogun/lib/malsar/malsar_low_rank.cpp
Expand Up @@ -197,6 +197,7 @@ malsar_result_t malsar_low_rank(
t_old = t;
t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
}
internal::set_is_malloc_allowed(true);
SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj);

SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
Expand Down
1 change: 1 addition & 0 deletions src/shogun/machine/Machine.cpp
Expand Up @@ -146,6 +146,7 @@ void CMachine::data_lock(CLabels* labs, CFeatures* features)
}

m_data_locked=true;
post_lock();
SG_DEBUG("leaving %s::data_lock\n", get_name());
}

Expand Down
3 changes: 3 additions & 0 deletions src/shogun/machine/Machine.h
Expand Up @@ -252,6 +252,9 @@ class CMachine : public CSGObject
*/
virtual void data_lock(CLabels* labs, CFeatures* features);

/** post lock */
virtual void post_lock() { };

/** Unlocks a locked machine and restores previous state */
virtual void data_unlock();

Expand Down
Expand Up @@ -64,6 +64,35 @@ CMultitaskClusteredLogisticRegression::~CMultitaskClusteredLogisticRegression()
{
}

bool CMultitaskClusteredLogisticRegression::train_locked_implementation(SGVector<index_t> indices,
SGVector<index_t>* tasks)
{
SGVector<float64_t> y(m_labels->get_num_labels());
for (int32_t i=0; i<y.vlen; i++)
y[i] = ((CBinaryLabels*)m_labels)->get_label(i);

malsar_options options = malsar_options::default_options();
options.termination = m_termination;
options.tolerance = m_tolerance;
options.max_iter = m_max_iter;
options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
options.tasks_indices = tasks;
options.n_clusters = m_num_clusters;

#ifdef HAVE_EIGEN3
malsar_result_t model = malsar_clustered(
features, y.vector, m_rho1, m_rho2, options);

m_tasks_w = model.w;
m_tasks_c = model.c;
#else
SG_WARNING("Please install Eigen3 to use MultitaskClusteredLogisticRegression\n");
m_tasks_w = SGMatrix<float64_t>(((CDotFeatures*)features)->get_dim_feature_space(), options.n_tasks);
m_tasks_c = SGVector<float64_t>(options.n_tasks);
#endif
return true;
}

bool CMultitaskClusteredLogisticRegression::train_machine(CFeatures* data)
{
if (data && (CDotFeatures*)data)
Expand Down
Expand Up @@ -72,6 +72,10 @@ class CMultitaskClusteredLogisticRegression : public CMultitaskLogisticRegressio

/** train machine */
virtual bool train_machine(CFeatures* data=NULL);

/** train locked implementation */
virtual bool train_locked_implementation(SGVector<index_t> indices,
SGVector<index_t>* tasks);

protected:

Expand Down
36 changes: 36 additions & 0 deletions src/shogun/transfer/multitask/MultitaskL1L2LogisticRegression.cpp
Expand Up @@ -18,6 +18,7 @@ namespace shogun
CMultitaskL1L2LogisticRegression::CMultitaskL1L2LogisticRegression() :
CMultitaskLogisticRegression(), m_rho1(0.0), m_rho2(0.0)
{
init();
}

CMultitaskL1L2LogisticRegression::CMultitaskL1L2LogisticRegression(
Expand All @@ -27,6 +28,13 @@ CMultitaskL1L2LogisticRegression::CMultitaskL1L2LogisticRegression(
{
set_rho1(rho1);
set_rho2(rho2);
init();
}

void CMultitaskL1L2LogisticRegression::init()
{
SG_ADD(&m_rho1,"rho1","rho L1/L2 regularization parameter",MS_AVAILABLE);
SG_ADD(&m_rho2,"rho2","rho L2 regularization parameter",MS_AVAILABLE);
}

void CMultitaskL1L2LogisticRegression::set_rho1(float64_t rho1)
Expand All @@ -43,6 +51,34 @@ CMultitaskL1L2LogisticRegression::~CMultitaskL1L2LogisticRegression()
{
}

bool CMultitaskL1L2LogisticRegression::train_locked_implementation(SGVector<index_t> indices,
SGVector<index_t>* tasks)
{
SGVector<float64_t> y(m_labels->get_num_labels());
for (int32_t i=0; i<y.vlen; i++)
y[i] = ((CBinaryLabels*)m_labels)->get_label(i);

malsar_options options = malsar_options::default_options();
options.termination = m_termination;
options.tolerance = m_tolerance;
options.max_iter = m_max_iter;
options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
options.tasks_indices = tasks;
#ifdef HAVE_EIGEN3
malsar_result_t model = malsar_joint_feature_learning(
features, y.vector, m_rho1, m_rho2, options);

m_tasks_w = model.w;
m_tasks_c = model.c;
#else
SG_WARNING("Please install Eigen3 to use MultitaskL1L2LogisticRegression\n");
m_tasks_w = SGMatrix<float64_t>(((CDotFeatures*)features)->get_dim_feature_space(), options.n_tasks);
m_tasks_c = SGVector<float64_t>(options.n_tasks);
#endif

return true;
}

bool CMultitaskL1L2LogisticRegression::train_machine(CFeatures* data)
{
if (data && (CDotFeatures*)data)
Expand Down
Expand Up @@ -55,10 +55,18 @@ class CMultitaskL1L2LogisticRegression : public CMultitaskLogisticRegression
return "MultitaskL1L2LogisticRegression";
}

private:

void init();

protected:

/** train machine */
virtual bool train_machine(CFeatures* data=NULL);

/** train locked implementation */
virtual bool train_locked_implementation(SGVector<index_t> indices,
SGVector<index_t>* tasks);

protected:

Expand Down
80 changes: 80 additions & 0 deletions src/shogun/transfer/multitask/MultitaskLogisticRegression.cpp
Expand Up @@ -142,6 +142,86 @@ bool CMultitaskLogisticRegression::train_machine(CFeatures* data)
return true;
}

void CMultitaskLogisticRegression::post_lock()
{
int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
SGVector<index_t>* tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();

m_tasks_indices.clear();
for (int32_t i=0; i<n_tasks; i++)
{
set<index_t> indices_set;
SGVector<index_t> task_indices = tasks_indices[i];
for (int32_t j=0; j<task_indices.vlen; j++)
indices_set.insert(task_indices[j]);

m_tasks_indices.push_back(indices_set);
}

for (int32_t i=0; i<n_tasks; i++)
tasks_indices[i].~SGVector<index_t>();
SG_FREE(tasks_indices);
}

bool CMultitaskLogisticRegression::train_locked(SGVector<index_t> indices)
{
int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
ASSERT((int)m_tasks_indices.size()==n_tasks);
vector< vector<index_t> > cutted_task_indices;
for (int32_t i=0; i<n_tasks; i++)
cutted_task_indices.push_back(vector<index_t>());
for (int32_t i=0; i<indices.vlen; i++)
{
for (int32_t j=0; j<n_tasks; j++)
{
if (m_tasks_indices[j].count(indices[i]))
{
cutted_task_indices[j].push_back(indices[i]);
break;
}
}
}
SGVector<index_t>* tasks = SG_MALLOC(SGVector<index_t>, n_tasks);
for (int32_t i=0; i<n_tasks; i++)
{
new (&tasks[i]) SGVector<index_t>(cutted_task_indices[i].size());
for (int32_t j=0; j<(int)cutted_task_indices[i].size(); j++)
tasks[i][j] = cutted_task_indices[i][j];
//tasks[i].display_vector();
}
bool res = train_locked_implementation(indices,tasks);
for (int32_t i=0; i<n_tasks; i++)
tasks[i].~SGVector<index_t>();
SG_FREE(tasks);
return res;
}

bool CMultitaskLogisticRegression::train_locked_implementation(SGVector<index_t> indices,
SGVector<index_t>* tasks)
{
SG_NOTIMPLEMENTED;
return false;
}

CBinaryLabels* CMultitaskLogisticRegression::apply_locked_binary(SGVector<index_t> indices)
{
int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
SGVector<float64_t> result(indices.vlen);
for (int32_t i=0; i<indices.vlen; i++)
{
for (int32_t j=0; j<n_tasks; j++)
{
if (m_tasks_indices[j].count(indices[i]))
{
set_current_task(j);
result[i] = apply_one(i);
break;
}
}
}
return new CBinaryLabels(result);
}

SGVector<index_t>* CMultitaskLogisticRegression::get_subset_tasks_indices()
{
int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
Expand Down
25 changes: 24 additions & 1 deletion src/shogun/transfer/multitask/MultitaskLogisticRegression.h
Expand Up @@ -17,6 +17,11 @@
#include <shogun/transfer/multitask/TaskTree.h>
#include <shogun/transfer/multitask/Task.h>

#include <vector>
#include <set>

using namespace std;

namespace shogun
{
/** @brief */
Expand Down Expand Up @@ -68,12 +73,27 @@ class CMultitaskLogisticRegression : public CSLEPMachine
* @param task_tree task tree
*/
void set_task_relation(CTaskRelation* task_relation);


/** @return whether machine supports locking */
virtual bool supports_locking() const { return true; }

/** post lock */
virtual void post_lock();

/** train on given indices */
virtual bool train_locked(SGVector<index_t> indices);

/** applies on given indices */
virtual CBinaryLabels* apply_locked_binary(SGVector<index_t> indices);

protected:

/** train machine */
virtual bool train_machine(CFeatures* data=NULL);

/** train locked implementation */
virtual bool train_locked_implementation(SGVector<index_t> indices, SGVector<index_t>* tasks);

/** subset mapped task indices */
SGVector<index_t>* get_subset_tasks_indices();

Expand All @@ -96,6 +116,9 @@ class CMultitaskLogisticRegression : public CSLEPMachine
/** tasks interceptss */
SGVector<float64_t> m_tasks_c;

/** vector of sets of indices */
vector< set<index_t> > m_tasks_indices;

};
}
#endif
35 changes: 35 additions & 0 deletions src/shogun/transfer/multitask/MultitaskTraceLogisticRegression.cpp
Expand Up @@ -19,6 +19,7 @@ namespace shogun
CMultitaskTraceLogisticRegression::CMultitaskTraceLogisticRegression() :
CMultitaskLogisticRegression(), m_rho(0.0)
{
init();
}

CMultitaskTraceLogisticRegression::CMultitaskTraceLogisticRegression(
Expand All @@ -27,6 +28,12 @@ CMultitaskTraceLogisticRegression::CMultitaskTraceLogisticRegression(
CMultitaskLogisticRegression(0.0,train_features,train_labels,(CTaskRelation*)task_group)
{
set_rho(rho);
init();
}

void CMultitaskTraceLogisticRegression::init()
{
SG_ADD(&m_rho,"rho","rho",MS_AVAILABLE);
}

void CMultitaskTraceLogisticRegression::set_rho(float64_t rho)
Expand All @@ -38,6 +45,34 @@ CMultitaskTraceLogisticRegression::~CMultitaskTraceLogisticRegression()
{
}

bool CMultitaskTraceLogisticRegression::train_locked_implementation(SGVector<index_t> indices,
SGVector<index_t>* tasks)
{
SGVector<float64_t> y(m_labels->get_num_labels());
for (int32_t i=0; i<y.vlen; i++)
y[i] = ((CBinaryLabels*)m_labels)->get_label(i);

malsar_options options = malsar_options::default_options();
options.termination = m_termination;
options.tolerance = m_tolerance;
options.max_iter = m_max_iter;
options.n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
options.tasks_indices = tasks;

#ifdef HAVE_EIGEN3
malsar_result_t model = malsar_low_rank(
features, y.vector, m_rho, options);

m_tasks_w = model.w;
m_tasks_c = model.c;
#else
SG_WARNING("Please install Eigen3 to use MultitaskTraceLogisticRegression\n");
m_tasks_w = SGMatrix<float64_t>(((CDotFeatures*)features)->get_dim_feature_space(), options.n_tasks);
m_tasks_c = SGVector<float64_t>(options.n_tasks);
#endif
return true;
}

bool CMultitaskTraceLogisticRegression::train_machine(CFeatures* data)
{
if (data && (CDotFeatures*)data)
Expand Down

0 comments on commit 1cee68b

Please sign in to comment.