Skip to content

Commit

Permalink
Added train_state support for MC liblinear making possible to retrain…
Browse files Browse the repository at this point in the history
… svm
  • Loading branch information
lisitsyn committed Mar 16, 2012
1 parent 173493d commit 8976502
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 75 deletions.
105 changes: 62 additions & 43 deletions src/shogun/classifier/svm/SVM_linear.cpp
Expand Up @@ -42,6 +42,7 @@
#include <shogun/mathematics/Math.h>
#include <shogun/classifier/svm/SVM_linear.h>
#include <shogun/classifier/svm/Tron.h>
#include <shogun/lib/Time.h>

using namespace shogun;

Expand Down Expand Up @@ -340,7 +341,10 @@ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
#define GETI(i) (prob->y[i])
// To support weights for instances, use GETI(i) (i)

Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, double *weighted_C, double epsilon, int max_it)
Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class,
double *weighted_C, double epsilon,
int max_it, double max_time,
mcsvm_state* given_state)
{
this->w_size = p->n;
this->l = p->l;
Expand All @@ -349,14 +353,12 @@ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, double *weighted
this->max_iter = max_it;
this->prob = p;
this->C = weighted_C;
this->B = SG_MALLOC(double, nr_class);
this->G = SG_MALLOC(double, nr_class);
this->max_train_time = max_time;
this->state = given_state;
}

Solver_MCSVM_CS::~Solver_MCSVM_CS()
{
SG_FREE(B);
SG_FREE(G);
}

int compare_double(const void *a, const void *b)
Expand All @@ -371,7 +373,7 @@ int compare_double(const void *a, const void *b)
void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
{
int r;
double *D=CMath::clone_vector(B, active_i);
double *D=CMath::clone_vector(state->B, active_i);

if(yi < active_i)
D[yi] += A_i*C_yi;
Expand All @@ -385,9 +387,9 @@ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int act
for(r=0;r<active_i;r++)
{
if(r == yi)
alpha_new[r] = CMath::min(C_yi, (beta-B[r])/A_i);
alpha_new[r] = CMath::min(C_yi, (beta-state->B[r])/A_i);
else
alpha_new[r] = CMath::min((double)0, (beta - B[r])/A_i);
alpha_new[r] = CMath::min((double)0, (beta - state->B[r])/A_i);
}
SG_FREE(D);
}
Expand All @@ -397,44 +399,68 @@ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double min
double bound = 0;
if(m == yi)
bound = C[GETI(i)];
if(alpha_i == bound && G[m] < minG)
if(alpha_i == bound && state->G[m] < minG)
return true;
return false;
}

void Solver_MCSVM_CS::Solve(double *w)
void Solver_MCSVM_CS::solve()
{
int i, m, s;
int iter = 0;
double *alpha = SG_MALLOC(double, l*nr_class);
double *alpha_new = SG_MALLOC(double, nr_class);
int *index = SG_MALLOC(int, l);
double *QD = SG_MALLOC(double, l);
int *d_ind = SG_MALLOC(int, nr_class);
double *d_val = SG_MALLOC(double, nr_class);
int *alpha_index = SG_MALLOC(int, nr_class*l);
int *y_index = SG_MALLOC(int, l);
double *w,*B,*G,*alpha,*alpha_new,*QD,*d_val;
int *index,*d_ind,*alpha_index,*y_index,*active_size_i;

if (!state->allocated)
{
state->w = SG_CALLOC(double, nr_class*w_size);
state->B = SG_CALLOC(double, nr_class);
state->G = SG_CALLOC(double, nr_class);
state->alpha = SG_CALLOC(double, l*nr_class);
state->alpha_new = SG_CALLOC(double, nr_class);
state->index = SG_CALLOC(int, l);
state->QD = SG_CALLOC(double, l);
state->d_ind = SG_CALLOC(int, nr_class);
state->d_val = SG_CALLOC(double, nr_class);
state->alpha_index = SG_CALLOC(int, nr_class*l);
state->y_index = SG_CALLOC(int, l);
state->active_size_i = SG_CALLOC(int, l);
state->allocated = true;
}
w = state->w;
B = state->B;
G = state->G;
alpha = state->alpha;
alpha_new = state->alpha_new;
index = state->index;
QD = state->QD;
d_ind = state->d_ind;
d_val = state->d_val;
alpha_index = state->alpha_index;
y_index = state->y_index;
active_size_i = state->active_size_i;

int active_size = l;
int *active_size_i = SG_MALLOC(int, l);
double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking
bool start_from_all = true;
CTime start_time;
// initial
for(i=0;i<l*nr_class;i++)
alpha[i] = 0;
for(i=0;i<w_size*nr_class;i++)
w[i] = 0;
for(i=0;i<l;i++)
if (!state->inited)
{
for(m=0;m<nr_class;m++)
alpha_index[i*nr_class+m] = m;
for(i=0;i<l;i++)
{
for(m=0;m<nr_class;m++)
alpha_index[i*nr_class+m] = m;

QD[i] = prob->x->dot(i, prob->x,i);
if (prob->use_bias)
QD[i] += 1.0;
QD[i] = prob->x->dot(i, prob->x,i);
if (prob->use_bias)
QD[i] += 1.0;

active_size_i[i] = nr_class;
y_index[i] = prob->y[i];
index[i] = i;
active_size_i[i] = nr_class;
y_index[i] = prob->y[i];
index[i] = i;
}
state->inited = true;
}

while(iter < max_iter)
Expand Down Expand Up @@ -593,6 +619,9 @@ void Solver_MCSVM_CS::Solve(double *w)
}
else
start_from_all = false;

if (max_train_time!=0.0 && max_train_time < start_time.cur_time_diff())
break;
}

SG_SINFO("\noptimization finished, #iter = %d\n",iter);
Expand All @@ -613,18 +642,8 @@ void Solver_MCSVM_CS::Solve(double *w)
}
for(i=0;i<l;i++)
v -= alpha[i*nr_class+prob->y[i]];
SG_SINFO("Objective value = %lf\n",v);
SG_SINFO("Objective value = %f\n",v);
SG_SINFO("nSV = %d\n",nSV);

SG_FREE(alpha);
SG_FREE(alpha_new);
SG_FREE(index);
SG_FREE(QD);
SG_FREE(d_ind);
SG_FREE(d_val);
SG_FREE(alpha_index);
SG_FREE(y_index);
SG_FREE(active_size_i);
}

//
Expand Down
61 changes: 58 additions & 3 deletions src/shogun/classifier/svm/SVM_linear.h
Expand Up @@ -230,21 +230,76 @@ class l2r_l2_svc_fun : public function
const problem *prob;
};

struct mcsvm_state
{
double* w;
double* B;
double* G;
double* alpha;
double* alpha_new;
int* index;
double* QD;
int* d_ind;
double* d_val;
int* alpha_index;
int* y_index;
int* active_size_i;
bool allocated,inited;

mcsvm_state()
{
w = NULL;
B = NULL;
G = NULL;
alpha = NULL;
alpha_new = NULL;
index = NULL;
QD = NULL;
d_ind = NULL;
d_val = NULL;
alpha_index = NULL;
y_index = NULL;
active_size_i = NULL;
allocated = false;
inited = false;
}

~mcsvm_state()
{
SG_FREE(w);
SG_FREE(B);
SG_FREE(G);
SG_FREE(alpha);
SG_FREE(alpha_new);
SG_FREE(index);
SG_FREE(QD);
SG_FREE(d_ind);
SG_FREE(d_val);
SG_FREE(alpha_index);
SG_FREE(y_index);
SG_FREE(active_size_i);
}
};

class Solver_MCSVM_CS
{
public:
Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
Solver_MCSVM_CS(const problem *prob, int nr_class, double *C,
double eps, int max_iter,
double train_time, mcsvm_state* given_state);
~Solver_MCSVM_CS();
void Solve(double *w);
void solve();
private:
void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
double *B, *C, *G;
double *C;
int w_size, l;
int nr_class;
int max_iter;
double eps;
double max_train_time;
const problem *prob;
mcsvm_state* state;
};


Expand Down
66 changes: 55 additions & 11 deletions src/shogun/multiclass/MulticlassLibLinear.cpp
Expand Up @@ -16,7 +16,42 @@

using namespace shogun;

struct problem;
CMulticlassLibLinear::CMulticlassLibLinear() :
CLinearMulticlassMachine()
{
init_defaults();
}

CMulticlassLibLinear::CMulticlassLibLinear(float64_t C, CDotFeatures* features, CLabels* labs) :
CLinearMulticlassMachine(ONE_VS_REST_STRATEGY,features,NULL,labs)
{
init_defaults();
set_C(C);
}

void CMulticlassLibLinear::init_defaults()
{
set_C(1.0);
set_epsilon(1e-2);
set_max_iter(10000);
set_use_bias(false);
set_save_train_state(false);
m_train_state = NULL;
}

void CMulticlassLibLinear::register_parameters()
{
m_parameters->add(&m_C, "m_C", "regularization constant");
m_parameters->add(&m_epsilon, "m_epsilon", "tolerance epsilon");
m_parameters->add(&m_max_iter, "m_max_iter", "max number of iterations");
m_parameters->add(&m_use_bias, "m_use_bias", "indicates whether bias should be used");
m_parameters->add(&m_save_train_state, "m_save_train_state", "indicates whether bias should be used");
}

CMulticlassLibLinear::~CMulticlassLibLinear()
{
reset_train_state();
}

bool CMulticlassLibLinear::train_machine(CFeatures* data)
{
Expand All @@ -25,42 +60,51 @@ bool CMulticlassLibLinear::train_machine(CFeatures* data)

int32_t num_vectors = m_features->get_num_vectors();
int32_t num_classes = m_labels->get_num_classes();
int32_t bias_n = m_use_bias ? 1 : 0;

problem mc_problem;
mc_problem.l = num_vectors;
mc_problem.n = m_features->get_dim_feature_space()+1;
mc_problem.n = m_features->get_dim_feature_space() + bias_n;
mc_problem.y = SG_MALLOC(int32_t, mc_problem.l);
for (int32_t i=0; i<num_vectors; i++)
mc_problem.y[i] = m_labels->get_int_label(i);

mc_problem.x = m_features;
mc_problem.use_bias = m_use_bias;

float64_t* w = SG_MALLOC(float64_t, mc_problem.n*num_classes);
if (!m_train_state)
m_train_state = new mcsvm_state();

float64_t* C = SG_MALLOC(float64_t, num_vectors);
for (int32_t i=0; i<num_vectors; i++)
C[i] = m_C;

Solver_MCSVM_CS solver(&mc_problem,num_classes,C,m_epsilon,m_max_iter);
solver.Solve(w);
Solver_MCSVM_CS solver(&mc_problem,num_classes,C,m_epsilon,
m_max_iter,m_max_train_time,m_train_state);
solver.solve();

clear_machines();
m_machines = SGVector<CMachine*>(num_classes);
for (int32_t i=0; i<num_classes; i++)
{
CLinearMachine* machine = new CLinearMachine();
float64_t* cw = SG_MALLOC(float64_t, mc_problem.n);
for (int32_t j=0; j<mc_problem.n-1; j++)
cw[j] = w[j*num_classes+i];
machine->set_w(SGVector<float64_t>(cw,mc_problem.n-1));
//CMath::display_vector(cw,mc_problem.n);
machine->set_bias(w[(mc_problem.n-1)*num_classes+i]);

for (int32_t j=0; j<mc_problem.n-bias_n; j++)
cw[j] = m_train_state->w[j*num_classes+i];

machine->set_w(SGVector<float64_t>(cw,mc_problem.n-bias_n));

if (m_use_bias)
machine->set_bias(m_train_state->w[(mc_problem.n-bias_n)*num_classes+i]);

m_machines[i] = machine;
}

if (!m_save_train_state)
reset_train_state();

SG_FREE(C);
SG_FREE(w);
SG_FREE(mc_problem.y);

return true;
Expand Down

0 comments on commit 8976502

Please sign in to comment.