Added train_state support for MC liblinear making possible to retrain…

… svm
shogun-toolbox · Mar 16, 2012 · 8976502 · 8976502
1 parent 173493d
commit 8976502
Show file tree

Hide file tree

Showing 4 changed files with 231 additions and 75 deletions.
diff --git a/src/shogun/classifier/svm/SVM_linear.cpp b/src/shogun/classifier/svm/SVM_linear.cpp
@@ -42,6 +42,7 @@
 #include <shogun/mathematics/Math.h>
 #include <shogun/classifier/svm/SVM_linear.h>
 #include <shogun/classifier/svm/Tron.h>
+#include <shogun/lib/Time.h>
 
 using namespace shogun;
 
@@ -340,7 +341,10 @@ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
 #define GETI(i) (prob->y[i])
 // To support weights for instances, use GETI(i) (i)
 
-Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, double *weighted_C, double epsilon, int max_it)
+Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, 
+                                 double *weighted_C, double epsilon, 
+                                 int max_it, double max_time,
+                                 mcsvm_state* given_state)
 {
 	this->w_size = p->n;
 	this->l = p->l;
@@ -349,14 +353,12 @@ Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, double *weighted
 	this->max_iter = max_it;
 	this->prob = p;
 	this->C = weighted_C;
-	this->B = SG_MALLOC(double, nr_class);
-	this->G = SG_MALLOC(double, nr_class);
+	this->max_train_time = max_time;
+	this->state = given_state;
 }
 
 Solver_MCSVM_CS::~Solver_MCSVM_CS()
 {
-	SG_FREE(B);
-	SG_FREE(G);
 }
 
 int compare_double(const void *a, const void *b)
@@ -371,7 +373,7 @@ int compare_double(const void *a, const void *b)
 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
 {
 	int r;
-	double *D=CMath::clone_vector(B, active_i);
+	double *D=CMath::clone_vector(state->B, active_i);
 
 	if(yi < active_i)
 		D[yi] += A_i*C_yi;
@@ -385,9 +387,9 @@ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int act
 	for(r=0;r<active_i;r++)
 	{
 		if(r == yi)
-			alpha_new[r] = CMath::min(C_yi, (beta-B[r])/A_i);
+			alpha_new[r] = CMath::min(C_yi, (beta-state->B[r])/A_i);
 		else
-			alpha_new[r] = CMath::min((double)0, (beta - B[r])/A_i);
+			alpha_new[r] = CMath::min((double)0, (beta - state->B[r])/A_i);
 	}
 	SG_FREE(D);
 }
@@ -397,44 +399,68 @@ bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double min
 	double bound = 0;
 	if(m == yi)
 		bound = C[GETI(i)];
-	if(alpha_i == bound && G[m] < minG)
+	if(alpha_i == bound && state->G[m] < minG)
 		return true;
 	return false;
 }
 
-void Solver_MCSVM_CS::Solve(double *w)
+void Solver_MCSVM_CS::solve()
 {
 	int i, m, s;
 	int iter = 0;
-	double *alpha =  SG_MALLOC(double, l*nr_class);
-	double *alpha_new = SG_MALLOC(double, nr_class);
-	int *index = SG_MALLOC(int, l);
-	double *QD = SG_MALLOC(double, l);
-	int *d_ind = SG_MALLOC(int, nr_class);
-	double *d_val = SG_MALLOC(double, nr_class);
-	int *alpha_index = SG_MALLOC(int, nr_class*l);
-	int *y_index = SG_MALLOC(int, l);
+	double *w,*B,*G,*alpha,*alpha_new,*QD,*d_val;
+	int *index,*d_ind,*alpha_index,*y_index,*active_size_i;
+
+	if (!state->allocated)
+	{
+		state->w = SG_CALLOC(double, nr_class*w_size);
+		state->B = SG_CALLOC(double, nr_class);
+		state->G = SG_CALLOC(double, nr_class);
+		state->alpha = SG_CALLOC(double, l*nr_class);
+		state->alpha_new = SG_CALLOC(double, nr_class);
+		state->index = SG_CALLOC(int, l);
+		state->QD = SG_CALLOC(double, l);
+		state->d_ind = SG_CALLOC(int, nr_class);
+		state->d_val = SG_CALLOC(double, nr_class);
+		state->alpha_index = SG_CALLOC(int, nr_class*l);
+		state->y_index = SG_CALLOC(int, l);
+		state->active_size_i = SG_CALLOC(int, l);
+		state->allocated = true;
+	}
+	w = state->w;
+	B = state->B;
+	G = state->G;
+	alpha = state->alpha;
+	alpha_new = state->alpha_new;
+	index = state->index;
+	QD = state->QD;
+	d_ind = state->d_ind;
+	d_val = state->d_val;
+	alpha_index = state->alpha_index;
+	y_index = state->y_index;
+	active_size_i = state->active_size_i;
+
 	int active_size = l;
-	int *active_size_i = SG_MALLOC(int, l);
 	double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking
 	bool start_from_all = true;
+	CTime start_time;
 	// initial
-	for(i=0;i<l*nr_class;i++)
-		alpha[i] = 0;
-	for(i=0;i<w_size*nr_class;i++)
-		w[i] = 0; 
-	for(i=0;i<l;i++)
+	if (!state->inited)
 	{
-		for(m=0;m<nr_class;m++)
-			alpha_index[i*nr_class+m] = m;
+		for(i=0;i<l;i++)
+		{
+			for(m=0;m<nr_class;m++)
+				alpha_index[i*nr_class+m] = m;
 
-		QD[i] = prob->x->dot(i, prob->x,i);
-		if (prob->use_bias)
-			QD[i] += 1.0;
+			QD[i] = prob->x->dot(i, prob->x,i);
+			if (prob->use_bias)
+				QD[i] += 1.0;
 
-		active_size_i[i] = nr_class;
-		y_index[i] = prob->y[i];
-		index[i] = i;
+			active_size_i[i] = nr_class;
+			y_index[i] = prob->y[i];
+			index[i] = i;
+		}
+		state->inited = true;
 	}
 
 	while(iter < max_iter) 
@@ -593,6 +619,9 @@ void Solver_MCSVM_CS::Solve(double *w)
 		}
 		else
 			start_from_all = false;
+
+		if (max_train_time!=0.0 && max_train_time < start_time.cur_time_diff())
+			break;
 	}
 
 	SG_SINFO("\noptimization finished, #iter = %d\n",iter);
@@ -613,18 +642,8 @@ void Solver_MCSVM_CS::Solve(double *w)
 	}
 	for(i=0;i<l;i++)
 		v -= alpha[i*nr_class+prob->y[i]];
-	SG_SINFO("Objective value = %lf\n",v);
+	SG_SINFO("Objective value = %f\n",v);
 	SG_SINFO("nSV = %d\n",nSV);
-
-	SG_FREE(alpha);
-	SG_FREE(alpha_new);
-	SG_FREE(index);
-	SG_FREE(QD);
-	SG_FREE(d_ind);
-	SG_FREE(d_val);
-	SG_FREE(alpha_index);
-	SG_FREE(y_index);
-	SG_FREE(active_size_i);
 }
 
 //

diff --git a/src/shogun/classifier/svm/SVM_linear.h b/src/shogun/classifier/svm/SVM_linear.h
@@ -230,21 +230,76 @@ class l2r_l2_svc_fun : public function
 	const problem *prob;
 };
 
+struct mcsvm_state
+{
+	double* w;
+	double* B;
+	double* G;
+	double* alpha;
+	double* alpha_new;
+	int* index;
+	double* QD;
+	int* d_ind;
+	double* d_val;
+	int* alpha_index;
+	int* y_index;
+	int* active_size_i;
+	bool allocated,inited;
+
+	mcsvm_state()
+	{
+		w = NULL;
+		B = NULL;
+		G = NULL;
+		alpha = NULL;
+		alpha_new = NULL;
+		index = NULL;
+		QD = NULL;
+		d_ind = NULL;
+		d_val = NULL;
+		alpha_index = NULL;
+		y_index = NULL;
+		active_size_i = NULL;
+		allocated = false;
+		inited = false;
+	}
+
+	~mcsvm_state()
+	{
+		SG_FREE(w);
+		SG_FREE(B);
+		SG_FREE(G);
+		SG_FREE(alpha);
+		SG_FREE(alpha_new);
+		SG_FREE(index);
+		SG_FREE(QD);
+		SG_FREE(d_ind);
+		SG_FREE(d_val);
+		SG_FREE(alpha_index);
+		SG_FREE(y_index);
+		SG_FREE(active_size_i);
+	}
+};
+
 class Solver_MCSVM_CS
 {
 	public:
-		Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
+		Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, 
+		                double eps, int max_iter, 
+		                double train_time, mcsvm_state* given_state);
 		~Solver_MCSVM_CS();
-		void Solve(double *w);
+		void solve();
 	private:
 		void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
 		bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
-		double *B, *C, *G;
+		double *C;
 		int w_size, l;
 		int nr_class;
 		int max_iter;
 		double eps;
+		double max_train_time;
 		const problem *prob;
+		mcsvm_state* state;
 };
 
 

diff --git a/src/shogun/multiclass/MulticlassLibLinear.cpp b/src/shogun/multiclass/MulticlassLibLinear.cpp
@@ -16,7 +16,42 @@
 
 using namespace shogun;
 
-struct problem;
+CMulticlassLibLinear::CMulticlassLibLinear() :
+	CLinearMulticlassMachine()
+{
+	init_defaults();
+}
+
+CMulticlassLibLinear::CMulticlassLibLinear(float64_t C, CDotFeatures* features, CLabels* labs) :
+	CLinearMulticlassMachine(ONE_VS_REST_STRATEGY,features,NULL,labs)
+{
+	init_defaults();
+	set_C(C);
+}
+
+void CMulticlassLibLinear::init_defaults()
+{
+	set_C(1.0);
+	set_epsilon(1e-2);
+	set_max_iter(10000);
+	set_use_bias(false);
+	set_save_train_state(false);
+	m_train_state = NULL;
+}
+
+void CMulticlassLibLinear::register_parameters()
+{
+	m_parameters->add(&m_C, "m_C", "regularization constant");
+	m_parameters->add(&m_epsilon, "m_epsilon", "tolerance epsilon");
+	m_parameters->add(&m_max_iter, "m_max_iter", "max number of iterations");
+	m_parameters->add(&m_use_bias, "m_use_bias", "indicates whether bias should be used");
+	m_parameters->add(&m_save_train_state, "m_save_train_state", "indicates whether bias should be used");
+}
+
+CMulticlassLibLinear::~CMulticlassLibLinear()
+{
+	reset_train_state();
+}
 
 bool CMulticlassLibLinear::train_machine(CFeatures* data)
 {
@@ -25,42 +60,51 @@ bool CMulticlassLibLinear::train_machine(CFeatures* data)
 
 	int32_t num_vectors = m_features->get_num_vectors();
 	int32_t num_classes = m_labels->get_num_classes();
+	int32_t bias_n = m_use_bias ? 1 : 0;
 
 	problem mc_problem;
 	mc_problem.l = num_vectors;
-	mc_problem.n = m_features->get_dim_feature_space()+1;
+	mc_problem.n = m_features->get_dim_feature_space() + bias_n;
 	mc_problem.y = SG_MALLOC(int32_t, mc_problem.l);
 	for (int32_t i=0; i<num_vectors; i++)
 		mc_problem.y[i] = m_labels->get_int_label(i);
 
 	mc_problem.x = m_features;
 	mc_problem.use_bias = m_use_bias;
 
-	float64_t* w = SG_MALLOC(float64_t, mc_problem.n*num_classes);
+	if (!m_train_state)
+		m_train_state = new mcsvm_state();
+
 	float64_t* C = SG_MALLOC(float64_t, num_vectors);
 	for (int32_t i=0; i<num_vectors; i++)
 		C[i] = m_C;
 
-	Solver_MCSVM_CS solver(&mc_problem,num_classes,C,m_epsilon,m_max_iter);
-	solver.Solve(w);
+	Solver_MCSVM_CS solver(&mc_problem,num_classes,C,m_epsilon,
+	                       m_max_iter,m_max_train_time,m_train_state);
+	solver.solve();
 
 	clear_machines();
 	m_machines = SGVector<CMachine*>(num_classes);
 	for (int32_t i=0; i<num_classes; i++)
 	{
 		CLinearMachine* machine = new CLinearMachine();
 		float64_t* cw = SG_MALLOC(float64_t, mc_problem.n);
-		for (int32_t j=0; j<mc_problem.n-1; j++)
-			cw[j] = w[j*num_classes+i];
-		machine->set_w(SGVector<float64_t>(cw,mc_problem.n-1));
-		//CMath::display_vector(cw,mc_problem.n);
-		machine->set_bias(w[(mc_problem.n-1)*num_classes+i]);
+
+		for (int32_t j=0; j<mc_problem.n-bias_n; j++)
+			cw[j] = m_train_state->w[j*num_classes+i];
+
+		machine->set_w(SGVector<float64_t>(cw,mc_problem.n-bias_n));
+
+		if (m_use_bias)
+			machine->set_bias(m_train_state->w[(mc_problem.n-bias_n)*num_classes+i]);
 
 		m_machines[i] = machine;
 	}
 
+	if (!m_save_train_state)
+		reset_train_state();
+
 	SG_FREE(C);
-	SG_FREE(w);
 	SG_FREE(mc_problem.y);
 
 	return true;