Merge pull request #373 from karlnapf/master

a bunch of improvements
shogun-toolbox · Feb 16, 2012 · c612a4c · c612a4c
2 parents 0338587 + f202026
commit c612a4c
Show file tree

Hide file tree

Showing 26 changed files with 417 additions and 188 deletions.
diff --git a/examples/undocumented/libshogun/Makefile b/examples/undocumented/libshogun/Makefile
@@ -15,8 +15,14 @@ CC=c++
 
 VALGRIND_LOG = valgrind.log
 
-TARGETS =	  basic_minimal classifier_libsvm classifier_minimal_svm \
-		  classifier_mklmulticlass kernel_gaussian kernel_revlin \
+TARGETS = basic_minimal \
+		  classifier_libsvm \
+		  classifier_minimal_svm \
+		  classifier_mklmulticlass \
+  		  classifier_conjugateindex \
+		  classifier_gaussiannaivebayes \
+		  classifier_libsvmmulticlass \
+		  kernel_gaussian kernel_revlin \
 		  library_dyn_int library_gc_array library_indirect_object \
 		  library_hash parameter_set_from_parameters \
 		  parameter_iterate_float64 parameter_iterate_sgobject \
@@ -63,8 +69,6 @@ TARGETS =	  basic_minimal classifier_libsvm classifier_minimal_svm \
 		  converter_linearlocaltangentspacealignment \
 		  converter_localitypreservingprojections \
 		  serialization_basic_tests \
-		  classifier_conjugateindex \
-		  classifier_gaussiannaivebayes \
 		  library_cover_tree \
 		  kernel_machine_train_locked \
 

diff --git a/examples/undocumented/libshogun/classifier_libsvmmulticlass.cpp b/examples/undocumented/libshogun/classifier_libsvmmulticlass.cpp
@@ -0,0 +1,62 @@
+#include <shogun/features/Labels.h>
+#include <shogun/features/SimpleFeatures.h>
+#include <shogun/kernel/GaussianKernel.h>
+#include <shogun/classifier/svm/LibSVMMultiClass.h>
+#include <shogun/base/init.h>
+
+using namespace shogun;
+
+void print_message(FILE* target, const char* str)
+{
+	fprintf(target, "%s", str);
+}
+
+int main(int argc, char** argv)
+{
+	init_shogun(&print_message);
+	index_t num_vec=3;
+	index_t num_feat=2;
+	index_t num_class=2;
+
+	// create some data
+	SGMatrix<float64_t> matrix(num_feat, num_vec);
+	CMath::range_fill_vector(matrix.matrix, num_feat*num_vec);
+
+	// create vectors
+	// shogun will now own the matrix created
+	CSimpleFeatures<float64_t>* features=new CSimpleFeatures<float64_t>(matrix);
+
+	// create three labels
+	CLabels* labels=new CLabels(num_vec);
+	for (index_t i=0; i<num_vec; ++i)
+		labels->set_label(i, i%num_class);
+
+	// create gaussian kernel with cache 10MB, width 0.5
+	CGaussianKernel* kernel = new CGaussianKernel(10, 0.5);
+	kernel->init(features, features);
+
+	// create libsvm with C=10 and train
+	CLibSVMMultiClass* svm = new CLibSVMMultiClass(10, kernel, labels);
+	svm->train();
+
+	// classify on training examples
+	CLabels* output=svm->apply();
+	CMath::display_vector(output->get_labels().vector, output->get_num_labels(),
+			"batch output");
+
+	/* assert that batch apply and apply(index_t) give same result */
+	for (index_t i=0; i<output->get_num_labels(); ++i)
+	{
+		float64_t label=svm->apply(i);
+		SG_SPRINT("single output[%d]=%f\n", i, label);
+		ASSERT(output->get_label(i)==label);
+	}
+	SG_UNREF(output);
+
+	// free up memory
+	SG_UNREF(svm);
+
+	exit_shogun();
+	return 0;
+}
+
diff --git a/examples/undocumented/libshogun/evaluation_cross_validation_classification.cpp b/examples/undocumented/libshogun/evaluation_cross_validation_classification.cpp
@@ -107,17 +107,10 @@ void test_cross_validation()
 	cross->set_num_runs(100);
 	cross->set_conf_int_alpha(0.05);
 
-	/* this is optional and speeds everything up since the kernel matrix is
-	 * precomputed. May not work though. */
-	svm->data_lock(features, labels);
-
 	/* actual evaluation */
 	CrossValidationResult result=cross->evaluate();
 	result.print_result();
 
-	/* see above */
-	svm->data_unlock();
-
 	/* clean up */
 	SG_UNREF(cross);
 	SG_UNREF(features);

diff --git a/examples/undocumented/libshogun/evaluation_cross_validation_locked_comparison.cpp b/examples/undocumented/libshogun/evaluation_cross_validation_locked_comparison.cpp
@@ -101,34 +101,38 @@ void test_cross_validation()
 	CCrossValidation* cross=new CCrossValidation(svm, features, labels,
 			splitting, eval_crit);
 
-	cross->set_num_runs(20);
+	cross->set_num_runs(10);
 	cross->set_conf_int_alpha(0.05);
 
-	/* actual evaluation without fixex kernel matrix */
-
-	index_t repetitions=1;
+	/* no locking */
+	index_t repetitions=3;
 	SG_SPRINT("unlocked x-val\n");
 	kernel->init(features, features);
+	cross->set_autolock(false);
+	CTime time;
+	time.start();
 	for (index_t i=0; i<repetitions; ++i)
-	{
-		CTime time;
-		time.start();
-		cross->evaluate().print_result();
-		time.stop();
-		SG_SPRINT("%f sec\n", time.cur_time_diff());
-	}
+		cross->evaluate();
+	time.stop();
+	SG_SPRINT("%f sec\n", time.cur_time_diff());
+
+	/* auto_locking in every iteration of this loop (better, not so nice) */
+	SG_SPRINT("locked in every iteration x-val\n");
+	cross->set_autolock(true);
+	time.start();
+	for (index_t i=0; i<repetitions; ++i)
+		cross->evaluate();
+	time.stop();
+	SG_SPRINT("%f sec\n", time.cur_time_diff());
 
-	/* actual evaluation with five kernel matrix (restore features first) */
-	svm->data_lock(features, labels);
+	/* lock once before, (no locking/unlocking in this loop) */
+	svm->data_lock(labels, features);
 	SG_SPRINT("locked x-val\n");
+	time.start();
 	for (index_t i=0; i<repetitions; ++i)
-	{
-		CTime time;
-		time.start();
-		cross->evaluate().print_result();
-		time.stop();
-		SG_SPRINT("%f sec\n", time.cur_time_diff());
-	}
+		cross->evaluate();
+	time.stop();
+	SG_SPRINT("%f sec\n", time.cur_time_diff());
 
 	/* clean up */
 	SG_UNREF(cross);

diff --git a/examples/undocumented/libshogun/evaluation_cross_validation_regression.cpp b/examples/undocumented/libshogun/evaluation_cross_validation_regression.cpp
@@ -94,18 +94,11 @@ void test_cross_validation()
 	cross->set_num_runs(100);
 	cross->set_conf_int_alpha(0.05);
 
-	/* this is optional and speeds everything up since the kernel matrix is
-	 * precomputed. May not work though.*/
-	krr->data_lock(features, labels);
-
 	/* actual evaluation */
 	CrossValidationResult result=cross->evaluate();
 	SG_SPRINT("cross_validation estimate:\n");
 	result.print_result();
 
-	/* see above */
-	krr->data_unlock();
-
 	/* same crude assertion as for above evaluation */
 	ASSERT(result.mean<2);
 

diff --git a/examples/undocumented/libshogun/kernel_machine_train_locked.cpp b/examples/undocumented/libshogun/kernel_machine_train_locked.cpp
@@ -82,7 +82,7 @@ void test()
 	/* now train a few times on different subsets on data and assert that
 	 * results are correc (data linear separable) */
 
-	svm->data_lock(features, labels);
+	svm->data_lock(labels, features);
 
 	SGVector<index_t> indices(4);
 	indices.vector[0]=1;

diff --git a/examples/undocumented/libshogun/modelselection_grid_search_kernel.cpp b/examples/undocumented/libshogun/modelselection_grid_search_kernel.cpp
@@ -141,10 +141,9 @@ int main(int argc, char **argv)
 	CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
 			param_tree, cross);
 
-	bool lock_data=true;
 	bool print_state=true;
 	CParameterCombination* best_combination=grid_search->select_model(
-			print_state, lock_data);
+			print_state);
 	SG_SPRINT("best parameter(s):\n");
 	best_combination->print_tree();
 

diff --git a/examples/undocumented/libshogun/modelselection_grid_search_krr.cpp b/examples/undocumented/libshogun/modelselection_grid_search_krr.cpp
@@ -129,9 +129,8 @@ void test_cross_validation()
 
 	/* print current combination */
 	bool print_state=true;
-	bool lock_data=true;
 	CParameterCombination* best_combination=grid_search->select_model(
-			print_state, lock_data);
+			print_state);
 	SG_SPRINT("best parameter(s):\n");
 	best_combination->print_tree();
 
@@ -140,7 +139,6 @@ void test_cross_validation()
 	/* larger number of runs to have tighter confidence intervals */
 	cross->set_num_runs(10);
 	cross->set_conf_int_alpha(0.01);
-	krr->data_lock(features, labels);
 	CrossValidationResult result=cross->evaluate();
 	SG_SPRINT("result: ");
 	result.print_result();

diff --git a/examples/undocumented/libshogun/modelselection_grid_search_linear.cpp b/examples/undocumented/libshogun/modelselection_grid_search_linear.cpp
@@ -83,7 +83,6 @@ int main(int argc, char **argv)
 	 * Dont worry if yours is not included, simply write to the mailing list */
 	classifier->print_modsel_params();
 
-
 	/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
 	CModelSelectionParameters* param_tree=create_param_tree();
 	param_tree->print_tree();
@@ -92,6 +91,9 @@ int main(int argc, char **argv)
 	CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
 			param_tree, cross);
 
+	/* set autolocking to false to get rid of warnings */
+	cross->set_autolock(false);
+
 	CParameterCombination* best_combination=grid_search->select_model();
 	SG_SPRINT("best parameter(s):\n");
 	best_combination->print_tree();

diff --git a/examples/undocumented/libshogun/modelselection_grid_search_string_kernel.cpp b/examples/undocumented/libshogun/modelselection_grid_search_string_kernel.cpp
@@ -135,9 +135,8 @@ int main(int argc, char **argv)
 			param_tree, cross);
 
 	bool print_state=true;
-	bool lock_data=true;
 	CParameterCombination* best_combination=grid_search->select_model(
-			print_state, lock_data);
+			print_state);
 	SG_SPRINT("best parameter(s):\n");
 	best_combination->print_tree();
 
@@ -146,7 +145,7 @@ int main(int argc, char **argv)
 	/* larger number of runs to have tighter confidence intervals */
 	cross->set_num_runs(10);
 	cross->set_conf_int_alpha(0.01);
-	classifier->data_lock(features, labels);
+	classifier->data_lock(labels, features);
 	CrossValidationResult result=cross->evaluate();
 	SG_SPRINT("result: ");
 	result.print_result();

diff --git a/examples/undocumented/python_modular/evaluation_cross_validation_regression.py b/examples/undocumented/python_modular/evaluation_cross_validation_regression.py
@@ -54,7 +54,7 @@ def evaluation_cross_validation_regression(fm_train=traindat,fm_test=testdat,lab
     cross_validation.set_conf_int_alpha(0.05)
 
     # (optional) tell machine to precompute kernel matrix. speeds up. may not work
-    predictor.data_lock(features, labels)
+    predictor.data_lock(labels, features)
 
     # perform cross-validation and print results
     result=cross_validation.evaluate()

diff --git a/examples/undocumented/python_modular/modelselection_grid_search_krr.py b/examples/undocumented/python_modular/modelselection_grid_search_krr.py
@@ -82,10 +82,7 @@ def evaluation_cross_validation_classification(fm_train=traindat,fm_test=testdat
     # print the current parameter combination, if no parameter nothing is printed
     print_state=True
 
-    # tell modelselection to lock data before (optional, speeds up since kernel
-    # matrix is precomputed, may not work)
-    lock_data=True
-    best_parameters=model_selection.select_model(print_state, lock_data)
+    best_parameters=model_selection.select_model(print_state)
 
     # print best parameters
     print "best parameters:"

diff --git a/examples/undocumented/python_modular/modelselection_grid_search_libsvr.py b/examples/undocumented/python_modular/modelselection_grid_search_libsvr.py
@@ -96,11 +96,11 @@ def evaluation_cross_validation_classification(fm_train=traindat,fm_test=testdat
     print "starting model selection"
     # print the current parameter combination, if no parameter nothing is printed
     print_state=True
-    # tell modelselection to not lock data before (since kernel matrix does not
-    # change here, just lock before model selection)
-    lock_data=False
-    predictor.data_lock(features_train, labels)
-    best_parameters=model_selection.select_model(print_state, lock_data)
+    # lock data before since model selection will not change the kernel matrix
+    # (use with care) This avoids that the kernel matrix is recomputed in every
+    # iteration of the model search
+    predictor.data_lock(labels, features_train)
+    best_parameters=model_selection.select_model(print_state)
 
     # print best parameters
     print "best parameters:"

diff --git a/src/shogun/classifier/svm/LibSVMMultiClass.cpp b/src/shogun/classifier/svm/LibSVMMultiClass.cpp
@@ -32,30 +32,44 @@ bool CLibSVMMultiClass::train_machine(CFeatures* data)
 {
 	struct svm_node* x_space;
 
-    problem = svm_problem();
+	problem = svm_problem();
 
 	ASSERT(labels && labels->get_num_labels());
 	int32_t num_classes = labels->get_num_classes();
 	problem.l=labels->get_num_labels();
 	SG_INFO( "%d trainlabels, %d classes\n", problem.l, num_classes);
 
+	/* ensure that there are only positive labels, otherwise, train_machine
+	 * will produce memory errors since svm index gets wrong */
+	for (index_t i=0; i<labels->get_num_labels(); ++i)
+	{
+		if (labels->get_label(i)<0)
+		{
+			SG_ERROR("Only labels >= 0 allowed for %s::train_machine!\n",
+					get_name());
+		}
+	}
+
 	if (data)
 	{
 		if (labels->get_num_labels() != data->get_num_vectors())
-			SG_ERROR("Number of training vectors does not match number of labels\n");
+		{
+			SG_ERROR("Number of training vectors does not match number of "
+					"labels\n");
+		}
 		kernel->init(data, data);
 	}
 
 	problem.y=SG_MALLOC(float64_t, problem.l);
 	problem.x=SG_MALLOC(struct svm_node*, problem.l);
 	problem.pv=SG_MALLOC(float64_t, problem.l);
-    problem.C=SG_MALLOC(float64_t, problem.l);
+	problem.C=SG_MALLOC(float64_t, problem.l);
 
 	x_space=SG_MALLOC(struct svm_node, 2*problem.l);
 
 	for (int32_t i=0; i<problem.l; i++)
 	{
-        problem.pv[i]=-1.0;
+		problem.pv[i]=-1.0;
 		problem.y[i]=labels->get_label(i);
 		problem.x[i]=&x_space[2*i];
 		x_space[2*i].index=i;
@@ -167,7 +181,6 @@ bool CLibSVMMultiClass::train_machine(CFeatures* data)
 //					idx=((num_classes-1)*model->label[j]+model->label[i])/2;
 //
 				SG_DEBUG("svm[%d] has %d sv (total: %d), b=%f label:(%d,%d) -> svm[%d]\n", s, num_sv, model->l, bias, model->label[i], model->label[j], idx);
-
 				set_svm(idx, svm);
 				s++;
 			}
@@ -179,12 +192,16 @@ bool CLibSVMMultiClass::train_machine(CFeatures* data)
 		SG_FREE(problem.x);
 		SG_FREE(problem.y);
 		SG_FREE(x_space);
+		SG_FREE(problem.pv);
+		SG_FREE(problem.C);
 
 		svm_destroy_model(model);
 		model=NULL;
 
 		/* the features needed for the model are all support vectors for now,
 		 * which  means that a copy of the features is stored in lhs */
+		/* TODO this can be done better, ie only store sv of underlying svms
+		 * and map indices */
 		m_svs.destroy_vector();
 		m_svs=SGVector<index_t>(kernel->get_num_vec_lhs());
 		m_svs.range_fill();