Skip to content

Commit

Permalink
Merge pull request #373 from karlnapf/master
Browse files Browse the repository at this point in the history
a bunch of improvements
  • Loading branch information
Soeren Sonnenburg committed Feb 16, 2012
2 parents 0338587 + f202026 commit c612a4c
Show file tree
Hide file tree
Showing 26 changed files with 417 additions and 188 deletions.
12 changes: 8 additions & 4 deletions examples/undocumented/libshogun/Makefile
Expand Up @@ -15,8 +15,14 @@ CC=c++

VALGRIND_LOG = valgrind.log

TARGETS = basic_minimal classifier_libsvm classifier_minimal_svm \
classifier_mklmulticlass kernel_gaussian kernel_revlin \
TARGETS = basic_minimal \
classifier_libsvm \
classifier_minimal_svm \
classifier_mklmulticlass \
classifier_conjugateindex \
classifier_gaussiannaivebayes \
classifier_libsvmmulticlass \
kernel_gaussian kernel_revlin \
library_dyn_int library_gc_array library_indirect_object \
library_hash parameter_set_from_parameters \
parameter_iterate_float64 parameter_iterate_sgobject \
Expand Down Expand Up @@ -63,8 +69,6 @@ TARGETS = basic_minimal classifier_libsvm classifier_minimal_svm \
converter_linearlocaltangentspacealignment \
converter_localitypreservingprojections \
serialization_basic_tests \
classifier_conjugateindex \
classifier_gaussiannaivebayes \
library_cover_tree \
kernel_machine_train_locked \

Expand Down
62 changes: 62 additions & 0 deletions examples/undocumented/libshogun/classifier_libsvmmulticlass.cpp
@@ -0,0 +1,62 @@
#include <shogun/features/Labels.h>
#include <shogun/features/SimpleFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/classifier/svm/LibSVMMultiClass.h>
#include <shogun/base/init.h>

using namespace shogun;

void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}

int main(int argc, char** argv)
{
init_shogun(&print_message);
index_t num_vec=3;
index_t num_feat=2;
index_t num_class=2;

// create some data
SGMatrix<float64_t> matrix(num_feat, num_vec);
CMath::range_fill_vector(matrix.matrix, num_feat*num_vec);

// create vectors
// shogun will now own the matrix created
CSimpleFeatures<float64_t>* features=new CSimpleFeatures<float64_t>(matrix);

// create three labels
CLabels* labels=new CLabels(num_vec);
for (index_t i=0; i<num_vec; ++i)
labels->set_label(i, i%num_class);

// create gaussian kernel with cache 10MB, width 0.5
CGaussianKernel* kernel = new CGaussianKernel(10, 0.5);
kernel->init(features, features);

// create libsvm with C=10 and train
CLibSVMMultiClass* svm = new CLibSVMMultiClass(10, kernel, labels);
svm->train();

// classify on training examples
CLabels* output=svm->apply();
CMath::display_vector(output->get_labels().vector, output->get_num_labels(),
"batch output");

/* assert that batch apply and apply(index_t) give same result */
for (index_t i=0; i<output->get_num_labels(); ++i)
{
float64_t label=svm->apply(i);
SG_SPRINT("single output[%d]=%f\n", i, label);
ASSERT(output->get_label(i)==label);
}
SG_UNREF(output);

// free up memory
SG_UNREF(svm);

exit_shogun();
return 0;
}

Expand Up @@ -107,17 +107,10 @@ void test_cross_validation()
cross->set_num_runs(100);
cross->set_conf_int_alpha(0.05);

/* this is optional and speeds everything up since the kernel matrix is
* precomputed. May not work though. */
svm->data_lock(features, labels);

/* actual evaluation */
CrossValidationResult result=cross->evaluate();
result.print_result();

/* see above */
svm->data_unlock();

/* clean up */
SG_UNREF(cross);
SG_UNREF(features);
Expand Down
Expand Up @@ -101,34 +101,38 @@ void test_cross_validation()
CCrossValidation* cross=new CCrossValidation(svm, features, labels,
splitting, eval_crit);

cross->set_num_runs(20);
cross->set_num_runs(10);
cross->set_conf_int_alpha(0.05);

/* actual evaluation without fixex kernel matrix */

index_t repetitions=1;
/* no locking */
index_t repetitions=3;
SG_SPRINT("unlocked x-val\n");
kernel->init(features, features);
cross->set_autolock(false);
CTime time;
time.start();
for (index_t i=0; i<repetitions; ++i)
{
CTime time;
time.start();
cross->evaluate().print_result();
time.stop();
SG_SPRINT("%f sec\n", time.cur_time_diff());
}
cross->evaluate();
time.stop();
SG_SPRINT("%f sec\n", time.cur_time_diff());

/* auto_locking in every iteration of this loop (better, not so nice) */
SG_SPRINT("locked in every iteration x-val\n");
cross->set_autolock(true);
time.start();
for (index_t i=0; i<repetitions; ++i)
cross->evaluate();
time.stop();
SG_SPRINT("%f sec\n", time.cur_time_diff());

/* actual evaluation with five kernel matrix (restore features first) */
svm->data_lock(features, labels);
/* lock once before, (no locking/unlocking in this loop) */
svm->data_lock(labels, features);
SG_SPRINT("locked x-val\n");
time.start();
for (index_t i=0; i<repetitions; ++i)
{
CTime time;
time.start();
cross->evaluate().print_result();
time.stop();
SG_SPRINT("%f sec\n", time.cur_time_diff());
}
cross->evaluate();
time.stop();
SG_SPRINT("%f sec\n", time.cur_time_diff());

/* clean up */
SG_UNREF(cross);
Expand Down
Expand Up @@ -94,18 +94,11 @@ void test_cross_validation()
cross->set_num_runs(100);
cross->set_conf_int_alpha(0.05);

/* this is optional and speeds everything up since the kernel matrix is
* precomputed. May not work though.*/
krr->data_lock(features, labels);

/* actual evaluation */
CrossValidationResult result=cross->evaluate();
SG_SPRINT("cross_validation estimate:\n");
result.print_result();

/* see above */
krr->data_unlock();

/* same crude assertion as for above evaluation */
ASSERT(result.mean<2);

Expand Down
Expand Up @@ -82,7 +82,7 @@ void test()
/* now train a few times on different subsets on data and assert that
* results are correc (data linear separable) */

svm->data_lock(features, labels);
svm->data_lock(labels, features);

SGVector<index_t> indices(4);
indices.vector[0]=1;
Expand Down
Expand Up @@ -141,10 +141,9 @@ int main(int argc, char **argv)
CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
param_tree, cross);

bool lock_data=true;
bool print_state=true;
CParameterCombination* best_combination=grid_search->select_model(
print_state, lock_data);
print_state);
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();

Expand Down
Expand Up @@ -129,9 +129,8 @@ void test_cross_validation()

/* print current combination */
bool print_state=true;
bool lock_data=true;
CParameterCombination* best_combination=grid_search->select_model(
print_state, lock_data);
print_state);
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();

Expand All @@ -140,7 +139,6 @@ void test_cross_validation()
/* larger number of runs to have tighter confidence intervals */
cross->set_num_runs(10);
cross->set_conf_int_alpha(0.01);
krr->data_lock(features, labels);
CrossValidationResult result=cross->evaluate();
SG_SPRINT("result: ");
result.print_result();
Expand Down
Expand Up @@ -83,7 +83,6 @@ int main(int argc, char **argv)
* Dont worry if yours is not included, simply write to the mailing list */
classifier->print_modsel_params();


/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
CModelSelectionParameters* param_tree=create_param_tree();
param_tree->print_tree();
Expand All @@ -92,6 +91,9 @@ int main(int argc, char **argv)
CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
param_tree, cross);

/* set autolocking to false to get rid of warnings */
cross->set_autolock(false);

CParameterCombination* best_combination=grid_search->select_model();
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();
Expand Down
Expand Up @@ -135,9 +135,8 @@ int main(int argc, char **argv)
param_tree, cross);

bool print_state=true;
bool lock_data=true;
CParameterCombination* best_combination=grid_search->select_model(
print_state, lock_data);
print_state);
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();

Expand All @@ -146,7 +145,7 @@ int main(int argc, char **argv)
/* larger number of runs to have tighter confidence intervals */
cross->set_num_runs(10);
cross->set_conf_int_alpha(0.01);
classifier->data_lock(features, labels);
classifier->data_lock(labels, features);
CrossValidationResult result=cross->evaluate();
SG_SPRINT("result: ");
result.print_result();
Expand Down
Expand Up @@ -54,7 +54,7 @@ def evaluation_cross_validation_regression(fm_train=traindat,fm_test=testdat,lab
cross_validation.set_conf_int_alpha(0.05)

# (optional) tell machine to precompute kernel matrix. speeds up. may not work
predictor.data_lock(features, labels)
predictor.data_lock(labels, features)

# perform cross-validation and print results
result=cross_validation.evaluate()
Expand Down
Expand Up @@ -82,10 +82,7 @@ def evaluation_cross_validation_classification(fm_train=traindat,fm_test=testdat
# print the current parameter combination, if no parameter nothing is printed
print_state=True

# tell modelselection to lock data before (optional, speeds up since kernel
# matrix is precomputed, may not work)
lock_data=True
best_parameters=model_selection.select_model(print_state, lock_data)
best_parameters=model_selection.select_model(print_state)

# print best parameters
print "best parameters:"
Expand Down
Expand Up @@ -96,11 +96,11 @@ def evaluation_cross_validation_classification(fm_train=traindat,fm_test=testdat
print "starting model selection"
# print the current parameter combination, if no parameter nothing is printed
print_state=True
# tell modelselection to not lock data before (since kernel matrix does not
# change here, just lock before model selection)
lock_data=False
predictor.data_lock(features_train, labels)
best_parameters=model_selection.select_model(print_state, lock_data)
# lock data before since model selection will not change the kernel matrix
# (use with care) This avoids that the kernel matrix is recomputed in every
# iteration of the model search
predictor.data_lock(labels, features_train)
best_parameters=model_selection.select_model(print_state)

# print best parameters
print "best parameters:"
Expand Down
27 changes: 22 additions & 5 deletions src/shogun/classifier/svm/LibSVMMultiClass.cpp
Expand Up @@ -32,30 +32,44 @@ bool CLibSVMMultiClass::train_machine(CFeatures* data)
{
struct svm_node* x_space;

problem = svm_problem();
problem = svm_problem();

ASSERT(labels && labels->get_num_labels());
int32_t num_classes = labels->get_num_classes();
problem.l=labels->get_num_labels();
SG_INFO( "%d trainlabels, %d classes\n", problem.l, num_classes);

/* ensure that there are only positive labels, otherwise, train_machine
* will produce memory errors since svm index gets wrong */
for (index_t i=0; i<labels->get_num_labels(); ++i)
{
if (labels->get_label(i)<0)
{
SG_ERROR("Only labels >= 0 allowed for %s::train_machine!\n",
get_name());
}
}

if (data)
{
if (labels->get_num_labels() != data->get_num_vectors())
SG_ERROR("Number of training vectors does not match number of labels\n");
{
SG_ERROR("Number of training vectors does not match number of "
"labels\n");
}
kernel->init(data, data);
}

problem.y=SG_MALLOC(float64_t, problem.l);
problem.x=SG_MALLOC(struct svm_node*, problem.l);
problem.pv=SG_MALLOC(float64_t, problem.l);
problem.C=SG_MALLOC(float64_t, problem.l);
problem.C=SG_MALLOC(float64_t, problem.l);

x_space=SG_MALLOC(struct svm_node, 2*problem.l);

for (int32_t i=0; i<problem.l; i++)
{
problem.pv[i]=-1.0;
problem.pv[i]=-1.0;
problem.y[i]=labels->get_label(i);
problem.x[i]=&x_space[2*i];
x_space[2*i].index=i;
Expand Down Expand Up @@ -167,7 +181,6 @@ bool CLibSVMMultiClass::train_machine(CFeatures* data)
// idx=((num_classes-1)*model->label[j]+model->label[i])/2;
//
SG_DEBUG("svm[%d] has %d sv (total: %d), b=%f label:(%d,%d) -> svm[%d]\n", s, num_sv, model->l, bias, model->label[i], model->label[j], idx);

set_svm(idx, svm);
s++;
}
Expand All @@ -179,12 +192,16 @@ bool CLibSVMMultiClass::train_machine(CFeatures* data)
SG_FREE(problem.x);
SG_FREE(problem.y);
SG_FREE(x_space);
SG_FREE(problem.pv);
SG_FREE(problem.C);

svm_destroy_model(model);
model=NULL;

/* the features needed for the model are all support vectors for now,
* which means that a copy of the features is stored in lhs */
/* TODO this can be done better, ie only store sv of underlying svms
* and map indices */
m_svs.destroy_vector();
m_svs=SGVector<index_t>(kernel->get_num_vec_lhs());
m_svs.range_fill();
Expand Down

0 comments on commit c612a4c

Please sign in to comment.