Skip to content

Commit

Permalink
Merge pull request #361 from karlnapf/master
Browse files Browse the repository at this point in the history
KRR/SVR and cross-validation/model-selection
  • Loading branch information
Soeren Sonnenburg committed Feb 1, 2012
2 parents 0be4076 + f044974 commit 013c234
Show file tree
Hide file tree
Showing 31 changed files with 999 additions and 122 deletions.
2 changes: 2 additions & 0 deletions examples/undocumented/libshogun/Makefile
Expand Up @@ -21,6 +21,8 @@ TARGETS = basic_minimal classifier_libsvm classifier_minimal_svm \
library_hash parameter_set_from_parameters \
parameter_iterate_float64 parameter_iterate_sgobject \
parameter_modsel_parameters \
evaluation_cross_validation_classification \
evaluation_cross_validation_regression \
modelselection_parameter_combination_test \
modelselection_model_selection_parameters_test \
modelselection_parameter_tree \
Expand Down
@@ -0,0 +1,131 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
*/

#include <shogun/base/init.h>
#include <shogun/features/SimpleFeatures.h>
#include <shogun/features/Labels.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>

using namespace shogun;

void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}

void test_cross_validation()
{
/* data matrix dimensions */
index_t num_vectors=40;
index_t num_features=5;

/* data means -1, 1 in all components, std deviation of 3 */
SGVector<float64_t> mean_1(num_features);
SGVector<float64_t> mean_2(num_features);
CMath::fill_vector(mean_1.vector, mean_1.vlen, -1.0);
CMath::fill_vector(mean_2.vector, mean_2.vlen, 1.0);
float64_t sigma=3;

CMath::display_vector(mean_1.vector, mean_1.vlen, "mean 1");
CMath::display_vector(mean_2.vector, mean_2.vlen, "mean 2");

/* fill data matrix around mean */
SGMatrix<float64_t> train_dat(num_features, num_vectors);
for (index_t i=0; i<num_vectors; ++i)
{
for (index_t j=0; j<num_features; ++j)
{
float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0];
train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma);
}
}

/* training features */
CSimpleFeatures<float64_t>* features=
new CSimpleFeatures<float64_t>(train_dat);
SG_REF(features);

/* training labels +/- 1 for each cluster */
SGVector<float64_t> lab(num_vectors);
for (index_t i=0; i<num_vectors; ++i)
lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0;

CLabels* labels=new CLabels(lab);

/* gaussian kernel */
int32_t kernel_cache=100;
int32_t width=10;
CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, width);
kernel->init(features, features);

/* create svm via libsvm */
float64_t svm_C=10;
float64_t svm_eps=0.0001;
CLibSVM* svm=new CLibSVM(svm_C, kernel, labels);
svm->set_epsilon(svm_eps);

/* train and output */
svm->train(features);
CLabels* output=svm->apply(features);
for (index_t i=0; i<num_vectors; ++i)
SG_SPRINT("i=%d, class=%f,\n", i, output->get_label(i));

/* evaluation criterion */
CContingencyTableEvaluation* eval_crit=
new CContingencyTableEvaluation(ACCURACY);

/* evaluate training error */
float64_t eval_result=eval_crit->evaluate(output, labels);
SG_SPRINT("training error: %f\n", eval_result);
SG_UNREF(output);

/* assert that regression "works". this is not guaranteed to always work
* but should be a really coarse check to see if everything is going
* approx. right */
ASSERT(eval_result<2);

/* splitting strategy */
index_t n_folds=5;
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, n_folds);

/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(svm, features, labels,
splitting, eval_crit);

cross->set_num_runs(100);
cross->set_conf_int_alpha(0.05);

/* actual evaluation */
CrossValidationResult result=cross->evaluate();
result.print_result();

/* clean up */
SG_UNREF(cross);
SG_UNREF(features);
mean_1.destroy_vector();
mean_2.destroy_vector();
}

int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);

test_cross_validation();

exit_shogun();

return 0;
}

@@ -0,0 +1,120 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
*/

#include <shogun/base/init.h>
#include <shogun/features/SimpleFeatures.h>
#include <shogun/features/Labels.h>
#include <shogun/kernel/LinearKernel.h>
#include <shogun/regression/KRR.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/CrossValidationSplitting.h>
#include <shogun/evaluation/MeanSquaredError.h>

using namespace shogun;

void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}

void test_cross_validation()
{
/* data matrix dimensions */
index_t num_vectors=100;
index_t num_features=1;

/* training label data */
SGVector<float64_t> lab(num_vectors);

/* fill data matrix and labels */
SGMatrix<float64_t> train_dat(num_features, num_vectors);
CMath::range_fill_vector(train_dat.matrix, num_vectors);
for (index_t i=0; i<num_vectors; ++i)
{
/* labels are linear plus noise */
lab.vector[i]=i+CMath::normal_random(0, 1.0);

}

/* training features */
CSimpleFeatures<float64_t>* features=
new CSimpleFeatures<float64_t>(train_dat);
SG_REF(features);

/* training labels */
CLabels* labels=new CLabels(lab);

/* kernel */
CLinearKernel* kernel=new CLinearKernel();
kernel->init(features, features);

/* kernel ridge regression*/
float64_t tau=0.0001;
CKRR* krr=new CKRR(tau, kernel, labels);

/* evaluation criterion */
CMeanSquaredError* eval_crit=
new CMeanSquaredError();

/* train and output */
krr->train(features);
CLabels* output=krr->apply();
for (index_t i=0; i<num_vectors; ++i)
{
SG_SPRINT("x=%f, train=%f, predict=%f\n", train_dat.matrix[i],
labels->get_label(i), output->get_label(i));
}

/* evaluate training error */
float64_t eval_result=eval_crit->evaluate(output, labels);
SG_SPRINT("training error: %f\n", eval_result);
SG_UNREF(output);

/* assert that regression "works". this is not guaranteed to always work
* but should be a really coarse check to see if everything is going
* approx. right */
ASSERT(eval_result<2);

/* splitting strategy */
index_t n_folds=5;
CCrossValidationSplitting* splitting=
new CCrossValidationSplitting(labels, n_folds);

/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(krr, features, labels,
splitting, eval_crit);

cross->set_num_runs(100);
cross->set_conf_int_alpha(0.05);

/* actual evaluation */
CrossValidationResult result=cross->evaluate();
SG_SPRINT("cross_validation estimate:\n");
result.print_result();

/* same crude assertion as for above evaluation */
ASSERT(result.mean<2);

/* clean up */
SG_UNREF(cross);
SG_UNREF(features);
}

int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);

test_cross_validation();

exit_shogun();

return 0;
}

0 comments on commit 013c234

Please sign in to comment.