Skip to content

Commit

Permalink
Merge pull request #737 from karlnapf/master
Browse files Browse the repository at this point in the history
added a class to store mkl kernel weights during cross-validation
  • Loading branch information
karlnapf committed Aug 17, 2012
2 parents c4e6009 + 65d1a5f commit ca1d2d0
Show file tree
Hide file tree
Showing 13 changed files with 558 additions and 33 deletions.
1 change: 1 addition & 0 deletions examples/undocumented/libshogun/Makefile
Expand Up @@ -35,6 +35,7 @@ TARGETS = basic_minimal \
evaluation_cross_validation_locked_comparison \
evaluation_cross_validation_multiclass \
evaluation_cross_validation_multiclass_mkl \
evaluation_cross_validation_mkl_weight_storage \
modelselection_parameter_combination_test \
regression_gaussian_process_fitc \
regression_gaussian_process_gaussian \
Expand Down
@@ -0,0 +1,136 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CombinedKernel.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/mkl/MKLClassification.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/CrossValidationPrintOutput.h>
#include <shogun/evaluation/CrossValidationMKLStorage.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/mathematics/Statistics.h>

using namespace shogun;

void gen_rand_data(SGVector<float64_t> lab, SGMatrix<float64_t> feat,
float64_t dist)
{
index_t dims=feat.num_rows;
index_t num=lab.vlen;

for (int32_t i=0; i<num; i++)
{
if (i<num/2)
{
lab[i]=-1.0;

for (int32_t j=0; j<dims; j++)
feat(j, i)=CMath::random(0.0, 1.0)+dist;
}
else
{
lab[i]=1.0;

for (int32_t j=0; j<dims; j++)
feat(j, i)=CMath::random(0.0, 1.0)-dist;
}
}
lab.display_vector("lab");
feat.display_matrix("feat");
}

void test_mkl_cross_validation()
{
/* generate random data */
index_t num=10;
index_t dims=2;
float64_t dist=0.5;
SGVector<float64_t> lab(num);
SGMatrix<float64_t> feat(dims, num);
gen_rand_data(lab, feat, dist);

/*create train labels */
CLabels* labels=new CBinaryLabels(lab);

/* create train features */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
features->set_feature_matrix(feat);
SG_REF(features);

/* create combined features */
CCombinedFeatures* comb_features=new CCombinedFeatures();
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
SG_REF(comb_features);

/* create multiple gaussian kernels */
CCombinedKernel* kernel=new CCombinedKernel();
kernel->append_kernel(new CGaussianKernel(10, 0.1));
kernel->append_kernel(new CGaussianKernel(10, 1));
kernel->append_kernel(new CGaussianKernel(10, 2));
kernel->init(comb_features, comb_features);
SG_REF(kernel);

/* create mkl using libsvm, due to a mem-bug, interleaved is not possible */
CMKLClassification* svm=new CMKLClassification(new CLibSVM());
svm->set_interleaved_optimization_enabled(false);
svm->set_kernel(kernel);
SG_REF(svm);

/* create cross-validation instance */
index_t num_folds=3;
CSplittingStrategy* split=new CStratifiedCrossValidationSplitting(labels,
num_folds);
CEvaluation* eval=new CContingencyTableEvaluation(ACCURACY);
CCrossValidation* cross=new CCrossValidation(svm, comb_features, labels, split, eval, false);

/* add print output listener and mkl storage listener */
cross->add_cross_validation_output(new CCrossValidationPrintOutput());
CCrossValidationMKLStorage* mkl_storage=new CCrossValidationMKLStorage();
cross->add_cross_validation_output(mkl_storage);

/* perform cross-validation, this will print loads of information
* (caused by the CCrossValidationPrintOutput instance attached to it) */
CEvaluationResult* result=cross->evaluate();

/* print mkl weights */
SGMatrix<float64_t> weights=mkl_storage->get_mkl_weights();
weights.display_matrix("mkl weights");

/* print mean and variance of each kernel weight. These could for example
* been used to compute confidence intervals */
CStatistics::mean(weights, false).display_vector("mean per kernel");
CStatistics::variance(weights, false).display_vector("variance per kernel");
CStatistics::std_deviation(weights, false).display_vector("std-dev per kernel");

/* clean up */
SG_UNREF(result);
SG_UNREF(cross);
SG_UNREF(kernel);
SG_UNREF(features);
SG_UNREF(comb_features);
SG_UNREF(svm);
}

int main()
{
init_shogun_with_defaults();

// sg_io->set_loglevel(MSG_DEBUG);

test_mkl_cross_validation();

exit_shogun();
return 0;
}

68 changes: 68 additions & 0 deletions examples/undocumented/libshogun/statistics.cpp
Expand Up @@ -17,6 +17,72 @@

using namespace shogun;

void test_mean()
{
SGMatrix<float64_t> X(3,5);

for (index_t i=0; i<X.num_rows*X.num_cols; ++i)
{
X.matrix[i]=i;
}
X.display_matrix("X");

SGVector<float64_t> mean=CStatistics::mean(X, true);
mean.display_vector("mean");
ASSERT(mean.vlen==5);
ASSERT(mean[0]==1);
ASSERT(mean[1]==4);
ASSERT(mean[2]==7);
ASSERT(mean[3]==10);
ASSERT(mean[4]==13);

float64_t mean2=CStatistics::mean(mean);
ASSERT(mean2==7);

mean=CStatistics::mean(X, false);
mean.display_vector("mean");
ASSERT(mean.vlen==3);
ASSERT(mean[0]==6);
ASSERT(mean[1]==7);
ASSERT(mean[2]==8);

mean2=CStatistics::mean(mean);
ASSERT(mean2==7);
}

void test_variance()
{
SGMatrix<float64_t> X(3,5);

for (index_t i=0; i<X.num_rows*X.num_cols; ++i)
{
X.matrix[i]=i;
}
X.display_matrix("X");

SGVector<float64_t> var=CStatistics::variance(X, true);
var.display_vector("variance");
ASSERT(var.vlen==5);
ASSERT(var[0]==1);
ASSERT(var[1]==1);
ASSERT(var[2]==1);
ASSERT(var[3]==1);
ASSERT(var[4]==1);

float64_t var2=CStatistics::variance(var);
ASSERT(var2==0);

var=CStatistics::variance(X, false);
var.display_vector("variance");
ASSERT(var.vlen==3);
ASSERT(var[0]==22.5);
ASSERT(var[1]==22.5);
ASSERT(var[2]==22.5);

var2=CStatistics::variance(var);
ASSERT(var2==0);
}

void test_confidence_intervals()
{
int32_t data_size=100;
Expand Down Expand Up @@ -244,6 +310,8 @@ int main(int argc, char **argv)
{
init_shogun_with_defaults();

test_mean();
test_variance();
test_confidence_intervals();
test_inverse_student_t();
test_incomplete_gamma();
Expand Down
3 changes: 2 additions & 1 deletion src/interfaces/modular/Evaluation.i
Expand Up @@ -46,7 +46,7 @@
%rename(CrossValidationResult) CCrossValidationResult;
%rename(CrossValidationOutput) CCrossValidationOutput;
%rename(CrossValidationPrintOutput) CCrossValidationPrintOutput;

%rename(CrossValidationMKLStorage) CCrossValidationMKLStorage;

/* Include Class Headers to make them visible from within the target language */
%include <shogun/evaluation/EvaluationResult.h>
Expand Down Expand Up @@ -74,3 +74,4 @@
%include <shogun/evaluation/CrossValidationSplitting.h>
%include <shogun/evaluation/CrossValidationOutput.h>
%include <shogun/evaluation/CrossValidationPrintOutput.h>
%include <shogun/evaluation/CrossValidationMKLStorage.h>
2 changes: 2 additions & 0 deletions src/interfaces/modular/Evaluation_includes.i
Expand Up @@ -23,5 +23,7 @@
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/CrossValidationSplitting.h>
#include <shogun/evaluation/CrossValidationOutput.h>
#include <shogun/evaluation/CrossValidationPrintOutput.h>
#include <shogun/evaluation/CrossValidationMKLStorage.h>
%}

28 changes: 24 additions & 4 deletions src/shogun/evaluation/CrossValidation.cpp
Expand Up @@ -217,6 +217,17 @@ float64_t CCrossValidation::evaluate_one_run()
/* do actual cross-validation */
for (index_t i=0; i <num_subsets; ++i)
{
/* evtl. update xvalidation output class */
CCrossValidationOutput* current=(CCrossValidationOutput*)
m_xval_outputs->get_first_element();
while (current)
{
current->update_fold_index(i);
SG_UNREF(current);
current=(CCrossValidationOutput*)
m_xval_outputs->get_next_element();
}

/* index subset for training, will be freed below */
SGVector<index_t> inverse_subset_indices =
m_splitting_strategy->generate_subset_inverse(i);
Expand All @@ -229,8 +240,7 @@ float64_t CCrossValidation::evaluate_one_run()
m_splitting_strategy->generate_subset_indices(i);

/* evtl. update xvalidation output class */
CCrossValidationOutput* current=(CCrossValidationOutput*)
m_xval_outputs->get_first_element();
current=(CCrossValidationOutput*)m_xval_outputs->get_first_element();
while (current)
{
current->update_train_indices(inverse_subset_indices, "\t");
Expand Down Expand Up @@ -283,6 +293,17 @@ float64_t CCrossValidation::evaluate_one_run()
/* do actual cross-validation */
for (index_t i=0; i <num_subsets; ++i)
{
/* evtl. update xvalidation output class */
CCrossValidationOutput* current=(CCrossValidationOutput*)
m_xval_outputs->get_first_element();
while (current)
{
current->update_fold_index(i);
SG_UNREF(current);
current=(CCrossValidationOutput*)
m_xval_outputs->get_next_element();
}

/* set feature subset for training */
SGVector<index_t> inverse_subset_indices=
m_splitting_strategy->generate_subset_inverse(i);
Expand Down Expand Up @@ -310,8 +331,7 @@ float64_t CCrossValidation::evaluate_one_run()
SG_DEBUG("finished training\n");

/* evtl. update xvalidation output class */
CCrossValidationOutput* current=(CCrossValidationOutput*)
m_xval_outputs->get_first_element();
current=(CCrossValidationOutput*)m_xval_outputs->get_first_element();
while (current)
{
current->update_train_indices(inverse_subset_indices, "\t");
Expand Down
50 changes: 50 additions & 0 deletions src/shogun/evaluation/CrossValidationMKLStorage.cpp
@@ -0,0 +1,50 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Sergey Lisitsyn
* Written (W) 2012 Heiko Strathmann
*/

#include <shogun/evaluation/CrossValidationMKLStorage.h>
#include <shogun/kernel/CombinedKernel.h>
#include <shogun/classifier/mkl/MKL.h>

using namespace shogun;

void CCrossValidationMKLStorage::update_trained_machine(
CMachine* machine, const char* prefix)
{
if (!dynamic_cast<CMKL*>(machine))
{
SG_ERROR("%s::update_trained_machine(): This method is only usable "
"with CMKL derived machines. This one is \"s\"\n", get_name(),
machine->get_name());
}

CMKL* mkl=(CMKL*)machine;
CCombinedKernel* kernel=dynamic_cast<CCombinedKernel*>(
mkl->get_kernel());

SGVector<float64_t> w=kernel->get_subkernel_weights();

/* evtl allocate memory (first call) */
if (!m_mkl_weights.matrix)
{
SG_PRINT("allocating memory for mkl weight matrix\n");
m_mkl_weights=SGMatrix<float64_t>(w.vlen, m_num_folds*m_num_runs);
}

/* put current mkl weights into matrix, copy memory vector wise to make
* things fast */
index_t n=m_current_run_index*m_current_fold_index;
index_t first_idx=n*w.vlen+m_current_fold_index*w.vlen;
SG_DEBUG("run %d, fold %d, matrix index %d\n",m_current_run_index,
m_current_fold_index, first_idx);
memcpy(&m_mkl_weights.matrix[first_idx], w.vector,
w.vlen*sizeof(float64_t));

SG_UNREF(kernel);
}

0 comments on commit ca1d2d0

Please sign in to comment.