Skip to content

Commit

Permalink
Merge pull request #686 from karlnapf/master
Browse files Browse the repository at this point in the history
precomputed kernel matrices
  • Loading branch information
karlnapf committed Aug 1, 2012
2 parents 6a311bd + 621145a commit 1434a79
Show file tree
Hide file tree
Showing 9 changed files with 213 additions and 43 deletions.
2 changes: 1 addition & 1 deletion examples/undocumented/libshogun/Makefile
Expand Up @@ -25,7 +25,7 @@ TARGETS = basic_minimal \
classifier_multiclasslibsvm \
classifier_qda \
classifier_multiclasslinearmachine \
kernel_gaussian kernel_revlin \
kernel_gaussian kernel_revlin kernel_custom\
library_dyn_int library_gc_array library_indirect_object \
library_hash parameter_set_from_parameters \
parameter_iterate_float64 parameter_iterate_sgobject \
Expand Down
78 changes: 78 additions & 0 deletions examples/undocumented/libshogun/kernel_custom.cpp
@@ -0,0 +1,78 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/

#include <shogun/base/init.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CustomKernel.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/DataGenerator.h>

using namespace shogun;

void test_custom_kernel_subsets()
{
/* create some data */
index_t m=10;
CFeatures* features=
new CDenseFeatures<float64_t>(CDataGenerator::generate_mean_data(
m, 2, 1));
SG_REF(features);

/* create a custom kernel */
CKernel* k=new CGaussianKernel();
k->init(features, features);
CCustomKernel* l=new CCustomKernel(k);

/* create a random permutation */
SGVector<index_t> subset(m);

for (index_t run=0; run<100; ++run)
{
subset.range_fill();
subset.permute();
// subset.display_vector("permutation");
features->add_subset(subset);
k->init(features, features);
l->add_row_subset(subset);
l->add_col_subset(subset);
// k->get_kernel_matrix().display_matrix("K");
// l->get_kernel_matrix().display_matrix("L");
for (index_t i=0; i<m; ++i)
{
for (index_t j=0; j<m; ++j)
{
SG_SDEBUG("K(%d,%d)=%f, L(%d,%d)=%f\n", i, j, k->kernel(i, j), i, j,
l->kernel(i, j));
ASSERT(CMath::abs(k->kernel(i, j)-l->kernel(i, j))<10E-8);
}
}

features->remove_subset();
l->remove_row_subset();
l->remove_col_subset();
}

SG_UNREF(k);
SG_UNREF(l);
SG_UNREF(features);
}

int main(int argc, char** argv)
{
init_shogun_with_defaults();

// sg_io->set_loglevel(MSG_DEBUG);

test_custom_kernel_subsets();

exit_shogun();
return 0;
}


25 changes: 25 additions & 0 deletions examples/undocumented/libshogun/statistics_quadratic_time_mmd.cpp
Expand Up @@ -10,6 +10,7 @@
#include <shogun/base/init.h>
#include <shogun/statistics/QuadraticTimeMMD.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CustomKernel.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/mathematics/Statistics.h>
#include <shogun/features/DataGenerator.h>
Expand Down Expand Up @@ -67,6 +68,9 @@ void test_quadratic_mmd_bootstrap()
CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features, m);
mmd->set_statistic_type(UNBIASED);
mmd->set_bootstrap_iterations(num_iterations);

/* use fixed seed */
CMath::init_random(1);
SGVector<float64_t> null_samples=mmd->bootstrap_null();

float64_t mean=CStatistics::mean(null_samples);
Expand All @@ -84,7 +88,26 @@ void test_quadratic_mmd_bootstrap()
// ASSERT(var>2.194192869469228e-05);
// ASSERT(var<2.936672859339959e-05);

/* now again but with a precomputed kernel, same features.
* This avoids re-computing the kernel matrix in every bootstrapping
* iteration and should be num_iterations times faster */
SG_REF(features);
CCustomKernel* precomputed_kernel=new CCustomKernel(kernel);
SG_UNREF(mmd);
mmd=new CQuadraticTimeMMD(precomputed_kernel, features, m);
mmd->set_statistic_type(UNBIASED);
mmd->set_bootstrap_iterations(num_iterations);
CMath::init_random(1);
null_samples=mmd->bootstrap_null();

/* assert that results do not change */
SG_SPRINT("mean %f, var %f\n", CStatistics::mean(null_samples),
CStatistics::variance(null_samples));
ASSERT(CMath::abs(mean-CStatistics::mean(null_samples))<10E-5);
ASSERT(CMath::abs(var-CStatistics::variance(null_samples))<10E-5);

SG_UNREF(mmd);
SG_UNREF(features);
}

#ifdef HAVE_LAPACK
Expand Down Expand Up @@ -204,6 +227,8 @@ int main(int argc, char** argv)
{
init_shogun_with_defaults();

// sg_io->set_loglevel(MSG_DEBUG);

/* all tests have been "speed up" by reducing the number of runs/samples.
* If you have any doubts in the results, set all num_runs to original
* numbers and activate asserts. If they fail, something is wrong. */
Expand Down
5 changes: 5 additions & 0 deletions src/shogun/lib/SGVector.cpp
Expand Up @@ -394,6 +394,11 @@ void SGVector<T>::permute(T* vec, int32_t n)
CMath::swap(vec[i], vec[CMath::random(i, n-1)]);
}

template<class T> void SGVector<T>::permute()
{
SGVector<T>::permute(vector, vlen);
}

template <class T>
void SGVector<T>::permute_vector(SGVector<T> vec)
{
Expand Down
3 changes: 3 additions & 0 deletions src/shogun/lib/SGVector.h
Expand Up @@ -168,6 +168,9 @@ template<class T> class SGVector : public SGReferencedData

static void permute_vector(SGVector<T> vec);

/** create a random permutation in place */
void permute();


/** resize array from old_size to new_size (keeping as much array
* content as possible intact)
Expand Down
67 changes: 67 additions & 0 deletions src/shogun/statistics/KernelTwoSampleTestStatistic.cpp
Expand Up @@ -10,6 +10,7 @@
#include <shogun/statistics/KernelTwoSampleTestStatistic.h>
#include <shogun/features/Features.h>
#include <shogun/kernel/Kernel.h>
#include <shogun/kernel/CustomKernel.h>

using namespace shogun;

Expand All @@ -27,6 +28,9 @@ CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic(CKernel* kernel,

m_kernel=kernel;
SG_REF(kernel);

/* init kernel once in the beginning */
m_kernel->init(m_p_and_q, m_p_and_q);
}

CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic(CKernel* kernel,
Expand All @@ -36,6 +40,9 @@ CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic(CKernel* kernel,

m_kernel=kernel;
SG_REF(kernel);

/* init kernel once in the beginning */
m_kernel->init(m_p_and_q, m_p_and_q);
}

CKernelTwoSampleTestStatistic::~CKernelTwoSampleTestStatistic()
Expand All @@ -49,3 +56,63 @@ void CKernelTwoSampleTestStatistic::init()
MS_AVAILABLE);
m_kernel=NULL;
}

SGVector<float64_t> CKernelTwoSampleTestStatistic::bootstrap_null()
{
/* compute bootstrap statistics for null distribution */
SGVector<float64_t> results(m_bootstrap_iterations);

/* memory for index permutations, (would slow down loop) */
SGVector<index_t> ind_permutation(m_p_and_q->get_num_vectors());
ind_permutation.range_fill();

/* check if kernel is a custom kernel. In that case, changing features is
* not what we want but just subsetting the kernel itself */
CCustomKernel* custom_kernel;
if (m_kernel->get_kernel_type()==K_CUSTOM)
{
custom_kernel=(CCustomKernel*)m_kernel;

for (index_t i=0; i<m_bootstrap_iterations; ++i)
{
/* idea: merge features of p and q, shuffle, and compute statistic.
* This is done using subsets here. add to custom kernel since
* it has no features to subset. CustomKernel has not to be
* re-initialised after each subset setting - in fact, this would
* remove all subsets */
SGVector<int32_t>::permute_vector(ind_permutation);
custom_kernel->add_row_subset(ind_permutation);
custom_kernel->add_col_subset(ind_permutation);

/* compute statistic for this permutation of mixed samples */
results[i]=compute_statistic();

/* remove subsets */
custom_kernel->remove_row_subset();
custom_kernel->remove_col_subset();
}
}
else
{
for (index_t i=0; i<m_bootstrap_iterations; ++i)
{
/* idea: merge features of p and q, shuffle, and compute statistic.
* This is done using subsets here */
SGVector<int32_t>::permute_vector(ind_permutation);
m_p_and_q->add_subset(ind_permutation);

/* kernel has to be re-initialised after each subset setting */
m_kernel->init(m_p_and_q, m_p_and_q);

/* compute statistic for this permutation of mixed samples */
results[i]=compute_statistic();

/* remove subset */
m_p_and_q->remove_subset();
}
}

/* clean up, re-init kernel on original data and return */
m_kernel->init(m_p_and_q, m_p_and_q);
return results;
}
9 changes: 9 additions & 0 deletions src/shogun/statistics/KernelTwoSampleTestStatistic.h
Expand Up @@ -60,6 +60,15 @@ class CKernelTwoSampleTestStatistic : public CTwoDistributionsTestStatistic

virtual ~CKernelTwoSampleTestStatistic();

/** merges both sets of samples and computes the test statistic
* m_bootstrap_iteration times. This version checks if a precomputed
* custom kernel is used, and, if so, just permutes it instead of re-
* computing it in every iteration.
*
* @return vector of all statistics
*/
virtual SGVector<float64_t> bootstrap_null();

inline virtual const char* get_name() const=0;

private:
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/statistics/LinearTimeMMD.cpp
Expand Up @@ -86,7 +86,7 @@ float64_t CLinearTimeMMD::compute_statistic()
SG_DEBUG("m_q_start=%d\n", m_q_start);

/* compute traces of kernel matrices for linear MMD */
m_kernel->init(m_p_and_q, m_p_and_q);
// m_kernel->init(m_p_and_q, m_p_and_q);

float64_t pp=0;
float64_t qq=0;
Expand Down Expand Up @@ -176,7 +176,7 @@ float64_t CLinearTimeMMD::compute_variance_estimate()
index_t m=m_q_start;
index_t m_2=m/2;

m_kernel->init(m_p_and_q, m_p_and_q);
// m_kernel->init(m_p_and_q, m_p_and_q);

/* allocate memory for traces */
SGVector<float64_t> traces(m_2);
Expand Down Expand Up @@ -238,7 +238,7 @@ void CLinearTimeMMD::optimize_kernel_weights()
}

/* init kernel with features */
m_kernel->init(m_p_and_q, m_p_and_q);
// m_kernel->init(m_p_and_q, m_p_and_q);

/* number of kernels and data */
index_t num_kernels=combined_kernel->get_num_subkernels();
Expand Down

0 comments on commit 1434a79

Please sign in to comment.