Skip to content

Commit

Permalink
Merge pull request #583 from karlnapf/master
Browse files Browse the repository at this point in the history
buxfixes and new tests
  • Loading branch information
karlnapf committed Jun 13, 2012
2 parents 86919c0 + 7113d94 commit 09518d6
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 24 deletions.
91 changes: 79 additions & 12 deletions examples/undocumented/libshogun/statistics_quadratic_time_mmd.cpp
Expand Up @@ -55,12 +55,15 @@ void test_quadratic_mmd_fixed()
SG_UNREF(mmd);
}

/** tests the quadratic mmd statistic bootstrapping for a random data case and
* ensures equality with matlab implementation */
void test_quadratic_mmd_bootstrap()
{
index_t dimension=3;
index_t m=300;
index_t m=100;
float64_t difference=0.5;
float64_t sigma=2;
index_t num_iterations=1000;

SGMatrix<float64_t> data(dimension, 2*m);
create_mean_data(data, difference);
Expand All @@ -69,28 +72,88 @@ void test_quadratic_mmd_bootstrap()
/* shoguns kernel width is different */
CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2);
CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features, m);
mmd->set_bootstrap_iterations(100);
mmd->set_bootstrap_iterations(num_iterations);
SGVector<float64_t> null_samples=mmd->bootstrap_null();

null_samples.display_vector();
SG_SPRINT("mean %f, var %f\n", CStatistics::mean(null_samples),
CStatistics::variance(null_samples));
float64_t mean=CStatistics::mean(null_samples);
float64_t var=CStatistics::variance(null_samples);

/* MATLAB mean 2-sigma confidence interval for 1000 repretitions is
* [-3.169406734013459e-04, 3.296399498466372e-04] */
ASSERT(mean>-3.169406734013459e-04);
ASSERT(mean<3.296399498466372e-04);

/* MATLAB variance 2-sigma confidence interval for 1000 repretitions is
* [2.194192869469228e-05,2.936672859339959e-05] */
ASSERT(var>2.194192869469228e-05);
ASSERT(var<2.936672859339959e-05);

SG_UNREF(mmd);
}

void test_spectrum(CQuadraticTimeMMD* mmd)
#ifdef HAVE_LAPACK
/** tests the quadratic mmd statistic threshold method spectrum for radnom data
* case and ensures equality with matlab implementation */
void test_quadratic_mmd_spectrum()
{
mmd->set_num_samples_sepctrum(250);
mmd->set_num_eigenvalues_spectrum(2);
index_t dimension=3;
index_t m=100;
float64_t difference=0.5;
float64_t sigma=2;

SGMatrix<float64_t> data(dimension, 2*m);
create_mean_data(data, difference);

CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);

/* shoguns kernel width is different */
CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2);
CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features, m);

mmd->set_num_samples_sepctrum(1000);
mmd->set_num_eigenvalues_spectrum(m);
mmd->set_p_value_method(MMD2_SPECTRUM);
SG_SPRINT("spectrum: %f\n", mmd->compute_p_value(2));

/* compute p-value for a fixed statistic value */
float64_t p=mmd->compute_p_value(2);

/* MATLAB 1000 iterations 3 sigma confidence interval is
* [0.021240218376709, 0.060875781623291] */
ASSERT(p>0.021240218376709);
ASSERT(p<0.060875781623291);

SG_UNREF(mmd);
}
#endif // HAVE_LAPACK

void test_gamma(CQuadraticTimeMMD* mmd)
/** tests the quadratic mmd statistic threshold method gamma for fixed data
* case and ensures equality with matlab implementation */
void test_quadratic_mmd_gamma()
{
index_t dimension=3;
index_t m=100;
float64_t sigma=4;

/* note: fixed data this time */
SGMatrix<float64_t> data(dimension, 2*m);
for (index_t i=0; i<2*dimension*m; ++i)
data.matrix[i]=i;

CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);

/* shoguns kernel width is different */
CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2);
CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features, m);

mmd->set_p_value_method(MMD2_GAMMA);
SG_SPRINT("gamma: %f\n", mmd->compute_p_value(2));

/* compute p-value for a fixed statistic value */
float64_t p=mmd->compute_p_value(2);

/* MATLAB 1000 iterations mean: 0.511547577996229 with variance 10E-15 */
ASSERT(CMath::abs(p-0.511547577996229)<10E-14);

SG_UNREF(mmd);
}

/** tests the quadratic mmd statistic for a random data case (fixed distribution
Expand Down Expand Up @@ -135,7 +198,11 @@ int main(int argc, char** argv)

test_quadratic_mmd_fixed();
test_quadratic_mmd_random();
// test_quadratic_mmd_bootstrap();
test_quadratic_mmd_bootstrap();
#ifdef HAVE_LAPACK
test_quadratic_mmd_spectrum();
#endif
test_quadratic_mmd_gamma();

exit_shogun();
return 0;
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/mathematics/Math.h
Expand Up @@ -700,18 +700,18 @@ class CMath : public CSGObject
*
* @param vector vector to find position in
* @param element element to find index for
* @return index of the first element smaller than given one plus 1
* @return index of the first element greater than given one
*/
template <class T>
static index_t find_position_to_insert(SGVector<T> vector, T element)
{
index_t i;
for (i=0; i<vector.vlen; ++i)
{
if (vector[i]<element)
if (vector[i]>element)
break;
}
return ++i;
return i;
}

/** performs a quicksort on the given vector
Expand Down
12 changes: 4 additions & 8 deletions src/shogun/statistics/QuadraticTimeMMD.cpp
Expand Up @@ -97,8 +97,8 @@ float64_t CQuadraticTimeMMD::compute_p_value(float64_t statistic)
SGVector<float64_t> null_samples=sample_null_spectrum(
m_num_samples_spectrum, m_num_eigenvalues_spectrum);
CMath::qsort(null_samples);
float64_t pos=CMath::find_position_to_insert(null_samples, statistic);
result=1.0-pos/null_samples.vlen;
index_t pos=CMath::find_position_to_insert(null_samples, statistic);
result=1.0-((float64_t)pos)/null_samples.vlen;
#else // HAVE_LAPACK
SG_ERROR("CQuadraticTimeMMD::compute_p_value(): Only possible if "
"shogun is compiled with LAPACK enabled\n");
Expand All @@ -121,8 +121,6 @@ float64_t CQuadraticTimeMMD::compute_p_value(float64_t statistic)
SGVector<float64_t> CQuadraticTimeMMD::sample_null_spectrum(index_t num_samples,
index_t num_eigenvalues)
{
/* the whole procedure is already checked against MATLAB implementation */

if (m_q_start!=m_p_and_q->get_num_vectors()/2)
{
/* TODO support different numbers of samples */
Expand Down Expand Up @@ -161,7 +159,7 @@ SGVector<float64_t> CQuadraticTimeMMD::sample_null_spectrum(index_t num_samples,
SGVector<float64_t> eigenvalues=SGMatrix<float64_t>::compute_eigenvectors(K);
SGVector<float64_t> largest_ev(num_eigenvalues);

/* scale by 1/2/m on the fly and take abs value*/
/* take largest EV, scale by 1/2/m on the fly and take abs value*/
for (index_t i=0; i<num_eigenvalues; ++i)
largest_ev[i]=CMath::abs(1.0/2/m_q_start*eigenvalues[eigenvalues.vlen-1-i]);

Expand All @@ -172,7 +170,7 @@ SGVector<float64_t> CQuadraticTimeMMD::sample_null_spectrum(index_t num_samples,
/* 2*sum(kEigs.*(randn(length(kEigs),1)).^2); */
null_samples[i]=0;
for (index_t j=0; j<largest_ev.vlen; ++j)
null_samples[i]+=largest_ev[j]*CMath::pow(2.0, 2);
null_samples[i]+=largest_ev[j]*CMath::pow(CMath::randn_double(), 2);

null_samples[i]*=2;
}
Expand All @@ -183,8 +181,6 @@ SGVector<float64_t> CQuadraticTimeMMD::sample_null_spectrum(index_t num_samples,

float64_t CQuadraticTimeMMD::compute_p_value_gamma(float64_t statistic)
{
/* the whole procedure is already checked against MATLAB implementation */

if (m_q_start!=m_p_and_q->get_num_vectors()/2)
{
/* TODO support different numbers of samples */
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/statistics/TwoSampleTestStatistic.cpp
Expand Up @@ -68,7 +68,7 @@ SGVector<float64_t> CTwoSampleTestStatistic::bootstrap_null()
SGVector<int32_t>::permute_vector(ind_permutation);

/* compute statistic for this permutation of mixed samples */
results.vector[i]=compute_statistic();
results[i]=compute_statistic();
}

/* clean up */
Expand Down

0 comments on commit 09518d6

Please sign in to comment.