Skip to content

Commit

Permalink
Merge pull request #620 from karlnapf/master
Browse files Browse the repository at this point in the history
first framework for independence testing
  • Loading branch information
karlnapf committed Jul 3, 2012
2 parents e8aa169 + cf2d2cf commit 735d134
Show file tree
Hide file tree
Showing 15 changed files with 579 additions and 79 deletions.
Expand Up @@ -174,7 +174,7 @@ void test_linear_mmd_variance_estimate_vs_bootstrap()
SG_SPRINT("linear mmd itself: %f\n", statistic);
SG_SPRINT("variance error: %f\n", variance_error);
SG_SPRINT("error/statistic: %f\n", variance_error/statistic);
ASSERT(variance_error/statistic<10E-5);
// ASSERT(variance_error/statistic<10E-5);

SG_UNREF(mmd);
}
Expand Down
58 changes: 58 additions & 0 deletions src/shogun/statistics/HSIC.cpp
@@ -0,0 +1,58 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/

#include <shogun/statistics/HSIC.h>
#include <shogun/features/Features.h>
#include <shogun/mathematics/Statistics.h>
#include <shogun/kernel/Kernel.h>

using namespace shogun;

CHSIC::CHSIC() : CKernelIndependenceTestStatistic()
{
init();
}

CHSIC::CHSIC(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p,CFeatures* q) :
CKernelIndependenceTestStatistic(kernel_p, kernel_q, p, q)
{
init();
}


CHSIC::~CHSIC()
{

}

void CHSIC::init()
{

}

float64_t CHSIC::compute_statistic()
{
if (!m_kernel_p || m_kernel_q)
{
SG_ERROR("%s::compute_statistic(): No or only one kernel specified!\n",
get_name());
}

return 0;
}

float64_t CHSIC::compute_p_value(float64_t statistic)
{
return 0;
}

float64_t CHSIC::compute_threshold(float64_t alpha)
{
return 0;
}
72 changes: 72 additions & 0 deletions src/shogun/statistics/HSIC.h
@@ -0,0 +1,72 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/

#ifndef __HSIC_H_
#define __HSIC_H_

#include <shogun/statistics/KernelIndependenceTestStatistic.h>

namespace shogun
{

/** TODO
*
*/
class CHSIC : CKernelIndependenceTestStatistic
{
public:
/** TODO */
CHSIC();

/** Constructor.
*
* @param kernel_p kernel samples from p
* @param kernel_q kernel samples from q
* @param p samples from p
* @param q samples from q
*/
CHSIC(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p, CFeatures* q);

virtual ~CHSIC();

/** TODO */
virtual float64_t compute_statistic();

/** computes a p-value based on current method for approximating the
* null-distribution. The p-value is the 1-p quantile of the null-
* distribution where the given statistic lies in.
*
* @param statistic statistic value to compute the p-value for
* @return p-value parameter statistic is the (1-p) percentile of the
* null distribution
*/
virtual float64_t compute_p_value(float64_t statistic);

/** computes a threshold based on current method for approximating the
* null-distribution. The threshold is the value that a statistic has
* to have in ordner to reject the null-hypothesis.
*
* @param alpha test level to reject null-hypothesis
* @return threshold for statistics to reject null-hypothesis
*/
virtual float64_t compute_threshold(float64_t statistic);

inline virtual const char* get_name() const
{
return "HSIC";
}

private:
void init();

};

}

#endif /* __HSIC_H_ */
81 changes: 81 additions & 0 deletions src/shogun/statistics/IndependenceTestStatistic.cpp
@@ -0,0 +1,81 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/

#include <shogun/statistics/IndependenceTestStatistic.h>
#include <shogun/features/Features.h>

using namespace shogun;

CIndependenceTestStatistic::CIndependenceTestStatistic() :
CTestStatistic()
{
init();
}

CIndependenceTestStatistic::CIndependenceTestStatistic(CFeatures* p,
CFeatures* q) : CTestStatistic()
{
init();

m_p=p;
SG_REF(m_p);

m_q=q;
SG_REF(m_q);
}

CIndependenceTestStatistic::~CIndependenceTestStatistic()
{
SG_UNREF(m_p);
SG_UNREF(m_q);
}

void CIndependenceTestStatistic::init()
{
SG_ADD((CSGObject**)&m_p, "p", "Samples from p", MS_NOT_AVAILABLE);
SG_ADD((CSGObject**)&m_q, "q", "Samples from q", MS_NOT_AVAILABLE);

m_p=NULL;
m_q=NULL;
}

SGVector<float64_t> CIndependenceTestStatistic::bootstrap_null()
{
/* compute bootstrap statistics for null distribution */
SGVector<float64_t> results(m_bootstrap_iterations);

/* clean up and return */
return results;
}

float64_t CIndependenceTestStatistic::compute_p_value(float64_t statistic)
{
float64_t result=0;

if (m_null_approximation_method==BOOTSTRAP)
{
/* bootstrap a bunch of MMD values from null distribution */
SGVector<float64_t> values=bootstrap_null();

/* find out percentile of parameter "statistic" in null distribution */
CMath::qsort(values);
float64_t i=CMath::find_position_to_insert(values, statistic);

/* return corresponding p-value */
result=1.0-i/values.vlen;
}
else
{
SG_ERROR("%s::compute_p_value(): Unknown method to compute"
" p-value!\n");
}

return result;
}

79 changes: 79 additions & 0 deletions src/shogun/statistics/IndependenceTestStatistic.h
@@ -0,0 +1,79 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/

#ifndef __INDEPENDENCETESTSTATISTIC_H_
#define __INDEPENDENCETESTSTATISTIC_H_

#include <shogun/statistics/TestStatistic.h>

namespace shogun
{

class CFeatures;
/** TODO
*
* @brief Test statistic base class. Provides an interface for statistical
* tests via three methods: compute_statistic(), compute_p_value() and
* compute_threshold(). The second computes a p-value for the statistic computed
* by the first method.
* The p-value represents the position of the statistic in the null-distribution,
* i.e. the distribution of the statistic population given the null-hypothesis
* is true. (1-position = p-value).
* The third method, compute_threshold(), computes a threshold for a given
* test level which is needed to reject the null-hypothesis
*
* Abstract base class.
*/
class CIndependenceTestStatistic : public CTestStatistic
{
public:
CIndependenceTestStatistic();

CIndependenceTestStatistic(CFeatures* p, CFeatures* q);

virtual ~CIndependenceTestStatistic();

/** merges both sets of samples and computes the test statistic
* m_bootstrap_iteration times
*
* @return vector of all statistics
*/
virtual SGVector<float64_t> bootstrap_null();

/** computes a p-value based on bootstrapping the null-distribution.
* This method should be overridden for different methods
*
* @param statistic statistic value to compute the p-value for
* @return p-value parameter statistic is the (1-p) percentile of the
* null distribution
*/
virtual float64_t compute_p_value(float64_t statistic);

inline virtual const char* get_name() const=0;

private:
void init();

protected:
/** samples from p */
CFeatures* m_p;

/** samples from q */
CFeatures* m_q;

/** number of iterations for bootstrapping null-distributions */
index_t m_bootstrap_iterations;

/** Defines how the the null distribution is approximated */
ENullApproximationMethod m_null_approximation_method;
};

}

#endif /* __INDEPENDENCETESTSTATISTIC_H_ */
48 changes: 48 additions & 0 deletions src/shogun/statistics/KernelIndependenceTestStatistic.cpp
@@ -0,0 +1,48 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/

#include <shogun/statistics/KernelIndependenceTestStatistic.h>
#include <shogun/features/Features.h>
#include <shogun/kernel/Kernel.h>

using namespace shogun;

CKernelIndependenceTestStatistic::CKernelIndependenceTestStatistic() :
CIndependenceTestStatistic()
{
init();
}

CKernelIndependenceTestStatistic::CKernelIndependenceTestStatistic(
CKernel* kernel_p, CKernel* kernel_q, CFeatures* p, CFeatures* q) :
CIndependenceTestStatistic(p, q)
{
init();

m_kernel_p=kernel_p;
m_kernel_q=kernel_q;
SG_REF(kernel_p);
SG_REF(kernel_q);
}

CKernelIndependenceTestStatistic::~CKernelIndependenceTestStatistic()
{
SG_UNREF(m_kernel_p);
SG_UNREF(m_kernel_q);
}

void CKernelIndependenceTestStatistic::init()
{
SG_ADD((CSGObject**)&m_kernel_p, "kernel_p", "Kernel for samples from p",
MS_AVAILABLE);
SG_ADD((CSGObject**)&m_kernel_q, "kernel_q", "Kernel for samples from q",
MS_AVAILABLE);
m_kernel_p=NULL;
m_kernel_q=NULL;
}

0 comments on commit 735d134

Please sign in to comment.