Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #620 from karlnapf/master
first framework for independence testing
- Loading branch information
Showing
15 changed files
with
579 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Heiko Strathmann | ||
*/ | ||
|
||
#include <shogun/statistics/HSIC.h> | ||
#include <shogun/features/Features.h> | ||
#include <shogun/mathematics/Statistics.h> | ||
#include <shogun/kernel/Kernel.h> | ||
|
||
using namespace shogun; | ||
|
||
CHSIC::CHSIC() : CKernelIndependenceTestStatistic() | ||
{ | ||
init(); | ||
} | ||
|
||
CHSIC::CHSIC(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p,CFeatures* q) : | ||
CKernelIndependenceTestStatistic(kernel_p, kernel_q, p, q) | ||
{ | ||
init(); | ||
} | ||
|
||
|
||
CHSIC::~CHSIC() | ||
{ | ||
|
||
} | ||
|
||
void CHSIC::init() | ||
{ | ||
|
||
} | ||
|
||
float64_t CHSIC::compute_statistic() | ||
{ | ||
if (!m_kernel_p || m_kernel_q) | ||
{ | ||
SG_ERROR("%s::compute_statistic(): No or only one kernel specified!\n", | ||
get_name()); | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
float64_t CHSIC::compute_p_value(float64_t statistic) | ||
{ | ||
return 0; | ||
} | ||
|
||
float64_t CHSIC::compute_threshold(float64_t alpha) | ||
{ | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Heiko Strathmann | ||
*/ | ||
|
||
#ifndef __HSIC_H_ | ||
#define __HSIC_H_ | ||
|
||
#include <shogun/statistics/KernelIndependenceTestStatistic.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
/** TODO | ||
* | ||
*/ | ||
class CHSIC : CKernelIndependenceTestStatistic | ||
{ | ||
public: | ||
/** TODO */ | ||
CHSIC(); | ||
|
||
/** Constructor. | ||
* | ||
* @param kernel_p kernel samples from p | ||
* @param kernel_q kernel samples from q | ||
* @param p samples from p | ||
* @param q samples from q | ||
*/ | ||
CHSIC(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p, CFeatures* q); | ||
|
||
virtual ~CHSIC(); | ||
|
||
/** TODO */ | ||
virtual float64_t compute_statistic(); | ||
|
||
/** computes a p-value based on current method for approximating the | ||
* null-distribution. The p-value is the 1-p quantile of the null- | ||
* distribution where the given statistic lies in. | ||
* | ||
* @param statistic statistic value to compute the p-value for | ||
* @return p-value parameter statistic is the (1-p) percentile of the | ||
* null distribution | ||
*/ | ||
virtual float64_t compute_p_value(float64_t statistic); | ||
|
||
/** computes a threshold based on current method for approximating the | ||
* null-distribution. The threshold is the value that a statistic has | ||
* to have in ordner to reject the null-hypothesis. | ||
* | ||
* @param alpha test level to reject null-hypothesis | ||
* @return threshold for statistics to reject null-hypothesis | ||
*/ | ||
virtual float64_t compute_threshold(float64_t statistic); | ||
|
||
inline virtual const char* get_name() const | ||
{ | ||
return "HSIC"; | ||
} | ||
|
||
private: | ||
void init(); | ||
|
||
}; | ||
|
||
} | ||
|
||
#endif /* __HSIC_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Heiko Strathmann | ||
*/ | ||
|
||
#include <shogun/statistics/IndependenceTestStatistic.h> | ||
#include <shogun/features/Features.h> | ||
|
||
using namespace shogun; | ||
|
||
CIndependenceTestStatistic::CIndependenceTestStatistic() : | ||
CTestStatistic() | ||
{ | ||
init(); | ||
} | ||
|
||
CIndependenceTestStatistic::CIndependenceTestStatistic(CFeatures* p, | ||
CFeatures* q) : CTestStatistic() | ||
{ | ||
init(); | ||
|
||
m_p=p; | ||
SG_REF(m_p); | ||
|
||
m_q=q; | ||
SG_REF(m_q); | ||
} | ||
|
||
CIndependenceTestStatistic::~CIndependenceTestStatistic() | ||
{ | ||
SG_UNREF(m_p); | ||
SG_UNREF(m_q); | ||
} | ||
|
||
void CIndependenceTestStatistic::init() | ||
{ | ||
SG_ADD((CSGObject**)&m_p, "p", "Samples from p", MS_NOT_AVAILABLE); | ||
SG_ADD((CSGObject**)&m_q, "q", "Samples from q", MS_NOT_AVAILABLE); | ||
|
||
m_p=NULL; | ||
m_q=NULL; | ||
} | ||
|
||
SGVector<float64_t> CIndependenceTestStatistic::bootstrap_null() | ||
{ | ||
/* compute bootstrap statistics for null distribution */ | ||
SGVector<float64_t> results(m_bootstrap_iterations); | ||
|
||
/* clean up and return */ | ||
return results; | ||
} | ||
|
||
float64_t CIndependenceTestStatistic::compute_p_value(float64_t statistic) | ||
{ | ||
float64_t result=0; | ||
|
||
if (m_null_approximation_method==BOOTSTRAP) | ||
{ | ||
/* bootstrap a bunch of MMD values from null distribution */ | ||
SGVector<float64_t> values=bootstrap_null(); | ||
|
||
/* find out percentile of parameter "statistic" in null distribution */ | ||
CMath::qsort(values); | ||
float64_t i=CMath::find_position_to_insert(values, statistic); | ||
|
||
/* return corresponding p-value */ | ||
result=1.0-i/values.vlen; | ||
} | ||
else | ||
{ | ||
SG_ERROR("%s::compute_p_value(): Unknown method to compute" | ||
" p-value!\n"); | ||
} | ||
|
||
return result; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Heiko Strathmann | ||
*/ | ||
|
||
#ifndef __INDEPENDENCETESTSTATISTIC_H_ | ||
#define __INDEPENDENCETESTSTATISTIC_H_ | ||
|
||
#include <shogun/statistics/TestStatistic.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
class CFeatures; | ||
/** TODO | ||
* | ||
* @brief Test statistic base class. Provides an interface for statistical | ||
* tests via three methods: compute_statistic(), compute_p_value() and | ||
* compute_threshold(). The second computes a p-value for the statistic computed | ||
* by the first method. | ||
* The p-value represents the position of the statistic in the null-distribution, | ||
* i.e. the distribution of the statistic population given the null-hypothesis | ||
* is true. (1-position = p-value). | ||
* The third method, compute_threshold(), computes a threshold for a given | ||
* test level which is needed to reject the null-hypothesis | ||
* | ||
* Abstract base class. | ||
*/ | ||
class CIndependenceTestStatistic : public CTestStatistic | ||
{ | ||
public: | ||
CIndependenceTestStatistic(); | ||
|
||
CIndependenceTestStatistic(CFeatures* p, CFeatures* q); | ||
|
||
virtual ~CIndependenceTestStatistic(); | ||
|
||
/** merges both sets of samples and computes the test statistic | ||
* m_bootstrap_iteration times | ||
* | ||
* @return vector of all statistics | ||
*/ | ||
virtual SGVector<float64_t> bootstrap_null(); | ||
|
||
/** computes a p-value based on bootstrapping the null-distribution. | ||
* This method should be overridden for different methods | ||
* | ||
* @param statistic statistic value to compute the p-value for | ||
* @return p-value parameter statistic is the (1-p) percentile of the | ||
* null distribution | ||
*/ | ||
virtual float64_t compute_p_value(float64_t statistic); | ||
|
||
inline virtual const char* get_name() const=0; | ||
|
||
private: | ||
void init(); | ||
|
||
protected: | ||
/** samples from p */ | ||
CFeatures* m_p; | ||
|
||
/** samples from q */ | ||
CFeatures* m_q; | ||
|
||
/** number of iterations for bootstrapping null-distributions */ | ||
index_t m_bootstrap_iterations; | ||
|
||
/** Defines how the the null distribution is approximated */ | ||
ENullApproximationMethod m_null_approximation_method; | ||
}; | ||
|
||
} | ||
|
||
#endif /* __INDEPENDENCETESTSTATISTIC_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Heiko Strathmann | ||
*/ | ||
|
||
#include <shogun/statistics/KernelIndependenceTestStatistic.h> | ||
#include <shogun/features/Features.h> | ||
#include <shogun/kernel/Kernel.h> | ||
|
||
using namespace shogun; | ||
|
||
CKernelIndependenceTestStatistic::CKernelIndependenceTestStatistic() : | ||
CIndependenceTestStatistic() | ||
{ | ||
init(); | ||
} | ||
|
||
CKernelIndependenceTestStatistic::CKernelIndependenceTestStatistic( | ||
CKernel* kernel_p, CKernel* kernel_q, CFeatures* p, CFeatures* q) : | ||
CIndependenceTestStatistic(p, q) | ||
{ | ||
init(); | ||
|
||
m_kernel_p=kernel_p; | ||
m_kernel_q=kernel_q; | ||
SG_REF(kernel_p); | ||
SG_REF(kernel_q); | ||
} | ||
|
||
CKernelIndependenceTestStatistic::~CKernelIndependenceTestStatistic() | ||
{ | ||
SG_UNREF(m_kernel_p); | ||
SG_UNREF(m_kernel_q); | ||
} | ||
|
||
void CKernelIndependenceTestStatistic::init() | ||
{ | ||
SG_ADD((CSGObject**)&m_kernel_p, "kernel_p", "Kernel for samples from p", | ||
MS_AVAILABLE); | ||
SG_ADD((CSGObject**)&m_kernel_q, "kernel_q", "Kernel for samples from q", | ||
MS_AVAILABLE); | ||
m_kernel_p=NULL; | ||
m_kernel_q=NULL; | ||
} |
Oops, something went wrong.