Navigation Menu

Skip to content

Commit

Permalink
Merge pull request #574 from karlnapf/master
Browse files Browse the repository at this point in the history
Finished Gamma-based computation of p-value for quadratic time MMD
  • Loading branch information
karlnapf committed Jun 7, 2012
2 parents 3abbb99 + 887d699 commit 631c3e7
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 7 deletions.
10 changes: 4 additions & 6 deletions src/shogun/statistics/QuadraticTimeMMD.cpp
Expand Up @@ -163,6 +163,8 @@ SGVector<float64_t> CQuadraticTimeMMD::sample_null_spectrum(index_t num_samples,

float64_t CQuadraticTimeMMD::compute_p_value_gamma(float64_t statistic)
{
/* the whole procedure is already checked against MATLAB implementation */

if (m_q_start!=m_p_and_q->get_num_vectors()/2)
{
/* TODO support different numbers of samples */
Expand All @@ -175,7 +177,6 @@ float64_t CQuadraticTimeMMD::compute_p_value_gamma(float64_t statistic)
* works since X and Y are concatenated here */
m_kernel->init(m_p_and_q, m_p_and_q);
SGMatrix<float64_t> K=m_kernel->get_kernel_matrix();
CMath::display_matrix(K, "kernel matrix");

/* compute mean under H0 of MMD, which is
* meanMMD = 2/m * ( 1 - 1/m*sum(diag(KL)) );
Expand All @@ -194,7 +195,6 @@ float64_t CQuadraticTimeMMD::compute_p_value_gamma(float64_t statistic)
K(m_q_start+i, i)=0;
}
mean_mmd=2.0/m_q_start*(1.0-1.0/m_q_start*mean_mmd);
SG_PRINT("mean mmd: %f\n", mean_mmd);

/* compute variance under H0 of MMD, which is
* varMMD = 2/m/(m-1) * 1/m/(m-1) * sum(sum( (K + L - KL - KL').^2 ));
Expand All @@ -213,14 +213,12 @@ float64_t CQuadraticTimeMMD::compute_p_value_gamma(float64_t statistic)
}
}
var_mmd*=2.0/m_q_start/(m_q_start-1)*1.0/m_q_start/(m_q_start-1);
SG_PRINT("var mmd: %f\n", var_mmd);

/* parameters for gamma distribution */
float64_t a=CMath::pow(mean_mmd, 2)/var_mmd;
float64_t b=var_mmd*m_q_start / mean_mmd;

SG_PRINT("a=%f, b=%f\n", a,b);

/* return: cdf('gam',statistic,al,bet) (MATLAB)
* which will get the position in the null distribution */

return CStatistics::gamma_cdf(statistic, a, b);
}
15 changes: 14 additions & 1 deletion src/shogun/statistics/QuadraticTimeMMD.h
Expand Up @@ -36,7 +36,7 @@ class CQuadraticTimeMMD : public CKernelTwoSampleTestStatistic

/* returns a set of samples of an estimate of the null distribution
* using the Eigen-spectrum of the centered kernel matrix of the merged
* samples of p and q.
* samples of p and q. May be used to compute p_value (easy)
*
* Works well if the kernel matrix is NOT diagonal dominant.
* See Gretton, A., Fukumizu, K., & Harchaoui, Z. (2011).
Expand All @@ -53,6 +53,19 @@ class CQuadraticTimeMMD : public CKernelTwoSampleTestStatistic
SGVector<float64_t> sample_null_spectrum(index_t num_samples,
index_t num_eigenvalues=-1);

/** Approximates the null-distribution by the two parameter gamma
* distribution. It works in O(m^2) where m is the number of samples
* from each distribution. Its very fast, but may be inaccurate.
* However, there are cases where it performs very well.
* Returns the p-value for a given statistic value in the
* null-distribution.
*
* See Gretton, A., Fukumizu, K., & Harchaoui, Z. (2011).
* A fast, consistent kernel two-sample test.
*
* @param statistic MMD value to compute the p-value for.
* @return p-value of the given statistic
*/
float64_t compute_p_value_gamma(float64_t statistic);

private:
Expand Down

0 comments on commit 631c3e7

Please sign in to comment.