Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
changed the quadratic time MMD to return m*MMD in compute_statistic()
since all null-approximations also fir m*MMD distirbution
  • Loading branch information
karlnapf committed Aug 11, 2012
1 parent d02ad13 commit 2f96a8a
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 19 deletions.
26 changes: 12 additions & 14 deletions src/shogun/statistics/QuadraticTimeMMD.cpp
Expand Up @@ -69,7 +69,6 @@ float64_t CQuadraticTimeMMD::compute_unbiased_statistic()
{
/* split computations into three terms from JLMR paper (see documentation )*/
index_t m=m_q_start;
index_t n=m_p_and_q->get_num_vectors()-m_q_start;

/* init kernel with features */
m_kernel->init(m_p_and_q, m_p_and_q);
Expand All @@ -82,26 +81,26 @@ float64_t CQuadraticTimeMMD::compute_unbiased_statistic()
for (index_t j=0; j<m; ++j)
first+=i==j ? 0 : m_kernel->kernel(i,j);
}
first/=m*(m-1);
first/=(m-1);

/* second term */
float64_t second=0;
for (index_t i=m_q_start; i<m_q_start+n; ++i)
for (index_t i=m_q_start; i<m_q_start+m; ++i)
{
/* ensure i!=j while adding up */
for (index_t j=m_q_start; j<m_q_start+n; ++j)
for (index_t j=m_q_start; j<m_q_start+m; ++j)
second+=i==j ? 0 : m_kernel->kernel(i,j);
}
second/=n*(n-1);
second/=(m-1);

/* third term */
float64_t third=0;
for (index_t i=0; i<m; ++i)
{
for (index_t j=m_q_start; j<m_q_start+n; ++j)
for (index_t j=m_q_start; j<m_q_start+m; ++j)
third+=m_kernel->kernel(i,j);
}
third*=2.0/(m*n);
third*=2.0/m;

return first+second-third;
}
Expand All @@ -110,7 +109,6 @@ float64_t CQuadraticTimeMMD::compute_biased_statistic()
{
/* split computations into three terms from JLMR paper (see documentation )*/
index_t m=m_q_start;
index_t n=m_p_and_q->get_num_vectors()-m_q_start;

/* init kernel with features */
m_kernel->init(m_p_and_q, m_p_and_q);
Expand All @@ -122,25 +120,25 @@ float64_t CQuadraticTimeMMD::compute_biased_statistic()
for (index_t j=0; j<m; ++j)
first+=m_kernel->kernel(i,j);
}
first/=(m*m);
first/=m;

/* second term */
float64_t second=0;
for (index_t i=m_q_start; i<m_q_start+n; ++i)
for (index_t i=m_q_start; i<m_q_start+m; ++i)
{
for (index_t j=m_q_start; j<m_q_start+n; ++j)
for (index_t j=m_q_start; j<m_q_start+m; ++j)
second+=m_kernel->kernel(i,j);
}
second/=(n*n);
second/=m;

/* third term */
float64_t third=0;
for (index_t i=0; i<m; ++i)
{
for (index_t j=m_q_start; j<m_q_start+n; ++j)
for (index_t j=m_q_start; j<m_q_start+m; ++j)
third+=m_kernel->kernel(i,j);
}
third*=2.0/(m*n);
third*=2.0/m;

return first+second-third;
}
Expand Down
14 changes: 9 additions & 5 deletions src/shogun/statistics/QuadraticTimeMMD.h
Expand Up @@ -51,6 +51,8 @@ enum EQuadraticMMDType
* \f]
*
* The type (biased/unbiased) can be selected via set_statistic_type().
* Note that computing the statistic returns m*MMD; same holds for the null
* distribution samples.
*
* Along with the statistic comes a method to compute a p-value based on
* different methods. Bootstrapping, is also possible. If unsure which one to
Expand Down Expand Up @@ -107,7 +109,7 @@ class CQuadraticTimeMMD : public CKernelTwoSampleTestStatistic

/** Computes the squared quadratic time MMD for the current data. Note
* that the type (biased/unbiased) can be specified with
* set_statistic_type() method.
* set_statistic_type() method. Note that it returns m*MMD.
*
* @return (biased or unbiased) squared quadratic time MMD
*/
Expand Down Expand Up @@ -151,7 +153,8 @@ class CQuadraticTimeMMD : public CKernelTwoSampleTestStatistic
* kernel matrix needs to be stored in memory
*
* Note that the provided statistic HAS to be the biased version
* (see paper for details)
* (see paper for details). Note that m*Null-distribution is returned,
* which is fine since the statistic is also m*MMD:
*
* Works well if the kernel matrix is NOT diagonal dominant.
* See Gretton, A., Fukumizu, K., & Harchaoui, Z. (2011).
Expand Down Expand Up @@ -199,7 +202,8 @@ class CQuadraticTimeMMD : public CKernelTwoSampleTestStatistic
*
* Note that when being used for constructing a test, the provided
* statistic HAS to be the biased version
* (see paper for details)
* (see paper for details). Note that m*Null-distribution is fitted,
* which is fine since the statistic is also m*MMD.
*
* See Gretton, A., Fukumizu, K., & Harchaoui, Z. (2011).
* A fast, consistent kernel two-sample test.
Expand All @@ -210,10 +214,10 @@ class CQuadraticTimeMMD : public CKernelTwoSampleTestStatistic
SGVector<float64_t> fit_null_gamma();

protected:
/** helper method to compute unbiased squared quadratic time MMD */
/** helper method to compute m*unbiased squared quadratic time MMD */
virtual float64_t compute_unbiased_statistic();

/** helper method to compute biased squared quadratic time MMD */
/** helper method to compute m*biased squared quadratic time MMD */
virtual float64_t compute_biased_statistic();

private:
Expand Down

0 comments on commit 2f96a8a

Please sign in to comment.