Skip to content

Commit

Permalink
Merge pull request #678 from karlnapf/master
Browse files Browse the repository at this point in the history
more work towards threshold computations
  • Loading branch information
karlnapf committed Jul 25, 2012
2 parents 0f8b2a9 + 83f385e commit 5b3d566
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 16 deletions.
6 changes: 3 additions & 3 deletions src/shogun/mathematics/Statistics.cpp
Expand Up @@ -263,7 +263,7 @@ float64_t CStatistics::inverse_incomplete_beta(float64_t a, float64_t b,
{
dithresh=1.0e-4;
}
yp=-inverse_normal_distribution(y);
yp=-inverse_normal_cdf(y);
if (greater(y, 0.5))
{
rflg=1;
Expand Down Expand Up @@ -741,7 +741,7 @@ float64_t CStatistics::incomplete_beta(float64_t a, float64_t b, float64_t x)
return result;
}

float64_t CStatistics::inverse_normal_distribution(float64_t y0)
float64_t CStatistics::inverse_normal_cdf(float64_t y0)
{
float64_t expm2;
float64_t s2pi;
Expand Down Expand Up @@ -1285,7 +1285,7 @@ float64_t CStatistics::inverse_incomplete_gamma_completed(float64_t a,
yh=1;
dithresh=5*igammaepsilon;
d=1/(9*a);
y=1-d-inverse_normal_distribution(y0)*CMath::sqrt(d);
y=1-d-inverse_normal_cdf(y0)*CMath::sqrt(d);
x=a*y*y*y;
lgm=lgamma(a);
i=0;
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/mathematics/Statistics.h
Expand Up @@ -155,7 +155,7 @@ class CStatistics: public CSGObject
*
* Taken from ALGLIB under gpl2+
*/
static float64_t inverse_normal_distribution(float64_t y0);
static float64_t inverse_normal_cdf(float64_t y0);

/** @return natural logarithm of the gamma function of input */
static inline float64_t lgamma(float64_t x)
Expand Down
2 changes: 2 additions & 0 deletions src/shogun/statistics/HSIC.cpp
Expand Up @@ -110,6 +110,7 @@ float64_t CHSIC::compute_p_value(float64_t statistic)
}

default:
/* bootstrapping is handled there */
result=CTwoDistributionsTestStatistic::compute_p_value(statistic);
break;
}
Expand All @@ -131,6 +132,7 @@ float64_t CHSIC::compute_threshold(float64_t alpha)
}

default:
/* bootstrapping is handled there */
result=CTwoDistributionsTestStatistic::compute_threshold(alpha);
break;
}
Expand Down
51 changes: 39 additions & 12 deletions src/shogun/statistics/LinearTimeMMD.cpp
Expand Up @@ -74,10 +74,9 @@ float64_t CLinearTimeMMD::compute_statistic()
{
SG_DEBUG("entering CLinearTimeMMD::compute_statistic()\n");

if (!m_kernel)
SG_ERROR("%s::compute_statistic(): No kernel specified!\n", get_name());
REQUIRE(m_p_and_q, "%s::compute_statistic: features needed!\n", get_name());

/* TODO features with a different number of vectors should be allowed */
REQUIRE(m_kernel, "%s::compute_statistic: kernel needed!\n", get_name());

/* m is number of samples from each distribution, m_2 is half of it
* using names from JLMR paper (see class documentation) */
Expand Down Expand Up @@ -117,20 +116,15 @@ float64_t CLinearTimeMMD::compute_p_value(float64_t statistic)
switch (m_null_approximation_method)
{
case MMD1_GAUSSIAN:
if (m_p_and_q->get_num_vectors()<10000)
{
SG_WARNING("CLinearTimeMMD::compute_p_value: The number of samples"
" should be very large (at least 10000) in order to get a"
" good Gaussian approximation using MMD1_GAUSSIAN.\n");
}

{
/* compute variance and use to estimate Gaussian distribution */
float64_t std_dev=CMath::sqrt(compute_variance_estimate());
result=1.0-CStatistics::normal_cdf(statistic, std_dev);
}
break;

default:
/* bootstrapping is handled here */
result=CKernelTwoSampleTestStatistic::compute_p_value(statistic);
break;
}
Expand All @@ -140,12 +134,45 @@ float64_t CLinearTimeMMD::compute_p_value(float64_t statistic)

float64_t CLinearTimeMMD::compute_threshold(float64_t alpha)
{
SG_ERROR("%s::compute_threshold is not yet implemented!\n");
return 0;
float64_t result=0;

switch (m_null_approximation_method)
{
case MMD1_GAUSSIAN:
{
/* compute variance and use to estimate Gaussian distribution */
SG_WARNING("CLinearTimeMMD::compute_threshold(): not yet"
"implemented for MMD1_GAUSSIAN");
// float64_t std_dev=CMath::sqrt(compute_variance_estimate());
// result=1.0-CStatistics::inverse_normal_cdf(1-alpha, std_dev);
}
break;

default:
/* bootstrapping is handled here */
result=CKernelTwoSampleTestStatistic::compute_threshold(alpha);
break;
}

return result;
}

float64_t CLinearTimeMMD::compute_variance_estimate()
{
REQUIRE(m_p_and_q, "%s::compute_variance_estimate: features needed!\n",
get_name());

REQUIRE(m_kernel, "%s::compute_variance_estimate: kernel needed!\n",
get_name());

if (m_p_and_q->get_num_vectors()<1000)
{
SG_WARNING("%s::compute_variance_estimate: The number of samples"
" should be very large (at least 1000) in order to get a"
" good Gaussian approximation using MMD1_GAUSSIAN.\n",
get_name());
}

/* this corresponds to computing the statistic itself, however, the
* difference is that all terms (of the traces) have to be stored */
index_t m=m_q_start;
Expand Down

0 comments on commit 5b3d566

Please sign in to comment.