Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
added threshold computation in a better way
added gamma fitting for threshold of HSIC
  • Loading branch information
karlnapf committed Jul 25, 2012
1 parent 8f57540 commit adc555a
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 13 deletions.
39 changes: 29 additions & 10 deletions src/shogun/statistics/HSIC.cpp
Expand Up @@ -102,8 +102,12 @@ float64_t CHSIC::compute_p_value(float64_t statistic)
switch (m_null_approximation_method)
{
case HSIC_GAMMA:
result=compute_p_value_gamma(statistic);
{
/* fit gamma and return cdf at statistic */
SGVector<float64_t> params=fit_null_gamma();
result=CStatistics::gamma_cdf(statistic, params[0], params[1]);
break;
}

default:
result=CTwoDistributionsTestStatistic::compute_p_value(statistic);
Expand All @@ -115,15 +119,31 @@ float64_t CHSIC::compute_p_value(float64_t statistic)

float64_t CHSIC::compute_threshold(float64_t alpha)
{
return 0;
float64_t result=0;
switch (m_null_approximation_method)
{
case HSIC_GAMMA:
{
/* fit gamma and return inverse cdf at statistic */
SGVector<float64_t> params=fit_null_gamma();
result=CStatistics::inverse_gamma_cdf(alpha, params[0], params[1]);
break;
}

default:
result=CTwoDistributionsTestStatistic::compute_threshold(alpha);
break;
}

return result;
}

float64_t CHSIC::compute_p_value_gamma(float64_t statistic)
SGVector<float64_t> CHSIC::fit_null_gamma()
{
SG_DEBUG(("entering %s::compute_p_value_gamma()\n"), get_name());
SG_DEBUG(("entering %s::fit_null_gamma()\n"), get_name());
if (!m_kernel_p || !m_kernel_q)
{
SG_ERROR("%s::compute_statistic(): No or only one kernel specified!\n",
SG_ERROR("%s::fit_null_gamma(): No or only one kernel specified!\n",
get_name());
}

Expand Down Expand Up @@ -208,11 +228,10 @@ float64_t CHSIC::compute_p_value_gamma(float64_t statistic)
float64_t b=var_hsic*m/m_hsic;
SG_DEBUG("a: %f, b: %f\n", a, b);

/* return: cdf('gam',statistic,al,bet) (MATLAB)
* which will get the position in the null distribution */
float64_t result=CStatistics::gamma_cdf(statistic, a, b);
SG_DEBUG("gamma_cdf(%f, %f, %f): %f\n", statistic, a, b, result);
SGVector<float64_t> result(2);
result[0]=a;
result[1]=b;

SG_DEBUG(("leaving %s::compute_p_value_gamma()\n"), get_name());
SG_DEBUG(("leaving %s::fit_null_gamma()\n"), get_name());
return result;
}
5 changes: 4 additions & 1 deletion src/shogun/statistics/HSIC.h
Expand Up @@ -92,8 +92,11 @@ class CHSIC : public CKernelIndependenceTestStatistic
*
* Called by compute_p_value() if null approximation method is set to
* MMD2_GAMMA.
*
* @return vector with two parameter for gamma distribution. To use:
* call gamma_cdf(statistic, a, b).
*/
float64_t compute_p_value_gamma(float64_t statistic);
SGVector<float64_t> fit_null_gamma();

private:
void init();
Expand Down
24 changes: 22 additions & 2 deletions src/shogun/statistics/TwoDistributionsTestStatistic.cpp
Expand Up @@ -103,10 +103,30 @@ float64_t CTwoDistributionsTestStatistic::compute_p_value(float64_t statistic)
}
else
{
SG_ERROR("CTwoDistributionsTestStatistics::compute_p_value(): Unknown method "
"to approximate null-distribution!\n");
SG_ERROR("CTwoDistributionsTestStatistics::compute_p_value(): Unknown"
"method to approximate null distribution!\n");
}

return result;
}

float64_t CTwoDistributionsTestStatistic::compute_threshold(float64_t alpha)
{
float64_t result=0;

if (m_null_approximation_method==BOOTSTRAP)
{
/* bootstrap a bunch of MMD values from null distribution */
SGVector<float64_t> values=bootstrap_null();

/* return value of (1-alpha) quantile */
result=values[CMath::floor(values.vlen*(1-alpha))];
}
else
{
SG_ERROR("CTwoDistributionsTestStatistics::compute_threshold():"
"Unknown method to approximate null distribution!\n");
}

return result;
}
10 changes: 10 additions & 0 deletions src/shogun/statistics/TwoDistributionsTestStatistic.h
Expand Up @@ -75,6 +75,16 @@ class CTwoDistributionsTestStatistic : public CTestStatistic
*/
virtual float64_t compute_p_value(float64_t statistic);

/** computes a threshold based on current method for approximating the
* null-distribution. The threshold is the argument of the \f$1-\alpha\f$
* quantile of the null. \f$\alpha\f$ is provided.
*
* @param alpha \f$\alpha\f$ quantile to get the threshold for
* @return threshold which is the \f$1-\alpha\f$ quantile of the null
* distribution
*/
virtual float64_t compute_threshold(float64_t alpha);

inline virtual const char* get_name() const=0;

private:
Expand Down

0 comments on commit adc555a

Please sign in to comment.