Merge pull request #627 from karlnapf/master

little more work on hsic_gamma
shogun-toolbox · Jul 7, 2012 · 05e12d2 · 05e12d2
2 parents 564295a + 377658c
commit 05e12d2
Show file tree

Hide file tree

Showing 6 changed files with 79 additions and 32 deletions.
diff --git a/examples/undocumented/libshogun/statistics_hsic.cpp b/examples/undocumented/libshogun/statistics_hsic.cpp
@@ -31,35 +31,46 @@ void create_mean_data(SGMatrix<float64_t> target, float64_t difference)
 	}
 }
 
-/** tests the hsic statistic for a single fixed data case and ensures
- * equality with matlab implementation */
-void test_hsic_fixed()
+void create_fixed_data_kernel(CFeatures*& features_p,
+		CFeatures*& features_q, CKernel*& kernel_p, CKernel*& kernel_q)
 {
 	index_t m=2;
 	index_t d=3;
-	float64_t sigma_x=2;
-	float64_t sq_sigma_x_twice=sigma_x*sigma_x*2;
-	float64_t sigma_y=3;
-	float64_t sq_sigma_y_twice=sigma_y*sigma_y*2;
 
 	SGMatrix<float64_t> p(d,2*m);
 	for (index_t i=0; i<2*d*m; ++i)
 		p.matrix[i]=i;
 
-//	p.display_matrix("p");
+	p.display_matrix("p");
 
 	SGMatrix<float64_t> q(d,2*m);
 	for (index_t i=0; i<2*d*m; ++i)
 		q.matrix[i]=i+10;
 
-//	q.display_matrix("q");
+	q.display_matrix("q");
 
-	CDenseFeatures<float64_t>* features_p=new CDenseFeatures<float64_t>(p);
-	CDenseFeatures<float64_t>* features_q=new CDenseFeatures<float64_t>(q);
+	features_p=new CDenseFeatures<float64_t>(p);
+	features_q=new CDenseFeatures<float64_t>(q);
+
+	float64_t sigma_x=2;
+	float64_t sigma_y=3;
+	float64_t sq_sigma_x_twice=sigma_x*sigma_x*2;
+	float64_t sq_sigma_y_twice=sigma_y*sigma_y*2;
 
 	/* shoguns kernel width is different */
-	CGaussianKernel* kernel_p=new CGaussianKernel(10, sq_sigma_x_twice);
-	CGaussianKernel* kernel_q=new CGaussianKernel(10, sq_sigma_y_twice);
+	kernel_p=new CGaussianKernel(10, sq_sigma_x_twice);
+	kernel_q=new CGaussianKernel(10, sq_sigma_y_twice);
+}
+
+/** tests the hsic statistic for a single fixed data case and ensures
+ * equality with matlab implementation */
+void test_hsic_fixed()
+{
+	CFeatures* features_p=NULL;
+	CFeatures* features_q=NULL;
+	CKernel* kernel_p=NULL;
+	CKernel* kernel_q=NULL;
+	create_fixed_data_kernel(features_p, features_q, kernel_p, kernel_q);
 
 	CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q);
 
@@ -71,15 +82,34 @@ void test_hsic_fixed()
 	SG_UNREF(hsic);
 }
 
+void test_hsic_gamma()
+{
+	CFeatures* features_p=NULL;
+	CFeatures* features_q=NULL;
+	CKernel* kernel_p=NULL;
+	CKernel* kernel_q=NULL;
+	create_fixed_data_kernel(features_p, features_q, kernel_p, kernel_q);
+
+	CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q);
+	hsic->set_null_approximation_method(HSIC_GAMMA);
+
+	hsic->compute_p_value(1);
+
+	SG_UNREF(hsic);
+}
+
 int main(int argc, char** argv)
 {
 	init_shogun_with_defaults();
 
+//	sg_io->set_loglevel(MSG_DEBUG);
+
 	/* all tests have been "speed up" by reducing the number of runs/samples.
 	 * If you have any doubts in the results, set all num_runs to original
 	 * numbers and activate asserts. If they fail, something is wrong.
 	 */
 	test_hsic_fixed();
+	test_hsic_gamma();
 
 	exit_shogun();
 	return 0;

diff --git a/src/shogun/kernel/Kernel.cpp b/src/shogun/kernel/Kernel.cpp
@@ -102,8 +102,11 @@ bool CKernel::init(CFeatures* l, CFeatures* r)
 	SG_REF(r);
 
 	//make sure features were indeed supplied
-	ASSERT(l);
-	ASSERT(r);
+	if (!l)
+		SG_ERROR("%s::init(): Features on left side are NULL\n", get_name());
+
+	if (!r)
+		SG_ERROR("%s::init(): Features on right side are NULL\n", get_name());
 
 	//make sure features are compatible
 	ASSERT(l->get_feature_class()==r->get_feature_class());

diff --git a/src/shogun/statistics/HSIC.cpp b/src/shogun/statistics/HSIC.cpp
@@ -23,7 +23,7 @@ CHSIC::CHSIC() :
 CHSIC::CHSIC(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p, CFeatures* q) :
 		CKernelIndependenceTestStatistic(kernel_p, kernel_q, p, q)
 {
-	if (p->get_num_vectors()!=q->get_num_vectors())
+	if (p && q && p->get_num_vectors()!=q->get_num_vectors())
 	{
 		SG_ERROR("%s: Only features with equal number of vectors are currently "
 				"possible\n", get_name());
@@ -98,6 +98,7 @@ float64_t CHSIC::compute_threshold(float64_t alpha)
 
 float64_t CHSIC::compute_p_value_gamma(float64_t statistic)
 {
+	SG_DEBUG(("entering %s::compute_p_value_gamma()\n"), get_name());
 	if (!m_kernel_p || !m_kernel_q)
 	{
 		SG_ERROR("%s::compute_statistic(): No or only one kernel specified!\n",
@@ -120,6 +121,9 @@ float64_t CHSIC::compute_p_value_gamma(float64_t statistic)
 	SGMatrix<float64_t> K=m_kernel_p->get_kernel_matrix();
 	SGMatrix<float64_t> L=m_kernel_q->get_kernel_matrix();
 
+	K.display_matrix("K");
+	L.display_matrix("L");
+
 	/* compute sum and trace of uncentered kernel matrices, needed later */
 	float64_t trace_K=0;
 	float64_t trace_L=0;
@@ -131,21 +135,27 @@ float64_t CHSIC::compute_p_value_gamma(float64_t statistic)
 		trace_L+=L(i,i);
 		for (index_t j=0; j<m; ++j)
 		{
-			sum_K=K(i,j);
-			sum_L=L(i,j);
+			sum_K+=K(i,j);
+			sum_L+=L(i,j);
 		}
 	}
+	SG_DEBUG("sum_K: %f, sum_L: %f, trace_K: %f, trace_L: %f\n", sum_K, sum_L,
+			trace_K, trace_L);
 
 	/* center both matrices: K=H*K*H, L=H*L*H in MATLAB */
 	K.center();
 	L.center();
 
+	K.display_matrix("K_centered");
+	L.display_matrix("L_centered");
+
 	/* compute the trace of MATLAB: (1/6 * Kc.*Lc).^2 Ü */
 	float64_t trace=0;
 	for (index_t i=0; i<m; ++i)
 		trace+=CMath::pow(K(i,i)*L(i,i), 2);
 
 	trace/=36.0;
+	SG_DEBUG("trace %f\n", trace);
 
 	/* compute sum of elements of MATLAB: (1/6 * Kc.*Lc).^2 */
 	float64_t sum=0;
@@ -155,28 +165,38 @@ float64_t CHSIC::compute_p_value_gamma(float64_t statistic)
 			sum+=CMath::pow(K(i,j)*L(i,j), 2);
 	}
 	sum/=36.0;
+	SG_DEBUG("sum %f\n", sum);
 
 	/* compute MATLAB: 1/m/(m-1)*(sum(sum(varHSIC)) - sum(diag(varHSIC))),
 	 * second term is bias correction */
-	float64_t var_hsic=1.0/m/m-1*(sum-trace);
+	float64_t var_hsic=1.0/m/(m-1)*(sum-trace);
+	SG_DEBUG("1.0/m/(m-1)*(sum-trace): %f\n", var_hsic);
 
 	/* finally, compute variance of hsic under H0
 	 * MATLAB: varHSIC = 72*(m-4)*(m-5)/m/(m-1)/(m-2)/(m-3)  *  varHSIC */
 	var_hsic=72.0*(m-4)*(m-5)/m/(m-1)/(m-2)/(m-2)*var_hsic;
+	SG_DEBUG("var_hsic: %f\n", var_hsic);
 
 	/* compute mean of matrices with diagonal elements zero on the base of sums
 	 * and trace from K and L which were computed above */
-	float64_t mu_x=sum_K-trace_K;
-	float64_t mu_y=sum_L-trace_L;
+	float64_t mu_x=1.0/m/(m-1)*(sum_K-trace_K);
+	float64_t mu_y=1.0/m/(m-1)*(sum_L-trace_L);
+	SG_DEBUG("mu_x: %f, mu_y: %f\n", mu_x, mu_y);
 
 	/* compute mean under H0, MATLAB: 1/m * ( 1 +muX*muY  - muX - muY ) */
-	float64_t m_hsic=1.0/m*(1+mu_y*mu_y-mu_x-mu_y);
+	float64_t m_hsic=1.0/m*(1+mu_x*mu_y-mu_x-mu_y);
+	SG_DEBUG("m_hsic: %f\n", m_hsic);
 
 	/* finally, compute parameters of gamma distirbution */
 	float64_t a=CMath::pow(m_hsic, 2)/var_hsic;
 	float64_t b=var_hsic*m/m_hsic;
+	SG_DEBUG("a: %f, b: %f\n", a, b);
 
 	/* return: cdf('gam',statistic,al,bet) (MATLAB)
 	 * which will get the position in the null distribution */
-	return CStatistics::gamma_cdf(statistic, a, b);
+	float64_t result=CStatistics::gamma_cdf(statistic, a, b);
+	SG_DEBUG("result: %f\n", result);
+
+	SG_DEBUG(("leaving %s::compute_p_value_gamma()\n"), get_name());
+	return result;
 }
diff --git a/src/shogun/statistics/IndependenceTestStatistic.h b/src/shogun/statistics/IndependenceTestStatistic.h
@@ -60,12 +60,6 @@ class CIndependenceTestStatistic : public CTestStatistic
 
 		/** samples from q */
 		CFeatures* m_q;
-
-		/** number of iterations for bootstrapping null-distributions */
-		index_t m_bootstrap_iterations;
-
-		/** Defines how the the null distribution is approximated */
-		ENullApproximationMethod m_null_approximation_method;
 };
 
 }

diff --git a/src/shogun/statistics/LinearTimeMMD.cpp b/src/shogun/statistics/LinearTimeMMD.cpp
@@ -25,7 +25,7 @@ CLinearTimeMMD::CLinearTimeMMD(CKernel* kernel, CFeatures* p_and_q,
 {
 	init();
 
-	if (q_start!=p_and_q->get_num_vectors()/2)
+	if (p_and_q && q_start!=p_and_q->get_num_vectors()/2)
 	{
 		SG_ERROR("CLinearTimeMMD: Only features with equal number of vectors "
 				"are currently possible\n");

diff --git a/src/shogun/statistics/QuadraticTimeMMD.cpp b/src/shogun/statistics/QuadraticTimeMMD.cpp
@@ -25,7 +25,7 @@ CQuadraticTimeMMD::CQuadraticTimeMMD(CKernel* kernel, CFeatures* p_and_q,
 {
 	init();
 
-	if (q_start!=p_and_q->get_num_vectors()/2)
+	if (p_and_q && q_start!=p_and_q->get_num_vectors()/2)
 	{
 		SG_ERROR("CQuadraticTimeMMD: Only features with equal number of vectors "
 				"are currently possible\n");
@@ -37,7 +37,7 @@ CQuadraticTimeMMD::CQuadraticTimeMMD(CKernel* kernel, CFeatures* p,
 {
 	init();
 
-	if (p->get_num_vectors()!=q->get_num_vectors())
+	if (p && q && p->get_num_vectors()!=q->get_num_vectors())
 	{
 		SG_ERROR("CQuadraticTimeMMD: Only features with equal number of vectors "
 				"are currently possible\n");