Merge pull request #637 from karlnapf/master

optimization for kernel weights with libqp for linear time mmd
shogun-toolbox · Jul 10, 2012 · 90c4bca · 90c4bca
2 parents 2aba715 + b8b8336
commit 90c4bca
Show file tree

Hide file tree

Showing 4 changed files with 244 additions and 8 deletions.
diff --git a/examples/undocumented/libshogun/Makefile b/examples/undocumented/libshogun/Makefile
@@ -85,6 +85,7 @@ TARGETS = basic_minimal \
 		  transfer_multitasklogisticregression \
 		  statistics_quadratic_time_mmd \
 		  statistics_linear_time_mmd \
+		  statistics_linear_time_mmd_kernel_choice \
 		  statistics_hsic \
 		  transfer_multitasklsregression \
 		  transfer_multitasklogisticregression \

diff --git a/examples/undocumented/libshogun/statistics_linear_time_mmd_kernel_choice.cpp b/examples/undocumented/libshogun/statistics_linear_time_mmd_kernel_choice.cpp
@@ -0,0 +1,87 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Written (W) 2012 Heiko Strathmann
+ */
+
+#include <shogun/base/init.h>
+#include <shogun/statistics/LinearTimeMMD.h>
+#include <shogun/kernel/GaussianKernel.h>
+#include <shogun/kernel/CombinedKernel.h>
+#include <shogun/features/DenseFeatures.h>
+#include <shogun/features/CombinedFeatures.h>
+#include <shogun/mathematics/Statistics.h>
+
+using namespace shogun;
+
+void create_mean_data(SGMatrix<float64_t> target, float64_t difference)
+{
+	/* create data matrix for P and Q. P is a standard normal, Q is the same but
+	 * has a mean difference in one dimension */
+	for (index_t i=0; i<target.num_rows; ++i)
+	{
+		for (index_t j=0; j<target.num_cols/2; ++j)
+			target(i,j)=CMath::randn_double();
+
+		/* add mean difference in first dimension of second half of data */
+		for (index_t j=target.num_cols/2; j<target.num_cols; ++j)
+				target(i,j)=CMath::randn_double() + (i==0 ? difference : 0);
+	}
+}
+
+SGMatrix<float64_t> create_fixed_data(index_t m, index_t dim)
+{
+	SGMatrix<float64_t> data(dim,2*m);
+	for (index_t i=0; i<2*dim*m; ++i)
+		data.matrix[i]=i*i;
+
+	data.display_matrix("data");
+
+	return data;
+}
+
+void test_linear_mmd_optimize_weights()
+{
+	index_t m=8;
+	index_t dim=2;
+	SGMatrix<float64_t> data=create_fixed_data(m, dim);
+
+	/* create a number of kernels with different widths */
+	SGVector<float64_t> sigmas(3);
+	SGVector<float64_t> shogun_sigmas(sigmas.vlen);
+
+	CCombinedKernel* kernel=new CCombinedKernel();
+	CCombinedFeatures* features=new CCombinedFeatures();
+	for (index_t i=0; i<sigmas.vlen; ++i)
+	{
+		sigmas[i]=CMath::pow(2.0, i-2)*1000;
+		shogun_sigmas[i]=sigmas[i]*sigmas[i]*2;
+		kernel->append_kernel(new CGaussianKernel(10, shogun_sigmas[i]));
+		features->append_feature_obj(new CDenseFeatures<float64_t>(data));
+	}
+
+	sigmas.display_vector("sigmas");
+
+	CLinearTimeMMD* mmd=new CLinearTimeMMD(kernel, features, m);
+	mmd->optimize_kernel_weights();
+
+	kernel->get_subkernel_weights().display_vector("weights");
+
+	SG_UNREF(mmd);
+}
+
+int main(int argc, char** argv)
+{
+	init_shogun_with_defaults();
+
+	sg_io->set_loglevel(MSG_DEBUG);
+
+	test_linear_mmd_optimize_weights();
+
+	exit_shogun();
+	return 0;
+}
+
diff --git a/src/shogun/statistics/LinearTimeMMD.cpp b/src/shogun/statistics/LinearTimeMMD.cpp
@@ -13,6 +13,8 @@
 #include <shogun/features/CombinedFeatures.h>
 #include <shogun/kernel/CombinedKernel.h>
 
+#include <shogun/lib/external/libqp.h>
+
 using namespace shogun;
 
 CLinearTimeMMD::CLinearTimeMMD() :
@@ -53,7 +55,19 @@ CLinearTimeMMD::~CLinearTimeMMD()
 
 void CLinearTimeMMD::init()
 {
-
+	SG_ADD(&m_opt_max_iterations, "opt_max_iterations", "Maximum number of "
+			"iterations for qp solver", MS_NOT_AVAILABLE);
+	SG_ADD(&m_opt_epsilon, "opt_epsilon", "Stopping criterion for qp solver",
+			MS_NOT_AVAILABLE);
+	SG_ADD(&m_opt_low_cut, "opt_low_cut", "Low cut value for optimization "
+			"kernel weights", MS_NOT_AVAILABLE);
+	SG_ADD(&m_opt_regularization_eps, "opt_regularization_eps", "Regularization"
+			" value that is added to diagonal of Q matrix", MS_NOT_AVAILABLE);
+
+	m_opt_max_iterations=10000;
+	m_opt_epsilon=10E-15;
+	m_opt_low_cut=10E-7;
+	m_opt_regularization_eps=10E-5;
 }
 
 float64_t CLinearTimeMMD::compute_statistic()
@@ -226,7 +240,6 @@ void CLinearTimeMMD::optimize_kernel_weights()
 			qp=current->kernel(m2+j, m_q_start+j);
 			hs(j, i)=pp+qq-pq-qp;
 			mmds[i]+=hs(j, i);
-//			SG_DEBUG("hs(%d,%d)=%f+%f-%f-%f\n", j, i, pp, qq, pq, qp);
 		}
 
 		/* mmd is simply mean. This is the unbiased linear time estimate */
@@ -235,15 +248,122 @@ void CLinearTimeMMD::optimize_kernel_weights()
 		SG_UNREF(current);
 	}
 
-	mmds.display_vector("mmds");
-//	hs.display_matrix("hs");
-
 	/* compute covariance matrix of h vector, in place is safe now since h
 	 * is not needed anymore */
-	SGMatrix<float64_t> Q=CStatistics::covariance_matrix(hs, true);
-	Q.display_matrix("Q");
+	m_Q=CStatistics::covariance_matrix(hs, true);
+
+	/* evtl regularize to avoid numerical problems (ratio of MMD and std-dev
+	 * blows up when variance is small */
+	if (m_opt_regularization_eps)
+	{
+		SG_DEBUG("regularizing matrix Q by adding %f to diagonal\n",
+				m_opt_regularization_eps);
+		for (index_t i=0; i<num_kernels; ++i)
+			m_Q(i,i)+=m_opt_regularization_eps;
+	}
+
+	if (sg_io->get_loglevel()==MSG_DEBUG)
+	{
+		m_Q.display_matrix("(evtl. regularized) Q");
+		mmds.display_vector("mmds");
+	}
+
+	/* compute sum of mmds to generate feasible point for convex program */
+	float64_t sum_mmds=0;
+	for (index_t i=0; i<mmds.vlen; ++i)
+		sum_mmds+=mmds[i];
+
+	/* QP: 0.5*x'*Q*x + f'*x
+	 * subject to
+	 * mmds'*x = b
+	 * LB[i] <= x[i] <= UB[i]   for all i=1..n */
+	SGVector<float64_t> Q_diag(num_kernels);
+	SGVector<float64_t> f(num_kernels);
+	SGVector<float64_t> lb(num_kernels);
+	SGVector<float64_t> ub(num_kernels);
+	SGVector<float64_t> x(num_kernels);
+
+	/* init everything, there are two cases possible: i) at least one mmd is
+	 * is positive, ii) all mmds are negative */
+	bool one_pos;
+	for (index_t i=0; i<mmds.vlen; ++i)
+	{
+		if (mmds[i]>0)
+		{
+			SG_DEBUG("found at least one positive MMD\n");
+			one_pos=true;
+			break;
+		}
+		one_pos=false;
+	}
+
+	if (!one_pos)
+	{
+		SG_WARNING("All mmd estimates are negative. This is techical possible,"
+				" although extremely rare. Current problem might bad\n");
+
+		/* if no element is positive, Q has to be replaced by -Q */
+		for (index_t i=0; i<num_kernels*num_kernels; ++i)
+			m_Q.matrix[i]*=-1;
+	}
+
+	/* init vectors */
+	for (index_t i=0; i<num_kernels; ++i)
+	{
+		Q_diag[i]=m_Q(i,i);
+		f[i]=0;
+		lb[i]=0;
+		ub[i]=CMath::INFTY;
+
+		/* initial point has to be feasible, i.e. mmds'*x = b */
+		x[i]=1.0/sum_mmds;
+	}
+
+	/* start libqp solver with desired parameters */
+	SG_DEBUG("starting libqp\n");
+	libqp_state_T qp_exitflag=libqp_gsmo_solver(&get_Q_col, Q_diag.vector,
+			f.vector, mmds.vector,
+			one_pos ? 1 : -1,
+			lb.vector, ub.vector,
+			x.vector, num_kernels, m_opt_max_iterations,
+			m_opt_regularization_eps, &print_state);
+
+	SG_DEBUG("libqp returns: nIts=%d, exit_flag: %d\n", qp_exitflag.nIter,
+			qp_exitflag.exitflag);
+
+	/* set really small entries to zero and sum up for normalization */
+	float64_t sum_weights=0;
+	for (index_t i=0; i<x.vlen; ++i)
+	{
+		if (x[i]<m_opt_low_cut)
+		{
+			SG_DEBUG("lowcut: weight[%i]=%f<%f; setting to zero\n", i, x[i],
+					m_opt_low_cut);
+			x[i]=0;
+		}
+
+		sum_weights+=x[i];
+	}
 
-	/* TODO form here solve QP */
+	/* normalize (allowed since problem is scale invariant) */
+	for (index_t i=0; i<x.vlen; ++i)
+		x[i]/=sum_weights;
+
+	/* set weights to kernel */
+	m_kernel->set_subkernel_weights(x);
+}
+
+SGMatrix<float64_t> CLinearTimeMMD::m_Q=SGMatrix<float64_t>();
+
+const float64_t* CLinearTimeMMD::get_Q_col(uint32_t i)
+{
+	return &m_Q[m_Q.num_rows*i];
 }
+
+void CLinearTimeMMD::print_state(libqp_state_T state)
+{
+	SG_SDEBUG("libqp state: primal=%f\n", state.QP);
+}
+
 #endif //HAVE_LAPACK
 
diff --git a/src/shogun/statistics/LinearTimeMMD.h b/src/shogun/statistics/LinearTimeMMD.h
@@ -13,6 +13,8 @@
 #include <shogun/statistics/KernelTwoSampleTestStatistic.h>
 #include <shogun/kernel/Kernel.h>
 
+#include <shogun/lib/external/libqp.h>
+
 namespace shogun
 {
 
@@ -118,13 +120,39 @@ class CLinearTimeMMD: public CKernelTwoSampleTestStatistic
 	virtual void optimize_kernel_weights();
 #endif //HAVE_LAPACK
 
+
 	inline virtual const char* get_name() const
 	{
 		return "LinearTimeMMD";
 	}
 
 private:
 	void init();
+
+public:
+	/** return pointer to i-th column of m_Q. Helper for libqp */
+	static const float64_t* get_Q_col(uint32_t i);
+
+	/** helper functions that prints current state */
+	static void print_state(libqp_state_T state);
+
+protected:
+	/** maximum number of iterations of qp solver */
+	index_t m_opt_max_iterations;
+
+	/** stopping accuracy of qp solver */
+	float64_t m_opt_epsilon;
+
+	/** low cut for weights, if weights are under this value, are set to zero */
+	float64_t m_opt_low_cut;
+
+	/** regularization epsilon that is added to diagonal of Q matrix */
+	float64_t m_opt_regularization_eps;
+
+#ifdef HAVE_LAPACK
+	/** matrix for selection of kernel weights (static because of libqp) */
+	static SGMatrix<float64_t> m_Q;
+#endif //HAVE_LAPACK
 };
 
 }