Cleaned DR preprocessors, removed effective target dim computation

shogun-toolbox · Oct 10, 2011 · 00b7950 · 00b7950
1 parent 85a967c
commit 00b7950
Show file tree

Hide file tree

Showing 19 changed files with 50 additions and 238 deletions.
diff --git a/src/shogun/preprocessor/DiffusionMaps.cpp b/src/shogun/preprocessor/DiffusionMaps.cpp
@@ -85,21 +85,14 @@ SGMatrix<float64_t> CDiffusionMaps::apply_to_feature_matrix(CFeatures* features)
 	SG_REF(features);
 
 	// get dimensionality and number of vectors of data
-	int32_t dim = simple_features->get_num_features();
-	int32_t target_dim = calculate_effective_target_dim(dim);
-	if (target_dim==-1)
-		SG_ERROR("Trying to decrease dimensionality to negative value, not possible.\n");
-	if (target_dim>dim)
-		SG_ERROR("Cannot increase dimensionality: target dimensionality is %d while given features dimensionality is %d.\n",
-		         target_dim, dim);
 	int32_t N = simple_features->get_num_vectors();
+	int32_t dim;
 
 	// loop variables
 	int32_t i,j;
 
 	float64_t* feature_matrix = simple_features->get_feature_matrix(dim,N);
 
-
 	float64_t features_min = feature_matrix[0];
 	float64_t features_max = feature_matrix[0];
 	for (i=0; i<dim*N; i++)
@@ -117,6 +110,7 @@ SGMatrix<float64_t> CDiffusionMaps::apply_to_feature_matrix(CFeatures* features)
 	ASSERT(m_kernel);
 	m_kernel->init(simple_features,simple_features);
 	SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
+	m_kernel->cleanup();
 
 	float64_t* p_vector = SG_CALLOC(float64_t, N);
 	for (i=0; i<N; i++)
@@ -165,7 +159,7 @@ SGMatrix<float64_t> CDiffusionMaps::apply_to_feature_matrix(CFeatures* features)
 
 	int32_t info = 0;
 
-	wrap_dsyevr('V','U',N,kkt_matrix,N,N-target_dim,N,s_values,kernel_matrix.matrix,&info);
+	wrap_dsyevr('V','U',N,kkt_matrix,N,N-m_target_dim,N,s_values,kernel_matrix.matrix,&info);
 	if (info)
 		SG_ERROR("DGESVD failed with %d code", info);
 
@@ -176,16 +170,16 @@ SGMatrix<float64_t> CDiffusionMaps::apply_to_feature_matrix(CFeatures* features)
 */
 
 	SG_FREE(kkt_matrix);
-	float64_t* new_feature_matrix = SG_MALLOC(float64_t, N*target_dim);
+	float64_t* new_feature_matrix = SG_MALLOC(float64_t, N*m_target_dim);
 
-	for (i=0; i<target_dim; i++)
+	for (i=0; i<m_target_dim; i++)
 	{
 		for (j=0; j<N; j++)
-			new_feature_matrix[j*target_dim+i] = kernel_matrix.matrix[(target_dim-i-1)*N+j]/kernel_matrix.matrix[(target_dim)*N+j];
+			new_feature_matrix[j*m_target_dim+i] = kernel_matrix.matrix[(m_target_dim-i-1)*N+j]/kernel_matrix.matrix[(m_target_dim)*N+j];
 	}
 	kernel_matrix.destroy_matrix();
 
-	simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,target_dim,N));
+	simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,N));
 	SG_UNREF(features);
 	return simple_features->get_feature_matrix();
 }

diff --git a/src/shogun/preprocessor/DiffusionMaps.h b/src/shogun/preprocessor/DiffusionMaps.h
@@ -22,7 +22,9 @@ namespace shogun
 class CFeatures;
 class CKernel;
 
-/** @brief */
+/** @brief CDiffusionMaps used to preprocess given data using 
+  * diffusion maps dimensionality reduction technique
+  */
 class CDiffusionMaps: public CDimensionReductionPreprocessor<float64_t>
 {
 public:

diff --git a/src/shogun/preprocessor/DimensionReductionPreprocessor.cpp b/src/shogun/preprocessor/DimensionReductionPreprocessor.cpp
@@ -42,6 +42,7 @@ EPreprocessorType CDimensionReductionPreprocessor<ST>::get_type() const { return
 template<class ST>
 void CDimensionReductionPreprocessor<ST>::set_target_dim(int32_t dim)
 {
+	ASSERT(dim>0);
 	m_target_dim = dim;
 }
 
@@ -51,22 +52,6 @@ int32_t CDimensionReductionPreprocessor<ST>::get_target_dim() const
 	return m_target_dim;
 }
 
-template<class ST>
-int32_t CDimensionReductionPreprocessor<ST>::calculate_effective_target_dim(int32_t dim)
-{
-	if (m_target_dim<0)
-	{
-		if (dim+m_target_dim>0)
-		{
-			return dim+m_target_dim;
-		}
-		else
-			return -1;
-	}
-	else
-		return m_target_dim;
-}
-
 template<class ST>
 void CDimensionReductionPreprocessor<ST>::set_distance(CDistance* distance)
 {

diff --git a/src/shogun/preprocessor/DimensionReductionPreprocessor.h b/src/shogun/preprocessor/DimensionReductionPreprocessor.h
@@ -97,13 +97,6 @@ class CDimensionReductionPreprocessor: public CSimplePreprocessor<ST>
 
 protected:
 
-	/** calculates effective target dimensionality
-	 * according to set m_target_dim
-	 * @param dim dimensionality of 
-	 * @return effective target dimensionality
-	 */
-	int32_t calculate_effective_target_dim(int32_t dim);
-
 	/** default init */
 	void init();
 

diff --git a/src/shogun/preprocessor/HessianLocallyLinearEmbedding.cpp b/src/shogun/preprocessor/HessianLocallyLinearEmbedding.cpp
@@ -80,15 +80,6 @@ CHessianLocallyLinearEmbedding::~CHessianLocallyLinearEmbedding()
 {
 }
 
-bool CHessianLocallyLinearEmbedding::init(CFeatures* features)
-{
-	return true;
-}
-
-void CHessianLocallyLinearEmbedding::cleanup()
-{
-}
-
 const char* CHessianLocallyLinearEmbedding::get_name() const 
 { 
 	return "HessianLocallyLinearEmbedding";
@@ -105,9 +96,8 @@ SGMatrix<float64_t> CHessianLocallyLinearEmbedding::construct_weight_matrix(CSim
 {
 	int32_t N = simple_features->get_num_vectors();
 	int32_t dim = simple_features->get_num_features();
-	int32_t target_dim = calculate_effective_target_dim(dim);
-	int32_t dp = target_dim*(target_dim+1)/2;
-	if (m_k<(1+target_dim+dp))
+	int32_t dp = m_target_dim*(m_target_dim+1)/2;
+	if (m_k<(1+m_target_dim+dp))
 		SG_ERROR("K parameter should have value greater than 1+target dimensionality+dp.\n");
 	int32_t t;
 #ifdef HAVE_PTHREAD
@@ -124,12 +114,12 @@ SGMatrix<float64_t> CHessianLocallyLinearEmbedding::construct_weight_matrix(CSim
 	// init matrices to be used
 	float64_t* local_feature_matrix = SG_MALLOC(float64_t, m_k*dim*num_threads);
 	float64_t* s_values_vector = SG_MALLOC(float64_t, dim*num_threads);
-	int32_t tau_len = CMath::min((1+target_dim+dp), m_k);
+	int32_t tau_len = CMath::min((1+m_target_dim+dp), m_k);
 	float64_t* tau = SG_MALLOC(float64_t, tau_len*num_threads);
 	float64_t* mean_vector = SG_MALLOC(float64_t, dim*num_threads);
 	float64_t* q_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
 	float64_t* w_sum_vector = SG_MALLOC(float64_t, dp*num_threads);
-	float64_t* Yi_matrix = SG_MALLOC(float64_t, m_k*(1+target_dim+dp)*num_threads);
+	float64_t* Yi_matrix = SG_MALLOC(float64_t, m_k*(1+m_target_dim+dp)*num_threads);
 	// get feature matrix
 	SGMatrix<float64_t> feature_matrix = simple_features->get_feature_matrix();
 
@@ -147,13 +137,13 @@ SGMatrix<float64_t> CHessianLocallyLinearEmbedding::construct_weight_matrix(CSim
 		parameters[t].idx_stop = N;
 		parameters[t].m_k = m_k;
 		parameters[t].dim = dim;
-		parameters[t].target_dim = target_dim;
+		parameters[t].target_dim = m_target_dim;
 		parameters[t].N = N;
 		parameters[t].dp = dp;
 		parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
 		parameters[t].feature_matrix = feature_matrix.matrix;
 		parameters[t].local_feature_matrix = local_feature_matrix + (m_k*dim)*t;
-		parameters[t].Yi_matrix = Yi_matrix + (m_k*(1+target_dim+dp))*t;
+		parameters[t].Yi_matrix = Yi_matrix + (m_k*(1+m_target_dim+dp))*t;
 		parameters[t].mean_vector = mean_vector + dim*t;
 		parameters[t].s_values_vector = s_values_vector + dim*t;
 		parameters[t].tau = tau+tau_len*t;
@@ -176,7 +166,7 @@ SGMatrix<float64_t> CHessianLocallyLinearEmbedding::construct_weight_matrix(CSim
 	single_thread_param.idx_stop = N;
 	single_thread_param.m_k = m_k;
 	single_thread_param.dim = dim;
-	single_thread_param.target_dim = target_dim;
+	single_thread_param.target_dim = m_target_dim;
 	single_thread_param.N = N;
 	single_thread_param.dp = dp;
 	single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;

diff --git a/src/shogun/preprocessor/HessianLocallyLinearEmbedding.h b/src/shogun/preprocessor/HessianLocallyLinearEmbedding.h
@@ -49,15 +49,6 @@ class CHessianLocallyLinearEmbedding: public CLocallyLinearEmbedding
 	/** destructor */
 	virtual ~CHessianLocallyLinearEmbedding();
 
-	/** init
-	 * @param features
-	 */
-	virtual bool init(CFeatures* features);
-
-	/** cleanup
-	 */
-	virtual void cleanup();
-
 	/** get name */
 	virtual const char* get_name() const;
 

diff --git a/src/shogun/preprocessor/Isomap.cpp b/src/shogun/preprocessor/Isomap.cpp
@@ -91,15 +91,6 @@ EPreprocessorType CIsomap::get_type() const
 	return P_ISOMAP;
 }
 
-bool CIsomap::init(CFeatures* features)
-{
-	return true;
-}
-
-void CIsomap::cleanup()
-{
-}
-
 CSimpleFeatures<float64_t>* CIsomap::apply_to_distance(CDistance* distance)
 {
 	ASSERT(distance);
@@ -127,6 +118,7 @@ SGMatrix<float64_t> CIsomap::apply_to_feature_matrix(CFeatures* features)
 	ASSERT(m_distance);
 	m_distance->init(simple_features, simple_features);
 	SGMatrix<float64_t> geodesic_distance_matrix = isomap_distance(m_distance->get_distance_matrix());
+	m_distance->remove_lhs_and_rhs();
 	SGMatrix<float64_t> new_features;
 	if (m_landmark) 
 		new_features = CMultidimensionalScaling::landmark_embedding(geodesic_distance_matrix);

diff --git a/src/shogun/preprocessor/Isomap.h b/src/shogun/preprocessor/Isomap.h
@@ -54,15 +54,6 @@ class CIsomap: public CMultidimensionalScaling
 	/* destructor */
 	virtual ~CIsomap();
 
-	/** empty init
-	 * @param features
-	 */
-	virtual bool init(CFeatures* features);
-
-	/** empty cleanup
-	 */
-	virtual void cleanup();
-
 	/** apply preprocessor to CDistance
 	 * @param distance distance
 	 * @return embedded features 

diff --git a/src/shogun/preprocessor/KernelLocalTangentSpaceAlignment.cpp b/src/shogun/preprocessor/KernelLocalTangentSpaceAlignment.cpp
@@ -69,15 +69,6 @@ CKernelLocalTangentSpaceAlignment::~CKernelLocalTangentSpaceAlignment()
 {
 }
 
-bool CKernelLocalTangentSpaceAlignment::init(CFeatures* features)
-{
-	return true;
-}
-
-void CKernelLocalTangentSpaceAlignment::cleanup()
-{
-}
-
 const char* CKernelLocalTangentSpaceAlignment::get_name() const
 { 
 	return "KernelLocalTangentSpaceAlignment"; 
@@ -89,8 +80,7 @@ EPreprocessorType CKernelLocalTangentSpaceAlignment::get_type() const
 };
 
 SGMatrix<float64_t> CKernelLocalTangentSpaceAlignment::construct_weight_matrix(SGMatrix<float64_t> kernel_matrix,
-                                                                               SGMatrix<int32_t> neighborhood_matrix,
-                                                                               int32_t target_dim)
+                                                                               SGMatrix<int32_t> neighborhood_matrix)
 {
 	int32_t N = kernel_matrix.num_cols;
 	int32_t t;
@@ -106,7 +96,7 @@ SGMatrix<float64_t> CKernelLocalTangentSpaceAlignment::construct_weight_matrix(S
 
 	// init matrices and norm factor to be used
 	float64_t* local_gram_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
-	float64_t* G_matrix = SG_MALLOC(float64_t, m_k*(1+target_dim)*num_threads);
+	float64_t* G_matrix = SG_MALLOC(float64_t, m_k*(1+m_target_dim)*num_threads);
 	float64_t* W_matrix = SG_CALLOC(float64_t, N*N);
 	float64_t* ev_vector = SG_MALLOC(float64_t, m_k*num_threads);
 
@@ -119,9 +109,9 @@ SGMatrix<float64_t> CKernelLocalTangentSpaceAlignment::construct_weight_matrix(S
 
 	for (t=0; t<num_threads; t++)
 	{
-		KLTSA_THREAD_PARAM params = {t,num_threads,N,m_k,target_dim,N,neighborhood_matrix.matrix,
+		KLTSA_THREAD_PARAM params = {t,num_threads,N,m_k,m_target_dim,N,neighborhood_matrix.matrix,
 		                            kernel_matrix.matrix,local_gram_matrix+(m_k*m_k)*t,ev_vector+m_k*t,
-		                            G_matrix+(m_k*(1+target_dim))*t,W_matrix,&W_matrix_lock};
+		                            G_matrix+(m_k*(1+m_target_dim))*t,W_matrix,&W_matrix_lock};
 		parameters[t] = params;
 		pthread_create(&threads[t], &attr, run_kltsa_thread, (void*)&parameters[t]);
 	}
@@ -131,7 +121,7 @@ SGMatrix<float64_t> CKernelLocalTangentSpaceAlignment::construct_weight_matrix(S
 	SG_FREE(parameters);
 	SG_FREE(threads);
 #else
-	KLTSA_THREAD_PARAM single_thread_param = {0,1,N,m_k,target_dim,neighborhood_matrix.matrix,
+	KLTSA_THREAD_PARAM single_thread_param = {0,1,N,m_k,m_target_dim,neighborhood_matrix.matrix,
 	                                          kernel_matrix.matrix,local_gram_matrix,ev_vector,
 	                                          G_matrix,W_matrix};
 	run_kltsa_thread((void*)&single_thread_param);

diff --git a/src/shogun/preprocessor/KernelLocalTangentSpaceAlignment.h b/src/shogun/preprocessor/KernelLocalTangentSpaceAlignment.h
@@ -41,15 +41,6 @@ class CKernelLocalTangentSpaceAlignment: public CKernelLocallyLinearEmbedding
 	/** destructor */
 	virtual ~CKernelLocalTangentSpaceAlignment();
 
-	/** init
-	 * @param features
-	 */
-	virtual bool init(CFeatures* features);
-
-	/** cleanup
-	 */
-	virtual void cleanup();
-
 	/** get name */
 	virtual const char* get_name() const;
 
@@ -62,8 +53,7 @@ class CKernelLocalTangentSpaceAlignment: public CKernelLocallyLinearEmbedding
 	/** construct weight matrix 
 	 */
 	virtual SGMatrix<float64_t> construct_weight_matrix(SGMatrix<float64_t> kernel_matrix, 
-	                                                    SGMatrix<int32_t> neighborhood_matrix,
-	                                                    int32_t target_dim);
+	                                                    SGMatrix<int32_t> neighborhood_matrix);
 
 /// THREADS
 protected:

diff --git a/src/shogun/preprocessor/KernelLocallyLinearEmbedding.cpp b/src/shogun/preprocessor/KernelLocallyLinearEmbedding.cpp
@@ -111,23 +111,10 @@ EPreprocessorType CKernelLocallyLinearEmbedding::get_type() const
 	return P_KERNELLOCALLYLINEAREMBEDDING;
 };
 
-void CKernelLocallyLinearEmbedding::init()
-{
-}
-
 CKernelLocallyLinearEmbedding::~CKernelLocallyLinearEmbedding()
 {
 }
 
-bool CKernelLocallyLinearEmbedding::init(CFeatures* features)
-{
-	return true;
-}
-
-void CKernelLocallyLinearEmbedding::cleanup()
-{
-}
-
 SGMatrix<float64_t> CKernelLocallyLinearEmbedding::apply_to_feature_matrix(CFeatures* features)
 {
 	ASSERT(features);
@@ -136,18 +123,6 @@ SGMatrix<float64_t> CKernelLocallyLinearEmbedding::apply_to_feature_matrix(CFeat
 	// get dimensionality and number of vectors of data
 	bool is_simple = ((features->get_feature_class()==C_SIMPLE) && (features->get_feature_type()==F_DREAL));
 	int32_t N = features->get_num_vectors();
-	int32_t target_dim = 0;
-	if (is_simple)
-		target_dim = calculate_effective_target_dim(((CSimpleFeatures<float64_t>*)features)->get_num_features());
-	else
-	{
-		if (m_target_dim<=0)
-			SG_ERROR("Cannot decrease dimensionality of given features by %d.\n", -m_target_dim);
-		else
-			target_dim = m_target_dim;
-	}
-	if (target_dim<=0)
-		SG_ERROR("Trying to decrease dimensionality to non-positive value, not possible.\n");
 	if (m_k>=N)
 		SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n",
 		         m_k, N);
@@ -160,29 +135,27 @@ SGMatrix<float64_t> CKernelLocallyLinearEmbedding::apply_to_feature_matrix(CFeat
 	m_kernel->cleanup();
 
 	// init W (weight) matrix
-	SGMatrix<float64_t> M_matrix = construct_weight_matrix(kernel_matrix,neighborhood_matrix,target_dim);
+	SGMatrix<float64_t> M_matrix = construct_weight_matrix(kernel_matrix,neighborhood_matrix);
 	neighborhood_matrix.destroy_matrix();
 
-	SGMatrix<float64_t> nullspace = find_null_space(M_matrix,target_dim);
+	SGMatrix<float64_t> nullspace = find_null_space(M_matrix,m_target_dim);
 	M_matrix.destroy_matrix();
 
+	SG_UNREF(features);
 	if (is_simple)
 	{
 		((CSimpleFeatures<float64_t>*)features)->set_feature_matrix(nullspace);
-		SG_UNREF(features);
 		return ((CSimpleFeatures<float64_t>*)features)->get_feature_matrix();
 	}
 	else
 	{
-		SG_UNREF(features);
 		SG_WARNING("Can't set feature matrix, returning feature matrix.\n");
 		return nullspace;
 	}
 }
 
 SGMatrix<float64_t> CKernelLocallyLinearEmbedding::construct_weight_matrix(SGMatrix<float64_t> kernel_matrix, 
-                                                                           SGMatrix<int32_t> neighborhood_matrix,
-                                                                           int32_t target_dim)
+                                                                           SGMatrix<int32_t> neighborhood_matrix)
 {
 	int32_t N = kernel_matrix.num_cols;
 	// loop variables