Skip to content

Commit

Permalink
Introduced DiffusionMaps dimension reduction preprocessor
Browse files Browse the repository at this point in the history
  • Loading branch information
lisitsyn committed Sep 24, 2011
1 parent be97a43 commit 3308301
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 1 deletion.
@@ -0,0 +1,27 @@
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def preprocessor_diffusionmaps_modular(data,t):
from shogun.Features import RealFeatures
from shogun.Preprocessor import DiffusionMaps
from shogun.Kernel import GaussianKernel

features = RealFeatures(data)

preprocessor = DiffusionMaps()
preprocessor.set_target_dim(1)
preprocessor.set_kernel(GaussianKernel(10,10.0))
preprocessor.set_t(t)
preprocessor.apply_to_feature_matrix(features)

return features


if __name__=='__main__':
print 'DiffusionMaps'
preprocessor_diffusionmaps_modular(*parameter_list[0])

2 changes: 2 additions & 0 deletions src/interfaces/modular/Preprocessor.i
Expand Up @@ -26,6 +26,7 @@
%rename(LocalTangentSpaceAlignment) CLocalTangentSpaceAlignment;
%rename(LaplacianEigenmaps) CLaplacianEigenmaps;
%rename(Isomap) CIsomap;
%rename(DiffusionMaps) CDiffusionMaps;

%rename(SortUlongString) CSortUlongString;
%rename(SortWordString) CSortWordString;
Expand Down Expand Up @@ -84,6 +85,7 @@ namespace shogun
%include <shogun/preprocessor/LocalTangentSpaceAlignment.h>
%include <shogun/preprocessor/LaplacianEigenmaps.h>
%include <shogun/preprocessor/Isomap.h>
%include <shogun/preprocessor/DiffusionMaps.h>

%include <shogun/preprocessor/SortUlongString.h>
%include <shogun/preprocessor/SortWordString.h>
Expand Down
1 change: 1 addition & 0 deletions src/interfaces/modular/Preprocessor_includes.i
Expand Up @@ -20,6 +20,7 @@
#include <shogun/preprocessor/LocalTangentSpaceAlignment.h>
#include <shogun/preprocessor/LaplacianEigenmaps.h>
#include <shogun/preprocessor/Isomap.h>
#include <shogun/preprocessor/DiffusionMaps.h>

#include <shogun/preprocessor/StringPreprocessor.h>
#include <shogun/preprocessor/DecompressString.h>
Expand Down
141 changes: 141 additions & 0 deletions src/shogun/preprocessor/DiffusionMaps.cpp
@@ -0,0 +1,141 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

#include <shogun/preprocessor/DiffusionMaps.h>
#include <shogun/lib/config.h>
#ifdef HAVE_LAPACK
#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
#include <shogun/mathematics/lapack.h>
#include <shogun/mathematics/Math.h>
#include <shogun/io/SGIO.h>
#include <shogun/kernel/Kernel.h>
#include <shogun/lib/Signal.h>

#ifdef HAVE_PTHREAD
#include <pthread.h>
#endif

using namespace shogun;

CDiffusionMaps::CDiffusionMaps() :
CDimensionReductionPreprocessor()
{
m_t = 10;

init();
}

void CDiffusionMaps::init()
{
m_parameters->add(&m_t, "t", "number of steps");
}

CDiffusionMaps::~CDiffusionMaps()
{
}

bool CDiffusionMaps::init(CFeatures* features)
{
return true;
}

void CDiffusionMaps::cleanup()
{
}

SGMatrix<float64_t> CDiffusionMaps::apply_to_feature_matrix(CFeatures* features)
{
ASSERT(features);
if (!(features->get_feature_class()==C_SIMPLE &&
features->get_feature_type()==F_DREAL))
{
SG_ERROR("Given features are not of SimpleRealFeatures type.\n");
}
// shorthand for simplefeatures
CSimpleFeatures<float64_t>* simple_features = (CSimpleFeatures<float64_t>*) features;
SG_REF(features);

// get dimensionality and number of vectors of data
int32_t dim = simple_features->get_num_features();
if (m_target_dim>dim)
SG_ERROR("Cannot increase dimensionality: target dimensionality is %d while given features dimensionality is %d.\n",
m_target_dim, dim);
int32_t N = simple_features->get_num_vectors();

// loop variables
int32_t i,j,t;

// compute distance matrix
ASSERT(m_kernel);
m_kernel->init(simple_features,simple_features);
SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();

float64_t* p_vector = SG_CALLOC(float64_t, N);
for (i=0; i<N; i++)
{
for (j=0; j<N; j++)
{
p_vector[i] += kernel_matrix.matrix[j*N+i];
}
}

float64_t* p_matrix = SG_CALLOC(float64_t, N*N);
cblas_dger(CblasColMajor,N,N,1.0,p_vector,1,p_vector,1,p_matrix,N);
for (i=0; i<N*N; i++)
{
kernel_matrix.matrix[i] /= CMath::pow(p_matrix[i], t);
}
SG_FREE(p_matrix);

for (i=0; i<N; i++)
{
p_vector[i] = 0.0;
for (j=0; j<N; j++)
{
p_vector[i] += kernel_matrix.matrix[j*N+i];
}
p_vector[i] = CMath::sqrt(p_vector[i]);
}
float64_t ppt = cblas_ddot(N,p_vector,1,p_vector,1);
SG_FREE(p_vector);

for (i=0; i<N*N; i++)
{
kernel_matrix.matrix[i] /= ppt;
}

float64_t* s_values = SG_MALLOC(float64_t, N);

int32_t info = 0;
wrap_dgesvd('O','N',N,N,kernel_matrix.matrix,N,s_values,NULL,1,NULL,1,&info);
if (info)
SG_ERROR("DGESVD failed with %d code", info);

float64_t* new_feature_matrix = SG_MALLOC(float64_t, N*m_target_dim);

for (i=0; i<m_target_dim; i++)
{
for (j=0; j<N; j++)
new_feature_matrix[j*m_target_dim+i] = kernel_matrix.matrix[(i+1)*N+j]/kernel_matrix.matrix[j];
}
kernel_matrix.destroy_matrix();

simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,N));
SG_UNREF(features);
return simple_features->get_feature_matrix();
}

SGVector<float64_t> CDiffusionMaps::apply_to_feature_vector(SGVector<float64_t> vector)
{
SG_NOTIMPLEMENTED;
return vector;
}

#endif /* HAVE_LAPACK */
91 changes: 91 additions & 0 deletions src/shogun/preprocessor/DiffusionMaps.h
@@ -0,0 +1,91 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

#ifndef DIFFUSIONMAPS_H_
#define DIFFUSIONMAPS_H_
#include <shogun/lib/config.h>
#ifdef HAVE_LAPACK
#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
#include <shogun/features/Features.h>
#include <shogun/distance/Distance.h>

namespace shogun
{

class CFeatures;
class CKernel;

/** @brief */
class CDiffusionMaps: public CDimensionReductionPreprocessor
{
public:

/** constructor */
CDiffusionMaps();

/** destructor */
virtual ~CDiffusionMaps();

/** init
* @param features
*/
virtual bool init(CFeatures* features);

/** cleanup
*/
virtual void cleanup();

/** apply preprocessor to features
* @param features
*/
virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);

/** apply preprocessor to feature vector, not supported for LLE
* @param vector
*/
virtual SGVector<float64_t> apply_to_feature_vector(SGVector<float64_t> vector);

/** setter for t parameter
* @param t t value
*/
void inline set_t(float64_t t)
{
m_t = t;
}

/** getter for t parameter
* @return t value
*/
int32_t inline get_t()
{
return m_t;
}

/** get name */
virtual inline const char* get_name() const { return "DiffusionMaps"; };

/** get type */
virtual inline EPreprocessorType get_type() const { return P_DIFFUSIONMAPS; };

protected:

/** default init */
void init();

protected:

/** steps */
int32_t m_t;

};
}

#endif /* HAVE_LAPACK */
#endif /* DIFFUSIONMAPS_H_ */
3 changes: 2 additions & 1 deletion src/shogun/preprocessor/Preprocessor.h
Expand Up @@ -54,7 +54,8 @@ enum EPreprocessorType
P_HESSIANLOCALLYLINEAREMBEDDING=200,
P_LOCALTANGENTSPACEALIGNMENT=210,
P_LAPLACIANEIGENMAPS=220,
P_KERNELLOCALLYLINEAREMBEDDING=230
P_KERNELLOCALLYLINEAREMBEDDING=230,
P_DIFFUSIONMAPS=240
};

/** @brief Class Preprocessor defines a preprocessor interface.
Expand Down

0 comments on commit 3308301

Please sign in to comment.