Skip to content

Commit

Permalink
Added support of target dimensionality reducing with negative target …
Browse files Browse the repository at this point in the history
…dimensionality, improved some issues at NLDR preprocessors
  • Loading branch information
lisitsyn committed Sep 29, 2011
1 parent 4889edc commit 95df0a5
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 60 deletions.
20 changes: 11 additions & 9 deletions src/shogun/preprocessor/DiffusionMaps.cpp
Expand Up @@ -66,9 +66,12 @@ SGMatrix<float64_t> CDiffusionMaps::apply_to_feature_matrix(CFeatures* features)

// get dimensionality and number of vectors of data
int32_t dim = simple_features->get_num_features();
if (m_target_dim>dim)
int32_t target_dim = calculate_effective_target_dim(dim);
if (target_dim==-1)
SG_ERROR("Trying to decrease dimensionality to negative value, not possible.\n");
if (target_dim>dim)
SG_ERROR("Cannot increase dimensionality: target dimensionality is %d while given features dimensionality is %d.\n",
m_target_dim, dim);
target_dim, dim);
int32_t N = simple_features->get_num_vectors();

// loop variables
Expand Down Expand Up @@ -142,27 +145,26 @@ SGMatrix<float64_t> CDiffusionMaps::apply_to_feature_matrix(CFeatures* features)

int32_t info = 0;

wrap_dsyevr('V','U',N,kkt_matrix,N,N-m_target_dim,N,s_values,kernel_matrix.matrix,&info);
wrap_dsyevr('V','U',N,kkt_matrix,N,N-target_dim,N,s_values,kernel_matrix.matrix,&info);
if (info)
SG_ERROR("DGESVD failed with %d code", info);

SG_REALLOC(float64_t, kkt_matrix, N*m_target_dim);

SG_FREE(s_values);
/*
int32_t info = 0;
wrap_dgesvd('O','N',N,N,kernel_matrix.matrix,N,s_values,NULL,1,NULL,1,&info);
*/

float64_t* new_feature_matrix = kkt_matrix;//SG_MALLOC(float64_t, N*m_target_dim);
float64_t* new_feature_matrix = kkt_matrix;//SG_MALLOC(float64_t, N*target_dim);

for (i=0; i<m_target_dim; i++)
for (i=0; i<target_dim; i++)
{
for (j=0; j<N; j++)
new_feature_matrix[j*m_target_dim+i] = kernel_matrix.matrix[(m_target_dim-i-1)*N+j]/kernel_matrix.matrix[(m_target_dim)*N+j];
new_feature_matrix[j*target_dim+i] = kernel_matrix.matrix[(target_dim-i-1)*N+j]/kernel_matrix.matrix[(target_dim)*N+j];
}
kernel_matrix.destroy_matrix();

simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,N));
simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,target_dim,N));
SG_UNREF(features);
return simple_features->get_feature_matrix();
}
Expand Down
16 changes: 15 additions & 1 deletion src/shogun/preprocessor/DimensionReductionPreprocessor.cpp
Expand Up @@ -50,7 +50,6 @@ EPreprocessorType CDimensionReductionPreprocessor::get_type() const { return P_D

void CDimensionReductionPreprocessor::set_target_dim(int32_t dim)
{
ASSERT(dim>0 || dim==AUTO_TARGET_DIM);
m_target_dim = dim;
}

Expand All @@ -59,6 +58,21 @@ int32_t CDimensionReductionPreprocessor::get_target_dim() const
return m_target_dim;
}

int32_t CDimensionReductionPreprocessor::calculate_effective_target_dim(int32_t dim)
{
if (m_target_dim<0)
{
if (dim+m_target_dim>1)
{
return dim+m_target_dim;
}
else
return -1;
}
else
return m_target_dim;
}

void CDimensionReductionPreprocessor::set_distance(CDistance* distance)
{
SG_UNREF(m_distance);
Expand Down
19 changes: 12 additions & 7 deletions src/shogun/preprocessor/DimensionReductionPreprocessor.h
Expand Up @@ -94,18 +94,23 @@ class CDimensionReductionPreprocessor: public CSimplePreprocessor<float64_t>
*/
CKernel* get_kernel() const;

public:

/** const indicating target dimensionality should be determined automagically */
static const int32_t AUTO_TARGET_DIM = -1;

protected:

/** detect dimensionality from distance matrix
/** detect dimensionality from distance matrix
* NOT YET IMPLEMENTED
* @param distance_matrix distance matrix to be used
* @return detected dimensionality
*/
virtual int32_t detect_dim(SGMatrix<float64_t> distance_matrix);
int32_t detect_dim(SGMatrix<float64_t> distance_matrix);

protected:

/** calculates effective target dimensionality
* according to set m_target_dim
* @param dim dimensionality of
* @return effective target dimensionality
*/
int32_t calculate_effective_target_dim(int32_t dim);

/** default init */
void init();
Expand Down
37 changes: 19 additions & 18 deletions src/shogun/preprocessor/HessianLocallyLinearEmbedding.cpp
Expand Up @@ -42,7 +42,7 @@ struct HESSIANESTIMATION_THREAD_PARAM
/// dp
int32_t dp;
/// target dimensionality
int32_t m_target_dim;
int32_t target_dim;
/// matrix containing indexes of neighbors of ith vector in ith column
const int32_t* neighborhood_matrix;
/// feature matrix
Expand Down Expand Up @@ -95,8 +95,9 @@ SGMatrix<float64_t> CHessianLocallyLinearEmbedding::construct_weight_matrix(CSim
{
int32_t N = simple_features->get_num_vectors();
int32_t dim = simple_features->get_num_features();
int32_t dp = m_target_dim*(m_target_dim+1)/2;
ASSERT(m_k>=(1+m_target_dim+dp));
int32_t target_dim = calculate_effective_target_dim(dim);
int32_t dp = target_dim*(target_dim+1)/2;
ASSERT(m_k>=(1+target_dim+dp));
int32_t t;
#ifdef HAVE_PTHREAD
int32_t num_threads = parallel->get_num_threads();
Expand All @@ -112,12 +113,12 @@ SGMatrix<float64_t> CHessianLocallyLinearEmbedding::construct_weight_matrix(CSim
// init matrices to be used
float64_t* local_feature_matrix = SG_MALLOC(float64_t, m_k*dim*num_threads);
float64_t* s_values_vector = SG_MALLOC(float64_t, dim*num_threads);
int32_t tau_len = CMath::min((1+m_target_dim+dp), m_k);
int32_t tau_len = CMath::min((1+target_dim+dp), m_k);
float64_t* tau = SG_MALLOC(float64_t, tau_len*num_threads);
float64_t* mean_vector = SG_MALLOC(float64_t, dim*num_threads);
float64_t* q_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
float64_t* w_sum_vector = SG_MALLOC(float64_t, dp*num_threads);
float64_t* Yi_matrix = SG_MALLOC(float64_t, m_k*(1+m_target_dim+dp)*num_threads);
float64_t* Yi_matrix = SG_MALLOC(float64_t, m_k*(1+target_dim+dp)*num_threads);
// get feature matrix
SGMatrix<float64_t> feature_matrix = simple_features->get_feature_matrix();

Expand All @@ -135,13 +136,13 @@ SGMatrix<float64_t> CHessianLocallyLinearEmbedding::construct_weight_matrix(CSim
parameters[t].idx_stop = N;
parameters[t].m_k = m_k;
parameters[t].dim = dim;
parameters[t].m_target_dim = m_target_dim;
parameters[t].target_dim = target_dim;
parameters[t].N = N;
parameters[t].dp = dp;
parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
parameters[t].feature_matrix = feature_matrix.matrix;
parameters[t].local_feature_matrix = local_feature_matrix + (m_k*dim)*t;
parameters[t].Yi_matrix = Yi_matrix + (m_k*(1+m_target_dim+dp))*t;
parameters[t].Yi_matrix = Yi_matrix + (m_k*(1+target_dim+dp))*t;
parameters[t].mean_vector = mean_vector + dim*t;
parameters[t].s_values_vector = s_values_vector + dim*t;
parameters[t].tau = tau+tau_len*t;
Expand All @@ -164,7 +165,7 @@ SGMatrix<float64_t> CHessianLocallyLinearEmbedding::construct_weight_matrix(CSim
single_thread_param.idx_stop = N;
single_thread_param.m_k = m_k;
single_thread_param.dim = dim;
single_thread_param.m_target_dim = m_target_dim;
single_thread_param.target_dim = target_dim;
single_thread_param.N = N;
single_thread_param.dp = dp;
single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;
Expand Down Expand Up @@ -203,7 +204,7 @@ void* CHessianLocallyLinearEmbedding::run_hessianestimation_thread(void* p)
int32_t dim = parameters->dim;
int32_t N = parameters->N;
int32_t dp = parameters->dp;
int32_t m_target_dim = parameters->m_target_dim;
int32_t target_dim = parameters->target_dim;
const int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
const float64_t* feature_matrix = parameters->feature_matrix;
float64_t* local_feature_matrix = parameters->local_feature_matrix;
Expand Down Expand Up @@ -253,8 +254,8 @@ void* CHessianLocallyLinearEmbedding::run_hessianestimation_thread(void* p)
NULL,1, NULL,1, &info);
ASSERT(info==0);

// Yi(0:m_k,1:1+m_target_dim) = Vh(0:m_k, 0:target_dim)
for (j=0; j<m_target_dim; j++)
// Yi(0:m_k,1:1+target_dim) = Vh(0:m_k, 0:target_dim)
for (j=0; j<target_dim; j++)
{
for (k=0; k<m_k; k++)
Yi_matrix[(j+1)*m_k+k] = local_feature_matrix[k*dim+j];
Expand All @@ -263,25 +264,25 @@ void* CHessianLocallyLinearEmbedding::run_hessianestimation_thread(void* p)
int32_t ct = 0;

// construct 2nd order hessian approx
for (j=0; j<m_target_dim; j++)
for (j=0; j<target_dim; j++)
{
for (k=0; k<m_target_dim-j; k++)
for (k=0; k<target_dim-j; k++)
{
for (l=0; l<m_k; l++)
{
Yi_matrix[(ct+k+1+m_target_dim)*m_k+l] = Yi_matrix[(j+1)*m_k+l]*Yi_matrix[(j+k+1)*m_k+l];
Yi_matrix[(ct+k+1+target_dim)*m_k+l] = Yi_matrix[(j+1)*m_k+l]*Yi_matrix[(j+k+1)*m_k+l];
}
}
ct += ct + m_target_dim - j;
ct += ct + target_dim - j;
}

// perform QR factorization
wrap_dgeqrf(m_k,(1+m_target_dim+dp),Yi_matrix,m_k,tau,&info);
wrap_dgeqrf(m_k,(1+target_dim+dp),Yi_matrix,m_k,tau,&info);
ASSERT(info==0);
wrap_dorgqr(m_k,(1+m_target_dim+dp),tau_len,Yi_matrix,m_k,tau,&info);
wrap_dorgqr(m_k,(1+target_dim+dp),tau_len,Yi_matrix,m_k,tau,&info);
ASSERT(info==0);

float64_t* Pii = (Yi_matrix+m_k*(1+m_target_dim));
float64_t* Pii = (Yi_matrix+m_k*(1+target_dim));

for (j=0; j<dp; j++)
{
Expand Down
1 change: 1 addition & 0 deletions src/shogun/preprocessor/Isomap.cpp
Expand Up @@ -112,6 +112,7 @@ SGMatrix<float64_t> CIsomap::apply_to_feature_matrix(CFeatures* features)
{
CSimpleFeatures<float64_t>* simple_features =
(CSimpleFeatures<float64_t>*) features;
ASSERT(m_target_dim>0);
SG_REF(features);
ASSERT(m_distance);
m_distance->init(simple_features, simple_features);
Expand Down
19 changes: 11 additions & 8 deletions src/shogun/preprocessor/LaplacianEigenmaps.cpp
Expand Up @@ -59,7 +59,10 @@ SGMatrix<float64_t> CLaplacianEigenmaps::apply_to_feature_matrix(CFeatures* feat

// get dimensionality and number of vectors of data
int32_t dim = simple_features->get_num_features();
ASSERT(m_target_dim<=dim);
int32_t target_dim = calculate_effective_target_dim(dim);
if (target_dim==-1)
SG_ERROR("Trying to decrease dimensionality to negative value, not possible.\n");
ASSERT(target_dim<=dim);
int32_t N = simple_features->get_num_vectors();
ASSERT(m_k<N);

Expand Down Expand Up @@ -148,8 +151,8 @@ SGMatrix<float64_t> CLaplacianEigenmaps::apply_to_feature_matrix(CFeatures* feat
#ifdef HAVE_ARPACK
// using ARPACK DS{E,A}UPD
int eigenproblem_status = 0;
float64_t* eigenvalues_vector = SG_MALLOC(float64_t,m_target_dim+1);
arpack_dsaeupd_wrap(W_matrix,D_diag_vector,N,m_target_dim+1,"LA",3,false,-1e-9,0.0,
float64_t* eigenvalues_vector = SG_MALLOC(float64_t,target_dim+1);
arpack_dsaeupd_wrap(W_matrix,D_diag_vector,N,target_dim+1,"LA",3,false,-1e-9,0.0,
eigenvalues_vector,W_matrix,eigenproblem_status);
ASSERT(eigenproblem_status==0);
SG_FREE(eigenvalues_vector);
Expand All @@ -162,24 +165,24 @@ SGMatrix<float64_t> CLaplacianEigenmaps::apply_to_feature_matrix(CFeatures* feat
// fill rhs with diag (for safety reasons zeros will be replaced with 1e-3)
for (i=0; i<N; i++)
rhs[i*N+i] = D_diag_vector[i];
wrap_dsygvx(1,'V','U',N,W_matrix,N,rhs,N,1,m_target_dim+2,eigenvalues_vector,W_matrix,&eigenproblem_status);
wrap_dsygvx(1,'V','U',N,W_matrix,N,rhs,N,1,target_dim+2,eigenvalues_vector,W_matrix,&eigenproblem_status);
if (eigenproblem_status)
SG_ERROR("DSYGVX failed with code: %d.\n",eigenproblem_status);
SG_FREE(rhs);
SG_FREE(eigenvalues_vector);
#endif /* HAVE_ARPACK */
SG_FREE(D_diag_vector);

SGMatrix<float64_t> new_features = SGMatrix<float64_t>(m_target_dim,N);
SGMatrix<float64_t> new_features = SGMatrix<float64_t>(target_dim,N);
// fill features according to used solver
for (i=0; i<m_target_dim; i++)
for (i=0; i<target_dim; i++)
{
for (j=0; j<N; j++)
{
#ifdef HAVE_ARPACK
new_features[j*m_target_dim+i] = W_matrix[j*(m_target_dim+1)+i+1];
new_features[j*target_dim+i] = W_matrix[j*(target_dim+1)+i+1];
#else
new_features[j*m_target_dim+i] = W_matrix[(i+1)*N+j];
new_features[j*target_dim+i] = W_matrix[(i+1)*N+j];
#endif
}
}
Expand Down
17 changes: 9 additions & 8 deletions src/shogun/preprocessor/LocalTangentSpaceAlignment.cpp
Expand Up @@ -37,7 +37,7 @@ struct LTSA_THREAD_PARAM
/// number of neighbors
int32_t m_k;
/// target dimension
int32_t m_target_dim;
int32_t target_dim;
/// current dimension
int32_t dim;
/// number of objects
Expand Down Expand Up @@ -88,6 +88,7 @@ SGMatrix<float64_t> CLocalTangentSpaceAlignment::construct_weight_matrix(CSimple
{
int32_t N = simple_features->get_num_vectors();
int32_t dim = simple_features->get_num_features();
int32_t target_dim = calculate_effective_target_dim(dim);
int32_t t;
#ifdef HAVE_PTHREAD
int32_t num_threads = parallel->get_num_threads();
Expand All @@ -104,7 +105,7 @@ SGMatrix<float64_t> CLocalTangentSpaceAlignment::construct_weight_matrix(CSimple
float64_t* mean_vector = SG_MALLOC(float64_t, dim*num_threads);
float64_t* q_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads);
float64_t* s_values_vector = SG_MALLOC(float64_t, dim*num_threads);
float64_t* G_matrix = SG_MALLOC(float64_t, m_k*(1+m_target_dim)*num_threads);
float64_t* G_matrix = SG_MALLOC(float64_t, m_k*(1+target_dim)*num_threads);

// get feature matrix
SGMatrix<float64_t> feature_matrix = simple_features->get_feature_matrix();
Expand All @@ -122,11 +123,11 @@ SGMatrix<float64_t> CLocalTangentSpaceAlignment::construct_weight_matrix(CSimple
parameters[t].idx_step = num_threads;
parameters[t].idx_stop = N;
parameters[t].m_k = m_k;
parameters[t].m_target_dim = m_target_dim;
parameters[t].target_dim = target_dim;
parameters[t].dim = dim;
parameters[t].N = N;
parameters[t].neighborhood_matrix = neighborhood_matrix.matrix;
parameters[t].G_matrix = G_matrix + (m_k*(1+m_target_dim))*t;
parameters[t].G_matrix = G_matrix + (m_k*(1+target_dim))*t;
parameters[t].mean_vector = mean_vector + dim*t;
parameters[t].local_feature_matrix = local_feature_matrix + (m_k*dim)*t;
parameters[t].feature_matrix = feature_matrix.matrix;
Expand All @@ -147,7 +148,7 @@ SGMatrix<float64_t> CLocalTangentSpaceAlignment::construct_weight_matrix(CSimple
single_thread_param.idx_step = 1;
single_thread_param.idx_stop = N;
single_thread_param.m_k = m_k;
single_thread_param.m_target_dim = m_target_dim;
single_thread_param.target_dim = target_dim;
single_thread_param.dim = dim;
single_thread_param.N = N;
single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix;
Expand Down Expand Up @@ -184,7 +185,7 @@ void* CLocalTangentSpaceAlignment::run_ltsa_thread(void* p)
int32_t idx_step = parameters->idx_step;
int32_t idx_stop = parameters->idx_stop;
int32_t m_k = parameters->m_k;
int32_t m_target_dim = parameters->m_target_dim;
int32_t target_dim = parameters->target_dim;
int32_t dim = parameters->dim;
int32_t N = parameters->N;
const int32_t* neighborhood_matrix = parameters->neighborhood_matrix;
Expand Down Expand Up @@ -232,15 +233,15 @@ void* CLocalTangentSpaceAlignment::run_ltsa_thread(void* p)
NULL,1, NULL,1, &info);
ASSERT(info==0);

for (j=0; j<m_target_dim; j++)
for (j=0; j<target_dim; j++)
{
for (k=0; k<m_k; k++)
G_matrix[(j+1)*m_k+k] = local_feature_matrix[k*dim+j];
}

// compute GG'
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,
m_k,m_k,1+m_target_dim,
m_k,m_k,1+target_dim,
1.0,G_matrix,m_k,
G_matrix,m_k,
0.0,q_matrix,m_k);
Expand Down

0 comments on commit 95df0a5

Please sign in to comment.