Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
WIP: use SGMatrix inside of SimpleFeatures
  • Loading branch information
Soeren Sonnenburg committed May 8, 2012
1 parent 81a30d8 commit 56661f3
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 118 deletions.
119 changes: 38 additions & 81 deletions src/shogun/features/DenseFeatures.cpp
Expand Up @@ -16,9 +16,7 @@ template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatur
template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
CDotFeatures(orig)
{
copy_feature_matrix(SGMatrix<ST>(orig.feature_matrix,
orig.num_features,
orig.num_vectors));
set_feature_matrix(orig.feature_matrix);
initialize_cache();
init();

Expand All @@ -32,18 +30,20 @@ template<class ST> CDenseFeatures<ST>::CDenseFeatures(SGMatrix<ST> matrix) :
init();
set_feature_matrix(matrix);
}

template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
CDotFeatures()
{
init();
set_feature_matrix(src, num_feat, num_vec);
set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
}
template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
CDotFeatures(loader)
{
init();
load(loader);
}

template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
{
return new CDenseFeatures<ST>(*this);
Expand All @@ -64,10 +64,7 @@ template<class ST> void CDenseFeatures<ST>::free_features()
template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
{
m_subset_stack->remove_all_subsets();
SG_FREE(feature_matrix);
feature_matrix = NULL;
feature_matrix_num_features = num_features;
feature_matrix_num_vectors = num_vectors;
feature_matrix.unref();
num_vectors = 0;
num_features = 0;
}
Expand Down Expand Up @@ -142,14 +139,14 @@ template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vect
"requested %d)\n", get_num_vectors(), num);
}

if (!feature_matrix)
if (!feature_matrix.matrix)
SG_ERROR("Requires a in-memory feature matrix\n");

if (vector.vlen != num_features)
SG_ERROR(
"Vector not of length %d (has %d)\n", num_features, vector.vlen);

memcpy(&feature_matrix[real_num * int64_t(num_features)], vector.vector,
memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
int64_t(num_features) * sizeof(ST));
}

Expand Down Expand Up @@ -246,62 +243,40 @@ template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t
}
}

template<class ST> void CDenseFeatures<ST>::get_feature_matrix(ST** dst, int32_t* num_feat, int32_t* num_vec)
template<class ST> SGMatrix<ST> CDenseFeatures<ST>::get_feature_matrix()
{
ASSERT(feature_matrix);
if (!m_subset_stack->has_subsets())
return feature_matrix;

int64_t num = int64_t(num_features) * get_num_vectors();
*num_feat = num_features;
*num_vec = get_num_vectors();
*dst = SG_MALLOC(ST, num);
SGMatrix<ST> submatrix(num_features, get_num_vectors());

/* copying depends on whether a subset is used */
if (m_subset_stack->has_subsets())
/* copy a subset vector wise */
for (int32_t i=0; i<submatrix.num_cols; ++i)
{
/* copy vector wise */
for (int32_t i = 0; i < *num_vec; ++i)
{
int32_t real_i = m_subset_stack->subset_idx_conversion(i);
memcpy(*dst, &feature_matrix[real_i * int64_t(num_features)],
num_features * sizeof(ST));
}
}
else
{
/* copy complete matrix */
memcpy(*dst, feature_matrix, num * sizeof(ST));
int32_t real_i = m_subset_stack->subset_idx_conversion(i);
memcpy(&submatrix.matrix[i*int64_t(num_features)],
&feature_matrix.matrix[real_i * int64_t(num_features)],
num_features * sizeof(ST));
}
}

template<class ST> SGMatrix<ST> CDenseFeatures<ST>::get_feature_matrix()
{
return SGMatrix<ST>(feature_matrix, num_features, num_vectors);
}

template<class ST> SGMatrix<ST> CDenseFeatures<ST>::steal_feature_matrix()
{
SGMatrix<ST> st_feature_matrix(feature_matrix, num_features, num_vectors);
SGMatrix<ST> st_feature_matrix=feature_matrix;
m_subset_stack->remove_all_subsets();
SG_UNREF(feature_cache);
clean_preprocessors();

feature_matrix = NULL;
feature_matrix_num_vectors = 0;
feature_matrix_num_features = 0;
num_features = 0;
num_vectors = 0;
free_feature_matrix();
return st_feature_matrix;
}

template<class ST> void CDenseFeatures<ST>::set_feature_matrix(SGMatrix<ST> matrix)
{
m_subset_stack->remove_all_subsets();
free_feature_matrix();
feature_matrix = matrix.matrix;
feature_matrix = matrix;
num_features = matrix.num_rows;
num_vectors = matrix.num_cols;
feature_matrix_num_vectors = num_vectors;
feature_matrix_num_features = num_features;
}

template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
Expand Down Expand Up @@ -342,21 +317,6 @@ template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int
return fm;
}

template<class ST> void CDenseFeatures<ST>::set_feature_matrix(ST* fm, int32_t num_feat, int32_t num_vec)
{
if (m_subset_stack->has_subsets())
SG_ERROR("A subset is set, cannot call set_feature_matrix\n");

free_feature_matrix();
feature_matrix = fm;
feature_matrix_num_features = num_feat;
feature_matrix_num_vectors = num_vec;

num_features = num_feat;
num_vectors = num_vec;
initialize_cache();
}

template<class ST> void CDenseFeatures<ST>::copy_feature_matrix(SGMatrix<ST> src)
{
if (m_subset_stack->has_subsets())
Expand Down Expand Up @@ -620,19 +580,15 @@ template<class ST> void CDenseFeatures<ST>::init()
num_vectors = 0;
num_features = 0;

feature_matrix = NULL;
feature_matrix_num_vectors = 0;
feature_matrix_num_features = 0;

feature_matrix = SGMatrix();
feature_cache = NULL;

set_generic<ST>();

/* not store number of vectors in subset */
m_parameters->add(&num_vectors, "num_vectors",
"Number of vectors.");
m_parameters->add(&num_features, "num_features", "Number of features.");
m_parameters->add_matrix(&feature_matrix, &feature_matrix_num_features,
&feature_matrix_num_vectors, "feature_matrix",
SG_ADD(&num_vectors, "num_vectors", "Number of vectors.");
SG_ADD(&num_features, "num_features", "Number of features.");
SG_ADD(&feature_matrix, "feature_matrix",
"Matrix of feature vectors / 1 vector per column.");
}

Expand Down Expand Up @@ -929,36 +885,36 @@ template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
vec1 = get_feature_vector(i, v1len, v1free);
vec2 = rhs->get_feature_vector(i, v2len, v2free);

if ( v1len != v2len )
if (v1len!=v2len)
stop = true;

for (int32_t j = 0; j < v1len; j++)
for (int32_t j=0; j<v1len; j++)
{
if ( vec1[j] != vec2[j] )
if (vec1[j]!=vec2[j])
stop = true;
}

free_feature_vector(vec1, i, v1free);
free_feature_vector(vec2, i, v2free);

if ( stop )
if (stop)
return false;
}

return true;
}

#define LOAD(f_load, sg_type) \
template<> void CDenseFeatures<sg_type>::load(CFile* loader) \
template<> void CDenseFeatures<sg_type>::load(CFile* loader) \
{ \
SG_SET_LOCALE_C; \
SG_SET_LOCALE_C; \
ASSERT(loader); \
sg_type* matrix; \
int32_t num_feat; \
int32_t num_vec; \
loader->f_load(matrix, num_feat, num_vec); \
set_feature_matrix(matrix, num_feat, num_vec); \
SG_RESET_LOCALE; \
set_feature_matrix(SGMatrix<sg_type>(matrix, num_feat, num_vec)); \
SG_RESET_LOCALE; \
}

LOAD(get_matrix, bool)
Expand All @@ -977,12 +933,13 @@ LOAD(get_longreal_matrix, floatmax_t)
#undef LOAD

#define SAVE(f_write, sg_type) \
template<> void CDenseFeatures<sg_type>::save(CFile* writer) \
template<> void CDenseFeatures<sg_type>::save(CFile* writer) \
{ \
SG_SET_LOCALE_C; \
SG_SET_LOCALE_C; \
ASSERT(writer); \
writer->f_write(feature_matrix, num_features, num_vectors); \
SG_RESET_LOCALE; \
writer->f_write(feature_matrix.matrix, feature_matrix.num_rows, \
feature_matrix.num_cols); \
SG_RESET_LOCALE; \
}

SAVE(set_matrix, bool)
Expand Down
49 changes: 12 additions & 37 deletions src/shogun/features/DenseFeatures.h
Expand Up @@ -194,20 +194,10 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*/
void feature_subset(int32_t* idx, int32_t idx_len);

/** get a copy of the feature matrix
* num_feat,num_vectors are returned by reference
*
* possible with subset
*
* @param dst destination to store matrix in
* @param num_feat number of features (rows of matrix)
* @param num_vec number of vectors (columns of matrix)
*/
void get_feature_matrix(ST** dst, int32_t* num_feat, int32_t* num_vec);

/** Getter for feature matrix
/** Getter the feature matrix
*
* subset is ignored
* in-place without subset
* a copy with subset
*
* @return matrix feature matrix
*/
Expand All @@ -225,7 +215,12 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*
* any subset is removed
*
* num_cols is number of feature vectors
* num_rows is number of dims of vectors
* see below for definition of feature_matrix
*
* @param matrix feature matrix to set
*
*/
void set_feature_matrix(SGMatrix<ST> matrix);

Expand Down Expand Up @@ -261,20 +256,6 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*/
ST* get_transposed(int32_t &num_feat, int32_t &num_vec);

/** set feature matrix
* necessary to set feature_matrix, num_features,
* num_vectors, where num_features is the column offset,
* and columns are linear in memory
* see below for definition of feature_matrix
*
* not possible with subset
*
* @param fm feature matrix to se
* @param num_feat number of features in matrix
* @param num_vec number of vectors in matrix
*/
virtual void set_feature_matrix(ST* fm, int32_t num_feat, int32_t num_vec);

/** copy feature matrix
* store copy of feature_matrix, where num_features is the
* column offset, and columns are linear in memory
Expand Down Expand Up @@ -430,7 +411,7 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*/
virtual void save(CFile* saver);

#ifndef DOXYGEN_SHOULD_SKIP_THIS
#ifndef DOXYGEN_SHOULD_SKIP_THIS
/** iterator for dense features */
struct dense_feature_iterator
{
Expand All @@ -446,7 +427,7 @@ template<class ST> class CDenseFeatures: public CDotFeatures
/** feature index */
int32_t index;
};
#endif
#endif

/** iterate over the non-zero features
*
Expand Down Expand Up @@ -531,15 +512,9 @@ template<class ST> class CDenseFeatures: public CDotFeatures

/** Feature matrix and its associated number of
* vectors and features. Note that num_vectors / num_features
* above have the same sizes if feature_matrix != NULL
* above match matrix sizes if feature_matrix.matrix != NULL
* */
ST* feature_matrix;

/** number of vectors in feature matrix */
int32_t feature_matrix_num_vectors;

/** number of features in feature matrix */
int32_t feature_matrix_num_features;
SGMatrix<ST> feature_matrix;

/** feature cache */
CCache<ST>* feature_cache;
Expand Down

0 comments on commit 56661f3

Please sign in to comment.