Skip to content

Commit

Permalink
drop vec_index from SGSparseVector
Browse files Browse the repository at this point in the history
- drop it from file formats
- typemaps
- parameters
  • Loading branch information
Soeren Sonnenburg committed May 16, 2012
1 parent a60c648 commit ed95843
Show file tree
Hide file tree
Showing 26 changed files with 84 additions and 170 deletions.
1 change: 0 additions & 1 deletion src/interfaces/octave_modular/swig_typemaps.i
Expand Up @@ -331,7 +331,6 @@ TYPEMAP_STRINGFEATURES_OUT(float64_t, Matrix)
for (int32_t i=0; i<num_vec; i++)
{
int32_t len=sm.cidx(i+1)-sm.cidx(i);
matrix[i].vec_index=i;
matrix[i].num_feat_entries=len;

if (len>0)
Expand Down
18 changes: 7 additions & 11 deletions src/interfaces/python_modular/swig_typemaps.i
Expand Up @@ -602,7 +602,6 @@ static bool spmatrix_from_numpy(SGSparseMatrix<type>& sg_matrix, PyObject* obj,

for (int32_t i=0; i<num_vec; i++)
{
sfm[i].vec_index = i;
sfm[i].num_feat_entries = 0;
sfm[i].features = NULL;
}
Expand Down Expand Up @@ -685,18 +684,15 @@ static bool spmatrix_to_numpy(PyObject* &obj, SGSparseMatrix<type> sg_matrix, in
for (int32_t i=0; i<num_vec; i++)
{
indptr[i+1]=indptr[i];
if (sfm[i].vec_index==i)
{
indptr[i+1]+=sfm[i].num_feat_entries;
indptr[i+1]+=sfm[i].num_feat_entries;

for (int32_t j=0; j<sfm[i].num_feat_entries; j++)
{
*i_ptr=sfm[i].features[j].feat_index;
*d_ptr=sfm[i].features[j].entry;
for (int32_t j=0; j<sfm[i].num_feat_entries; j++)
{
*i_ptr=sfm[i].features[j].feat_index;
*d_ptr=sfm[i].features[j].entry;

i_ptr++;
d_ptr++;
}
i_ptr++;
d_ptr++;
}
}

Expand Down
16 changes: 7 additions & 9 deletions src/shogun/base/Parameter.cpp
Expand Up @@ -2102,8 +2102,7 @@ TParameter::save_stype(CSerializableFile* file, const void* param,
len_real = 0;
}
if (!file->write_sparse_begin(
&m_datatype, m_name, prefix, spr_ptr->vec_index,
len_real)) return false;
&m_datatype, m_name, prefix, len_real)) return false;
for (index_t i=0; i<len_real; i++) {
SGSparseVectorEntry<char>* cur = (SGSparseVectorEntry<char>*)
((char*) spr_ptr->features + i *TSGDataType
Expand All @@ -2119,8 +2118,7 @@ TParameter::save_stype(CSerializableFile* file, const void* param,
cur->feat_index, i)) return false;
}
if (!file->write_sparse_end(
&m_datatype, m_name, prefix, spr_ptr->vec_index,
len_real)) return false;
&m_datatype, m_name, prefix, len_real)) return false;
break;
}

Expand Down Expand Up @@ -2161,8 +2159,7 @@ TParameter::load_stype(CSerializableFile* file, void* param,
break;
case ST_SPARSE:
if (!file->read_sparse_begin(
&m_datatype, m_name, prefix, &spr_ptr->vec_index,
&len_real)) return false;
&m_datatype, m_name, prefix, &len_real)) return false;
spr_ptr->features = len_real > 0? (SGSparseVectorEntry<char>*)
SG_MALLOC(char, len_real *TSGDataType::sizeof_sparseentry(
m_datatype.m_ptype)): NULL;
Expand All @@ -2180,9 +2177,10 @@ TParameter::load_stype(CSerializableFile* file, void* param,
&m_datatype, m_name, prefix, spr_ptr->features,
&cur->feat_index, i)) return false;
}
if (!file->read_sparse_end(
&m_datatype, m_name, prefix, &spr_ptr->vec_index,
len_real)) return false;

if (!file->read_sparse_end(&m_datatype, m_name, prefix, len_real))
return false;

spr_ptr->num_feat_entries = len_real;
break;
}
Expand Down
16 changes: 7 additions & 9 deletions src/shogun/features/SparseFeatures.cpp
Expand Up @@ -251,7 +251,6 @@ template<class ST> SGSparseVector<ST> CSparseFeatures<ST>::get_sparse_feature_ve
result.num_feat_entries=tmp_len ;
SG_DEBUG( "len: %d len2: %d\n", result.num_feat_entries, num_features);
}
result.vec_index=num;
return result ;
}
}
Expand Down Expand Up @@ -432,7 +431,6 @@ template<class ST> SGSparseVector<ST>* CSparseFeatures<ST>::get_transposed(int32
{
sfm[v].features= SG_MALLOC(SGSparseVectorEntry<ST>, hist[v]);
sfm[v].num_feat_entries=hist[v];
sfm[v].vec_index=v;
}

// fill future feature vectors with content
Expand Down Expand Up @@ -480,12 +478,12 @@ template<class ST> SGMatrix<ST> CSparseFeatures<ST>::get_full_feature_matrix()

for (int32_t v=0; v<full.num_cols; v++)
{
SGSparseVector<ST> current=
sparse_feature_matrix[m_subset_stack->subset_idx_conversion(v)];
int32_t idx=m_subset_stack->subset_idx_conversion(v);
SGSparseVector<ST> current=sparse_feature_matrix[idx];

for (int32_t f=0; f<current.num_feat_entries; f++)
{
int64_t offs=(current.vec_index*num_features)
int64_t offs=(idx*num_features)
+current.features[f].feat_index;

full.matrix[offs]=current.features[f].entry;
Expand Down Expand Up @@ -534,7 +532,6 @@ template<class ST> bool CSparseFeatures<ST>::set_full_feature_matrix(SGMatrix<ST
{
for (int32_t i=0; i< num_vec; i++)
{
sparse_feature_matrix[i].vec_index=i;
sparse_feature_matrix[i].num_feat_entries=0;
sparse_feature_matrix[i].features= NULL;

Expand Down Expand Up @@ -888,7 +885,6 @@ template<class ST> CLabels* CSparseFeatures<ST>::load_svmlight_file(char* fname,
}
}

sparse_feature_matrix[lines].vec_index=lines;
sparse_feature_matrix[lines].num_feat_entries=dims;
sparse_feature_matrix[lines].features=feat;

Expand Down Expand Up @@ -1053,6 +1049,7 @@ template<class ST> void* CSparseFeatures<ST>::get_feature_iterator(int32_t vecto
sparse_feature_iterator* it=SG_MALLOC(sparse_feature_iterator, 1);
it->sv=get_sparse_feature_vector(vector_index);
it->index=0;
it->vector_index=vector_index;

return it;
}
Expand All @@ -1077,7 +1074,7 @@ template<class ST> void CSparseFeatures<ST>::free_feature_iterator(void* iterato
return;

sparse_feature_iterator* it=(sparse_feature_iterator*) iterator;
free_sparse_feature_vector(it->sv, it->sv.vec_index);
free_sparse_feature_vector(it->sv, it->vector_index);
SG_FREE(it);
}

Expand All @@ -1090,11 +1087,12 @@ template<class ST> CFeatures* CSparseFeatures<ST>::copy_subset(SGVector<index_t>
{
/* index to copy */
index_t index=indices.vector[i];
index_t real_index=m_subset_stack->subset_idx_conversion(index);

/* copy sparse vector */
SGSparseVector<ST> current=get_sparse_feature_vector(index);
matrix_copy.sparse_matrix[i]=SGSparseVector<ST>(
current.num_feat_entries, current.vec_index);
current.num_feat_entries, real_index);

/* copy entries */
memcpy(matrix_copy.sparse_matrix[i].features, current.features,
Expand Down
11 changes: 7 additions & 4 deletions src/shogun/features/SparseFeatures.h
Expand Up @@ -37,8 +37,8 @@ template <class ST> class SGSparseMatrix;

/** @brief Template class SparseFeatures implements sparse matrices.
*
* Features are an array of SGSparseVector, sorted w.r.t. vec_index (increasing) and
* withing same vec_index w.r.t. feat_index (increasing);
* Features are an array of SGSparseVector. Within each vector feat_index are
* sorted (increasing).
*
* Sparse feature vectors can be accessed via get_sparse_feature_vector() and
* should be freed (this operation is a NOP in most cases) via
Expand Down Expand Up @@ -474,14 +474,17 @@ template <class ST> class CSparseFeatures : public CDotFeatures
/** feature vector */
SGSparseVector<ST> sv;

/** index */
/** vector index */
int32_t vector_index;

/** feature index */
int32_t index;

/** print details of iterator (for debugging purposes)*/
void print_info()
{
SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
sv.features, sv.vec_index, sv.num_feat_entries, index);
sv.features, vector_index, sv.num_feat_entries, index);
}
};
#endif
Expand Down
1 change: 0 additions & 1 deletion src/shogun/features/StreamingSparseFeatures.cpp
Expand Up @@ -414,7 +414,6 @@ SGSparseVector<T> CStreamingSparseFeatures<T>::get_vector()
{
current_sgvector.features=current_vector;
current_sgvector.num_feat_entries=current_length;
current_sgvector.vec_index=current_vec_index;

return current_sgvector;
}
Expand Down
1 change: 0 additions & 1 deletion src/shogun/io/AsciiFile.cpp
Expand Up @@ -421,7 +421,6 @@ void CAsciiFile::fname(SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int3
} \
} \
\
matrix[lines].vec_index=lines; \
matrix[lines].num_feat_entries=dims; \
matrix[lines].features=feat; \
\
Expand Down
8 changes: 3 additions & 5 deletions src/shogun/io/SerializableAsciiFile.cpp
Expand Up @@ -235,19 +235,17 @@ CSerializableAsciiFile::write_stringentry_end_wrapped(

bool
CSerializableAsciiFile::write_sparse_begin_wrapped(
const TSGDataType* type, index_t vec_index,
index_t length)
const TSGDataType* type, index_t length)
{
if (fprintf(m_fstream, "%"PRIi32" %"PRIi32" %c", vec_index, length,
if (fprintf(m_fstream, "%"PRIi32" %c", length,
CHAR_SPARSE_BEGIN) <= 0) return false;

return true;
}

bool
CSerializableAsciiFile::write_sparse_end_wrapped(
const TSGDataType* type, index_t vec_index,
index_t length)
const TSGDataType* type, index_t length)
{
if (fprintf(m_fstream, "%c", CHAR_SPARSE_END) <= 0) return false;

Expand Down
6 changes: 2 additions & 4 deletions src/shogun/io/SerializableAsciiFile.h
Expand Up @@ -71,11 +71,9 @@ class CSerializableAsciiFile :public CSerializableFile
const TSGDataType* type, index_t y);

virtual bool write_sparse_begin_wrapped(
const TSGDataType* type, index_t vec_index,
index_t length);
const TSGDataType* type, index_t length);
virtual bool write_sparse_end_wrapped(
const TSGDataType* type, index_t vec_index,
index_t length);
const TSGDataType* type, index_t length);

virtual bool write_sparseentry_begin_wrapped(
const TSGDataType* type, const SGSparseVectorEntry<char>* first_entry,
Expand Down
9 changes: 3 additions & 6 deletions src/shogun/io/SerializableAsciiReader00.cpp
Expand Up @@ -170,11 +170,9 @@ SerializableAsciiReader00::read_stringentry_end_wrapped(

bool
SerializableAsciiReader00::read_sparse_begin_wrapped(
const TSGDataType* type, index_t* vec_index,
index_t* length)
const TSGDataType* type, index_t* length)
{
if (fscanf(m_file->m_fstream, "%"PRIi32" %"PRIi32, vec_index,
length) != 2) return false;
if (fscanf(m_file->m_fstream, "%"PRIi32, length) != 2) return false;
if (fgetc(m_file->m_fstream) != ' ') return false;
if (fgetc(m_file->m_fstream) != CHAR_SPARSE_BEGIN) return false;

Expand All @@ -183,8 +181,7 @@ SerializableAsciiReader00::read_sparse_begin_wrapped(

bool
SerializableAsciiReader00::read_sparse_end_wrapped(
const TSGDataType* type, index_t* vec_index,
index_t length)
const TSGDataType* type, index_t length)
{
if (fgetc(m_file->m_fstream) != CHAR_SPARSE_END) return false;

Expand Down
6 changes: 2 additions & 4 deletions src/shogun/io/SerializableAsciiReader00.h
Expand Up @@ -56,11 +56,9 @@ class SerializableAsciiReader00
const TSGDataType* type, index_t y);

virtual bool read_sparse_begin_wrapped(
const TSGDataType* type, index_t* vec_index,
index_t* length);
const TSGDataType* type, index_t* length);
virtual bool read_sparse_end_wrapped(
const TSGDataType* type, index_t* vec_index,
index_t length);
const TSGDataType* type, index_t length);

virtual bool read_sparseentry_begin_wrapped(
const TSGDataType* type, SGSparseVectorEntry<char>* first_entry,
Expand Down
16 changes: 8 additions & 8 deletions src/shogun/io/SerializableFile.cpp
Expand Up @@ -309,11 +309,11 @@ CSerializableFile::read_stringentry_end(
bool
CSerializableFile::write_sparse_begin(
const TSGDataType* type, const char* name, const char* prefix,
index_t vec_index, index_t length)
index_t length)
{
if (!is_task_warn('w', name, prefix)) return false;

if (!write_sparse_begin_wrapped(type, vec_index, length))
if (!write_sparse_begin_wrapped(type, length))
return false_warn(prefix, name);

return true;
Expand All @@ -322,11 +322,11 @@ CSerializableFile::write_sparse_begin(
bool
CSerializableFile::read_sparse_begin(
const TSGDataType* type, const char* name, const char* prefix,
index_t* vec_index, index_t* length)
index_t* length)
{
if (!is_task_warn('r', name, prefix)) return false;

if (!m_reader->read_sparse_begin_wrapped(type, vec_index, length))
if (!m_reader->read_sparse_begin_wrapped(type, length))
return false_warn(prefix, name);

return true;
Expand All @@ -335,11 +335,11 @@ CSerializableFile::read_sparse_begin(
bool
CSerializableFile::write_sparse_end(
const TSGDataType* type, const char* name, const char* prefix,
index_t vec_index, index_t length)
index_t length)
{
if (!is_task_warn('w', name, prefix)) return false;

if (!write_sparse_end_wrapped(type, vec_index, length))
if (!write_sparse_end_wrapped(type, length))
return false_warn(prefix, name);

return true;
Expand All @@ -348,11 +348,11 @@ CSerializableFile::write_sparse_end(
bool
CSerializableFile::read_sparse_end(
const TSGDataType* type, const char* name, const char* prefix,
index_t* vec_index, index_t length)
index_t length)
{
if (!is_task_warn('r', name, prefix)) return false;

if (!m_reader->read_sparse_end_wrapped(type, vec_index, length))
if (!m_reader->read_sparse_end_wrapped(type, length))
return false_warn(prefix, name);

return true;
Expand Down
20 changes: 8 additions & 12 deletions src/shogun/io/SerializableFile.h
Expand Up @@ -53,11 +53,9 @@ class CSerializableFile :public CSGObject
const TSGDataType* type, index_t y) = 0;

virtual bool read_sparse_begin_wrapped(
const TSGDataType* type, index_t* vec_index,
index_t* length) = 0;
const TSGDataType* type, index_t* length) = 0;
virtual bool read_sparse_end_wrapped(
const TSGDataType* type, index_t* vec_index,
index_t length) = 0;
const TSGDataType* type, index_t length) = 0;

virtual bool read_sparseentry_begin_wrapped(
const TSGDataType* type, SGSparseVectorEntry<char>* first_entry,
Expand Down Expand Up @@ -141,11 +139,9 @@ class CSerializableFile :public CSGObject
const TSGDataType* type, index_t y) = 0;

virtual bool write_sparse_begin_wrapped(
const TSGDataType* type, index_t vec_index,
index_t length) = 0;
const TSGDataType* type, index_t length) = 0;
virtual bool write_sparse_end_wrapped(
const TSGDataType* type, index_t vec_index,
index_t length) = 0;
const TSGDataType* type, index_t length) = 0;

virtual bool write_sparseentry_begin_wrapped(
const TSGDataType* type, const SGSparseVectorEntry<char>* first_entry,
Expand Down Expand Up @@ -259,17 +255,17 @@ class CSerializableFile :public CSGObject

virtual bool write_sparse_begin(
const TSGDataType* type, const char* name, const char* prefix,
index_t vec_index, index_t length);
index_t length);
virtual bool read_sparse_begin(
const TSGDataType* type, const char* name, const char* prefix,
index_t* vec_index, index_t* length);
index_t* length);

virtual bool write_sparse_end(
const TSGDataType* type, const char* name, const char* prefix,
index_t vec_index, index_t length);
index_t length);
virtual bool read_sparse_end(
const TSGDataType* type, const char* name, const char* prefix,
index_t* vec_index, index_t length);
index_t length);

virtual bool write_sparseentry_begin(
const TSGDataType* type, const char* name, const char* prefix,
Expand Down

0 comments on commit ed95843

Please sign in to comment.