Skip to content

Commit

Permalink
add BinnedDotFeatures
Browse files Browse the repository at this point in the history
  • Loading branch information
Soeren Sonnenburg committed Jan 27, 2012
1 parent 02a1cfd commit b0ddf6c
Show file tree
Hide file tree
Showing 4 changed files with 388 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/interfaces/modular/Features.i
Expand Up @@ -25,6 +25,7 @@
%rename(Features) CFeatures;
%rename(StreamingFeatures) CStreamingFeatures;
%rename(DotFeatures) CDotFeatures;
%rename(BinnedDotFeatures) CBinnedDotFeatures;
%rename(StreamingDotFeatures) CStreamingDotFeatures;
%rename(StreamingVwFeatures) CStreamingVwFeatures;
%rename(DummyFeatures) CDummyFeatures;
Expand All @@ -51,6 +52,7 @@
%include <shogun/lib/Compressor.h>
%include <shogun/features/Features.h>
%include <shogun/features/DotFeatures.h>
%include <shogun/features/BinnedDotFeatures.h>
%include <shogun/features/StreamingFeatures.h>
%include <shogun/features/StreamingDotFeatures.h>
%include <shogun/features/StreamingVwFeatures.h>
Expand Down
1 change: 1 addition & 0 deletions src/interfaces/modular/Features_includes.i
Expand Up @@ -7,6 +7,7 @@
#include <shogun/features/StreamingStringFeatures.h>
#include <shogun/features/StringFileFeatures.h>
#include <shogun/features/DotFeatures.h>
#include <shogun/features/BinnedDotFeatures.h>
#include <shogun/features/StreamingDotFeatures.h>
#include <shogun/features/SparseFeatures.h>
#include <shogun/features/StreamingSparseFeatures.h>
Expand Down
226 changes: 226 additions & 0 deletions src/shogun/features/BinnedDotFeatures.cpp
@@ -0,0 +1,226 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Copyright (C) 2012 Soeren Sonnenburg
*/

#include <shogun/features/BinnedDotFeatures.h>
#include <shogun/base/Parameter.h>

using namespace shogun;

CBinnedDotFeatures::CBinnedDotFeatures(int32_t size)
: CDotFeatures(size)
{
init();
}


CBinnedDotFeatures::CBinnedDotFeatures(const CBinnedDotFeatures & orig)
: CDotFeatures(orig), m_bins(orig.m_bins), m_fill(orig.m_fill),
m_norm_one(orig.m_norm_one)
{
init();
}

CBinnedDotFeatures::CBinnedDotFeatures(CSimpleFeatures<float64_t>* sf, SGMatrix<float64_t> bins)
{
init();
set_simple_features(sf);
set_bins(bins);

}

CBinnedDotFeatures::~CBinnedDotFeatures()
{
SG_UNREF(m_features);
m_bins.destroy_matrix();
}

int32_t CBinnedDotFeatures::get_dim_feature_space() const
{
return m_bins.num_rows*m_bins.num_cols;
}

float64_t CBinnedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
{
SG_NOTIMPLEMENTED;
return 0;
}

float64_t CBinnedDotFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
{
assert_shape(vec2_len);

float64_t result=0;

SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1);

int32_t idx=0;

for (int32_t i=0; i<m_bins.num_cols; i++)
{
float64_t v=vec1.vector[i];
float64_t* col=m_bins.get_column_vector(i);

for (int32_t j=0; j<m_bins.num_rows; j++)
{
if (v<=col[j])
result+=vec2[idx];

idx++;
}
}
m_features->free_feature_vector(vec1, vec_idx1);

return result;
}

void CBinnedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
{
assert_shape(vec2_len);
SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1);

int32_t idx=0;

for (int32_t i=0; i<m_bins.num_cols; i++)
{
float64_t v=vec1.vector[i];
float64_t* col=m_bins.get_column_vector(i);

for (int32_t j=0; j<m_bins.num_rows; j++)
{
if (col[j]<=v)
vec2[idx]+=alpha;

idx++;
}
}
m_features->free_feature_vector(vec1, vec_idx1);
}

void CBinnedDotFeatures::assert_shape(int32_t vec2_len)
{
if (m_bins.num_cols*m_bins.num_rows != vec2_len)
{
SG_ERROR("Bin matrix has shape (%d,%d) = %d entries, not matching vector"
" length %d\n", m_bins.num_cols,m_bins.num_rows,
m_bins.num_cols*m_bins.num_rows,vec2_len);
}

if (m_features && m_bins.num_cols != m_features->get_num_features())
{
SG_ERROR("Number of colums (%d) doesn't match number of features "
"(%d)\n", m_bins.num_cols, m_features->get_num_features());
}

}

int32_t CBinnedDotFeatures::get_nnz_features_for_vector(int32_t num)
{
if (m_fill)
return m_bins.num_rows;
else
return 1;
}

void* CBinnedDotFeatures::get_feature_iterator(int32_t vector_index)
{
SG_NOTIMPLEMENTED;
return NULL;
}

bool CBinnedDotFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
{
SG_NOTIMPLEMENTED;
return false;
}

void CBinnedDotFeatures::free_feature_iterator(void* iterator)
{
SG_NOTIMPLEMENTED;
}

bool CBinnedDotFeatures::get_fill()
{
return m_fill;
}

void CBinnedDotFeatures::set_fill(bool fill)
{
m_fill=fill;
}

bool CBinnedDotFeatures::get_norm_one()
{
return m_fill;
}

void CBinnedDotFeatures::set_norm_one(bool norm_one)
{
m_norm_one=norm_one;
}

void CBinnedDotFeatures::set_bins(SGMatrix<float64_t> bins)
{
m_bins=bins;
}

SGMatrix<float64_t> CBinnedDotFeatures::get_bins()
{
return m_bins;
}

void CBinnedDotFeatures::set_simple_features(CSimpleFeatures<float64_t>* features)
{
SG_REF(features);
m_features=features;
}

CSimpleFeatures<float64_t>* CBinnedDotFeatures::get_simple_features()
{
SG_REF(m_features);
return m_features;
}

void CBinnedDotFeatures::init()
{
m_fill=true;
m_norm_one=false;
}

const char* CBinnedDotFeatures::get_name() const
{
return "BinnedDotFeatures";
}

CFeatures* CBinnedDotFeatures::duplicate() const
{
return new CBinnedDotFeatures(*this);
}

EFeatureType CBinnedDotFeatures::get_feature_type()
{
return F_UNKNOWN;
}


EFeatureClass CBinnedDotFeatures::get_feature_class()
{
return C_UNKNOWN;
}

int32_t CBinnedDotFeatures::get_num_vectors() const
{
ASSERT(m_features);
return m_features->get_num_vectors();
}

int32_t CBinnedDotFeatures::get_size()
{
return sizeof(float64_t);
}

0 comments on commit b0ddf6c

Please sign in to comment.