Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #313 from frx/streaming_vw
Conversion of vw's size_t to typedef'd uint32_t
  • Loading branch information
Soeren Sonnenburg committed Aug 26, 2011
2 parents 2226301 + 54bc14c commit 653e96c
Show file tree
Hide file tree
Showing 21 changed files with 157 additions and 169 deletions.
22 changes: 11 additions & 11 deletions src/shogun/classifier/vw/VowpalWabbit.cpp
Expand Up @@ -115,7 +115,7 @@ bool CVowpalWabbit::train_machine(CFeatures* feat)
set_learner();

VwExample* example = NULL;
size_t current_pass = 0;
vw_size_t current_pass = 0;

const char* header_fmt = "%-10s %-10s %8s %8s %10s %8s %8s\n";

Expand Down Expand Up @@ -163,7 +163,7 @@ bool CVowpalWabbit::train_machine(CFeatures* feat)
if (env->l1_regularization > 0.)
{
uint32_t length = 1 << env->num_bits;
size_t stride = env->stride;
vw_size_t stride = env->stride;
float32_t gravity = env->l1_regularization * env->update_sum;
for (uint32_t i = 0; i < length; i++)
reg->weight_vectors[0][stride*i] = real_weight(reg->weight_vectors[0][stride*i], gravity);
Expand Down Expand Up @@ -243,12 +243,12 @@ void CVowpalWabbit::set_learner()

float32_t CVowpalWabbit::inline_l1_predict(VwExample* &ex)
{
size_t thread_num = 0;
vw_size_t thread_num = 0;

float32_t prediction = ex->ld->get_initial();

float32_t* weights = reg->weight_vectors[thread_num];
size_t thread_mask = env->thread_mask;
vw_size_t thread_mask = env->thread_mask;

prediction += features->dense_dot_truncated(weights, ex, env->l1_regularization * env->update_sum);

Expand All @@ -270,11 +270,11 @@ float32_t CVowpalWabbit::inline_l1_predict(VwExample* &ex)

float32_t CVowpalWabbit::inline_predict(VwExample* &ex)
{
size_t thread_num = 0;
vw_size_t thread_num = 0;
float32_t prediction = ex->ld->initial;

float32_t* weights = reg->weight_vectors[thread_num];
size_t thread_mask = env->thread_mask;
vw_size_t thread_mask = env->thread_mask;
prediction += features->dense_dot(weights, 0);

for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
Expand Down Expand Up @@ -372,16 +372,16 @@ void CVowpalWabbit::output_prediction(int32_t f, float32_t res, float32_t weight
float32_t CVowpalWabbit::compute_exact_norm(VwExample* &ex, float32_t& sum_abs_x)
{
// We must traverse the features in _precisely_ the same order as during training.
size_t thread_mask = env->thread_mask;
size_t thread_num = 0;
vw_size_t thread_mask = env->thread_mask;
vw_size_t thread_num = 0;

float32_t g = reg->loss->get_square_grad(ex->final_prediction, ex->ld->label) * ex->ld->weight;
if (g == 0) return 0.;

float32_t xGx = 0.;

float32_t* weights = reg->weight_vectors[thread_num];
for (size_t* i = ex->indices.begin; i != ex->indices.end; i++)
for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
{
for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
{
Expand All @@ -407,9 +407,9 @@ float32_t CVowpalWabbit::compute_exact_norm(VwExample* &ex, float32_t& sum_abs_x
}

float32_t CVowpalWabbit::compute_exact_norm_quad(float32_t* weights, VwFeature& page_feature, v_array<VwFeature> &offer_features,
size_t mask, float32_t g, float32_t& sum_abs_x)
vw_size_t mask, float32_t g, float32_t& sum_abs_x)
{
size_t halfhash = quadratic_constant * page_feature.weight_index;
vw_size_t halfhash = quadratic_constant * page_feature.weight_index;
float32_t xGx = 0.;
float32_t update2 = g * page_feature.x * page_feature.x;
for (VwFeature* elem = offer_features.begin; elem != offer_features.end; elem++)
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/classifier/vw/VowpalWabbit.h
Expand Up @@ -166,7 +166,7 @@ class CVowpalWabbit: public COnlineLinearMachine
* @return norm
*/
float32_t compute_exact_norm_quad(float32_t* weights, VwFeature& page_feature, v_array<VwFeature> &offer_features,
size_t mask, float32_t g, float32_t& sum_abs_x);
vw_size_t mask, float32_t g, float32_t& sum_abs_x);

/**
* Get the environment
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/classifier/vw/VwEnvironment.cpp
Expand Up @@ -62,13 +62,13 @@ void CVwEnvironment::init()

ignore_some = false;

index_t len = ((index_t) 1) << num_bits;
vw_size_t len = ((vw_size_t) 1) << num_bits;
thread_mask = (stride * (len >> thread_bits)) - 1;
}

void CVwEnvironment::set_stride(index_t new_stride)
void CVwEnvironment::set_stride(vw_size_t new_stride)
{
stride = new_stride;
index_t len = ((index_t) 1) << num_bits;
vw_size_t len = ((vw_size_t) 1) << num_bits;
thread_mask = (stride * (len >> thread_bits)) - 1;
}
37 changes: 19 additions & 18 deletions src/shogun/classifier/vw/VwEnvironment.h
Expand Up @@ -20,6 +20,7 @@
#include <shogun/lib/DataType.h>
#include <shogun/lib/common.h>
#include <shogun/lib/v_array.h>
#include <shogun/classifier/vw/vw_constants.h>

namespace shogun
{
Expand Down Expand Up @@ -53,25 +54,25 @@ class CVwEnvironment: public CSGObject
* Set number of bits used for the weight vector
* @param bits number of bits
*/
inline void set_num_bits(index_t bits) { num_bits = bits; }
inline void set_num_bits(vw_size_t bits) { num_bits = bits; }

/**
* Return number of bits used for weight vector
* @return number of bits
*/
inline index_t get_num_bits() { return num_bits; }
inline vw_size_t get_num_bits() { return num_bits; }

/**
* Set mask used while accessing features
* @param m mask
*/
inline void set_mask(index_t m) { mask = m; }
inline void set_mask(vw_size_t m) { mask = m; }

/**
* Return the mask used
* @return mask
*/
inline index_t get_mask() { return mask; }
inline vw_size_t get_mask() { return mask; }

/**
* Return minimum label encountered
Expand All @@ -89,21 +90,21 @@ class CVwEnvironment: public CSGObject
* Return number of threads used for learning
* @return number of threads
*/
inline index_t num_threads() { return 1 << thread_bits; }
inline vw_size_t num_threads() { return 1 << thread_bits; }

/**
* Return length of weight vector
* @return length of weight vector
*/
inline index_t length() { return 1 << num_bits; }
inline vw_size_t length() { return 1 << num_bits; }

/**
* Set a new stride value.
* Also changes thread_mask.
*
* @param new_stride new value of stride
*/
void set_stride(index_t new_stride);
void set_stride(vw_size_t new_stride);

/**
* Return the name of the object
Expand All @@ -120,15 +121,15 @@ class CVwEnvironment: public CSGObject

public:
/// log_2 of the number of features
index_t num_bits;
vw_size_t num_bits;
/// log_2 of the number of threads
index_t thread_bits;
vw_size_t thread_bits;
/// Mask used for hashing
index_t mask;
vw_size_t mask;
/// Mask used by regressor for learning
index_t thread_mask;
vw_size_t thread_mask;
/// Number of elements in weight vector per feature
index_t stride;
vw_size_t stride;

/// Smallest label seen
float64_t min_label;
Expand Down Expand Up @@ -171,18 +172,18 @@ class CVwEnvironment: public CSGObject
/// Weighted labels
float64_t weighted_labels;
/// Total number of features
index_t total_features;
vw_size_t total_features;
/// Sum of losses
float64_t sum_loss;
/// Number of passes complete
index_t passes_complete;
vw_size_t passes_complete;
/// Number of passes
index_t num_passes;
vw_size_t num_passes;

/// ngrams to generate
size_t ngram;
vw_size_t ngram;
/// Skips in ngrams
size_t skips;
vw_size_t skips;

/// Whether some namespaces are ignored
bool ignore_some;
Expand All @@ -195,7 +196,7 @@ class CVwEnvironment: public CSGObject
/// VW version
const char* vw_version;
/// Length of version string
size_t v_length;
vw_size_t v_length;
};

}
Expand Down
24 changes: 12 additions & 12 deletions src/shogun/classifier/vw/VwParser.cpp
Expand Up @@ -93,7 +93,7 @@ int32_t CVwParser::read_features(CIOBuffer* buf, VwExample*& ae)
set_minmax(ae->ld->label);
}

size_t mask = env->mask;
vw_size_t mask = env->mask;

/* Now parse the individual channels, i.e., namespaces */
for (substring* i = feature_start; i != channels.end; i++)
Expand All @@ -106,12 +106,12 @@ int32_t CVwParser::read_features(CIOBuffer* buf, VwExample*& ae)

/* Set default scale value for channel */
float32_t channel_v = 1.;
size_t channel_hash;
vw_size_t channel_hash;

/* Index by which to refer to the namespace */
size_t index = 0;
vw_size_t index = 0;
bool new_index = false;
size_t feature_offset = 0;
vw_size_t feature_offset = 0;

if (channel.start[0] != ' ')
{
Expand Down Expand Up @@ -150,7 +150,7 @@ int32_t CVwParser::read_features(CIOBuffer* buf, VwExample*& ae)
v *= channel_v;

/* Hash feature */
size_t word_hash = (hasher(name[0], channel_hash)) & mask;
vw_size_t word_hash = (hasher(name[0], channel_hash)) & mask;
VwFeature f = {v,word_hash};
ae->sum_feat_sq[index] += v*v;
ae->atomics[index].push(f);
Expand Down Expand Up @@ -178,7 +178,7 @@ int32_t CVwParser::read_svmlight_features(CIOBuffer* buf, VwExample*& ae)
/* Mark begin and end of example in the buffer */
substring example_string = {line, line + num_chars};

size_t mask = env->mask;
vw_size_t mask = env->mask;
tokenize(' ', example_string, words);

ae->ld->label = float_of_substring(words[0]);
Expand All @@ -188,8 +188,8 @@ int32_t CVwParser::read_svmlight_features(CIOBuffer* buf, VwExample*& ae)

substring* feature_start = &words[1];

size_t index = (unsigned char)' '; // Any default namespace is ok
size_t channel_hash = 0;
vw_size_t index = (unsigned char)' '; // Any default namespace is ok
vw_size_t channel_hash = 0;
ae->sum_feat_sq[index] = 0;
ae->indices.push(index);
/* Now parse the individual features */
Expand All @@ -198,7 +198,7 @@ int32_t CVwParser::read_svmlight_features(CIOBuffer* buf, VwExample*& ae)
float32_t v;
feature_value(*i, name, v);

size_t word_hash = (hasher(name[0], channel_hash)) & mask;
vw_size_t word_hash = (hasher(name[0], channel_hash)) & mask;
VwFeature f = {v,word_hash};
ae->sum_feat_sq[index] += v*v;
ae->atomics[index].push(f);
Expand All @@ -220,7 +220,7 @@ int32_t CVwParser::read_dense_features(CIOBuffer* buf, VwExample*& ae)
// Mark begin and end of example in the buffer
substring example_string = {line, line + num_chars};

size_t mask = env->mask;
vw_size_t mask = env->mask;
tokenize(' ', example_string, words);

ae->ld->label = float_of_substring(words[0]);
Expand All @@ -230,7 +230,7 @@ int32_t CVwParser::read_dense_features(CIOBuffer* buf, VwExample*& ae)

substring* feature_start = &words[1];

size_t index = (unsigned char)' ';
vw_size_t index = (unsigned char)' ';

ae->sum_feat_sq[index] = 0;
ae->indices.push(index);
Expand All @@ -239,7 +239,7 @@ int32_t CVwParser::read_dense_features(CIOBuffer* buf, VwExample*& ae)
for (substring* i = feature_start; i != words.end; i++)
{
float32_t v = float_of_substring(*i);
size_t word_hash = j & mask;
vw_size_t word_hash = j & mask;
VwFeature f = {v,word_hash};
ae->sum_feat_sq[index] += v*v;
ae->atomics[index].push(f);
Expand Down

0 comments on commit 653e96c

Please sign in to comment.