Skip to content

Commit

Permalink
Merge pull request #480 from karlnapf/master
Browse files Browse the repository at this point in the history
Implement the new SubsetStack system
  • Loading branch information
Soeren Sonnenburg committed Apr 22, 2012
2 parents e4ab8a1 + 1144ae8 commit 4328d97
Show file tree
Hide file tree
Showing 33 changed files with 677 additions and 459 deletions.
1 change: 1 addition & 0 deletions examples/undocumented/libshogun/Makefile
Expand Up @@ -40,6 +40,7 @@ TARGETS = basic_minimal \
modelselection_grid_search_linear features_subset_labels \
modelselection_grid_search_kernel \
modelselection_grid_search_string_kernel \
features_subset_stack \
features_subset_simple_features \
features_subset_sparse_features \
features_copy_subset_simple_features \
Expand Down
Expand Up @@ -104,7 +104,7 @@ void test_cross_validation()
CCrossValidation* cross=new CCrossValidation(svm, features, labels,
splitting, eval_crit);

cross->set_num_runs(100);
cross->set_num_runs(10);
cross->set_conf_int_alpha(0.05);

/* actual evaluation */
Expand All @@ -122,6 +122,8 @@ int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);

sg_io->set_loglevel(MSG_DEBUG);

test_cross_validation();

exit_shogun();
Expand Down
Expand Up @@ -5,7 +5,6 @@
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
*/

#include <shogun/base/init.h>
Expand All @@ -29,8 +28,8 @@ void print_message(FILE* target, const char* str)
void test_cross_validation()
{
/* data matrix dimensions */
index_t num_vectors=500;
index_t num_features=50;
index_t num_vectors=50;
index_t num_features=5;

/* data means -1, 1 in all components, std deviation of sigma */
SGVector<float64_t> mean_1(num_features);
Expand Down Expand Up @@ -93,19 +92,19 @@ void test_cross_validation()
ASSERT(eval_result<2);

/* splitting strategy */
index_t n_folds=5;
index_t n_folds=3;
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, n_folds);

/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(svm, features, labels,
splitting, eval_crit);

cross->set_num_runs(10);
cross->set_num_runs(5);
cross->set_conf_int_alpha(0.05);

/* no locking */
index_t repetitions=3;
index_t repetitions=5;
SG_SPRINT("unlocked x-val\n");
kernel->init(features, features);
cross->set_autolock(false);
Expand Down
Expand Up @@ -4,7 +4,7 @@
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

Expand Down Expand Up @@ -36,7 +36,7 @@ int main(int argc, char **argv)
CMath::display_vector(feature_subset.vector, feature_subset.vlen,
"feature subset");

f->add_subset(new CSubset(feature_subset));
f->add_subset(feature_subset);
SG_SPRINT("feature vectors after setting subset on original data:\n");
for (index_t i=0; i<f->get_num_vectors(); ++i)
{
Expand Down Expand Up @@ -70,7 +70,8 @@ int main(int argc, char **argv)

SG_UNREF(f);
SG_UNREF(subset_copy);
SG_FREE(feature_copy_subset.vector);
feature_copy_subset.destroy_vector();
feature_subset.destroy_vector();

SG_SPRINT("\nEND\n");
exit_shogun();
Expand Down
Expand Up @@ -4,7 +4,7 @@
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

Expand Down Expand Up @@ -64,7 +64,7 @@ int main(int argc, char **argv)
"feature subset");

/* set subset and print data */
f->add_subset(new CSubset(feature_subset));
f->add_subset(feature_subset);
SG_SPRINT("feature vectors after setting subset on original data:\n");
for (index_t i=0; i<f->get_num_vectors(); ++i)
{
Expand Down Expand Up @@ -125,6 +125,7 @@ int main(int argc, char **argv)
SG_UNREF(f);
SG_UNREF(subset_copy);
feature_copy_subset.destroy_vector();
feature_subset.destroy_vector();

exit_shogun();

Expand Down
Expand Up @@ -4,7 +4,7 @@
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

Expand Down Expand Up @@ -63,7 +63,7 @@ int main(int argc, char **argv)
CMath::display_vector(feature_subset.vector, feature_subset.vlen,
"feature subset");

f->add_subset(new CSubset(feature_subset));
f->add_subset(feature_subset);
SG_SPRINT("feature vectors after setting subset on original data:\n");
for (index_t i=0; i<f->get_num_vectors(); ++i)
{
Expand Down Expand Up @@ -117,7 +117,8 @@ int main(int argc, char **argv)

SG_UNREF(f);
SG_UNREF(subset_copy);
SG_FREE(feature_copy_subset.vector);
feature_copy_subset.destroy_vector();
feature_subset.destroy_vector();

exit_shogun();

Expand Down
10 changes: 6 additions & 4 deletions examples/undocumented/libshogun/features_subset_labels.cpp
Expand Up @@ -4,7 +4,7 @@
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

Expand Down Expand Up @@ -49,7 +49,7 @@ int main(int argc, char **argv)
SG_SPRINT("\n\n-------------------\n"
"applying subset to features\n"
"-------------------\n");
labels->set_subset(new CSubset(subset_idx));
labels->add_subset(subset_idx);

/* do some stuff do check and output */
ASSERT(labels->get_num_labels()==num_subset_idx);
Expand All @@ -59,13 +59,13 @@ int main(int argc, char **argv)
{
float64_t label=labels->get_label(i);
SG_SPRINT("label %f:\n", label);
ASSERT(label==labels_data.vector[labels->subset_idx_conversion(i)]);
ASSERT(label==labels_data.vector[subset_idx.vector[i]]);
}

/* remove features subset */SG_SPRINT("\n\n-------------------\n"
"removing subset from features\n"
"-------------------\n");
labels->remove_subset();
labels->remove_all_subsets();

ASSERT(labels->get_num_labels()==num_labels);
SG_SPRINT("labels->get_num_labels(): %d\n", labels->get_num_labels());
Expand All @@ -77,9 +77,11 @@ int main(int argc, char **argv)
ASSERT(label==labels_data.vector[i]);
}
SG_UNREF(labels);
subset_idx.destroy_vector();

SG_SPRINT("\nEND\n");
exit_shogun();

return 0;
}

Expand Up @@ -4,7 +4,7 @@
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

Expand Down Expand Up @@ -80,7 +80,7 @@ int main(int argc, char **argv)
SG_SPRINT("\n\n-------------------\n"
"applying subset to features\n"
"-------------------\n");
features->add_subset(new CSubset(subset_idx));
features->add_subset(subset_idx);

/* do some stuff do check and output */
ASSERT(features->get_num_vectors()==num_subset_idx);
Expand All @@ -99,8 +99,7 @@ int main(int argc, char **argv)
CMath::display_vector(vec.vector, vec.vlen);

for (index_t j=0; j<dim_features; ++j)
ASSERT(vec.vector[j]==data.matrix[features->subset_idx_conversion(
i)*num_vectors+j]);
ASSERT(vec.vector[j]==data.matrix[subset_idx.vector[i]*num_vectors+j]);

/* not necessary since feature matrix is in memory. for documentation */
features->free_feature_vector(vec, i);
Expand Down Expand Up @@ -128,17 +127,16 @@ int main(int argc, char **argv)
CMath::display_vector(vec.vector, vec.vlen);

for (index_t j=0; j<dim_features; ++j)
ASSERT(vec.vector[j]==data.matrix[features->subset_idx_conversion(i)
*num_vectors+j]);
ASSERT(vec.vector[j]==data.matrix[i*num_vectors+j]);

/* not necessary since feature matrix is in memory. for documentation */
features->free_feature_vector(vec, i);
}


SG_UNREF(features);
subset_idx.destroy_vector();

SG_SPRINT("\nEND\n");
exit_shogun();

return 0;
Expand Down
Expand Up @@ -4,7 +4,7 @@
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

Expand Down Expand Up @@ -75,7 +75,7 @@ int main(int argc, char **argv)
SG_SPRINT("\n-------------------\n"
"applying subset to features\n"
"-------------------\n");
features->add_subset(new CSubset(subset_idx));
features->add_subset(subset_idx);

/* do some stuff do check and output */
ASSERT(features->get_num_vectors()==num_subset_idx);
Expand All @@ -102,7 +102,7 @@ int main(int argc, char **argv)
for (index_t j=0; j<vec.num_feat_entries; ++j)
{
int32_t a=vec.features[j].entry;
index_t ind=features->subset_idx_conversion(i)*num_vectors+j;
index_t ind=subset_idx.vector[i]*num_vectors+j;
int32_t b=data.matrix[ind];
ASSERT(a==b);
}
Expand Down Expand Up @@ -145,7 +145,9 @@ int main(int argc, char **argv)
}

SG_UNREF(features);
SG_FREE(data.matrix);
data.destroy_matrix();
subset_idx.destroy_vector();


exit_shogun();

Expand Down
65 changes: 65 additions & 0 deletions examples/undocumented/libshogun/features_subset_stack.cpp
@@ -0,0 +1,65 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/

#include <shogun/base/init.h>
#include <shogun/features/SubsetStack.h>

using namespace shogun;

void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}

int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);

CSubsetStack* stack=new CSubsetStack();
SG_REF(stack);

/* subset indices, each set is shifted by one */
SGVector<index_t> subset_a(10);
SGVector<index_t> subset_b(4);
subset_a.range_fill(1);
subset_b.range_fill(1);

/* add and remove subsets a couple of times */
stack->add_subset(subset_a);
stack->remove_subset();
stack->add_subset(subset_b);
stack->remove_subset();

/* add and remove subsets a couple of times, different order */
stack->add_subset(subset_a);
stack->add_subset(subset_b);
stack->remove_subset();
stack->remove_subset();

/** add two subsets and check if index mapping works */
stack->add_subset(subset_a);
stack->add_subset(subset_b);

/* remember, offset of one for each index set */
for (index_t i=0; i<subset_b.vlen; ++i)
ASSERT(stack->subset_idx_conversion(i)==i+2);

stack->remove_subset();
stack->remove_subset();

/* clean up */
SG_UNREF(stack);
subset_a.destroy_vector();
subset_b.destroy_vector();

exit_shogun();

return 0;
}

9 changes: 9 additions & 0 deletions src/shogun/base/DynArray.h
Expand Up @@ -104,6 +104,15 @@ template <class T> class DynArray
return array[index];
}

/** gets last array element
*
* @return array element at last index
*/
inline T get_last_element() const
{
return array[last_element_idx];
}

/** get array element at index as pointer
*
* (does NOT do bounds checking)
Expand Down

0 comments on commit 4328d97

Please sign in to comment.