Skip to content

Commit

Permalink
Merge pull request #311 from frx/streaming_vw
Browse files Browse the repository at this point in the history
StreamingStringFeatures improvements
  • Loading branch information
Soeren Sonnenburg committed Aug 26, 2011
2 parents 4a4d755 + 1d81f89 commit 58b557f
Show file tree
Hide file tree
Showing 4 changed files with 354 additions and 288 deletions.
52 changes: 52 additions & 0 deletions examples/undocumented/libshogun/streaming_stringfeatures.cpp
@@ -0,0 +1,52 @@
// This example simply demonstrates the use/working of StreamingStringFeatures

#include <shogun/io/StreamingAsciiFile.h>
#include <shogun/features/StreamingStringFeatures.h>

using namespace shogun;

void display_vector(const SGString<char> &vec)
{
printf("\nNew Vector\n------------------\n");
printf("Length=%d.\n", vec.slen);
for (int32_t i=0; i<vec.slen; i++)
{
printf("%c", vec.string[i]);
}
printf("\n");
}

int main(int argc, char **argv)
{
init_shogun_with_defaults();

// Create a StreamingAsciiFile from our input file
CStreamingAsciiFile* file = new CStreamingAsciiFile("../data/fm_train_dna.dat");

// This file contains unlabelled data, so the second arg is `false'.
CStreamingStringFeatures<char>* feat = new CStreamingStringFeatures<char>(file, false, 1024);
// Alphabet to use is DNA
feat->use_alphabet(DNA);

// Loop over all examples and simply display each example
feat->start_parser();
while (feat->get_next_example())
{
SGString<char> vec = feat->get_vector();
display_vector(vec);
feat->release_example();
}
feat->end_parser();

// Get the alphabet and display the histogram
CAlphabet* alpha = feat->get_alphabet();
printf("\nThe histogram is:\n");
alpha->print_histogram();
SG_UNREF(alpha);

SG_UNREF(feat);
SG_UNREF(file);

exit_shogun();
return 0;
}
10 changes: 8 additions & 2 deletions src/shogun/features/StreamingStringFeatures.h
Expand Up @@ -69,7 +69,7 @@ template <class T> class CStreamingStringFeatures : public CStreamingFeatures
~CStreamingStringFeatures()
{
parser.end_parser();
SG_FREE(current_string);
SG_UNREF(alphabet);
}

/**
Expand Down Expand Up @@ -102,6 +102,8 @@ template <class T> class CStreamingStringFeatures : public CStreamingFeatures
*/
void use_alphabet(EAlphabet alpha)
{
SG_UNREF(alphabet);

alphabet=new CAlphabet(alpha);
SG_REF(alphabet);
num_symbols=alphabet->get_num_symbols();
Expand All @@ -115,6 +117,8 @@ template <class T> class CStreamingStringFeatures : public CStreamingFeatures
*/
void use_alphabet(CAlphabet* alpha)
{
SG_UNREF(alphabet);

alphabet=new CAlphabet(alpha);
SG_REF(alphabet);
num_symbols=alphabet->get_num_symbols();
Expand Down Expand Up @@ -375,7 +379,7 @@ void CStreamingStringFeatures<T>::init()
{
working_file=NULL;
alphabet=new CAlphabet();

current_string=NULL;
current_length=-1;
current_sgstring.string=current_string;
Expand All @@ -391,6 +395,8 @@ void CStreamingStringFeatures<T>::init(CStreamingFile* file,
has_labels=is_labelled;
working_file=file;
parser.init(file, is_labelled, size);
parser.set_free_vector_after_release(false);
parser.set_free_vectors_on_destruct(false);
}

template <class T>
Expand Down

0 comments on commit 58b557f

Please sign in to comment.