Skip to content

Commit

Permalink
Merge pull request #668 from karlnapf/master
Browse files Browse the repository at this point in the history
new data generator class to generate example data
  • Loading branch information
karlnapf committed Jul 22, 2012
2 parents 9bef2a5 + 1b70388 commit 8e101b7
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 22 deletions.
31 changes: 9 additions & 22 deletions examples/undocumented/libshogun/statistics_quadratic_time_mmd.cpp
Expand Up @@ -12,25 +12,10 @@
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/mathematics/Statistics.h>
#include <shogun/features/DataGenerator.h>

using namespace shogun;


void create_mean_data(SGMatrix<float64_t> target, float64_t difference)
{
/* create data matrix for P and Q. P is a standard normal, Q is the same but
* has a mean difference in one dimension */
for (index_t i=0; i<target.num_rows; ++i)
{
for (index_t j=0; j<target.num_cols/2; ++j)
target(i,j)=CMath::randn_double();

/* add mean difference in first dimension of second half of data */
for (index_t j=target.num_cols/2; j<target.num_cols; ++j)
target(i,j)=CMath::randn_double() + (i==0 ? difference : 0);
}
}

/** tests the quadratic mmd statistic for a single data case and ensures
* equality with matlab implementation */
void test_quadratic_mmd_fixed()
Expand Down Expand Up @@ -73,8 +58,8 @@ void test_quadratic_mmd_bootstrap()
index_t num_iterations=1000;
num_iterations=10; //speed up

SGMatrix<float64_t> data(dimension, 2*m);
create_mean_data(data, difference);
SGMatrix<float64_t> data=CDataGenerator::generate_mean_data(m, dimension,
difference);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);

/* shoguns kernel width is different */
Expand Down Expand Up @@ -112,8 +97,8 @@ void test_quadratic_mmd_spectrum()
float64_t difference=0.5;
float64_t sigma=2;

SGMatrix<float64_t> data(dimension, 2*m);
create_mean_data(data, difference);
SGMatrix<float64_t> data=CDataGenerator::generate_mean_data(m, dimension,
difference);

CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);

Expand Down Expand Up @@ -185,8 +170,8 @@ void test_quadratic_mmd_random()
num_runs=10; //speed up
SGVector<float64_t> mmds(num_runs);

/* pre-allocate data matrix and features, just change elements later */
SGMatrix<float64_t> data(dimension, 2*m);

CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);

/* shoguns kernel width is different */
Expand All @@ -195,7 +180,9 @@ void test_quadratic_mmd_random()
mmd->set_statistic_type(UNBIASED);
for (index_t i=0; i<num_runs; ++i)
{
create_mean_data(data, difference);
/* use pre-allocated space for data generation */
CDataGenerator::generate_mean_data(m, dimension,
difference, data.matrix);
kernel->init(features, features);
mmds[i]=mmd->compute_statistic();
}
Expand Down
2 changes: 2 additions & 0 deletions src/interfaces/modular/Features.i
Expand Up @@ -63,6 +63,7 @@
%rename(LBPPyrDotFeatures) CLBPPyrDotFeatures;
%rename(ExplicitSpecFeatures) CExplicitSpecFeatures;
%rename(ImplicitWeightedSpecFeatures) CImplicitWeightedSpecFeatures;
%rename(DataGenerator) CDataGenerator;

/* Include Class Headers to make them visible from within the target language */
%include <shogun/features/FeatureTypes.h>
Expand All @@ -75,6 +76,7 @@
%include <shogun/features/StreamingFeatures.h>
%include <shogun/features/StreamingDotFeatures.h>
%include <shogun/features/StreamingVwFeatures.h>
%include <shogun/features/DataGenerator.h>

/* Templated Class StringFeatures */
%include <shogun/features/StringFeatures.h>
Expand Down
1 change: 1 addition & 0 deletions src/interfaces/modular/Features_includes.i
Expand Up @@ -40,4 +40,5 @@
#include <shogun/features/LBPPyrDotFeatures.h>
#include <shogun/features/ExplicitSpecFeatures.h>
#include <shogun/features/ImplicitWeightedSpecFeatures.h>
#include <shogun/features/DataGenerator.h>
%}
56 changes: 56 additions & 0 deletions src/shogun/features/DataGenerator.cpp
@@ -0,0 +1,56 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/

#include <shogun/features/DataGenerator.h>
#include <shogun/mathematics/Math.h>

using namespace shogun;

CDataGenerator::CDataGenerator() : CSGObject()
{
init();
}

CDataGenerator::~CDataGenerator()
{

}

void CDataGenerator::init()
{
}

SGMatrix<float64_t> CDataGenerator::generate_mean_data(index_t m,
index_t dim, float64_t mean_shift, float64_t* target_data)
{
/* evtl use pre-allocated space */
SGMatrix<float64_t> result;

if (target_data)
{
result.matrix=target_data;
result.num_rows=dim;
result.num_cols=2*m;
}
else
result=SGMatrix<float64_t>(dim, 2*m);

/* fill matrix with normal data */
for (index_t i=0; i<2*m; ++i)
{
for (index_t j=0; j<dim; ++j)
result(j,i)=CMath::randn_double();

/* mean shift for second half */
if (i>=m)
result(0,i)+=mean_shift;
}

return result;
}
53 changes: 53 additions & 0 deletions src/shogun/features/DataGenerator.h
@@ -0,0 +1,53 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/

#ifndef __DATAGENERATOR_H_
#define __DATAGENERATOR_H_

#include <shogun/base/SGObject.h>
#include <shogun/lib/SGMatrix.h>

namespace shogun
{

/** @brief Class that is able to generate various data samples, which may be
* used for examples in SHOGUN.
*/
class CDataGenerator: public CSGObject
{
public:
CDataGenerator();

virtual ~CDataGenerator();

/** Takes each m samples from two distributions p and q, where each element
* is standard normally distributed, except for the first dimension of q,
* where the mean is shifted by a specified value.
*
* @param m number of samples to generate
* @param dim dimension of generated samples
* @param mean_shift is added mean of first dimension
* @target_data if non-NULL then this is used as matrix data storage. Make
* sure that its dimensions fit
* @return matrix with concatenated samples,first p then q
*/
static SGMatrix<float64_t> generate_mean_data(index_t m, index_t dim,
float64_t mean_shift, float64_t* target_data=NULL);

inline virtual const char* get_name() const { return "DataGenerator"; }

private:
/** registers all parameters and initializes variables with defaults */
void init();

};

}

#endif /* __DATAGENERATOR_H_ */

0 comments on commit 8e101b7

Please sign in to comment.