Skip to content

Commit

Permalink
Merge pull request #672 from karlnapf/master
Browse files Browse the repository at this point in the history
new data generation method and helper methods
  • Loading branch information
karlnapf committed Jul 24, 2012
2 parents df82997 + a3cbd47 commit 497cd01
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 23 deletions.
Expand Up @@ -66,8 +66,7 @@ void test_linear_mmd_random()

for (index_t i=0; i<num_runs; ++i)
{
CDataGenerator::generate_mean_data(m, dimension, difference,
data.matrix);
CDataGenerator::generate_mean_data(m, dimension, difference, data);
mmds[i]=mmd->compute_statistic();
}

Expand Down Expand Up @@ -108,8 +107,7 @@ void test_linear_mmd_variance_estimate()

for (index_t i=0; i<num_runs; ++i)
{
CDataGenerator::generate_mean_data(m, dimension, difference,
data.matrix);
CDataGenerator::generate_mean_data(m, dimension, difference, data);
vars[i]=mmd->compute_variance_estimate();
}

Expand Down Expand Up @@ -188,8 +186,7 @@ void test_linear_mmd_type2_error()

for (index_t i=0; i<num_runs; ++i)
{
CDataGenerator::generate_mean_data(m, dimension, difference,
data.matrix);
CDataGenerator::generate_mean_data(m, dimension, difference, data);

/* technically, this leads to a wrong result since training (statistic)
* and testing (p-value) have to happen on different data, but this
Expand Down
Expand Up @@ -181,8 +181,7 @@ void test_quadratic_mmd_random()
for (index_t i=0; i<num_runs; ++i)
{
/* use pre-allocated space for data generation */
CDataGenerator::generate_mean_data(m, dimension,
difference, data.matrix);
CDataGenerator::generate_mean_data(m, dimension, difference, data);
kernel->init(features, features);
mmds[i]=mmd->compute_statistic();
}
Expand Down
46 changes: 34 additions & 12 deletions src/shogun/features/DataGenerator.cpp
Expand Up @@ -27,19 +27,12 @@ void CDataGenerator::init()
}

SGMatrix<float64_t> CDataGenerator::generate_mean_data(index_t m,
index_t dim, float64_t mean_shift, float64_t* target_data)
index_t dim, float64_t mean_shift,
SGMatrix<float64_t> target)
{
/* evtl use pre-allocated space */
SGMatrix<float64_t> result;

if (target_data)
{
result.matrix=target_data;
result.num_rows=dim;
result.num_cols=2*m;
}
else
result=SGMatrix<float64_t>(dim, 2*m);
/* evtl. allocate space */
SGMatrix<float64_t> result=SGMatrix<float64_t>::get_allocated_matrix(
dim, 2*m, target);

/* fill matrix with normal data */
for (index_t i=0; i<2*m; ++i)
Expand All @@ -54,3 +47,32 @@ SGMatrix<float64_t> CDataGenerator::generate_mean_data(index_t m,

return result;
}

SGMatrix<float64_t> CDataGenerator::generate_sym_mix_gauss(index_t m,
float64_t d, float64_t angle, SGMatrix<float64_t> target)
{
/* evtl. allocate space */
SGMatrix<float64_t> result=SGMatrix<float64_t>::get_allocated_matrix(
2, m, target);

/* rotation matrix */
SGMatrix<float64_t> rot=SGMatrix<float64_t>(2,2);
rot(0, 0)=CMath::cos(angle);
rot(0, 1)=-CMath::sin(angle);
rot(1, 0)=CMath::sin(angle);
rot(1, 1)=CMath::cos(angle);

/* generate signal in each dimension which is an equal mixture of two
* Gaussians */
for (index_t i=0; i<m; ++i)
{
result(0,i)=CMath::randn_double() + (CMath::random(0, 1) ? d : -d);
result(1,i)=CMath::randn_double() + (CMath::random(0, 1) ? d : -d);
}

/* rotate result */
if (angle)
result=SGMatrix<float64_t>::matrix_multiply(rot, result);

return result;
}
34 changes: 31 additions & 3 deletions src/shogun/features/DataGenerator.h
Expand Up @@ -30,15 +30,43 @@ class CDataGenerator: public CSGObject
* is standard normally distributed, except for the first dimension of q,
* where the mean is shifted by a specified value.
*
* May be used for a two-sample test.
*
* @param m number of samples to generate
* @param dim dimension of generated samples
* @param mean_shift is added to mean of first dimension
* @target_data if non-NULL then this is used as matrix data storage. Make
* sure that its dimensions fit
* @param target if non-empty then this is used as pre-allocated matrix.
* Make sure that its dimensions fit
* @return matrix with concatenated samples,first p then q
*/
static SGMatrix<float64_t> generate_mean_data(index_t m, index_t dim,
float64_t mean_shift, float64_t* target_data=NULL);
float64_t mean_shift,
SGMatrix<float64_t> target=SGMatrix<float64_t>());

/** Produces samples as in source (g) from Table 3 in [1].
* Namely, produces an equal mixture of two independent Gaussians per --
* per dimension, of which there are two. The resulting 4 Gaussian blobs
* are then optionally rotated by the provided angle.
* Distance of means from origin (dimension-wise) can be controlled via
* parameter d
*
* May be used in a independence test to detect dependence in rotation.
* First dimensions can be used as one-dimensional p, second as q
*
* [1]: Gretton, A., Herbrich, R., Smola, A., Bousquet, O., & Schölkopf, B.
* (2005). Kernel Methods for Measuring Independence.
* Journal of Machine Learning Research, 6, 2075-2129.
*
* @param m number of samples per dimension
* @param d distance of Gaussian means to origin (dimension wise)
* @param angle fraction of \f$\pi\f$ that data is rotated by
* @param target if non-empty then this is used as pre-allocated matrix.
* Make sure that its dimensions fit
* @return TODO
*/
static SGMatrix<float64_t> generate_sym_mix_gauss(index_t m,
float64_t d, float64_t angle,
SGMatrix<float64_t> target=SGMatrix<float64_t>());

inline virtual const char* get_name() const { return "DataGenerator"; }

Expand Down
30 changes: 30 additions & 0 deletions src/shogun/lib/SGMatrix.cpp
Expand Up @@ -635,6 +635,36 @@ SGMatrix<float64_t> SGMatrix<T>::matrix_multiply(
return C;
}

template<class T>
SGMatrix<T> SGMatrix<T>::get_allocated_matrix(index_t num_rows,
index_t num_cols, SGMatrix<T> pre_allocated)
{
SGMatrix<T> result;

/* evtl use pre-allocated space */
if (pre_allocated.matrix)
{
result=pre_allocated;

/* check dimension consistency */
if (pre_allocated.num_rows!=num_rows ||
pre_allocated.num_cols!=num_cols)
{
SG_SERROR("SGMatrix<T>::get_allocated_matrix(). Provided target"
"matrix dimensions (%dx%d) do not match passed data "
"dimensions (%dx%d)!\n", pre_allocated.num_rows,
pre_allocated.num_cols, num_rows, num_cols);
}
}
else
{
/* otherwise, allocate space */
result=SGMatrix<T>(num_rows, num_cols);
}

return result;
}

#endif //HAVE_LAPACK

template class SGMatrix<bool>;
Expand Down
14 changes: 14 additions & 0 deletions src/shogun/lib/SGMatrix.h
Expand Up @@ -279,6 +279,20 @@ template<class T> class SGMatrix : public SGReferencedData
const SGMatrix<T> matrix, const char* name="matrix",
const char* prefix="");

/** Simple helper method that returns a matrix with allocated memory
* for a given size. A pre_allocated one can optionally be specified
* in order to use that.
* Basically just for having dimension check encapsulated.
*
* @param num_rows rows of returned matrix
* @param num_cols columns of returned matrix
* @param pre_allocated optional matrix that is returned instead of new
* matrix. Make sure dimensions match
* @return matrix with allocated memory of specified size
*/
static SGMatrix<T> get_allocated_matrix(index_t num_rows,
index_t num_cols, SGMatrix<T> pre_allocated=SGMatrix<T>());

protected:
/** needs to be overridden to copy data */
virtual void copy_data(const SGReferencedData &orig)
Expand Down

0 comments on commit 497cd01

Please sign in to comment.