Skip to content

Commit

Permalink
Merge pull request #608 from karlnapf/master
Browse files Browse the repository at this point in the history
convienience methods fot two-sample-tests
  • Loading branch information
karlnapf committed Jun 28, 2012
2 parents b283832 + 3152cc8 commit d1ee078
Show file tree
Hide file tree
Showing 14 changed files with 289 additions and 3 deletions.
3 changes: 2 additions & 1 deletion examples/undocumented/libshogun/Makefile
Expand Up @@ -48,6 +48,7 @@ TARGETS = basic_minimal \
features_copy_subset_simple_features \
features_copy_subset_string_features \
features_copy_subset_sparse_features \
features_create_merged_copy \
mathematics_confidence_intervals \
clustering_kmeans base_parameter_map \
base_load_file_parameters \
Expand Down Expand Up @@ -82,7 +83,7 @@ TARGETS = basic_minimal \
statistics \
statistics_quadratic_time_mmd \
statistics_linear_time_mmd \
transfer_multitasklsregression
transfer_multitasklsregression \

all: $(TARGETS)

Expand Down
72 changes: 72 additions & 0 deletions examples/undocumented/libshogun/features_create_merged_copy.cpp
@@ -0,0 +1,72 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>

using namespace shogun;

void test_dense_features()
{
/* create two matrices, feature objects for them, call create_merged_copy,
* and check if it worked */

index_t n_1=3;
index_t n_2=4;
index_t dim=2;

SGMatrix<float64_t> data_1(dim,n_1);
for (index_t i=0; i<dim*n_1; ++i)
data_1.matrix[i]=i;

data_1.display_matrix("data_1");

SGMatrix<float64_t> data_2(dim,n_2);
for (index_t i=0; i<dim*n_2; ++i)
data_2.matrix[i]=CMath::randn_double();

data_1.display_matrix("data_2");

CDenseFeatures<float64_t>* features_1=new CDenseFeatures<float64_t>(data_1);
CDenseFeatures<float64_t>* features_2=new CDenseFeatures<float64_t>(data_2);


CFeatures* concatenation=features_1->create_merged_copy(features_2);

SGMatrix<float64_t> concat_data=
((CDenseFeatures<float64_t>*)concatenation)->get_feature_matrix();
concat_data.display_matrix("concat_data");

/* check for equality with data_1 */
for (index_t i=0; i<dim*n_1; ++i)
ASSERT(data_1.matrix[i]==concat_data.matrix[i]);

/* check for equality with data_2 */
for (index_t i=0; i<dim*n_2; ++i)
ASSERT(data_2.matrix[i]==concat_data.matrix[n_1*dim+i]);

SG_UNREF(concatenation);
SG_UNREF(features_1);
SG_UNREF(features_2);
}

int main(int argc, char **argv)
{
init_shogun_with_defaults();

sg_io->set_loglevel(MSG_DEBUG);

test_dense_features();

exit_shogun();

return 0;
}

1 change: 1 addition & 0 deletions src/interfaces/modular/Features.i
Expand Up @@ -19,6 +19,7 @@

/* These functions return new Objects */
%newobject get_transposed();
%newobject create_merged_copy(CFeatures* other);

#ifdef USE_SWIG_DIRECTORS
%feature("director") shogun::CDirectorDotFeatures;
Expand Down
47 changes: 47 additions & 0 deletions src/shogun/features/DenseFeatures.cpp
Expand Up @@ -897,6 +897,53 @@ template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
return true;
}

template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy(
CFeatures* other)
{
SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
if (get_feature_type()!=other->get_feature_type() ||
get_feature_class()!=other->get_feature_class() ||
strcmp(get_name(), other->get_name()))
{
SG_ERROR("%s::create_merged_copy(): Features are of different type!\n",
get_name());
}

CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other);

if (!casted)
{
SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to "
"same type as %s\n",get_name(), other->get_name(), get_name());
}

if (num_features!=casted->num_features)
{
SG_ERROR("%s::create_merged_copy(): Provided feature object has "
"different dimension than this one\n");
}

/* create new feature matrix and copy both instances data into it */
SGMatrix<ST> data(num_features, num_vectors+casted->get_num_vectors());

/* copy data of this instance */
SG_DEBUG("copying matrix of this instance\n");
memcpy(data.matrix, feature_matrix.matrix,
num_features*num_vectors*sizeof(ST));

/* copy data of provided instance */
SG_DEBUG("copying matrix of provided instance\n");
memcpy(&data.matrix[num_vectors*num_features],
casted->feature_matrix.matrix,
casted->num_features*casted->num_vectors*sizeof(ST));

/* create new instance and return */
CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data);

SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
return result;
}

#define LOAD(f_load, sg_type) \
template<> void CDenseFeatures<sg_type>::load(CFile* loader) \
{ \
Expand Down
10 changes: 10 additions & 0 deletions src/shogun/features/DenseFeatures.h
Expand Up @@ -482,6 +482,16 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*/
virtual bool is_equal(CDenseFeatures* rhs);

/** Takes another feature instance and returns a new instance which is
* a concatenation of a copy if this instace's data and the given
* instance's data. Note that the feature types have to be equal.
*
* @param other feature object to append
* @return new feature object which contains copy of data of this
* instance and of given one
*/
CFeatures* create_merged_copy(CFeatures* other);

/** @return object name */
inline virtual const char* get_name() const { return "DenseFeatures"; }

Expand Down
16 changes: 16 additions & 0 deletions src/shogun/features/Features.h
Expand Up @@ -221,6 +221,22 @@ class CFeatures : public CSGObject
*/
void unset_property(EFeatureProperty p);

/** Takes another feature instance and returns a new instance which is
* a concatenation of a copy if this instace's data and the given
* instance's data. Note that the feature types have to be equal.
*
* NOT IMPLEMENTED!
*
* @param other feature object to append
* @return new feature object which contains copy of data of this
* instance and of given one
*/
CFeatures* create_merged_copy(CFeatures* other)
{
SG_ERROR("%s::create_merged_copy() is not yet implemented!\n");
return NULL;
}

/** adds a subset of indices on top of the current subsets (possibly
* subset o subset. Calls subset_changed_post() afterwards
*
Expand Down
9 changes: 9 additions & 0 deletions src/shogun/statistics/KernelTwoSampleTestStatistic.cpp
Expand Up @@ -29,6 +29,15 @@ CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic(CKernel* kernel,
SG_REF(kernel);
}

CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic(CKernel* kernel,
CFeatures* p, CFeatures* q) : CTwoSampleTestStatistic(p, q)
{
init();

m_kernel=kernel;
SG_REF(kernel);
}

CKernelTwoSampleTestStatistic::~CKernelTwoSampleTestStatistic()
{
SG_UNREF(m_kernel);
Expand Down
25 changes: 25 additions & 0 deletions src/shogun/statistics/KernelTwoSampleTestStatistic.h
Expand Up @@ -25,9 +25,34 @@ class CKernelTwoSampleTestStatistic : public CTwoSampleTestStatistic
{
public:
CKernelTwoSampleTestStatistic();

/** Constructor
*
* @param p_and_q feature data. Is assumed to contain samples from both
* p and q. First all samples from p, then from index q_start all
* samples from q
*
* @param kernel kernel to use
* @param p_and_q samples from p and q, appended
* @param q_start index of first sample of q
*/
CKernelTwoSampleTestStatistic(CKernel* kernel, CFeatures* p_and_q,
index_t q_start);

/** Constructor.
* This is a convienience constructor which copies both features to one
* element and then calls the other constructor. Needs twice the memory
* for a short time
*
* @param kernel kernel for MMD
* @param p samples from distribution p, will be copied and NOT
* SG_REF'ed
* @param q samples from distribution q, will be copied and NOT
* SG_REF'ed
*/
CKernelTwoSampleTestStatistic(CKernel* kernel, CFeatures* p,
CFeatures* q);

virtual ~CKernelTwoSampleTestStatistic();

inline virtual const char* get_name() const=0;
Expand Down
12 changes: 12 additions & 0 deletions src/shogun/statistics/LinearTimeMMD.cpp
Expand Up @@ -32,6 +32,18 @@ CLinearTimeMMD::CLinearTimeMMD(CKernel* kernel, CFeatures* p_and_q,
}
}

CLinearTimeMMD::CLinearTimeMMD(CKernel* kernel, CFeatures* p, CFeatures* q) :
CKernelTwoSampleTestStatistic(kernel, p, q)
{
init();

if (p->get_num_vectors()!=q->get_num_vectors())
{
SG_ERROR("CLinearTimeMMD: Only features with equal number of vectors "
"are currently possible\n");
}
}

CLinearTimeMMD::~CLinearTimeMMD()
{

Expand Down
24 changes: 24 additions & 0 deletions src/shogun/statistics/LinearTimeMMD.h
Expand Up @@ -33,8 +33,32 @@ class CLinearTimeMMD: public CKernelTwoSampleTestStatistic
{
public:
CLinearTimeMMD();

/** Constructor
*
* @param p_and_q feature data. Is assumed to contain samples from both
* p and q. First all samples from p, then from index q_start all
* samples from q
*
* @param kernel kernel to use
* @param p_and_q samples from p and q, appended
* @param q_start index of first sample of q
*/
CLinearTimeMMD(CKernel* kernel, CFeatures* p_and_q, index_t q_start);

/** Constructor.
* This is a convienience constructor which copies both features to one
* element and then calls the other constructor. Needs twice the memory
* for a short time
*
* @param kernel kernel for MMD
* @param p samples from distribution p, will be copied and NOT
* SG_REF'ed
* @@param q samples from distribution q, will be copied and NOT
* SG_REF'ed
*/
CLinearTimeMMD(CKernel* kernel, CFeatures* p, CFeatures* q);

virtual ~CLinearTimeMMD();

/** Computes the squared linear time MMD for the current data. his is an
Expand Down
12 changes: 12 additions & 0 deletions src/shogun/statistics/QuadraticTimeMMD.cpp
Expand Up @@ -31,6 +31,18 @@ CQuadraticTimeMMD::CQuadraticTimeMMD(CKernel* kernel, CFeatures* p_and_q,
}
}

CQuadraticTimeMMD::CQuadraticTimeMMD(CKernel* kernel, CFeatures* p,
CFeatures* q) : CKernelTwoSampleTestStatistic(kernel, p, q)
{
init();

if (p->get_num_vectors()!=q->get_num_vectors())
{
SG_ERROR("CQuadraticTimeMMD: Only features with equal number of vectors "
"are currently possible\n");
}
}

CQuadraticTimeMMD::~CQuadraticTimeMMD()
{

Expand Down
24 changes: 24 additions & 0 deletions src/shogun/statistics/QuadraticTimeMMD.h
Expand Up @@ -60,8 +60,32 @@ class CQuadraticTimeMMD : public CKernelTwoSampleTestStatistic
{
public:
CQuadraticTimeMMD();

/** Constructor
*
* @param p_and_q feature data. Is assumed to contain samples from both
* p and q. First all samples from p, then from index q_start all
* samples from q
*
* @param kernel kernel to use
* @param p_and_q samples from p and q, appended
* @param q_start index of first sample of q
*/
CQuadraticTimeMMD(CKernel* kernel, CFeatures* p_and_q, index_t q_start);

/** Constructor.
* This is a convienience constructor which copies both features to one
* element and then calls the other constructor. Needs twice the memory
* for a short time
*
* @param kernel kernel for MMD
* @param p samples from distribution p, will be copied and NOT
* SG_REF'ed
* @param q samples from distribution q, will be copied and NOT
* SG_REF'ed
*/
CQuadraticTimeMMD(CKernel* kernel, CFeatures* p, CFeatures* q);

virtual ~CQuadraticTimeMMD();

/** Computes the squared quadratic time MMD for the current data. Note
Expand Down
14 changes: 12 additions & 2 deletions src/shogun/statistics/TwoSampleTestStatistic.cpp
Expand Up @@ -18,8 +18,7 @@ CTwoSampleTestStatistic::CTwoSampleTestStatistic() : CTestStatistic()
}

CTwoSampleTestStatistic::CTwoSampleTestStatistic(CFeatures* p_and_q,
index_t q_start) :
CTestStatistic()
index_t q_start) : CTestStatistic()
{
init();

Expand All @@ -29,6 +28,17 @@ CTwoSampleTestStatistic::CTwoSampleTestStatistic(CFeatures* p_and_q,
m_q_start=q_start;
}

CTwoSampleTestStatistic::CTwoSampleTestStatistic(CFeatures* p, CFeatures* q) :
CTestStatistic()
{
init();

m_p_and_q=p->create_merged_copy(q);
SG_REF(m_p_and_q);

m_q_start=p->get_num_vectors();
}

CTwoSampleTestStatistic::~CTwoSampleTestStatistic()
{
SG_UNREF(m_p_and_q);
Expand Down

0 comments on commit d1ee078

Please sign in to comment.