Skip to content

Commit

Permalink
Merge pull request #377 from iglesias/master
Browse files Browse the repository at this point in the history
Mahalanobis distance
  • Loading branch information
Soeren Sonnenburg committed Feb 27, 2012
2 parents db9a699 + a5e9781 commit 22a5739
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/interfaces/modular/Distance.i
Expand Up @@ -32,6 +32,7 @@
%rename(ChiSquareDistance) CChiSquareDistance;
%rename(CosineDistance) CCosineDistance;
%rename(TanimotoDistance) CTanimotoDistance;
%rename(MahalanobisDistance) CMahalanobisDistance;

/* Include Class Headers to make them visible from within the target language */
%include <shogun/distance/Distance.h>
Expand Down Expand Up @@ -112,3 +113,4 @@ namespace shogun
%include <shogun/distance/ChiSquareDistance.h>
%include <shogun/distance/CosineDistance.h>
%include <shogun/distance/TanimotoDistance.h>
%include <shogun/distance/MahalanobisDistance.h>
1 change: 1 addition & 0 deletions src/interfaces/modular/Distance_includes.i
Expand Up @@ -22,5 +22,6 @@
#include <shogun/distance/ChiSquareDistance.h>
#include <shogun/distance/CosineDistance.h>
#include <shogun/distance/TanimotoDistance.h>
#include <shogun/distance/MahalanobisDistance.h>
%}

3 changes: 2 additions & 1 deletion src/shogun/distance/Distance.h
Expand Up @@ -47,7 +47,8 @@ enum EDistanceType
D_COSINE = 140,
D_BRAYCURTIS = 150,
D_CUSTOM = 160,
D_ATTENUATEDEUCLIDIAN = 170
D_ATTENUATEDEUCLIDIAN = 170,
D_MAHALANOBIS = 180
};


Expand Down
92 changes: 92 additions & 0 deletions src/shogun/distance/MahalanobisDistance.cpp
@@ -0,0 +1,92 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Fernando José Iglesias García
* Copyright (C) 2012 Fernando José Iglesias García
*/

#ifdef HAVE_LAPACK

#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
#include <shogun/distance/MahalanobisDistance.h>
#include <shogun/features/Features.h>
#include <shogun/features/SimpleFeatures.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/lapack.h>

using namespace shogun;

CMahalanobisDistance::CMahalanobisDistance() : CRealDistance()
{
init();
}

CMahalanobisDistance::CMahalanobisDistance(CSimpleFeatures<float64_t>* l, CSimpleFeatures<float64_t>* r)
: CRealDistance()
{
init();
init(l, r);
}

CMahalanobisDistance::~CMahalanobisDistance()
{
cleanup();
}

bool CMahalanobisDistance::init(CFeatures* l, CFeatures* r)
{
CRealDistance::init(l, r);

mean = ((CSimpleFeatures<float64_t>*) l)->get_mean();
icov = ((CSimpleFeatures<float64_t>*) l)->get_cov();

CMath::inverse(icov);

return true;
}

void CMahalanobisDistance::cleanup()
{
}

float64_t CMahalanobisDistance::compute(int32_t idx_a, int32_t idx_b)
{
int32_t blen;
bool bfree;
float64_t* bvec = ((CSimpleFeatures<float64_t>*) rhs)->
get_feature_vector(idx_b, blen, bfree);

ASSERT(blen == mean.vlen);

SGVector<float64_t> diff(bvec, blen);
for (int32_t i = 0 ; i<blen ; i++)
diff[i] -= mean[i];

SGVector<float64_t> v = diff.clone();
cblas_dgemv(CblasColMajor, CblasNoTrans,
icov.num_rows, icov.num_cols, 1.0, icov.matrix,
diff.vlen, diff.vector, 1, 0.0, v.vector, 1);

float64_t result = cblas_ddot(v.vlen, v.vector, 1, diff.vector, 1);

((CSimpleFeatures<float64_t>*) lhs)->free_feature_vector(bvec, idx_b, bfree);
v.destroy_vector();

if (disable_sqrt)
return result;
else
return CMath::sqrt(result);
}

void CMahalanobisDistance::init()
{
disable_sqrt = false;

m_parameters->add(&disable_sqrt, "disable_sqrt", "If sqrt shall not be applied.");
}

#endif /* HAVE_LAPACK */
121 changes: 121 additions & 0 deletions src/shogun/distance/MahalanobisDistance.h
@@ -0,0 +1,121 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Fernando José Iglesias García
* Copyright (C) 2012 Fernando José Iglesias García
*/

#ifndef _MAHALANOBISDISTANCE_H__
#define _MAHALANOBISDISTANCE_H__

#ifdef HAVE_LAPACK

#include <shogun/lib/common.h>
#include <shogun/distance/RealDistance.h>
#include <shogun/features/SimpleFeatures.h>

namespace shogun
{
/** @brief class MahalanobisDistance
*
* The Mahalanobis distance for real valued features computes the distance
* between a feature vector and a distribution of features characterized by its
* mean and covariance.
*
* \f[\displaystyle
* D = \sqrt{ (x_i - \mu)' \Sigma^{-1} (x_i - \mu) }
* \f]
*
* The Mahalanobis Squared distance does not take the square root:
*
* \f[\displaystyle
* D = (x_i - \mu)' \Sigma^{-1} (x_i - \mu)
* \f]
*
* @see <a href="en.wikipedia.org/wiki/Mahalanobis_distance">
* Wikipedia: Mahalanobis Distance</a>
*/
class CMahalanobisDistance: public CRealDistance
{
public:
/** default constructor */
CMahalanobisDistance();

/** constructor
*
* @param l features of left-hand side
* @param r features of right-hand side
*/
CMahalanobisDistance(CSimpleFeatures<float64_t>* l, CSimpleFeatures<float64_t>* r);
virtual ~CMahalanobisDistance();

/** init distance
*
* @param l features of left-hand side
* @param r features of right-hand side
* @return if init was successful
*/
virtual bool init(CFeatures* l, CFeatures* r);

/** cleanup distance */
virtual void cleanup();

/** get distance type we are
*
* @return distance type MAHALANOBIS
*/
virtual EDistanceType get_distance_type() { return D_MAHALANOBIS; }

/** get feature type the distance can deal with
*
* @return feature type DREAL
*/
inline virtual EFeatureType get_feature_type() { return F_DREAL; }

/** get name of the distance
*
* @return name Mahalanobis
*/
virtual const char* get_name() const { return "MahalanobisDistance"; }

/** disable application of sqrt on matrix computation
* the matrix can then also be named norm squared
*
* @return if application of sqrt is disabled
*/
virtual bool get_disable_sqrt() { return disable_sqrt; };

/** disable application of sqrt on matrix computation
* the matrix can then also be named norm squared
*
* @param state new disable_sqrt
*/
virtual void set_disable_sqrt(bool state) { disable_sqrt=state; };

protected:
/// compute Mahalanobis distance between a feature vector of the
/// rhs to the lhs distribution
/// idx_a is not used here but included because of inheritance
/// idx_b denotes the index of the feature vector
/// in the corresponding feature object rhs
virtual float64_t compute(int32_t idx_a, int32_t idx_b);

private:
void init();

protected:
/** if application of sqrt on matrix computation is disabled */
bool disable_sqrt;

/** vector mean of the lhs feature vectors */
SGVector<float64_t> mean;
/** inverse of the covariance matrix of lhs feature vectors */
SGMatrix<float64_t> icov;
};

} // namespace shogun
#endif /* HAVE_LAPACK */
#endif /* _MAHALANOBISDISTANCE_H__ */

0 comments on commit 22a5739

Please sign in to comment.