Skip to content

Commit

Permalink
Merge pull request #524 from pluskid/multiclass-ecoc
Browse files Browse the repository at this point in the history
Multiclass ecoc
  • Loading branch information
Soeren Sonnenburg committed May 15, 2012
2 parents a71d99e + fcc831c commit c54f3e6
Show file tree
Hide file tree
Showing 19 changed files with 403 additions and 73 deletions.
@@ -1,23 +1,6 @@
from os.path import exists
from tools.load import LoadMatrix
lm=LoadMatrix()

if exists('../data/../mldata/uci-20070111-optdigits.mat'):
from scipy.io import loadmat

mat = loadmat('../data/../mldata/uci-20070111-optdigits.mat')['int0'].astype(float)
X = mat[:-1,:]
Y = mat[-1,:]
isplit = X.shape[1]/2
traindat = X[:,:isplit]
label_traindat = Y[:isplit]
testdat = X[:, isplit:]
label_testdat = Y[isplit:]
else:
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
label_testdat = None
import classifier_multiclass_shared

[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data(False)

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

Expand Down
@@ -0,0 +1,23 @@
def prepare_data(use_toy=True):
from os.path import exists
from tools.load import LoadMatrix
lm=LoadMatrix()

if not use_toy and exists('../data/../mldata/uci-20070111-optdigits.mat'):
from scipy.io import loadmat

mat = loadmat('../data/../mldata/uci-20070111-optdigits.mat')['int0'].astype(float)
X = mat[:-1,:]
Y = mat[-1,:]
isplit = X.shape[1]/2
traindat = X[:,:isplit]
label_traindat = Y[:isplit]
testdat = X[:, isplit:]
label_testdat = Y[isplit:]
else:
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
label_testdat = None

return [traindat, label_traindat, testdat, label_testdat]
@@ -1,9 +1,6 @@
from tools.load import LoadMatrix
lm=LoadMatrix()
import classifier_multiclass_shared

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data()

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

Expand Down
@@ -1,9 +1,6 @@
from tools.load import LoadMatrix
lm=LoadMatrix()
import classifier_multiclass_shared

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data()

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

Expand Down
@@ -1,9 +1,6 @@
from tools.load import LoadMatrix
lm=LoadMatrix()
import classifier_multiclass_shared

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data(False)

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

Expand Down
@@ -1,9 +1,6 @@
from tools.load import LoadMatrix
lm=LoadMatrix()
import classifier_multiclass_shared

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data()

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

Expand Down
@@ -1,9 +1,6 @@
from tools.load import LoadMatrix
lm=LoadMatrix()
import classifier_multiclass_shared

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data()

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

Expand Down
7 changes: 7 additions & 0 deletions src/interfaces/modular/Multiclass.i
Expand Up @@ -30,6 +30,9 @@
%rename(ECOCRandomDenseEncoder) CECOCRandomDenseEncoder;
%rename(ECOCDiscriminantEncoder) CECOCDiscriminantEncoder;
%rename(ECOCHDDecoder) CECOCHDDecoder;
%rename(ECOCIHDDecoder) CECOCIHDDecoder;
%rename(ECOCEDDecoder) CECOCEDDecoder;
%rename(ECOCAEDDecoder) CECOCAEDDecoder;

%rename(MulticlassLibLinear) CMulticlassLibLinear;
%rename(MulticlassOCAS) CMulticlassOCAS;
Expand Down Expand Up @@ -59,7 +62,11 @@
%include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>
%include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h>
%include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h>
%include <shogun/multiclass/ecoc/ECOCSimpleDecoder.h>
%include <shogun/multiclass/ecoc/ECOCHDDecoder.h>
%include <shogun/multiclass/ecoc/ECOCIHDDecoder.h>
%include <shogun/multiclass/ecoc/ECOCEDDecoder.h>
%include <shogun/multiclass/ecoc/ECOCAEDDecoder.h>
%include <shogun/multiclass/ecoc/ECOCStrategy.h>

%include <shogun/multiclass/MulticlassLibLinear.h>
Expand Down
4 changes: 4 additions & 0 deletions src/interfaces/modular/Multiclass_includes.i
Expand Up @@ -17,7 +17,11 @@
#include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h>
#include <shogun/multiclass/ecoc/ECOCDecoder.h>
#include <shogun/multiclass/ecoc/ECOCOVREncoder.h>
#include <shogun/multiclass/ecoc/ECOCSimpleDecoder.h>
#include <shogun/multiclass/ecoc/ECOCHDDecoder.h>
#include <shogun/multiclass/ecoc/ECOCIHDDecoder.h>
#include <shogun/multiclass/ecoc/ECOCEDDecoder.h>
#include <shogun/multiclass/ecoc/ECOCAEDDecoder.h>

#include <shogun/multiclass/MulticlassLibLinear.h>
#include <shogun/multiclass/MulticlassOCAS.h>
Expand Down
60 changes: 60 additions & 0 deletions src/shogun/multiclass/ecoc/ECOCAEDDecoder.h
@@ -0,0 +1,60 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Chiyuan Zhang
* Copyright (C) 2012 Chiyuan Zhang
*/

#ifndef ECOCAEDDECODER_H__
#define ECOCAEDDECODER_H__


#include <shogun/multiclass/ecoc/ECOCSimpleDecoder.h>
#include <shogun/mathematics/Math.h>

namespace shogun
{

/** Attenuated Euclidean Distance Decoder.
*
* \f[
* AED(q, b_i) = \sqrt{\sum_{j=1}^n (q^j-b_i^j)^2 |b_i^j|}
* \f]
*/
class CECOCAEDDecoder: public CECOCSimpleDecoder
{
public:
/** constructor */
CECOCAEDDecoder() {}

/** destructor */
virtual ~CECOCAEDDecoder() {}

/** get name */
virtual const char* get_name() const { return "ECOCAEDDecoder"; }


protected:
/** whether to turn the output into binary before decoding */
virtual bool binary_decoding()
{
return false;
}

/** compute distance */
virtual float64_t compute_distance(SGVector<float64_t> outputs, const int32_t *code)
{
float64_t dist = 0;
for (int32_t i=0; i < outputs.vlen; ++i)
dist += (outputs[i]-code[i])*(outputs[i]-code[i]) * CMath::abs(code[i]);
return CMath::sqrt(dist);
}
};

} /* shogun */


#endif /* end of include guard: ECOCAEDDECODER_H__ */
27 changes: 9 additions & 18 deletions src/shogun/multiclass/ecoc/ECOCDecoder.cpp
Expand Up @@ -13,26 +13,17 @@

using namespace shogun;

int32_t CECOCDecoder::decide_label(const SGVector<float64_t> outputs, const SGMatrix<int32_t> codebook)
SGVector<float64_t> CECOCDecoder::binarize(const SGVector<float64_t> query)
{
SGVector<float64_t> query=outputs;

if (binary_decoding())
SGVector<float64_t> bquery(query.vlen);
for (int32_t i=0; i < query.vlen; ++i)
{
query = SGVector<float64_t>(outputs.vlen);
for (int32_t i=0; i < outputs.vlen; ++i)
{
if (outputs.vector[i] >= 0)
query.vector[i] = +1.0;
else
query.vector[i] = -1.0;
}
if (query.vector[i] >= 0)
bquery.vector[i] = +1.0;
else
bquery.vector[i] = -1.0;
}

SGVector<float64_t> distances(codebook.num_cols);
for (int32_t i=0; i < distances.vlen; ++i)
distances[i] = compute_distance(query, codebook.get_column_vector(i));

int32_t result = CMath::arg_min(distances.vector, 1, distances.vlen);
return result;
return bquery;
}

12 changes: 5 additions & 7 deletions src/shogun/multiclass/ecoc/ECOCDecoder.h
Expand Up @@ -12,7 +12,8 @@
#define ECOCDECODER_H__

#include <shogun/base/SGObject.h>
#include <shogun/lib/DataType.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/lib/SGVector.h>

namespace shogun
{
Expand All @@ -37,14 +38,11 @@ class CECOCDecoder: public CSGObject
* @param outputs outputs by classifiers
* @param codebook ECOC codebook
*/
virtual int32_t decide_label(const SGVector<float64_t> outputs, const SGMatrix<int32_t> codebook);
virtual int32_t decide_label(const SGVector<float64_t> outputs, const SGMatrix<int32_t> codebook)=0;

protected:
/** whether to turn the output into binary before decoding */
virtual bool binary_decoding()=0;

/** compute distance */
virtual float64_t compute_distance(SGVector<float64_t> outputs, const int32_t *code)=0;
/** turn 2-class labels into binary */
SGVector<float64_t> binarize(const SGVector<float64_t> query);
};

}
Expand Down
59 changes: 59 additions & 0 deletions src/shogun/multiclass/ecoc/ECOCEDDecoder.h
@@ -0,0 +1,59 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Chiyuan Zhang
* Copyright (C) 2012 Chiyuan Zhang
*/

#ifndef ECOCEDDECODER_H__
#define ECOCEDDECODER_H__

#include <shogun/multiclass/ecoc/ECOCSimpleDecoder.h>
#include <shogun/mathematics/Math.h>

namespace shogun
{

/** Euclidean Distance Decoder.
*
* \f[
* ED(q, b_i) = \sqrt{\sum_{j=1}^n (q^j-b_i^j)^2}
* \f]
*/
class CECOCEDDecoder: public CECOCSimpleDecoder
{
public:
/** constructor */
CECOCEDDecoder() {}

/** destructor */
virtual ~CECOCEDDecoder() {}

/** get name */
virtual const char* get_name() const { return "ECOCEDDecoder"; }


protected:
/** whether to turn the output into binary before decoding */
virtual bool binary_decoding()
{
return false;
}

/** compute distance */
virtual float64_t compute_distance(SGVector<float64_t> outputs, const int32_t *code)
{
float64_t dist = 0;
for (int32_t i=0; i < outputs.vlen; ++i)
dist += (outputs[i]-code[i])*(outputs[i]-code[i]);
return CMath::sqrt(dist);
}
};

} /* shogun */

#endif /* end of include guard: ECOCEDDECODER_H__ */

9 changes: 7 additions & 2 deletions src/shogun/multiclass/ecoc/ECOCHDDecoder.h
Expand Up @@ -8,14 +8,17 @@
* Copyright (C) 2012 Chiyuan Zhang
*/

#include <shogun/multiclass/ecoc/ECOCDecoder.h>
#ifndef ECOCHDDECODER_H__
#define ECOCHDDECODER_H__

#include <shogun/multiclass/ecoc/ECOCSimpleDecoder.h>
#include <shogun/multiclass/ecoc/ECOCUtil.h>

namespace shogun
{

/** Hamming Distance Decoder */
class CECOCHDDecoder: public CECOCDecoder
class CECOCHDDecoder: public CECOCSimpleDecoder
{
public:
/** constructor */
Expand Down Expand Up @@ -45,3 +48,5 @@ class CECOCHDDecoder: public CECOCDecoder
};

}

#endif /* end of include guard: ECOCHDDECODER_H__ */

0 comments on commit c54f3e6

Please sign in to comment.