Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #527 from pluskid/multiclass-ecoc
Multiclass ecoc
  • Loading branch information
Soeren Sonnenburg committed May 16, 2012
2 parents f4b1c9e + 68cf2c5 commit a60c648
Show file tree
Hide file tree
Showing 9 changed files with 202 additions and 23 deletions.
72 changes: 72 additions & 0 deletions examples/undocumented/python_modular/classifier_multiclass_ecoc.py
@@ -0,0 +1,72 @@
import classifier_multiclass_shared
# run with toy data
[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data()
# run with opt-digits if available
#[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data(False)


import shogun.Classifier as Classifier
from shogun.Classifier import ECOCStrategy
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
from shogun.Evaluation import MulticlassAccuracy

import re
encoders = [x for x in dir(Classifier)
if re.match(r'ECOC.+Encoder', x)]
decoders = [x for x in dir(Classifier)
if re.match(r'ECOC.+Decoder', x)]

fea_train = RealFeatures(traindat)
fea_test = RealFeatures(testdat)
gnd_train = Labels(label_traindat)
if label_testdat is None:
gnd_test = None
else:
gnd_test = Labels(label_testdat)

base_classifier = LibLinear(L2R_L2LOSS_SVC)
base_classifier.set_bias_enabled(True)

print('Testing with %d encoders and %d decoders' % (len(encoders), len(decoders)))
print('-' * 70)
format_str = '%%15s + %%-10s %%-10%s %%-10%s %%-10%s'
print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy'))

def run_ecoc(ier, idr):
encoder = getattr(Classifier, encoders[ier])()
decoder = getattr(Classifier, decoders[idr])()

# whether encoder is data dependent
if hasattr(encoder, 'set_labels'):
encoder.set_labels(gnd_train)
encoder.set_features(fea_train)

strategy = ECOCStrategy(encoder, decoder)
classifier = LinearMulticlassMachine(strategy, fea_train, base_classifier, gnd_train)
classifier.train()
label_pred = classifier.apply(fea_test)
if gnd_test is not None:
evaluator = MulticlassAccuracy()
acc = evaluator.evaluate(label_pred, gnd_test)
else:
acc = None

return (classifier.get_num_machines(), acc)


import time
for ier in range(len(encoders)):
for idr in range(len(decoders)):
t_begin = time.clock()
(codelen, acc) = run_ecoc(ier, idr)
if acc is None:
acc_fmt = 's'
acc = 'N/A'
else:
acc_fmt = '.4f'

t_elapse = time.clock() - t_begin
print((format_str % ('d', '.3f', acc_fmt)) %
(encoders[ier][4:-7], decoders[idr][4:-7], codelen, t_elapse, acc))

2 changes: 2 additions & 0 deletions src/interfaces/modular/Multiclass.i
Expand Up @@ -29,6 +29,7 @@
%rename(ECOCRandomSparseEncoder) CECOCRandomSparseEncoder;
%rename(ECOCRandomDenseEncoder) CECOCRandomDenseEncoder;
%rename(ECOCDiscriminantEncoder) CECOCDiscriminantEncoder;
%rename(ECOCForestEncoder) CECOCForestEncoder;
%rename(ECOCHDDecoder) CECOCHDDecoder;
%rename(ECOCIHDDecoder) CECOCIHDDecoder;
%rename(ECOCEDDecoder) CECOCEDDecoder;
Expand Down Expand Up @@ -62,6 +63,7 @@
%include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>
%include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h>
%include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h>
%include <shogun/multiclass/ecoc/ECOCForestEncoder.h>
%include <shogun/multiclass/ecoc/ECOCSimpleDecoder.h>
%include <shogun/multiclass/ecoc/ECOCHDDecoder.h>
%include <shogun/multiclass/ecoc/ECOCIHDDecoder.h>
Expand Down
1 change: 1 addition & 0 deletions src/interfaces/modular/Multiclass_includes.i
Expand Up @@ -15,6 +15,7 @@
#include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>
#include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h>
#include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h>
#include <shogun/multiclass/ecoc/ECOCForestEncoder.h>
#include <shogun/multiclass/ecoc/ECOCDecoder.h>
#include <shogun/multiclass/ecoc/ECOCOVREncoder.h>
#include <shogun/multiclass/ecoc/ECOCSimpleDecoder.h>
Expand Down
19 changes: 12 additions & 7 deletions src/shogun/multiclass/ecoc/ECOCDiscriminantEncoder.cpp
Expand Up @@ -30,14 +30,16 @@ CECOCDiscriminantEncoder::~CECOCDiscriminantEncoder()
void CECOCDiscriminantEncoder::init()
{
// default parameters
m_iterations = 5;
m_iterations = 25;
m_num_trees = 1;

// init values
m_features = NULL;
m_labels = NULL;

// parameters
m_parameters->add(&m_iterations, "iterations", "number of iterations in SFFS");

SG_ADD(&m_iterations, "iterations", "number of iterations in SFFS", MS_NOT_AVAILABLE);
}

void CECOCDiscriminantEncoder::set_features(CDenseFeatures<float64_t> *features)
Expand All @@ -60,15 +62,18 @@ SGMatrix<int32_t> CECOCDiscriminantEncoder::create_codebook(int32_t num_classes)
SG_ERROR("Need features and labels to learn the codebook");

m_feats = m_features->get_feature_matrix();
m_codebook = SGMatrix<int32_t>(num_classes-1, num_classes);
m_codebook = SGMatrix<int32_t>(m_num_trees * (num_classes-1), num_classes);
m_codebook.zero();
m_code_idx = 0;

vector<int32_t> classes(num_classes);
for (int32_t i=0; i < num_classes; ++i)
classes[i] = i;
for (int32_t itree = 0; itree < m_num_trees; ++itree)
{
vector<int32_t> classes(num_classes);
for (int32_t i=0; i < num_classes; ++i)
classes[i] = i;

binary_partition(classes);
binary_partition(classes);
}

m_feats = SGMatrix<float64_t>(); // release memory
return m_codebook;
Expand Down
3 changes: 2 additions & 1 deletion src/shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h
Expand Up @@ -62,7 +62,7 @@ class CECOCDiscriminantEncoder: public CECOCEncoder
*/
virtual SGMatrix<int32_t> create_codebook(int32_t num_classes);

private:
protected:
void init();


Expand All @@ -75,6 +75,7 @@ class CECOCDiscriminantEncoder: public CECOCEncoder
const std::set<int32_t>& idata, int32_t *hist);

int32_t m_iterations;
int32_t m_num_trees;

SGMatrix<int32_t> m_codebook;
int32_t m_code_idx;
Expand Down
26 changes: 26 additions & 0 deletions src/shogun/multiclass/ecoc/ECOCForestEncoder.cpp
@@ -0,0 +1,26 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Chiyuan Zhang
* Copyright (C) 2012 Chiyuan Zhang
*/

#include <shogun/multiclass/ecoc/ECOCForestEncoder.h>

using namespace shogun;

CECOCForestEncoder::CECOCForestEncoder()
{
m_num_trees = 3;
SG_ADD(&m_num_trees, "num_trees", "number of trees", MS_NOT_AVAILABLE);
}

void CECOCForestEncoder::set_num_trees(int32_t num_trees)
{
if (num_trees < 1)
SG_ERROR("number of trees (%d) should be >= 1", num_trees);
m_num_trees = num_trees;
}
51 changes: 51 additions & 0 deletions src/shogun/multiclass/ecoc/ECOCForestEncoder.h
@@ -0,0 +1,51 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Chiyuan Zhang
* Copyright (C) 2012 Chiyuan Zhang
*/

#ifndef ECOCFORESTENCODER_H__
#define ECOCFORESTENCODER_H__

#include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h>

namespace shogun
{

/** Forest ECOC Encoder.
*
* A data-dependent ECOC coding scheme that learns a tree-style codebook. See the
* following paper for details
*
* Sergio Escalera, Oriol Pujol, Petia Radeva. Boosted Landmarks of
* Contextual Descriptors and Forest-ECOC: A novel framework to detect and
* classify objects in cluttered scenes. Pattern Recognition Letters, 2007.
*
*/
class CECOCForestEncoder: public CECOCDiscriminantEncoder
{
public:
/** constructor */
CECOCForestEncoder();

/** destructor */
virtual ~CECOCForestEncoder() {}

/** get name */
virtual const char* get_name() const { return "ECOCForestEncoder"; }

/** get number of trees */
int32_t get_num_trees() const { return m_num_trees; }

/** set number of trees */
void set_num_trees(int32_t num_trees);
};

} /* shogun */

#endif /* end of include guard: ECOCFORESTENCODER_H__ */

2 changes: 1 addition & 1 deletion src/shogun/multiclass/ecoc/ECOCOVOEncoder.cpp
Expand Up @@ -19,7 +19,7 @@ SGMatrix<int32_t> CECOCOVOEncoder::create_codebook(int32_t num_classes)
int32_t k=0;
for (int32_t i=0; i < num_classes; ++i)
{
for (int32_t j=0; j < num_classes; ++j)
for (int32_t j=i+1; j < num_classes; ++j)
{
code_book(k, i) = 1;
code_book(k, j) = -1;
Expand Down
49 changes: 35 additions & 14 deletions src/shogun/multiclass/ecoc/ECOCRandomDenseEncoder.cpp
Expand Up @@ -68,29 +68,50 @@ SGMatrix<int32_t> CECOCRandomDenseEncoder::create_codebook(int32_t num_classes)
}
}

// see if this is a better codebook
// compute the minimum pairwise code distance
int32_t min_dist = std::numeric_limits<int32_t>::max();
for (int32_t i=0; i < num_classes; ++i)
bool valid = true;
for (int32_t i=0; i < codelen; ++i)
{
for (int32_t j=i+1; j < num_classes; ++j)
bool p1_occur = false, n1_occur = false;
for (int32_t j=0; j < num_classes; ++j)
if (codebook(i, j) == 1)
p1_occur = true;
else if (codebook(i, j) == -1)
n1_occur = true;

if (!p1_occur || !n1_occur)
{
int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
codebook.get_column_vector(j), codelen);
if (dist < min_dist)
min_dist = dist;
valid = false;
break;
}
}

if (min_dist > best_dist)
if (valid)
{
best_dist = min_dist;
std::copy(codebook.matrix, codebook.matrix + codelen*num_classes,
best_codebook.matrix);
// see if this is a better codebook
// compute the minimum pairwise code distance
int32_t min_dist = std::numeric_limits<int32_t>::max();
for (int32_t i=0; i < num_classes; ++i)
{
for (int32_t j=i+1; j < num_classes; ++j)
{
int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
codebook.get_column_vector(j), codelen);
if (dist < min_dist)
min_dist = dist;
}
}

if (min_dist > best_dist)
{
best_dist = min_dist;
std::copy(codebook.matrix, codebook.matrix + codelen*num_classes,
best_codebook.matrix);
}
}

if (++n_iter >= m_maxiter)
break;
if (best_dist > 0) // already obtained a good codebook
break;
}

return best_codebook;
Expand Down

0 comments on commit a60c648

Please sign in to comment.