Skip to content

Commit

Permalink
ECOC Random Dense/Sparse Coding.
Browse files Browse the repository at this point in the history
  • Loading branch information
pluskid committed Apr 30, 2012
1 parent a5e054b commit 2b13d07
Show file tree
Hide file tree
Showing 10 changed files with 408 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/shogun/multiclass/ecoc/ECOCEncoder.h
Expand Up @@ -11,6 +11,7 @@
#ifndef ECOCENCODER_H__
#define ECOCENCODER_H__

#include <shogun/base/Parameter.h>
#include <shogun/base/SGObject.h>
#include <shogun/lib/DataType.h>

Expand Down
6 changes: 2 additions & 4 deletions src/shogun/multiclass/ecoc/ECOCHDDecoder.h
Expand Up @@ -9,6 +9,7 @@
*/

#include <shogun/multiclass/ecoc/ECOCDecoder.h>
#include <shogun/multiclass/ecoc/ECOCUtil.h>

namespace shogun
{
Expand Down Expand Up @@ -39,10 +40,7 @@ class CECOCHDDecoder: public CECOCDecoder
/** compute distance */
virtual float64_t compute_distance(const SGVector<float64_t> &outputs, const int32_t *code)
{
float64_t dist=0;
for (int32_t i=0; i < outputs.vlen; ++i)
dist += CMath::abs(code[i]-outputs[i]);
return dist;
return CECOCUtil::hamming_distance(outputs.vector, code, outputs.vlen);
}
};

Expand Down
32 changes: 32 additions & 0 deletions src/shogun/multiclass/ecoc/ECOCOVOEncoder.cpp
@@ -0,0 +1,32 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Chiyuan Zhang
* Copyright (C) 2012 Chiyuan Zhang
*/

#include <shogun/multiclass/ecoc/ECOCOVOEncoder.h>

using namespace shogun;

SGMatrix<int32_t> CECOCOVOEncoder::create_codebook(int32_t num_classes)
{
SGMatrix<int32_t> code_book(num_classes*(num_classes-1)/2, num_classes);
code_book.zero();
int32_t k=0;
for (int32_t i=0; i < num_classes; ++i)
{
for (int32_t j=0; j < num_classes; ++j)
{
code_book(k, i) = 1;
code_book(k, j) = -1;
k++;
}
}

return code_book;
}

43 changes: 43 additions & 0 deletions src/shogun/multiclass/ecoc/ECOCOVOEncoder.h
@@ -0,0 +1,43 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Chiyuan Zhang
* Copyright (C) 2012 Chiyuan Zhang
*/

#ifndef ECOCOVOENCODER_H__
#define ECOCOVOENCODER_H__

#include <shogun/multiclass/ecoc/ECOCEncoder.h>

namespace shogun
{

/** One-vs-One Encoder */
class CECOCOVOEncoder: public CECOCEncoder
{
public:
/** constructor */
CECOCOVOEncoder() {}

/** destructor */
virtual ~CECOCOVOEncoder() {}

/** get name */
virtual const char* get_name() const
{
return "ECOCOVOEncoder";
}

/** init codebook.
* @param num_classes number of classes in this problem
*/
virtual SGMatrix<int32_t> create_codebook(int32_t num_classes);
};

}

#endif /* end of include guard: ECOCOVOENCODER_H__ */
1 change: 1 addition & 0 deletions src/shogun/multiclass/ecoc/ECOCOVREncoder.h
Expand Up @@ -16,6 +16,7 @@
namespace shogun
{

/** One-vs-Rest Encoder */
class CECOCOVREncoder: public CECOCEncoder
{
public:
Expand Down
68 changes: 68 additions & 0 deletions src/shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h
@@ -0,0 +1,68 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Chiyuan Zhang
* Copyright (C) 2012 Chiyuan Zhang
*/

#ifndef ECOCRANDOMDENSEENCODER_H__
#define ECOCRANDOMDENSEENCODER_H__

#include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>

namespace shogun
{

class CECOCRandomDenseEncoder: public CECOCRandomSparseEncoder
{
public:
/** constructor
* @param maxiter max number of iterations
* @param codelen code length, if set to zero, will be computed automatically via get_default_code_length
* @param pposone probability of +1
* @param pnegone probability of -1
*
* @see get_default_code_length
*/
CECOCRandomDenseEncoder(int32_t maxiter=1000, int32_t codelen=0, float64_t pposone=0.5, float64_t pnegone=0.5)
:CECOCRandomSparseEncoder(maxiter, codelen, 0, pposone, pnegone)
{
}

/** destructor */
virtual ~CECOCRandomDenseEncoder() {}

/** set probability
* @param pposone probability of +1
* @param pnegone probability of -1
*/
void set_probability(float64_t pposone, float64_t pnegone)
{
CECOCRandomSparseEncoder::set_probability(0, pposone, pnegone);
}

/** get name */
virtual const char* get_name() const { return "ECOCRandomDenseEncoder"; }

/** get default code length
* @param num_classes number of classes
*
* In Dense Random Coding, 10 * log(num_classes) is suggested as code length.
* See
*
* E. Allwein, R. Schapire, and Y. Singer. Reducing multiclass to binary: A unifying approach
* for margin classifiers. Journal of Machine Learning Research, 1:113-141, 2002.
*/
virtual int32_t get_default_code_length(int32_t num_classes) const
{
return static_cast<int32_t>(CMath::round(10 * CMath::log(num_classes)));
}
};

} // namespace shogun

#endif /* end of include guard: ECOCRANDOMDENSEENCODER_H__ */

126 changes: 126 additions & 0 deletions src/shogun/multiclass/ecoc/ECOCRandomSparseEncoder.cpp
@@ -0,0 +1,126 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Chiyuan Zhang
* Copyright (C) 2012 Chiyuan Zhang
*/

#include <limits>
#include <algorithm>

#include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>
#include <shogun/multiclass/ecoc/ECOCUtil.h>

using namespace shogun;

CECOCRandomSparseEncoder::CECOCRandomSparseEncoder(int32_t maxiter, int32_t codelen,
float64_t pzero, float64_t pposone, float64_t pnegone)
:m_maxiter(maxiter), m_codelen(codelen), m_pzero(pzero), m_pposone(pposone), m_pnegone(pnegone)
{
if (!check_probability(pzero, pposone, pnegone))
SG_ERROR("probability of 0, +1 and -1 must sum to one");

init();
}

void CECOCRandomSparseEncoder::init()
{
SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE);
SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE);
SG_ADD(&m_pzero, "pzero", "probability of 0", MS_NOT_AVAILABLE);
SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE);
SG_ADD(&m_pnegone, "pnegone", "probability of -1", MS_NOT_AVAILABLE);
}

void CECOCRandomSparseEncoder::set_probability(float64_t pzero, float64_t pposone, float64_t pnegone)
{
if (!check_probability(pzero, pposone, pnegone))
SG_ERROR("probability of 0, +1 and -1 must sum to one");

m_pzero = pzero;
m_pposone = pposone;
m_pnegone = pnegone;
}

SGMatrix<int32_t> CECOCRandomSparseEncoder::create_codebook(int32_t num_classes)
{
int32_t codelen = m_codelen;
if (codelen <= 0)
codelen = get_default_code_length(num_classes);

SGMatrix<int32_t> best_codebook(codelen, num_classes);
int32_t best_dist = 0;

SGMatrix<int32_t> codebook(codelen, num_classes);
int32_t n_iter = 0;
while (true)
{
// fill codebook
codebook.zero();
for (int32_t i=0; i < codelen; ++i)
{
for (int32_t j=0; j < num_classes; ++j)
{
float64_t randval = CMath::random(0.0, 1.0);
if (randval > m_pzero)
{
if (randval > m_pzero+m_pposone)
codebook(i, j) = -1;
else
codebook(i, j) = +1;
}
}
}

bool valid = true;
for (int32_t i=0; i < codelen; ++i)
{
bool p1_occur = false, n1_occur = false;
for (int32_t j=0; j < num_classes; ++j)
if (codebook(i, j) == 1)
p1_occur = true;
else if (codebook(i, j) == -1)
n1_occur = true;

if (!p1_occur || !n1_occur)
{
valid = false;
break;
}
}

if (valid)
{
// see if this is a better codebook
// compute the minimum pairwise code distance
int32_t min_dist = std::numeric_limits<int32_t>::max();
for (int32_t i=0; i < num_classes; ++i)
{
for (int32_t j=i+1; j < num_classes; ++j)
{
int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
codebook.get_column_vector(j), codelen);
if (dist < min_dist)
min_dist = dist;
}
}

if (min_dist > best_dist)
{
best_dist = min_dist;
std::copy(best_codebook.matrix, best_codebook.matrix + codelen*num_classes,
codebook.matrix);
}
}

n_iter++;
if (best_dist > 0 && n_iter >= m_maxiter)
break;
}

codebook.destroy_matrix();
return best_codebook;
}

0 comments on commit 2b13d07

Please sign in to comment.