Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
408 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Chiyuan Zhang | ||
* Copyright (C) 2012 Chiyuan Zhang | ||
*/ | ||
|
||
#include <shogun/multiclass/ecoc/ECOCOVOEncoder.h> | ||
|
||
using namespace shogun; | ||
|
||
SGMatrix<int32_t> CECOCOVOEncoder::create_codebook(int32_t num_classes) | ||
{ | ||
SGMatrix<int32_t> code_book(num_classes*(num_classes-1)/2, num_classes); | ||
code_book.zero(); | ||
int32_t k=0; | ||
for (int32_t i=0; i < num_classes; ++i) | ||
{ | ||
for (int32_t j=0; j < num_classes; ++j) | ||
{ | ||
code_book(k, i) = 1; | ||
code_book(k, j) = -1; | ||
k++; | ||
} | ||
} | ||
|
||
return code_book; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Chiyuan Zhang | ||
* Copyright (C) 2012 Chiyuan Zhang | ||
*/ | ||
|
||
#ifndef ECOCOVOENCODER_H__ | ||
#define ECOCOVOENCODER_H__ | ||
|
||
#include <shogun/multiclass/ecoc/ECOCEncoder.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
/** One-vs-One Encoder */ | ||
class CECOCOVOEncoder: public CECOCEncoder | ||
{ | ||
public: | ||
/** constructor */ | ||
CECOCOVOEncoder() {} | ||
|
||
/** destructor */ | ||
virtual ~CECOCOVOEncoder() {} | ||
|
||
/** get name */ | ||
virtual const char* get_name() const | ||
{ | ||
return "ECOCOVOEncoder"; | ||
} | ||
|
||
/** init codebook. | ||
* @param num_classes number of classes in this problem | ||
*/ | ||
virtual SGMatrix<int32_t> create_codebook(int32_t num_classes); | ||
}; | ||
|
||
} | ||
|
||
#endif /* end of include guard: ECOCOVOENCODER_H__ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
namespace shogun | ||
{ | ||
|
||
/** One-vs-Rest Encoder */ | ||
class CECOCOVREncoder: public CECOCEncoder | ||
{ | ||
public: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Chiyuan Zhang | ||
* Copyright (C) 2012 Chiyuan Zhang | ||
*/ | ||
|
||
#ifndef ECOCRANDOMDENSEENCODER_H__ | ||
#define ECOCRANDOMDENSEENCODER_H__ | ||
|
||
#include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
class CECOCRandomDenseEncoder: public CECOCRandomSparseEncoder | ||
{ | ||
public: | ||
/** constructor | ||
* @param maxiter max number of iterations | ||
* @param codelen code length, if set to zero, will be computed automatically via get_default_code_length | ||
* @param pposone probability of +1 | ||
* @param pnegone probability of -1 | ||
* | ||
* @see get_default_code_length | ||
*/ | ||
CECOCRandomDenseEncoder(int32_t maxiter=1000, int32_t codelen=0, float64_t pposone=0.5, float64_t pnegone=0.5) | ||
:CECOCRandomSparseEncoder(maxiter, codelen, 0, pposone, pnegone) | ||
{ | ||
} | ||
|
||
/** destructor */ | ||
virtual ~CECOCRandomDenseEncoder() {} | ||
|
||
/** set probability | ||
* @param pposone probability of +1 | ||
* @param pnegone probability of -1 | ||
*/ | ||
void set_probability(float64_t pposone, float64_t pnegone) | ||
{ | ||
CECOCRandomSparseEncoder::set_probability(0, pposone, pnegone); | ||
} | ||
|
||
/** get name */ | ||
virtual const char* get_name() const { return "ECOCRandomDenseEncoder"; } | ||
|
||
/** get default code length | ||
* @param num_classes number of classes | ||
* | ||
* In Dense Random Coding, 10 * log(num_classes) is suggested as code length. | ||
* See | ||
* | ||
* E. Allwein, R. Schapire, and Y. Singer. Reducing multiclass to binary: A unifying approach | ||
* for margin classifiers. Journal of Machine Learning Research, 1:113-141, 2002. | ||
*/ | ||
virtual int32_t get_default_code_length(int32_t num_classes) const | ||
{ | ||
return static_cast<int32_t>(CMath::round(10 * CMath::log(num_classes))); | ||
} | ||
}; | ||
|
||
} // namespace shogun | ||
|
||
#endif /* end of include guard: ECOCRANDOMDENSEENCODER_H__ */ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Chiyuan Zhang | ||
* Copyright (C) 2012 Chiyuan Zhang | ||
*/ | ||
|
||
#include <limits> | ||
#include <algorithm> | ||
|
||
#include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h> | ||
#include <shogun/multiclass/ecoc/ECOCUtil.h> | ||
|
||
using namespace shogun; | ||
|
||
CECOCRandomSparseEncoder::CECOCRandomSparseEncoder(int32_t maxiter, int32_t codelen, | ||
float64_t pzero, float64_t pposone, float64_t pnegone) | ||
:m_maxiter(maxiter), m_codelen(codelen), m_pzero(pzero), m_pposone(pposone), m_pnegone(pnegone) | ||
{ | ||
if (!check_probability(pzero, pposone, pnegone)) | ||
SG_ERROR("probability of 0, +1 and -1 must sum to one"); | ||
|
||
init(); | ||
} | ||
|
||
void CECOCRandomSparseEncoder::init() | ||
{ | ||
SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE); | ||
SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE); | ||
SG_ADD(&m_pzero, "pzero", "probability of 0", MS_NOT_AVAILABLE); | ||
SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE); | ||
SG_ADD(&m_pnegone, "pnegone", "probability of -1", MS_NOT_AVAILABLE); | ||
} | ||
|
||
void CECOCRandomSparseEncoder::set_probability(float64_t pzero, float64_t pposone, float64_t pnegone) | ||
{ | ||
if (!check_probability(pzero, pposone, pnegone)) | ||
SG_ERROR("probability of 0, +1 and -1 must sum to one"); | ||
|
||
m_pzero = pzero; | ||
m_pposone = pposone; | ||
m_pnegone = pnegone; | ||
} | ||
|
||
SGMatrix<int32_t> CECOCRandomSparseEncoder::create_codebook(int32_t num_classes) | ||
{ | ||
int32_t codelen = m_codelen; | ||
if (codelen <= 0) | ||
codelen = get_default_code_length(num_classes); | ||
|
||
SGMatrix<int32_t> best_codebook(codelen, num_classes); | ||
int32_t best_dist = 0; | ||
|
||
SGMatrix<int32_t> codebook(codelen, num_classes); | ||
int32_t n_iter = 0; | ||
while (true) | ||
{ | ||
// fill codebook | ||
codebook.zero(); | ||
for (int32_t i=0; i < codelen; ++i) | ||
{ | ||
for (int32_t j=0; j < num_classes; ++j) | ||
{ | ||
float64_t randval = CMath::random(0.0, 1.0); | ||
if (randval > m_pzero) | ||
{ | ||
if (randval > m_pzero+m_pposone) | ||
codebook(i, j) = -1; | ||
else | ||
codebook(i, j) = +1; | ||
} | ||
} | ||
} | ||
|
||
bool valid = true; | ||
for (int32_t i=0; i < codelen; ++i) | ||
{ | ||
bool p1_occur = false, n1_occur = false; | ||
for (int32_t j=0; j < num_classes; ++j) | ||
if (codebook(i, j) == 1) | ||
p1_occur = true; | ||
else if (codebook(i, j) == -1) | ||
n1_occur = true; | ||
|
||
if (!p1_occur || !n1_occur) | ||
{ | ||
valid = false; | ||
break; | ||
} | ||
} | ||
|
||
if (valid) | ||
{ | ||
// see if this is a better codebook | ||
// compute the minimum pairwise code distance | ||
int32_t min_dist = std::numeric_limits<int32_t>::max(); | ||
for (int32_t i=0; i < num_classes; ++i) | ||
{ | ||
for (int32_t j=i+1; j < num_classes; ++j) | ||
{ | ||
int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i), | ||
codebook.get_column_vector(j), codelen); | ||
if (dist < min_dist) | ||
min_dist = dist; | ||
} | ||
} | ||
|
||
if (min_dist > best_dist) | ||
{ | ||
best_dist = min_dist; | ||
std::copy(best_codebook.matrix, best_codebook.matrix + codelen*num_classes, | ||
codebook.matrix); | ||
} | ||
} | ||
|
||
n_iter++; | ||
if (best_dist > 0 && n_iter >= m_maxiter) | ||
break; | ||
} | ||
|
||
codebook.destroy_matrix(); | ||
return best_codebook; | ||
} |
Oops, something went wrong.