Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
324 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Copyright (C) 2012 Sergey Lisitsyn | ||
*/ | ||
|
||
#include <shogun/classifier/svm/PegasosSVM.h> | ||
#include <shogun/optimization/pegasos/pegasos_optimize.h> | ||
|
||
using namespace shogun; | ||
|
||
CPegasosSVM::CPegasosSVM() | ||
: CLinearMachine() | ||
{ | ||
init(); | ||
} | ||
|
||
CPegasosSVM::CPegasosSVM( | ||
float64_t lambda, CDotFeatures* traindat, CLabels* trainlab) | ||
: CLinearMachine() | ||
{ | ||
init(); | ||
m_lambda = lambda; | ||
|
||
set_features(traindat); | ||
set_labels(trainlab); | ||
} | ||
|
||
void CPegasosSVM::init() | ||
{ | ||
m_lambda = 0.01; | ||
set_max_iterations(); | ||
|
||
SG_ADD(&m_lambda, "lambda", "lambda regularization constant", MS_AVAILABLE); | ||
SG_ADD(&m_max_iterations, "max_iterations", "max number of iterations", | ||
MS_NOT_AVAILABLE); | ||
} | ||
|
||
CPegasosSVM::~CPegasosSVM() | ||
{ | ||
} | ||
|
||
bool CPegasosSVM::train_machine(CFeatures* data) | ||
{ | ||
if (data) | ||
set_features((CDotFeatures*)data); | ||
|
||
double obj_value = 0.0; | ||
double norm_value = 0.0; | ||
double loss_value = 0.0; | ||
|
||
int eta_rule_type = 0; | ||
int eta_constant = 0; | ||
int projection_rule = 0; | ||
double projection_constant = 0; | ||
|
||
w = CPegasos::Learn(features, ((CBinaryLabels*)m_labels)->get_labels(), features->get_dim_feature_space(), m_lambda, | ||
m_max_iterations, 1, 100, obj_value, norm_value, loss_value, | ||
eta_rule_type, eta_constant, projection_rule, projection_constant); | ||
set_bias(0.0); | ||
|
||
return true; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Copyright (W) 2012 Sergey Lisitsyn | ||
*/ | ||
|
||
#ifndef PEGASOS_SVM_H_ | ||
#define PEGASOS_SVM_H_ | ||
|
||
#include <shogun/lib/config.h> | ||
|
||
#include <shogun/lib/common.h> | ||
#include <shogun/base/Parameter.h> | ||
#include <shogun/machine/LinearMachine.h> | ||
#include <shogun/optimization/pegasos/pegasos_optimize.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
/** @brief */ | ||
class CPegasosSVM : public CLinearMachine | ||
{ | ||
public: | ||
MACHINE_PROBLEM_TYPE(PT_BINARY); | ||
|
||
/** default constructor */ | ||
CPegasosSVM(); | ||
|
||
/** constructor | ||
* | ||
* @param lambda constant lambda | ||
* @param traindat training features | ||
* @param trainlab training labels | ||
*/ | ||
CPegasosSVM( | ||
float64_t lambda, CDotFeatures* traindat, | ||
CLabels* trainlab); | ||
|
||
/** destructor */ | ||
virtual ~CPegasosSVM(); | ||
|
||
/** set lambda | ||
* | ||
* @param lambda lambda | ||
*/ | ||
inline void set_lambda(float64_t lambda) { m_lambda = lambda; } | ||
|
||
/** get lambda | ||
* | ||
* @return lambda | ||
*/ | ||
inline float64_t get_lambda() { return m_lambda; } | ||
|
||
/** @return object name */ | ||
inline virtual const char* get_name() const { return "PegasosSVM"; } | ||
|
||
/** get the maximum number of iterations solver is allowed to do */ | ||
inline int32_t get_max_iterations() | ||
{ | ||
return m_max_iterations; | ||
} | ||
|
||
/** set the maximum number of iterations solver is allowed to do */ | ||
inline void set_max_iterations(int32_t max_iter=1000) | ||
{ | ||
m_max_iterations=max_iter; | ||
} | ||
|
||
private: | ||
|
||
/** init */ | ||
void init(); | ||
|
||
protected: | ||
/** train linear SVM classifier | ||
* | ||
* @param data training data (parameter can be avoided if distance or | ||
* kernel-based classifiers are used and distance/kernels are | ||
* initialized with train data) | ||
* | ||
* @return whether training was successful | ||
*/ | ||
virtual bool train_machine(CFeatures* data=NULL); | ||
|
||
protected: | ||
/** lambda */ | ||
float64_t m_lambda; | ||
/** maximum number of iterations */ | ||
int32_t m_max_iterations; | ||
}; | ||
|
||
} /* namespace shogun */ | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
// Distributed under GNU General Public License (see license.txt for details). | ||
// | ||
// Copyright (c) 2007 Shai Shalev-Shwartz. | ||
// All Rights Reserved. | ||
|
||
#include <shogun/optimization/pegasos/pegasos_optimize.h> | ||
|
||
using namespace shogun; | ||
|
||
// ------------------------------------------------------------// | ||
// ---------------- OPTIMIZING --------------------------------// | ||
// ------------------------------------------------------------// | ||
SGVector<float64_t> CPegasos::Learn(// Input variables | ||
CDotFeatures* features, | ||
SGVector<float64_t> labels, | ||
int dimension, | ||
double lambda,int max_iter,int exam_per_iter,int num_iter_to_avg, | ||
// Output variables | ||
double& obj_value, double& norm_value,double& loss_value, | ||
// additional parameters | ||
int eta_rule_type , double eta_constant , | ||
int projection_rule, double projection_constant) | ||
{ | ||
|
||
int num_examples = features->get_num_vectors(); | ||
|
||
// Initialization of classification vector | ||
SGVector<float64_t> W(dimension); | ||
SGVector<float64_t> AvgW(dimension); | ||
double avgScale = (num_iter_to_avg > max_iter) ? max_iter : num_iter_to_avg; | ||
|
||
// ---------------- Main Loop ------------------- | ||
for (int i = 0; i < max_iter; ++i) { | ||
|
||
// learning rate | ||
double eta; | ||
if (eta_rule_type == 0) { // Pegasos eta rule | ||
eta = 1 / (lambda * (i+2)); | ||
} else if (eta_rule_type == 1) { // Norma rule | ||
eta = eta_constant / sqrt(i+2); | ||
} else { | ||
eta = eta_constant; | ||
} | ||
|
||
// gradient indices and losses | ||
std::vector<uint> grad_index; | ||
std::vector<double> grad_weights; | ||
|
||
// calc sub-gradients | ||
for (int j=0; j < exam_per_iter; ++j) { | ||
|
||
// choose random example | ||
uint r = ((int)rand()) % num_examples; | ||
|
||
// calculate prediction | ||
double prediction = features->dense_dot_sgvec(r, W); | ||
|
||
// calculate loss | ||
double cur_loss = 1 - labels[r]*prediction; | ||
if (cur_loss < 0.0) cur_loss = 0.0; | ||
|
||
// and add to the gradient | ||
if (cur_loss > 0.0) { | ||
grad_index.push_back(r); | ||
grad_weights.push_back(eta*labels[r]/exam_per_iter); | ||
} | ||
} | ||
|
||
// scale w | ||
// W.scale(1.0 - eta*lambda); | ||
double scaling = 1.0 - eta*lambda; | ||
if (scaling==0) | ||
W.zero(); | ||
else | ||
{ | ||
SGVector<float64_t>::scale_vector(scaling, W.vector, W.vlen); | ||
} | ||
|
||
// and add sub-gradients | ||
for (uint j=0; j<grad_index.size(); ++j) { | ||
//W.add(Dataset[grad_index[j]],grad_weights[j]); | ||
features->add_to_dense_vec(grad_weights[j],grad_index[j],W.vector,W.vlen); | ||
} | ||
|
||
// Project if needed | ||
if (projection_rule == 0) { // Pegasos projection rule | ||
double norm2 = SGVector<float64_t>::twonorm(W.vector, W.vlen); | ||
if (norm2 > 1.0/lambda) { | ||
SGVector<float64_t>::scale_vector(sqrt(1.0/(lambda*norm2)), W.vector, W.vlen); | ||
} | ||
} else if (projection_rule == 1) { // other projection | ||
double norm2 = SGVector<float64_t>::twonorm(W.vector, W.vlen); | ||
if (norm2 > (projection_constant*projection_constant)) { | ||
SGVector<float64_t>::scale_vector(projection_constant/sqrt(norm2), W.vector, W.vlen); | ||
} | ||
} // else -- no projection | ||
|
||
|
||
// and update AvgW | ||
if (max_iter <= num_iter_to_avg + i) | ||
for (int j=0; j<dimension; j++) | ||
AvgW[j] += W[j]/avgScale; | ||
} | ||
|
||
// Calculate objective value | ||
norm_value = SGVector<float64_t>::twonorm(AvgW.vector, AvgW.vlen); | ||
obj_value = norm_value * lambda / 2.0; | ||
loss_value = 0.0; | ||
for (int i=0; i < num_examples; ++i) { | ||
double cur_loss = 1 - labels[i]*features->dense_dot_sgvec(i,AvgW); | ||
if (cur_loss < 0.0) cur_loss = 0.0; | ||
loss_value += cur_loss/num_examples; | ||
obj_value += cur_loss/num_examples; | ||
} | ||
return AvgW; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// Distributed under GNU General Public License (see license.txt for details). | ||
// | ||
// Copyright (c) 2007 Shai Shalev-Shwartz. | ||
// All Rights Reserved. | ||
//============================================================================= | ||
// File Name: pegasos_optimize.h | ||
// header for the main optimization function of pegasos | ||
//============================================================================= | ||
|
||
#ifndef _SHAI_PEGASOS_OPTIMIZE_H | ||
#define _SHAI_PEGASOS_OPTIMIZE_H | ||
|
||
#include <shogun/features/DotFeatures.h> | ||
#include <shogun/lib/SGVector.h> | ||
|
||
//***************************************************************************** | ||
// Included Files | ||
//***************************************************************************** | ||
#include <cstdio> | ||
#include <cstdlib> | ||
#include <vector> | ||
#include <ctime> | ||
#include <cmath> | ||
|
||
namespace shogun | ||
{ | ||
class CPegasos | ||
{ | ||
public: | ||
static SGVector<float64_t> Learn(// Input variables | ||
CDotFeatures* features, | ||
SGVector<float64_t> labels, | ||
int dimension, | ||
double lambda,int max_iter,int exam_per_iter,int num_iter_to_avg, | ||
// Output variables | ||
double& obj_value, double& norm_value,double& loss_value, | ||
// additional parameters | ||
int eta_rule_type, double eta_constant, | ||
int projection_rule, double projection_constant); | ||
}; | ||
} | ||
#endif |