Skip to content

Commit

Permalink
Merge branch 'multiclass-ecoc' of https://github.com/pluskid/shogun
Browse files Browse the repository at this point in the history
  • Loading branch information
lisitsyn committed May 13, 2012
2 parents afe403b + 0f8f746 commit 4851ff0
Show file tree
Hide file tree
Showing 10 changed files with 557 additions and 20 deletions.
Expand Up @@ -55,9 +55,10 @@ int main(int argc, char** argv)
stream_features->release_example();
}
stream_features->end_parser();
mat.num_cols = num_vectors;

// Create features with the useful values from mat
CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(mat.matrix, num_feats, num_vectors);
CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(mat);

CLabels* labels = new CLabels(num_vectors);
SG_REF(features);
Expand Down Expand Up @@ -101,8 +102,8 @@ int main(int argc, char** argv)
SG_UNREF(output);
SG_UNREF(features);
SG_UNREF(labels);
//SG_UNREF(ffeats_train);
//SG_UNREF(flabels_train);
SG_UNREF(ffeats_train);
SG_UNREF(flabels_train);
SG_UNREF(stream_features);
SG_UNREF(stream_labels);
exit_shogun();
Expand Down
@@ -0,0 +1,117 @@
#include <shogun/features/Labels.h>
#include <shogun/io/StreamingAsciiFile.h>
#include <shogun/io/SGIO.h>
#include <shogun/features/StreamingDenseFeatures.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/multiclass/ecoc/ECOCStrategy.h>
#include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h>
#include <shogun/multiclass/ecoc/ECOCHDDecoder.h>
#include <shogun/machine/LinearMulticlassMachine.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/base/init.h>

#define EPSILON 1e-5

using namespace shogun;

int main(int argc, char** argv)
{
int32_t num_vectors = 0;
int32_t num_feats = 2;

init_shogun_with_defaults();

// Prepare to read a file for the training data
char fname_feats[] = "../data/fm_train_real.dat";
char fname_labels[] = "../data/label_train_multiclass.dat";
CStreamingAsciiFile* ffeats_train = new CStreamingAsciiFile(fname_feats);
CStreamingAsciiFile* flabels_train = new CStreamingAsciiFile(fname_labels);
SG_REF(ffeats_train);
SG_REF(flabels_train);

CStreamingDenseFeatures< float64_t >* stream_features =
new CStreamingDenseFeatures< float64_t >(ffeats_train, false, 1024);

CStreamingDenseFeatures< float64_t >* stream_labels =
new CStreamingDenseFeatures< float64_t >(flabels_train, true, 1024);

SG_REF(stream_features);
SG_REF(stream_labels);

// Create a matrix with enough space to read all the feature vectors
SGMatrix< float64_t > mat = SGMatrix< float64_t >(num_feats, 1000);

// Read the values from the file and store them in mat
SGVector< float64_t > vec;
stream_features->start_parser();
while ( stream_features->get_next_example() )
{
vec = stream_features->get_vector();

for ( int32_t i = 0 ; i < num_feats ; ++i )
mat[num_vectors*num_feats + i] = vec[i];

num_vectors++;
stream_features->release_example();
}
stream_features->end_parser();
mat.num_cols = num_vectors;

// Create features with the useful values from mat
CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(mat);

CLabels* labels = new CLabels(num_vectors);
SG_REF(features);
SG_REF(labels);

// Read the labels from the file
int32_t idx = 0;
stream_labels->start_parser();
while ( stream_labels->get_next_example() )
{
labels->set_int_label( idx++, (int32_t)stream_labels->get_label() );
stream_labels->release_example();
}
stream_labels->end_parser();

// Create liblinear svm classifier with L2-regularized L2-loss
CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC);
SG_REF(svm);

// Add some configuration to the svm
svm->set_epsilon(EPSILON);
svm->set_bias_enabled(true);

CECOCDiscriminantEncoder *encoder = new CECOCDiscriminantEncoder();
encoder->set_features(features);
encoder->set_labels(labels);

// Create a multiclass svm classifier that consists of several of the previous one
CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine(
new CECOCStrategy(encoder, new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels);
SG_REF(mc_svm);

// Train the multiclass machine using the data passed in the constructor
mc_svm->train();

// Classify the training examples and show the results
CLabels* output = mc_svm->apply();

SGVector< int32_t > out_labels = output->get_int_labels();
CMath::display_vector(out_labels.vector, out_labels.vlen);

// Free resources
SG_UNREF(mc_svm);
SG_UNREF(svm);
SG_UNREF(output);
SG_UNREF(features);
SG_UNREF(labels);
SG_UNREF(ffeats_train);
SG_UNREF(flabels_train);
SG_UNREF(stream_features);
SG_UNREF(stream_labels);
exit_shogun();

return 0;
}

Expand Up @@ -56,9 +56,10 @@ int main(int argc, char** argv)
stream_features->release_example();
}
stream_features->end_parser();
mat.num_cols = num_vectors;

// Create features with the useful values from mat
CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(mat.matrix, num_feats, num_vectors);
CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(mat);

CLabels* labels = new CLabels(num_vectors);
SG_REF(features);
Expand Down Expand Up @@ -102,8 +103,8 @@ int main(int argc, char** argv)
SG_UNREF(output);
SG_UNREF(features);
SG_UNREF(labels);
//SG_UNREF(ffeats_train);
//SG_UNREF(flabels_train);
SG_UNREF(ffeats_train);
SG_UNREF(flabels_train);
SG_UNREF(stream_features);
SG_UNREF(stream_labels);
exit_shogun();
Expand Down
@@ -0,0 +1,62 @@
from os.path import exists
from tools.load import LoadMatrix
lm=LoadMatrix()

if exists('../data/../mldata/uci-20070111-optdigits.mat'):
from scipy.io import loadmat

mat = loadmat('../data/../mldata/uci-20070111-optdigits.mat')['int0'].astype(float)
X = mat[:-1,:]
Y = mat[-1,:]
isplit = X.shape[1]/2
traindat = X[:,:isplit]
label_traindat = Y[:isplit]
testdat = X[:, isplit:]
label_testdat = Y[isplit:]
else:
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
label_testdat = None

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
from shogun.Classifier import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder

feats_train = RealFeatures(fm_train_real)
feats_test = RealFeatures(fm_test_real)

labels = Labels(label_train_multiclass)

classifier = LibLinear(L2R_L2LOSS_SVC)
classifier.set_epsilon(epsilon)
classifier.set_bias_enabled(True)

encoder = ECOCDiscriminantEncoder()
encoder.set_features(feats_train)
encoder.set_labels(labels)
encoder.set_sffs_iterations(50)

strategy = ECOCStrategy(encoder, ECOCHDDecoder())

classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels)
classifier.train()
label_pred = classifier.apply(feats_test)
out = label_pred.get_labels()

if label_test_multiclass is not None:
from shogun.Evaluation import MulticlassAccuracy
labels_test = Labels(label_test_multiclass)
evaluator = MulticlassAccuracy()
acc = evaluator.evaluate(label_pred, labels_test)
print('Accuracy = %.4f' % acc)

return out

if __name__=='__main__':
print('MulticlassMachine')
classifier_multiclasslinearmachine_modular(*parameter_list[0])

@@ -1,13 +1,27 @@
from os.path import exists
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
if exists('../data/../mldata/uci-20070111-optdigits.mat'):
from scipy.io import loadmat

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]
mat = loadmat('../data/../mldata/uci-20070111-optdigits.mat')['int0'].astype(float)
X = mat[:-1,:]
Y = mat[-1,:]
isplit = X.shape[1]/2
traindat = X[:,:isplit]
label_traindat = Y[:isplit]
testdat = X[:, isplit:]
label_testdat = Y[isplit:]
else:
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
label_testdat = None

def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCHDDecoder, MulticlassOneVsRestStrategy
Expand All @@ -23,21 +37,33 @@ def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_r

mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels)
mc_classifier.train()
out_mc = mc_classifier.apply(feats_test).get_labels()
label_mc = mc_classifier.apply(feats_test)
out_mc = label_mc.get_labels()

ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCHDDecoder())
ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels)
ecoc_classifier.train()
out_ecoc = ecoc_classifier.apply(feats_test).get_labels()
label_ecoc = ecoc_classifier.apply(feats_test)
out_ecoc = label_ecoc.get_labels()

n_diff = (out_mc != out_ecoc).sum()
if n_diff == 0:
print("Same results for OvR and ECOCOvR")
else:
print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc)))

if label_test_multiclass is not None:
from shogun.Evaluation import MulticlassAccuracy
labels_test = Labels(label_test_multiclass)
evaluator = MulticlassAccuracy()
acc_mc = evaluator.evaluate(label_mc, labels_test)
acc_ecoc = evaluator.evaluate(label_ecoc, labels_test)
print('Normal OVR Accuracy = %.4f' % acc_mc)
print('ECOC OVR Accuracy = %.4f' % acc_ecoc)

return out_ecoc, out_mc

if __name__=='__main__':
print('MulticlassMachine')
classifier_multiclasslinearmachine_modular(*parameter_list[0])

@@ -1,13 +1,27 @@
from os.path import exists
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
if exists('../data/../mldata/uci-20070111-optdigits.mat'):
from scipy.io import loadmat

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]
mat = loadmat('../data/../mldata/uci-20070111-optdigits.mat')['int0'].astype(float)
X = mat[:-1,:]
Y = mat[-1,:]
isplit = X.shape[1]/2
traindat = X[:,:isplit]
label_traindat = Y[:isplit]
testdat = X[:, isplit:]
label_testdat = Y[isplit:]
else:
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
label_testdat = None

def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder
Expand All @@ -26,11 +40,22 @@ def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_r

dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels)
dense_classifier.train()
out_dense = dense_classifier.apply(feats_test).get_labels()
label_dense = dense_classifier.apply(feats_test)
out_dense = label_dense.get_labels()

sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels)
sparse_classifier.train()
out_sparse = sparse_classifier.apply(feats_test).get_labels()
label_sparse = sparse_classifier.apply(feats_test)
out_sparse = label_sparse.get_labels()

if label_test_multiclass is not None:
from shogun.Evaluation import MulticlassAccuracy
labels_test = Labels(label_test_multiclass)
evaluator = MulticlassAccuracy()
acc_dense = evaluator.evaluate(label_dense, labels_test)
acc_sparse = evaluator.evaluate(label_sparse, labels_test)
print('Random Dense Accuracy = %.4f' % acc_dense)
print('Random Sparse Accuracy = %.4f' % acc_sparse)

return out_sparse, out_dense

Expand Down
2 changes: 2 additions & 0 deletions src/interfaces/modular/Multiclass.i
Expand Up @@ -28,6 +28,7 @@
%rename(ECOCOVOEncoder) CECOCOVOEncoder;
%rename(ECOCRandomSparseEncoder) CECOCRandomSparseEncoder;
%rename(ECOCRandomDenseEncoder) CECOCRandomDenseEncoder;
%rename(ECOCDiscriminantEncoder) CECOCDiscriminantEncoder;
%rename(ECOCHDDecoder) CECOCHDDecoder;

%rename(MulticlassLibLinear) CMulticlassLibLinear;
Expand Down Expand Up @@ -57,6 +58,7 @@
%include <shogun/multiclass/ecoc/ECOCOVOEncoder.h>
%include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>
%include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h>
%include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h>
%include <shogun/multiclass/ecoc/ECOCHDDecoder.h>
%include <shogun/multiclass/ecoc/ECOCStrategy.h>

Expand Down
1 change: 1 addition & 0 deletions src/interfaces/modular/Multiclass_includes.i
Expand Up @@ -14,6 +14,7 @@
#include <shogun/multiclass/ecoc/ECOCOVOEncoder.h>
#include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>
#include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h>
#include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h>
#include <shogun/multiclass/ecoc/ECOCDecoder.h>
#include <shogun/multiclass/ecoc/ECOCOVREncoder.h>
#include <shogun/multiclass/ecoc/ECOCHDDecoder.h>
Expand Down

0 comments on commit 4851ff0

Please sign in to comment.