Skip to content

Commit

Permalink
Introduced classifier based on conjugate index
Browse files Browse the repository at this point in the history
  • Loading branch information
lisitsyn committed Jan 5, 2012
1 parent a690f9d commit ecc2543
Show file tree
Hide file tree
Showing 8 changed files with 444 additions and 1 deletion.
1 change: 1 addition & 0 deletions examples/undocumented/libshogun/Makefile
Expand Up @@ -52,6 +52,7 @@ TARGETS = basic_minimal classifier_libsvm classifier_minimal_svm \
converter_linearlocaltangentspacealignment \
converter_localitypreservingprojections \
serialization_basic_tests \
classifier_conjugateindex

all: $(TARGETS)

Expand Down
42 changes: 42 additions & 0 deletions examples/undocumented/libshogun/classifier_conjugateindex.cpp
@@ -0,0 +1,42 @@
#include <shogun/features/Labels.h>
#include <shogun/features/SimpleFeatures.h>
#include <shogun/classifier/ConjugateIndex.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>

using namespace shogun;

int main(int argc, char** argv)
{
init_shogun_with_defaults();

// create some data
float64_t* matrix = SG_MALLOC(float64_t, 6);
for (int32_t i=0; i<6; i++)
matrix[i]=i;

// create three 2-dimensional vectors
// shogun will now own the matrix created
CSimpleFeatures<float64_t>* features= new CSimpleFeatures<float64_t>();
features->set_feature_matrix(matrix, 2, 3);

// create three labels
CLabels* labels=new CLabels(3);
labels->set_label(0, 0);
labels->set_label(1, +1);
labels->set_label(2, 0);

CConjugateIndex* ci = new CConjugateIndex(features,labels);
ci->train();

// classify on training examples
for (int32_t i=0; i<3; i++)
SG_SPRINT("output[%d]=%f\n", i, ci->apply(i));

// free up memory
SG_UNREF(ci);

exit_shogun();
return 0;
}
@@ -0,0 +1,27 @@
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')

parameter_list = [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]]

def classifier_conjugateindex_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat):
from shogun.Features import RealFeatures, Labels
from shogun.Classifier import ConjugateIndex

feats_train = RealFeatures(fm_train_real)
feats_test = RealFeatures(fm_test_real)

labels = Labels(label_train_multiclass)

ci = ConjugateIndex(feats_train, labels)
ci.train()

res = ci.apply(feats_test).get_labels()
return ci, res

if __name__=='__main__':
print 'ConjugateIndex'
classifier_conjugateindex_modular(*parameter_list[0])
2 changes: 2 additions & 0 deletions src/interfaces/modular/Classifier.i
Expand Up @@ -55,6 +55,7 @@
%rename(MKLOneClass) CMKLOneClass;
%rename(MKLMultiClass) CMKLMultiClass;
%rename(VowpalWabbit) CVowpalWabbit;
%rename(ConjugateIndex) CConjugateIndex;
#ifdef USE_SVMLIGHT
%rename(SVMLight) CSVMLight;
%rename(DomainAdaptationSVM) CDomainAdaptationSVM;
Expand Down Expand Up @@ -108,6 +109,7 @@
%include <shogun/classifier/mkl/MKLMultiClass.h>
%include <shogun/classifier/vw/VowpalWabbit.h>
%include <shogun/classifier/svm/DomainAdaptationSVMLinear.h>
%include <shogun/classifier/ConjugateIndex.h>

#ifdef USE_SVMLIGHT

Expand Down
1 change: 1 addition & 0 deletions src/interfaces/modular/Classifier_includes.i
Expand Up @@ -38,6 +38,7 @@
#include <shogun/classifier/mkl/MKLOneClass.h>
#include <shogun/classifier/mkl/MKLMultiClass.h>
#include <shogun/classifier/vw/VowpalWabbit.h>
#include <shogun/classifier/ConjugateIndex.h>
#ifdef USE_SVMLIGHT
#include <shogun/classifier/svm/SVMLight.h>
#include <shogun/classifier/svm/SVMLightOneClass.h>
Expand Down
242 changes: 242 additions & 0 deletions src/shogun/classifier/ConjugateIndex.cpp
@@ -0,0 +1,242 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Sergey Lisitsyn
*/

#include <shogun/classifier/ConjugateIndex.h>
#include <shogun/machine/Machine.h>
#include <shogun/features/Features.h>
#include <shogun/features/Labels.h>
#include <shogun/mathematics/lapack.h>
#include <shogun/mathematics/Math.h>
#include <shogun/lib/Signal.h>

using namespace shogun;

CConjugateIndex::CConjugateIndex() : CMachine()
{
m_classes = NULL;
m_features = NULL;
};

CConjugateIndex::CConjugateIndex(CFeatures* train_features, CLabels* train_labels) : CMachine()
{
m_features = NULL;
set_features(train_features);
set_labels(train_labels);
m_classes = NULL;
};

CConjugateIndex::~CConjugateIndex()
{
clean_classes();
m_feature_vector.destroy_vector();
SG_UNREF(m_features);
};

void CConjugateIndex::set_features(CFeatures* features)
{
ASSERT(features->get_feature_class()==C_SIMPLE);
SG_REF(features);
SG_UNREF(m_features);
m_features = (CSimpleFeatures<float64_t>*)features;
}

CSimpleFeatures<float64_t>* CConjugateIndex::get_features()
{
SG_REF(m_features);
return m_features;
}

void CConjugateIndex::clean_classes()
{
if (m_classes)
{
for (int32_t i=0; i<m_num_classes; i++)
m_classes[i].destroy_matrix();

delete[] m_classes;
}
}

bool CConjugateIndex::train(CFeatures* train_features)
{
if (train_features)
set_features(train_features);

m_num_classes = labels->get_num_classes();
ASSERT(m_num_classes>=2);
clean_classes();

int32_t num_vectors;
int32_t num_features;
float64_t* feature_matrix = m_features->get_feature_matrix(num_features,num_vectors);

m_classes = new SGMatrix<float64_t>[m_num_classes];
for (int32_t i=0; i<m_num_classes; i++)
m_classes[i] = SGMatrix<float64_t>(num_features,num_features);

m_feature_vector = SGVector<float64_t>(num_features);

//float64_t* evals = SG_MALLOC(float64_t, num_features);
//float64_t* evecs = SG_MALLOC(float64_t, num_features*num_features);
SGMatrix<float64_t> matrix(CMath::max(num_features,num_vectors),CMath::max(num_features,num_vectors));
SGMatrix<float64_t> class_feature_matrix(num_features,CMath::max(num_features,num_vectors));
SGMatrix<float64_t> helper_matrix(CMath::max(num_features,num_vectors),num_features);

SG_PROGRESS(0,0,m_num_classes-1);
for (int32_t label=0; label<m_num_classes; label++)
{
/*
int32_t count = 0;
for (int32_t i=0; i<num_vectors; i++)
{
if ((int32_t)labels->get_label(i) == label)
{
for (int32_t j=0; j<num_features; j++)
{
for (int32_t k=0; k<num_features; k++)
{
matrix[j*num_features+k] +=
feature_matrix[i*num_features+j]*
feature_matrix[i*num_features+k];
}
}
count++;
}
}
ASSERT(num_features>count);
int32_t info = 0;
wrap_dsyevr('V','U',num_features,matrix.matrix,num_features,1,num_features-count+1,evals,evecs,&info);
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,
num_features,num_features,num_features-count-1,
1.0,evecs,num_features,
evecs,num_features,
0.0,m_classes[label].matrix,num_features);
ASSERT(!info);
*/
int32_t count = 0;
for (int32_t i=0; i<num_vectors; i++)
{
if ((int32_t)labels->get_label(i) == label)
count++;
}

count = 0;
for (int32_t i=0; i<num_vectors; i++)
{
if (labels->get_label(i) == label)
{
memcpy(class_feature_matrix.matrix+count*num_features,
feature_matrix+i*num_features,
sizeof(float64_t)*num_features);
count++;
}
}

cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,
count,count,num_features,
1.0,class_feature_matrix.matrix,num_features,
class_feature_matrix.matrix,num_features,
0.0,matrix.matrix,count);

CMath::inverse(SGMatrix<float64_t>(matrix.matrix,count,count));

cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,
count,num_features,count,
1.0,matrix.matrix,count,
class_feature_matrix.matrix,num_features,
0.0,helper_matrix.matrix,count);

cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,
num_features,num_features,count,
1.0,class_feature_matrix.matrix,num_features,
helper_matrix.matrix,count,
0.0,m_classes[label].matrix,num_features);

SG_PROGRESS(label+1,0,m_num_classes);
}
helper_matrix.destroy_matrix();
class_feature_matrix.destroy_matrix();
matrix.destroy_matrix();
SG_DONE();

return true;
};

CLabels* CConjugateIndex::apply(CFeatures* test_features)
{
set_features(test_features);

CLabels* predicted_labels = apply();

return predicted_labels;
};

CLabels* CConjugateIndex::apply()
{
ASSERT(m_classes);
ASSERT(m_num_classes>1);
ASSERT(m_features->get_num_features()==m_feature_vector.vlen);

int32_t num_vectors = m_features->get_num_vectors();

CLabels* predicted_labels = new CLabels(num_vectors);

for (int32_t i=0; i<num_vectors;i++)
{
SG_PROGRESS(i,0,num_vectors-1);
predicted_labels->set_label(i,apply(i));
}
SG_DONE();

return predicted_labels;
};

float64_t CConjugateIndex::conjugate_index(SGVector<float64_t> feature_vector, int32_t label)
{
int32_t num_features = feature_vector.vlen;
float64_t norm = cblas_ddot(num_features,feature_vector.vector,1,
feature_vector.vector,1);

cblas_dgemv(CblasColMajor,CblasNoTrans,
num_features,num_features,
1.0,m_classes[label].matrix,num_features,
feature_vector.vector,1,
0.0,m_feature_vector.vector,1);

float64_t product = cblas_ddot(num_features,feature_vector.vector,1,
m_feature_vector.vector,1);
return product/norm;
};

float64_t CConjugateIndex::apply(int32_t index)
{
int32_t predicted_label = 0;
float64_t max_conjugate_index = 0.0;
float64_t current_conjugate_index;

SGVector<float64_t> feature_vector = m_features->get_feature_vector(index);
for (int32_t i=0; i<m_num_classes; i++)
{
current_conjugate_index = conjugate_index(feature_vector,i);

if (current_conjugate_index > max_conjugate_index)
{
max_conjugate_index = current_conjugate_index;
predicted_label = i;
}
}
feature_vector.free_vector();

return predicted_label;
};



0 comments on commit ecc2543

Please sign in to comment.