Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Refactoring of apply() methods
- apply() of CMachine now calls
  one of appropriate apply_binary(), apply_regression() or apply_multiclass()
- apply(int32_t) renamed to apply_one(int32_t)
  • Loading branch information
lisitsyn committed May 21, 2012
1 parent 6630435 commit c3e643d
Show file tree
Hide file tree
Showing 19 changed files with 199 additions and 103 deletions.
2 changes: 1 addition & 1 deletion src/shogun/classifier/AveragedPerceptron.cpp
Expand Up @@ -66,7 +66,7 @@ bool CAveragedPerceptron::train(CFeatures* data)
converged=true;
for (int32_t i=0; i<num_vec; i++)
{
output[i]=apply(i);
output[i]=apply_one(i);

if (CMath::sign<float64_t>(output[i]) != train_labels.vector[i])
{
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/classifier/Perceptron.cpp
Expand Up @@ -65,7 +65,7 @@ bool CPerceptron::train_machine(CFeatures* data)
converged=true;
for (int32_t i=0; i<num_vec; i++)
{
output[i]=apply(i);
output[i]=apply_one(i);

if (CMath::sign<float64_t>(output[i]) != train_labels.vector[i])
{
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/classifier/svm/SVM.cpp
Expand Up @@ -289,7 +289,7 @@ float64_t CSVM::compute_svm_primal_objective()
regularizer-=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(ii,jj);
}

loss-=(C1*(-((CBinaryLabels*) m_labels)->get_label(ii)+1)/2.0 + C2_tmp*(((CBinaryLabels*) m_labels)->get_label(ii)+1)/2.0 )*CMath::max(0.0, 1.0-((CBinaryLabels*) m_labels)->get_label(ii)*apply(ii));
loss-=(C1*(-((CBinaryLabels*) m_labels)->get_label(ii)+1)/2.0 + C2_tmp*(((CBinaryLabels*) m_labels)->get_label(ii)+1)/2.0 )*CMath::max(0.0, 1.0-((CBinaryLabels*) m_labels)->get_label(ii)*apply_one(ii));
}

}
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/machine/DistanceMachine.cpp
Expand Up @@ -204,7 +204,7 @@ void* CDistanceMachine::run_distance_thread_rhs(void* p)
return NULL;
}

CLabels* CDistanceMachine::apply(CFeatures* data)
CMulticlassLabels* CDistanceMachine::apply_multiclass(CFeatures* data)
{
if (data)
{
Expand All @@ -214,7 +214,7 @@ CLabels* CDistanceMachine::apply(CFeatures* data)
SG_UNREF(lhs);

/* build result labels and classify all elements of procedure */
CRealLabels* result=new CRealLabels(data->get_num_vectors());
CMulticlassLabels* result=new CMulticlassLabels(data->get_num_vectors());
for (index_t i=0; i<data->get_num_vectors(); ++i)
result->set_label(i, apply(i));
return result;
Expand All @@ -223,7 +223,7 @@ CLabels* CDistanceMachine::apply(CFeatures* data)
{
/* call apply on complete right hand side */
CFeatures* all=distance->get_rhs();
CLabels* result=apply(all);
CMulticlassLabels* result = apply_multiclass(all);
SG_UNREF(all);
return result;
}
Expand Down
14 changes: 13 additions & 1 deletion src/shogun/machine/DistanceMachine.h
Expand Up @@ -91,7 +91,19 @@ class CDistanceMachine : public CMachine
* @param data (test)data to be classified
* @return classified labels
*/
virtual CLabels* apply(CFeatures* data=NULL);
virtual CMulticlassLabels* apply_multiclass(CFeatures* data=NULL);

virtual CRealLabels* apply_regression(CFeatures* data=NULL)
{
SG_ERROR("Not supported");
return NULL;
}

virtual CBinaryLabels* apply_binary(CFeatures* data=NULL)
{
SG_ERROR("Not supported");
return NULL;
}

/** Apply machine to one example.
* Cluster index with smallest distance to to be classified element is
Expand Down
58 changes: 33 additions & 25 deletions src/shogun/machine/KernelMachine.cpp
Expand Up @@ -21,7 +21,7 @@ using namespace shogun;
struct S_THREAD_PARAM
{
CKernelMachine* kernel_machine;
CRealLabels* result;
float64_t* result;
int32_t start;
int32_t end;

Expand Down Expand Up @@ -236,7 +236,19 @@ bool CKernelMachine::init_kernel_optimization()
return false;
}

CLabels* CKernelMachine::apply(CFeatures* data)
CRealLabels* CKernelMachine::apply_regression(CFeatures* data)
{
SGVector<float64_t> outputs = apply_get_outputs(data);
return new CRealLabels(outputs);
}

CBinaryLabels* CKernelMachine::apply_binary(CFeatures* data)
{
SGVector<float64_t> outputs = apply_get_outputs(data);
return new CBinaryLabels(outputs);
}

SGVector<float64_t> CKernelMachine::apply_get_outputs(CFeatures* data)
{
if (is_data_locked())
{
Expand Down Expand Up @@ -264,13 +276,13 @@ CLabels* CKernelMachine::apply(CFeatures* data)

SG_UNREF(lhs);

CRealLabels* lab=NULL;
int32_t num_vectors=kernel->get_num_vec_rhs();

SGVector<float64_t> output(num_vectors);

if (kernel->get_num_vec_rhs()>0)
{
int32_t num_vectors=kernel->get_num_vec_rhs();

lab=new CRealLabels(num_vectors);
SG_DEBUG( "computing output on %d test examples\n", num_vectors);

CSignal::clear_cancel();
Expand All @@ -283,9 +295,6 @@ CLabels* CKernelMachine::apply(CFeatures* data)
if (kernel->has_property(KP_BATCHEVALUATION) &&
get_batch_computation_enabled())
{
float64_t* output=SG_MALLOC(float64_t, num_vectors);
memset(output, 0, sizeof(float64_t)*num_vectors);

if (get_num_support_vectors()>0)
{
int32_t* sv_idx=SG_MALLOC(int32_t, get_num_support_vectors());
Expand All @@ -303,16 +312,15 @@ CLabels* CKernelMachine::apply(CFeatures* data)
}

kernel->compute_batch(num_vectors, idx,
output, get_num_support_vectors(), sv_idx, sv_weight);
output.vector, get_num_support_vectors(), sv_idx, sv_weight);
SG_FREE(sv_idx);
SG_FREE(sv_weight);
SG_FREE(idx);
}

for (int32_t i=0; i<num_vectors; i++)
lab->set_label(i, get_bias()+output[i]);
output[i] = get_bias() + output[i];

SG_FREE(output);
}
else
{
Expand All @@ -323,7 +331,7 @@ CLabels* CKernelMachine::apply(CFeatures* data)
{
S_THREAD_PARAM params;
params.kernel_machine=this;
params.result=lab;
params.result = output.vector;
params.start=0;
params.end=num_vectors;
params.verbose=true;
Expand All @@ -343,7 +351,7 @@ CLabels* CKernelMachine::apply(CFeatures* data)
for (t=0; t<num_threads-1; t++)
{
params[t].kernel_machine = this;
params[t].result = lab;
params[t].result = output.vector;
params[t].start = t*step;
params[t].end = (t+1)*step;
params[t].verbose = false;
Expand All @@ -354,7 +362,7 @@ CLabels* CKernelMachine::apply(CFeatures* data)
}

params[t].kernel_machine = this;
params[t].result = lab;
params[t].result = output.vector;
params[t].start = t*step;
params[t].end = num_vectors;
params[t].verbose = true;
Expand All @@ -379,10 +387,10 @@ CLabels* CKernelMachine::apply(CFeatures* data)
SG_DONE();
}

return lab;
return output;
}

float64_t CKernelMachine::apply(int32_t num)
float64_t CKernelMachine::apply_one(int32_t num)
{
ASSERT(kernel);

Expand All @@ -403,9 +411,9 @@ float64_t CKernelMachine::apply(int32_t num)

void* CKernelMachine::apply_helper(void* p)
{
S_THREAD_PARAM* params= (S_THREAD_PARAM*) p;
CRealLabels* result=params->result;
CKernelMachine* kernel_machine=params->kernel_machine;
S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
float64_t* result = params->result;
CKernelMachine* kernel_machine = params->kernel_machine;

#ifdef WIN32
for (int32_t vec=params->start; vec<params->end; vec++)
Expand All @@ -424,7 +432,7 @@ void* CKernelMachine::apply_helper(void* p)

/* eventually use index mapping if exists */
index_t idx=params->indices ? params->indices[vec] : vec;
result->set_label(vec, kernel_machine->apply(idx));
result[vec] = kernel_machine->apply_one(idx);
}

return NULL;
Expand Down Expand Up @@ -499,7 +507,7 @@ CLabels* CKernelMachine::apply_locked(SGVector<index_t> indices)
ASSERT(m_custom_kernel==kernel);

int32_t num_inds=indices.vlen;
CRealLabels* lab=new CRealLabels(num_inds);
SGVector<float64_t> output(num_inds);

CSignal::clear_cancel();

Expand All @@ -516,7 +524,7 @@ CLabels* CKernelMachine::apply_locked(SGVector<index_t> indices)
{
S_THREAD_PARAM params;
params.kernel_machine=this;
params.result=lab;
params.result=output.vector;

/* use the parameter index vector */
params.start=0;
Expand All @@ -538,7 +546,7 @@ CLabels* CKernelMachine::apply_locked(SGVector<index_t> indices)
for (t=0; t<num_threads-1; t++)
{
params[t].kernel_machine=this;
params[t].result=lab;
params[t].result=output.vector;

/* use the parameter index vector */
params[t].start=t*step;
Expand All @@ -552,7 +560,7 @@ CLabels* CKernelMachine::apply_locked(SGVector<index_t> indices)
}

params[t].kernel_machine=this;
params[t].result=lab;
params[t].result=output.vector;

/* use the parameter index vector */
params[t].start=t*step;
Expand All @@ -578,7 +586,7 @@ CLabels* CKernelMachine::apply_locked(SGVector<index_t> indices)
#endif
SG_DONE();

return lab;
return new CRealLabels(output);
}

void CKernelMachine::data_lock(CLabels* labs, CFeatures* features)
Expand Down
23 changes: 21 additions & 2 deletions src/shogun/machine/KernelMachine.h
Expand Up @@ -205,18 +205,33 @@ class CKernelMachine : public CMachine
bool init_kernel_optimization();

/** apply kernel machine to data
* for regression task
*
* @param data (test)data to be classified
* @return classified labels
*/
virtual CLabels* apply(CFeatures* data=NULL);
virtual CRealLabels* apply_regression(CFeatures* data=NULL);

/** apply kernel machine to data
* for binary classification task
*
* @param data (test)data to be classified
* @return classified labels
*/
virtual CBinaryLabels* apply_binary(CFeatures* data=NULL);

virtual CMulticlassLabels* apply_multiclass(CFeatures* data=NULL)
{
SG_ERROR("Not supported");
return NULL;
}

/** apply kernel machine to one example
*
* @param num which example to apply to
* @return classified value
*/
virtual float64_t apply(int32_t num);
virtual float64_t apply_one(int32_t num);

/** apply example helper, used in threads
*
Expand Down Expand Up @@ -256,7 +271,11 @@ class CKernelMachine : public CMachine
/** @return whether machine supports locking */
virtual bool supports_locking() const { return true; }


protected:

SGVector<float64_t> apply_get_outputs(CFeatures* data);

/** Stores feature data of the SV indices and sets it to the lhs of the
* underlying kernel. Then, all SV indices are set to identity.
*
Expand Down
19 changes: 15 additions & 4 deletions src/shogun/machine/LinearMachine.cpp
Expand Up @@ -43,7 +43,19 @@ CLinearMachine::~CLinearMachine()
SG_UNREF(features);
}

CLabels* CLinearMachine::apply(CFeatures* data)
CRealLabels* CLinearMachine::apply_regression(CFeatures* data)
{
SGVector<float64_t> outputs = apply_get_outputs(data);
return new CRealLabels(outputs);
}

CBinaryLabels* CLinearMachine::apply_binary(CFeatures* data)
{
SGVector<float64_t> outputs = apply_get_outputs(data);
return new CBinaryLabels(outputs);
}

SGVector<float64_t> CLinearMachine::apply_get_outputs(CFeatures* data)
{
if (data)
{
Expand All @@ -54,14 +66,13 @@ CLabels* CLinearMachine::apply(CFeatures* data)
}

if (!features)
return NULL;
return SGVector<float64_t>();

int32_t num=features->get_num_vectors();
ASSERT(num>0);
ASSERT(w.vlen==features->get_dim_feature_space());

float64_t* out=SG_MALLOC(float64_t, num);
features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias);

return new CRealLabels(SGVector<float64_t>(out,num));
return SGVector<float64_t>(out,num);
}
23 changes: 20 additions & 3 deletions src/shogun/machine/LinearMachine.h
Expand Up @@ -116,20 +116,34 @@ class CLinearMachine : public CMachine
SG_REF(feat);
features=feat;
}

/** apply linear machine to data
* for binary classification problem
*
* @param data (test)data to be classified
* @return classified labels
*/
virtual CBinaryLabels* apply_binary(CFeatures* data=NULL);

/** apply linear machine to data
* for regression problem
*
* @param data (test)data to be classified
* @return classified labels
*/
virtual CLabels* apply(CFeatures* data=NULL);
virtual CRealLabels* apply_regression(CFeatures* data=NULL);

/// get output for example "vec_idx"
virtual float64_t apply(int32_t vec_idx)
virtual float64_t apply_one(int32_t vec_idx)
{
return features->dense_dot(vec_idx, w.vector, w.vlen) + bias;
}

virtual CMulticlassLabels* apply_multiclass(CFeatures* data=NULL)
{
SG_ERROR("Use LinearMulticlassMachine");
return NULL;
}

/** get features
*
* @return features
Expand All @@ -144,6 +158,9 @@ class CLinearMachine : public CMachine
virtual const char* get_name() const { return "LinearMachine"; }

protected:

SGVector<float64_t> apply_get_outputs(CFeatures* data);

/** Stores feature data of underlying model. Does nothing because
* Linear machines store the normal vector of the separating hyperplane
* and therefore the model anyway
Expand Down

0 comments on commit c3e643d

Please sign in to comment.