Skip to content

Commit

Permalink
Introduced random search model selection
Browse files Browse the repository at this point in the history
  • Loading branch information
lisitsyn committed Jul 21, 2012
1 parent 6765c4d commit 6eea837
Show file tree
Hide file tree
Showing 7 changed files with 261 additions and 27 deletions.
5 changes: 3 additions & 2 deletions src/interfaces/modular/ModelSelection.i
Expand Up @@ -11,8 +11,7 @@
SERIALIZABLE_DUMMY(shogun::CrossValidationResult);

/* These functions return new Objects */
%newobject CGridSearchModelSelection::select_model();
%newobject CGradientModelSelection::select_model();
%newobject *::select_model();
%newobject CParameterCombination::copy_tree();
%newobject CParameterCombination::leaf_sets_multiplication();
%newobject CModelSelectionParameters::get_combinations();
Expand All @@ -22,6 +21,7 @@ SERIALIZABLE_DUMMY(shogun::CrossValidationResult);

/* Remove C Prefix */
%rename(GridSearchModelSelection) CGridSearchModelSelection;
%rename(RandomSearchModelSelection) CRandomSearchModelSelection;
%rename(GradientModelSelection) CGradientModelSelection;
%rename(ModelSelectionBase) CModelSelection;
%rename(ModelSelectionOutput) CModelSelectionOutput;
Expand All @@ -31,6 +31,7 @@ SERIALIZABLE_DUMMY(shogun::CrossValidationResult);
%include <shogun/modelselection/ModelSelectionOutput.h>
%include <shogun/modelselection/ModelSelection.h>
%include <shogun/modelselection/GridSearchModelSelection.h>
%include <shogun/modelselection/RandomSearchModelSelection.h>
%include <shogun/modelselection/ParameterCombination.h>
%include <shogun/modelselection/ModelSelectionParameters.h>
%include <shogun/modelselection/GradientModelSelection.h>
1 change: 1 addition & 0 deletions src/interfaces/modular/ModelSelection_includes.i
Expand Up @@ -3,6 +3,7 @@
#include <shogun/modelselection/ModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/RandomSearchModelSelection.h>
#include <shogun/modelselection/GradientModelSelection.h>
#include <shogun/modelselection/ParameterCombination.h>
%}
27 changes: 2 additions & 25 deletions src/shogun/converter/MultidimensionalScaling.cpp
Expand Up @@ -16,6 +16,7 @@
#include <shogun/distance/CustomDistance.h>
#include <shogun/lib/common.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/Statistics.h>
#include <shogun/io/SGIO.h>
#include <shogun/distance/EuclidianDistance.h>

Expand Down Expand Up @@ -273,7 +274,7 @@ SGMatrix<float64_t> CMultidimensionalScaling::landmark_embedding(SGMatrix<float6
}

// get landmark indexes with random permutation
SGVector<int32_t> lmk_idxs = shuffle(lmk_N,total_N);
SGVector<int32_t> lmk_idxs = CStatistics::sample_indices(lmk_N,total_N);
// compute distances between landmarks
float64_t* lmk_dist_matrix = SG_MALLOC(float64_t, lmk_N*lmk_N);
for (i=0; i<lmk_N; i++)
Expand Down Expand Up @@ -427,28 +428,4 @@ void* CMultidimensionalScaling::run_triangulation_thread(void* p)
return NULL;
}


SGVector<int32_t> CMultidimensionalScaling::shuffle(int32_t count, int32_t total_count)
{
int32_t* idxs = SG_MALLOC(int32_t, total_count);
int32_t i,rnd;
int32_t* permuted_idxs = SG_MALLOC(int32_t, count);

// reservoir sampling
for (i=0; i<total_count; i++)
idxs[i] = i;
for (i=0; i<count; i++)
permuted_idxs[i] = idxs[i];
for (i=count; i<total_count; i++)
{
rnd = CMath::random(1,i);
if (rnd<count)
permuted_idxs[rnd] = idxs[i];
}
SG_FREE(idxs);

CMath::qsort(permuted_idxs,count);
return SGVector<int32_t>(permuted_idxs, count);
}

#endif /* HAVE_LAPACK */
26 changes: 26 additions & 0 deletions src/shogun/mathematics/Statistics.cpp
Expand Up @@ -1490,3 +1490,29 @@ float64_t CStatistics::entropy(float64_t* p, int32_t len)

return (float64_t) e;
}

SGVector<int32_t> CStatistics::sample_indices(int32_t sample_size, int32_t N)
{
REQUIRE(sample_size<N, "sample size should be less than number of indices\n");
int32_t* idxs = SG_MALLOC(int32_t,N);
int32_t i,rnd;
int32_t* permuted_idxs = SG_MALLOC(int32_t,sample_size);

// reservoir sampling
for (i=0; i<N; i++)
idxs[i] = i;
for (i=0; i<sample_size; i++)
permuted_idxs[i] = idxs[i];
for (i=sample_size; i<N; i++)
{
rnd = CMath::random(1,i);
if (rnd<sample_size)
permuted_idxs[rnd] = idxs[i];
}
SG_FREE(idxs);

CMath::qsort(permuted_idxs,sample_size);
return SGVector<int32_t>(permuted_idxs,sample_size);
}


6 changes: 6 additions & 0 deletions src/shogun/mathematics/Statistics.h
Expand Up @@ -320,6 +320,12 @@ class CStatistics: public CSGObject
*/
static float64_t fishers_exact_test_for_2x3_table(SGMatrix<float64_t> table);

/** sample indices
* @param sample_size size of sample to pick
* @param N total number of indices
*/
static SGVector<int32_t> sample_indices(int32_t sample_size, int32_t N);

/** @return object name */
inline virtual const char* get_name() const
{
Expand Down
152 changes: 152 additions & 0 deletions src/shogun/modelselection/RandomSearchModelSelection.cpp
@@ -0,0 +1,152 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Copyright (C) 2011 Heiko Strathmann
* Copyright (C) 2012 Sergey Lisitsyn
*/

#include <shogun/modelselection/RandomSearchModelSelection.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/mathematics/Statistics.h>
#include <shogun/machine/Machine.h>

using namespace shogun;

CRandomSearchModelSelection::CRandomSearchModelSelection() :
CModelSelection(NULL, NULL)
{
set_ratio(0.5);
}

CRandomSearchModelSelection::CRandomSearchModelSelection(
CModelSelectionParameters* model_parameters,
CMachineEvaluation* machine_eval, float64_t ratio) :
CModelSelection(model_parameters, machine_eval)
{
set_ratio(ratio);
}

CRandomSearchModelSelection::~CRandomSearchModelSelection()
{
}

CParameterCombination* CRandomSearchModelSelection::select_model(bool print_state)
{
if (print_state)
SG_PRINT("Generating parameter combinations\n");

/* Retrieve all possible parameter combinations */
CDynamicObjectArray* all_combinations=
(CDynamicObjectArray*)m_model_parameters->get_combinations();

int32_t n_all_combinations = all_combinations->get_num_elements();
SGVector<index_t> combinations_indices = CStatistics::sample_indices(n_all_combinations*m_ratio, n_all_combinations);

CDynamicObjectArray* combinations = new CDynamicObjectArray();

for (int32_t i=0; i<combinations_indices.vlen; i++)
combinations->append_element(all_combinations->get_element(i));

CrossValidationResult* best_result = new CrossValidationResult();

CParameterCombination* best_combination=NULL;
if (m_machine_eval->get_evaluation_direction()==ED_MAXIMIZE)
{
if (print_state) SG_PRINT("Direction is maximize\n");
best_result->mean=CMath::ALMOST_NEG_INFTY;
}
else
{
if (print_state) SG_PRINT("Direction is maximize\n");
best_result->mean=CMath::ALMOST_INFTY;
}

/* underlying learning machine */
CMachine* machine=m_machine_eval->get_machine();

/* apply all combinations and search for best one */
for (index_t i=0; i<combinations->get_num_elements(); ++i)
{
CParameterCombination* current_combination=(CParameterCombination*)
combinations->get_element(i);

/* eventually print */
if (print_state)
{
SG_PRINT("trying combination:\n");
current_combination->print_tree();
}

current_combination->apply_to_modsel_parameter(
machine->m_model_selection_parameters);

/* note that this may implicitly lock and unlockthe machine */
CrossValidationResult* result =
(CrossValidationResult*)(m_machine_eval->evaluate(m_ms_output));

if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_ERROR("Evaluation result is not of type CrossValidationResult!");

if (print_state)
result->print_result();

/* check if current result is better, delete old combinations */
if (m_machine_eval->get_evaluation_direction()==ED_MAXIMIZE)
{
if (result->mean>best_result->mean)
{
if (best_combination)
SG_UNREF(best_combination);

best_combination=(CParameterCombination*)
combinations->get_element(i);

SG_UNREF(best_result);
SG_REF(result);
best_result = result;
}
else
{
CParameterCombination* combination=(CParameterCombination*)
combinations->get_element(i);
SG_UNREF(combination);
}
}
else
{
if (result->mean<best_result->mean)
{
if (best_combination)
SG_UNREF(best_combination);

best_combination=(CParameterCombination*)
combinations->get_element(i);

SG_UNREF(best_result);
SG_REF(result);
best_result = result;
}
else
{
CParameterCombination* combination=(CParameterCombination*)
combinations->get_element(i);
SG_UNREF(combination);
}
}

SG_UNREF(result);
SG_UNREF(current_combination);
}

SG_UNREF(best_result);
SG_UNREF(machine);
SG_UNREF(combinations);

return best_combination;
}

71 changes: 71 additions & 0 deletions src/shogun/modelselection/RandomSearchModelSelection.h
@@ -0,0 +1,71 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Copyright (C) 2011 Heiko Strathmann
* Copyright (C) 2012 Sergey Lisitsyn
*/

#ifndef RANDOMSEARCHMODELSELECTION_H_
#define RANDOMSEARCHMODELSELECTION_H_

#include <shogun/modelselection/ModelSelection.h>
#include <shogun/modelselection/ModelSelectionOutput.h>
#include <shogun/base/DynArray.h>

namespace shogun
{

class CModelSelectionParameters;

/**
* @brief Model selection class which searches for the best model by a random
* search. See CModelSelection for details.
*/
class CRandomSearchModelSelection: public CModelSelection
{
public:
/** constructor */
CRandomSearchModelSelection();

/** constructor
* @param model_parameters
* @param cross_validation
* @param ratio
*/
CRandomSearchModelSelection(CModelSelectionParameters* model_parameters,
CMachineEvaluation* machine_eval, float64_t ratio);

/** destructor */
virtual ~CRandomSearchModelSelection();

/** get ratio */
float64_t get_ratio() const { return m_ratio; };
/** set ratio */
void set_ratio(float64_t ratio) { REQUIRE(ratio>0.0 && ratio<1.0, "Ratio should be in ]0,1[ range"); m_ratio = ratio; };

/**
* method to select model via grid search
*
* @param print_state if true, the current combination is printed
* @return best combination of model parameters
*/
virtual CParameterCombination* select_model(bool print_state=false);

/** @return name of the SGSerializable */
inline virtual const char* get_name() const
{
return "RandomSearchModelSelection";
}

protected:
/** ratio */
float64_t m_ratio;

};

}

#endif /* RANDOMSEARCHMODELSELECTION_H_ */

0 comments on commit 6eea837

Please sign in to comment.