Skip to content

Commit

Permalink
Fixed FeatureBlockLogisticRegression solver and added Task and TaskTree
Browse files Browse the repository at this point in the history
  • Loading branch information
lisitsyn committed Jul 5, 2012
1 parent b20223c commit 2cf3002
Show file tree
Hide file tree
Showing 11 changed files with 331 additions and 97 deletions.
4 changes: 4 additions & 0 deletions src/interfaces/modular/Transfer.i
Expand Up @@ -16,6 +16,8 @@
%rename(MultitaskKernelMaskPairNormalizer) CMultitaskKernelMaskPairNormalizer;
%rename(MultitaskKernelPlifNormalizer) CMultitaskKernelPlifNormalizer;

%rename(Task) CTask;
%rename(TaskTree) CTaskTree;
%rename(MultitaskLSRegression) CMultitaskLSRegression;
%rename(MultitaskLogisticRegression) CMultitaskLogisticRegression;

Expand All @@ -36,6 +38,8 @@
%include <shogun/transfer/multitask/MultitaskKernelMaskPairNormalizer.h>
%include <shogun/transfer/multitask/MultitaskKernelPlifNormalizer.h>

%include <shogun/transfer/multitask/Task.h>
%include <shogun/transfer/multitask/TaskTree.h>
%include <shogun/transfer/multitask/MultitaskLSRegression.h>
%include <shogun/transfer/multitask/MultitaskLogisticRegression.h>

Expand Down
2 changes: 2 additions & 0 deletions src/interfaces/modular/Transfer_includes.i
Expand Up @@ -7,6 +7,8 @@
#include <shogun/transfer/multitask/MultitaskKernelPlifNormalizer.h>

#include <shogun/transfer/multitask/LibLinearMTL.h>
#include <shogun/transfer/multitask/Task.h>
#include <shogun/transfer/multitask/TaskTree.h>
#include <shogun/transfer/multitask/MultitaskLSRegression.h>
#include <shogun/transfer/multitask/MultitaskLogisticRegression.h>

Expand Down
12 changes: 7 additions & 5 deletions src/shogun/classifier/FeatureBlockLogisticRegression.cpp
Expand Up @@ -95,23 +95,23 @@ bool CFeatureBlockLogisticRegression::train_machine(CFeatures* data)
new_w[i] = result.w[i];

set_bias(result.c[0]);

w = new_w;
}
break;
case TREE:
{
CIndexBlockTree* feature_tree = (CIndexBlockTree*)m_feature_relation;

CIndexBlock* root_block = feature_tree->get_root_block();
if (root_block->get_max_index() > features->get_num_vectors())
if (root_block->get_max_index() > features->get_dim_feature_space())
SG_ERROR("Root block covers more vectors than available\n");
SG_UNREF(root_block);

SGVector<index_t> ind = feature_tree->get_SLEP_ind();
SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t();
options.ind = ind.vector;
options.ind_t = ind_t.vector;
options.n_feature_blocks = ind.vlen-1;
options.n_nodes = ind_t.vlen / 3;
options.n_nodes = ind_t.vlen/3;
options.n_feature_blocks = ind_t.vlen/3;
options.mode = FEATURE_TREE;

slep_result_t result = slep_logistic(features, y.vector, m_z, options);
Expand All @@ -122,6 +122,8 @@ bool CFeatureBlockLogisticRegression::train_machine(CFeatures* data)
new_w[i] = result.w[i];

set_bias(result.c[0]);

w = new_w;
}
break;
default:
Expand Down
2 changes: 0 additions & 2 deletions src/shogun/lib/IndexBlock.h
Expand Up @@ -59,8 +59,6 @@ class CIndexBlock : public CSGObject
/** get num subtasks */
int32_t get_num_sub_blocks();

protected:

/** adds sub-block
* @param sub_block subtask to add
*/
Expand Down
92 changes: 9 additions & 83 deletions src/shogun/lib/IndexBlockTree.cpp
Expand Up @@ -25,76 +25,25 @@ struct block_tree_node_t
float64_t weight;
};

int32_t count_leaf_blocks_recursive(CIndexBlock* subtree_root_block)
void collect_tree_nodes_recursive(CIndexBlock* subtree_root_block, vector<block_tree_node_t>* tree_nodes)
{
CList* sub_blocks = subtree_root_block->get_sub_blocks();
int32_t n_sub_blocks = sub_blocks->get_num_elements();
if (n_sub_blocks==0)
{
SG_UNREF(sub_blocks);
return 1;
}
else
{
int32_t sum = 0;
CIndexBlock* iterator = (CIndexBlock*)sub_blocks->get_first_element();
do
{
sum += count_leaf_blocks_recursive(iterator);
}
while ((iterator = (CIndexBlock*)sub_blocks->get_next_element()) != NULL);

SG_UNREF(sub_blocks);
return sum;
}
}

void collect_tree_nodes_recursive(CIndexBlock* subtree_root_block, vector<block_tree_node_t>* tree_nodes, int low)
{
int32_t lower = low;
CList* sub_blocks = subtree_root_block->get_sub_blocks();
if (sub_blocks->get_num_elements()>0)
{
CIndexBlock* iterator = (CIndexBlock*)sub_blocks->get_first_element();
do
{
SG_SDEBUG("Block [%d %d] \n",iterator->get_min_index(), iterator->get_max_index());
tree_nodes->push_back(block_tree_node_t(iterator->get_min_index(),iterator->get_max_index(),iterator->get_weight()));
if (iterator->get_num_sub_blocks()>0)
{
int32_t n_leaves = count_leaf_blocks_recursive(iterator);
SG_SDEBUG("Block [%d %d] has %d leaf childs \n",iterator->get_min_index(), iterator->get_max_index(), n_leaves);
tree_nodes->push_back(block_tree_node_t(lower,lower+n_leaves-1,iterator->get_weight()));
collect_tree_nodes_recursive(iterator, tree_nodes, lower);
lower = lower + n_leaves;
}
else
lower++;
collect_tree_nodes_recursive(iterator, tree_nodes);
SG_UNREF(iterator);
}
while ((iterator = (CIndexBlock*)sub_blocks->get_next_element()) != NULL);
}
SG_UNREF(sub_blocks);
}

void collect_leaf_blocks_recursive(CIndexBlock* subtree_root_block, CList* list)
{
CList* sub_blocks = subtree_root_block->get_sub_blocks();
if (sub_blocks->get_num_elements() == 0)
{
list->append_element(subtree_root_block);
}
else
{
CIndexBlock* iterator = (CIndexBlock*)sub_blocks->get_first_element();
do
{
collect_leaf_blocks_recursive(iterator, list);
SG_UNREF(iterator);
}
while ((iterator = (CIndexBlock*)sub_blocks->get_next_element()) != NULL);
}
SG_UNREF(sub_blocks);
}

CIndexBlockTree::CIndexBlockTree() : CIndexBlockRelation(), m_root_block(NULL)
{

Expand Down Expand Up @@ -126,50 +75,27 @@ void CIndexBlockTree::set_root_block(CIndexBlock* root_block)

SGVector<index_t> CIndexBlockTree::get_SLEP_ind()
{
CList* blocks = new CList(true);
collect_leaf_blocks_recursive(m_root_block, blocks);
SG_DEBUG("Collected %d leaf blocks\n", blocks->get_num_elements());
check_blocks_list(blocks);


SGVector<index_t> ind(blocks->get_num_elements()+1);

int t_i = 0;
ind[0] = 0;
CIndexBlock* iterator = (CIndexBlock*)blocks->get_first_element();
do
{
ind[t_i+1] = iterator->get_max_index();
SG_DEBUG("Blocks = [%d,%d]\n", iterator->get_min_index(), iterator->get_max_index());
SG_UNREF(iterator);
t_i++;
}
while ((iterator = (CIndexBlock*)blocks->get_next_element()) != NULL);

SG_UNREF(blocks);

return ind;
SG_SNOTIMPLEMENTED;
return SGVector<index_t>();
}

SGVector<float64_t> CIndexBlockTree::get_SLEP_ind_t()
{
CList* blocks = new CList(true);
int n_blocks = get_SLEP_ind().vlen;
SG_DEBUG("Number of blocks = %d \n", n_blocks);

vector<block_tree_node_t> tree_nodes = vector<block_tree_node_t>();

collect_tree_nodes_recursive(m_root_block, &tree_nodes,1);
collect_tree_nodes_recursive(m_root_block, &tree_nodes);

SGVector<float64_t> ind_t(3+3*tree_nodes.size());
// supernode
ind_t[0] = -1;
ind_t[1] = -1;
ind_t[2] = 1.0;

for (int32_t i=0; i<tree_nodes.size(); i++)
for (int32_t i=0; i<(int32_t)tree_nodes.size(); i++)
{
ind_t[3+i*3] = tree_nodes[i].t_min_index;
ind_t[3+i*3] = tree_nodes[i].t_min_index + 1;
ind_t[3+i*3+1] = tree_nodes[i].t_max_index;
ind_t[3+i*3+2] = tree_nodes[i].weight;
}
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/lib/IndexBlockTree.h
Expand Up @@ -40,19 +40,19 @@ class CIndexBlockTree : public CIndexBlockRelation
/** returns information about blocks in
* SLEP "ind" format
*/
SGVector<index_t> get_SLEP_ind();
virtual SGVector<index_t> get_SLEP_ind();

/** returns information about blocks relations
* in SLEP "ind_t" format
*/
SGVector<float64_t> get_SLEP_ind_t();
virtual SGVector<float64_t> get_SLEP_ind_t();

virtual EIndexBlockRelationType get_relation_type() const { return TREE; }

/** get name */
const char* get_name() const { return "IndexBlockTree"; };

private:
protected:

/** root block */
CIndexBlock* m_root_block;
Expand Down
22 changes: 18 additions & 4 deletions src/shogun/lib/slep/slep_logistic.cpp
Expand Up @@ -241,13 +241,13 @@ double compute_lambda_logistic(
for (int i=0; i<n_vecs; i++)
{
if (y[i]>0)
b = m2;
b = double(m2) / (n_vecs*n_vecs);
else
b = -m1;
b = -double(m1) / (n_vecs*n_vecs);

features->add_to_dense_vec(b,i,ATb,n_feats);
}

if (options.general)
lambda_max = general_findLambdaMax(ATb, n_feats, options.G, options.ind_t, options.n_nodes);
else
Expand Down Expand Up @@ -304,6 +304,20 @@ slep_result_t slep_logistic(
SGVector<double> c(n_blocks);
c.zero();

if (options.mode == FEATURE_TREE)
{
int m1=0;
int m2=0;
for (i=0; i<n_vecs; i++)
{
if (y[i]>0)
m1++;
else
m2++;
}
c[0] = CMath::log(double(m1)/m2);
}

double* s = SG_CALLOC(double, n_feats*n_tasks);
double* sc = SG_CALLOC(double, n_tasks);
double* g = SG_CALLOC(double, n_feats*n_tasks);
Expand Down Expand Up @@ -512,7 +526,7 @@ slep_result_t slep_logistic(
double aa = -y[i]*(Aw[i]+c[0]);
double bb = CMath::max(aa,0.0);

fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs;
}
}
break;
Expand Down
26 changes: 26 additions & 0 deletions src/shogun/transfer/multitask/Task.cpp
@@ -0,0 +1,26 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Copyright (C) 2012 Sergey Lisitsyn
*/

#include <shogun/transfer/multitask/Task.h>

using namespace shogun;

CTask::CTask() : CIndexBlock()
{
}

CTask::CTask(index_t min_index, index_t max_index,
float64_t weight, const char* name) :
CIndexBlock(min_index, max_index, weight, name)
{
}

CTask::~CTask()
{
}
47 changes: 47 additions & 0 deletions src/shogun/transfer/multitask/Task.h
@@ -0,0 +1,47 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Copyright (C) 2012 Sergey Lisitsyn
*/

#ifndef TASK_H_
#define TASK_H_

#include <shogun/lib/SGVector.h>
#include <shogun/lib/List.h>
#include <shogun/lib/IndexBlock.h>

namespace shogun
{

/** @brief used to represent tasks in multitask learning
*/
class CTask : public CIndexBlock
{
public:

/** default constructor */
CTask();

/** constructor
* @param min_index smallest index of the task
* @param max_index largest index of the task
* @param weight weight (optional)
* @param name name of task (optional)
*/
CTask(index_t min_index, index_t max_index,
float64_t weight=1.0, const char* name="task");

/** destructor */
~CTask();

/** get name */
virtual const char* get_name() const { return "Task"; };

};

}
#endif

0 comments on commit 2cf3002

Please sign in to comment.