Skip to content

Commit

Permalink
Use the built-in get_unique_labels instead of wrting a new function.
Browse files Browse the repository at this point in the history
  • Loading branch information
pluskid committed Apr 5, 2012
1 parent 49b6e2e commit 580d1f8
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 31 deletions.
33 changes: 13 additions & 20 deletions src/shogun/evaluation/ClusteringEvaluation.cpp
Expand Up @@ -19,16 +19,6 @@
using namespace shogun;
using namespace std;

vector<int32_t> CClusteringEvaluation::unique_labels(CLabels* labels)
{
std::set<int32_t> uniq_lbl;
for (int32_t i=labels->get_num_labels()-1; i >= 0; --i)
{
uniq_lbl.insert(labels->get_int_label(i));
}
return std::vector<int32_t>(uniq_lbl.begin(), uniq_lbl.end());
}

int32_t CClusteringEvaluation::find_match_count(const SGVector<int32_t>& l1, int32_t m1, const SGVector<int32_t>& l2, int32_t m2)
{
int32_t match_count=0;
Expand All @@ -49,36 +39,37 @@ int32_t CClusteringEvaluation::find_mismatch_count(const SGVector<int32_t>& l1,
void CClusteringEvaluation::best_map(CLabels* predicted, CLabels* ground_truth)
{
ASSERT(predicted->get_num_labels() == ground_truth->get_num_labels());
std::vector<int32_t> label_p=unique_labels(predicted);
std::vector<int32_t> label_g=unique_labels(ground_truth);
SGVector<float64_t> label_p=predicted->get_unique_labels();
SGVector<float64_t> label_g=ground_truth->get_unique_labels();

SGVector<int32_t> predicted_ilabels=predicted->get_int_labels();
SGVector<int32_t> groundtruth_ilabels=ground_truth->get_int_labels();

int32_t n_class=max(label_p.size(), label_g.size());
int32_t n_class=max(label_p.vlen, label_g.vlen);
SGMatrix<float64_t> G(n_class, n_class);
G.zero();

for (size_t i=0; i < label_g.size(); ++i)
for (size_t i=0; i < label_g.vlen; ++i)
{
for (size_t j=0; j < label_p.size(); ++j)
for (size_t j=0; j < label_p.vlen; ++j)
{
G(i, j)=find_mismatch_count(groundtruth_ilabels, label_g[i],
predicted_ilabels, label_p[j]);
G(i, j)=find_mismatch_count(groundtruth_ilabels, static_cast<int32_t>(label_g[i]),
predicted_ilabels, static_cast<int32_t>(label_p[j]));
}
}

Munkres munkres_solver(G);
munkres_solver.solve();

std::map<int32_t, int32_t> label_map;
for (size_t i=0; i < label_p.size(); ++i)
for (size_t i=0; i < label_p.vlen; ++i)
{
for (size_t j=0; j < label_g.size(); ++j)
for (size_t j=0; j < label_g.vlen; ++j)
{
if (G(j, i) == 0)
{
label_map.insert(make_pair(label_p[i], label_g[j]));
label_map.insert(make_pair(static_cast<int32_t>(label_p[i]),
static_cast<int32_t>(label_g[j])));
break;
}
}
Expand All @@ -87,5 +78,7 @@ void CClusteringEvaluation::best_map(CLabels* predicted, CLabels* ground_truth)
for (int32_t i= 0; i < predicted_ilabels.vlen; ++i)
predicted->set_int_label(i, label_map[predicted_ilabels[i]]);

label_p.free_vector();
label_g.free_vector();
G.destroy_matrix();
}
7 changes: 0 additions & 7 deletions src/shogun/evaluation/ClusteringEvaluation.h
Expand Up @@ -46,13 +46,6 @@ class CClusteringEvaluation: public CEvaluation
*/
virtual float64_t evaluate(CLabels* predicted, CLabels* ground_truth) = 0;
protected:
/** get a vector of unique labels occured
*
* @param labels labels to be investigated
* @return a vector of unique labels
*/
std::vector<int32_t> unique_labels(CLabels* labels);

/** find number of matches in the two labels sequence.
*
* For each index i, if l1[i] == m1 and l2[i] == m2, then we get a match.
Expand Down
12 changes: 8 additions & 4 deletions src/shogun/evaluation/ClusteringMutualInformation.cpp
Expand Up @@ -18,12 +18,12 @@ using namespace shogun;

float64_t CClusteringMutualInformation::evaluate(CLabels* predicted, CLabels* ground_truth)
{
std::vector<int32_t> label_p=unique_labels(predicted);
std::vector<int32_t> label_g=unique_labels(ground_truth);
SGVector<float64_t> label_p=predicted->get_unique_labels();
SGVector<float64_t> label_g=ground_truth->get_unique_labels();

if (label_p.size() != label_g.size())
if (label_p.vlen != label_g.vlen)
SG_ERROR("Number of classes are different\n");
uint32_t n_class=label_p.size();
uint32_t n_class=label_p.vlen;
float64_t n_label=predicted->get_num_labels();

SGVector<int32_t> ilabels_p=predicted->get_int_labels();
Expand Down Expand Up @@ -65,5 +65,9 @@ float64_t CClusteringMutualInformation::evaluate(CLabels* predicted, CLabels* gr
entropy_p += -G_colsum[i] * log(G_colsum[i])/log(2.);
}

label_p.free_vector();
label_g.free_vector();
G.destroy_matrix();

return mutual_info / std::max(entropy_g, entropy_p);
}

0 comments on commit 580d1f8

Please sign in to comment.