Skip to content

Commit

Permalink
add a very basic web_like search endpoint
Browse files Browse the repository at this point in the history
This is related to metacpan-web's module-model's search method. This
then needs to be wrapped into search_expanded, search_collapsed etc.
  • Loading branch information
jberger committed Nov 19, 2016
1 parent 9514007 commit 14936ac
Show file tree
Hide file tree
Showing 3 changed files with 208 additions and 1 deletion.
2 changes: 1 addition & 1 deletion cpanfile
Expand Up @@ -82,7 +82,7 @@ requires 'LWP::UserAgent', '6.15';
requires 'LWP::UserAgent::Paranoid';
requires 'List::AllUtils', '0.09';
requires 'List::MoreUtils', '0.413';
requires 'List::Util', '1.43';
requires 'List::Util', '1.45';
requires 'Log::Contextual';
requires 'Log::Log4perl';
requires 'Log::Log4perl::Appender::ScreenColoredLevels';
Expand Down
22 changes: 22 additions & 0 deletions lib/MetaCPAN/Server/Controller/Search/WebLike.pm
@@ -0,0 +1,22 @@
package MetaCPAN::Server::Controller::Search::WebLike;

use strict;
use warnings;

use Moose;

BEGIN { extends 'MetaCPAN::Server::Controller' }

with 'MetaCPAN::Server::Role::JSONP';

sub get : Chained('/search/index') : PathPart('web_like') : Args(0) {
my ( $self, $c ) = @_;
my $args = $c->req->params;

my $model = $c->model('Search');
my $query = $model->build_query( $args->{q} );

$c->stash( $model->run_query($query) );
}

1;
185 changes: 185 additions & 0 deletions lib/MetaCPAN/Server/Model/Search.pm
@@ -0,0 +1,185 @@
package MetaCPAN::Server::Model::Search;

use strict;
use warnings;

use Moose;

extends 'MetaCPAN::Server::Model::CPAN';

use Hash::Merge qw( merge );
use List::Util qw(uniq);

my $RESULTS_PER_RUN = 200;
my @ROGUE_DISTRIBUTIONS
= qw(kurila perl_debug perl_mlb perl-5.005_02+apache1.3.3+modperl pod2texi perlbench spodcxx Bundle-Everything);

sub _not_rogue {
my @rogue_dists
= map { { term => { 'distribution' => $_ } } } @ROGUE_DISTRIBUTIONS;
return { not => { filter => { or => \@rogue_dists } } };
}

# was sub search {}
sub build_query {
my ( $self, $query, $params ) = @_;
$params //= {};
( my $clean = $query ) =~ s/::/ /g;

my $negative
= { term => { 'mime' => { value => 'text/x-script.perl' } } };

my $positive = {
bool => {
should => [

# exact matches result in a huge boost
{
term => {
'documentation' => {
value => $query,
boost => 100
}
}
},
{
term => {
'module.name' => {
value => $query,
boost => 100
}
}
},

# take the maximum score from the module name and the abstract/pod
{
dis_max => {
queries => [
{
query_string => {
fields => [
qw(documentation.analyzed^2 module.name.analyzed^2 distribution.analyzed),
qw(documentation.camelcase module.name.camelcase distribution.camelcase)
],
query => $clean,
boost => 3,
default_operator => 'AND',
allow_leading_wildcard => 0,
use_dis_max => 1,

}
},
{
query_string => {
fields => [
qw(abstract.analyzed pod.analyzed)
],
query => $clean,
default_operator => 'AND',
allow_leading_wildcard => 0,
use_dis_max => 1,

}
}
]
}
}

]
}
};

my $search = merge(
$params,
{
query => {
filtered => {
query => {
function_score => {

# prefer shorter module names
script_score => {
script => {
lang => 'groovy',
file => 'prefer_shorter_module_names_400',
},
},
query => {
boosting => {
negative_boost => 0.5,
negative => $negative,
positive => $positive
}
}
}
},
filter => {
and => [
$self->_not_rogue,
{ term => { status => 'latest' } },
{ term => { 'authorized' => 1 } },
{ term => { 'indexed' => 1 } },
{
or => [
{
and => [
{
exists => {
field => 'module.name'
}
},
{
term => {
'module.indexed' => 1
}
}
]
},
{
exists => { field => 'documentation' }
},
]
}
]
}
}
},
_source => "module",
fields => [
qw(
documentation
author
abstract.analyzed
release
path
status
indexed
authorized
distribution
date
id
pod_lines
)
],
}
);

# Ensure our requested fields are unique so that Elasticsearch doesn't
# return us the same value multiple times in an unexpected arrayref. For
# example, distribution is listed both above and in ->_search, which calls
# this function (->search) and gets merged with the query above.
$search->{fields} = [ uniq @{ $search->{fields} || [] } ];

return $search;
}

sub run_query {
my ( $self, $query ) = @_;
return $self->es->search(
index => $self->index,
body => $query,
);
}

1;

0 comments on commit 14936ac

Please sign in to comment.