Skip to content

Commit

Permalink
Introducing 'contributor' type (release contributors info)
Browse files Browse the repository at this point in the history
New type and supporting code & test to fill/fetch the
release contributors info to the index.

* MetaCPAN::Script::Mapping::CPAN::Contributor
Added for type mapping for Elasticseach

* Script::Contributor
Added for data filling of release contributors.
Script can run in different modes:
--all          # run for all releases in the 'release' type
--distribution # run for all releases matching a given
               # distribution
--release      # run for a single release
               # (format: PAUSEID/DISTRIBUTION-VERSION)

* Script::Release
Updated to store contributor info for processed releases

* Script::Role::Contributor
Added to support common functionality shared by the scripts
for handling contributors info fetching/updating.

* Document::Contributor
Model representation of the new type.

* Server::Controller::Contributor
Controller endpoints for the new type.
  • Loading branch information
mickeyn committed May 18, 2017
1 parent ef51fc2 commit 24d7b24
Show file tree
Hide file tree
Showing 9 changed files with 391 additions and 1 deletion.
70 changes: 70 additions & 0 deletions lib/MetaCPAN/Document/Contributor.pm
@@ -0,0 +1,70 @@
package MetaCPAN::Document::Contributor;

use MetaCPAN::Moose;

use ElasticSearchX::Model::Document;
use MetaCPAN::Types qw( Str );

has distribution => (
is => 'ro',
isa => Str,
required => 1,
);

has release_author => (
is => 'ro',
isa => Str,
required => 1,
);

has release_name => (
is => 'ro',
isa => Str,
required => 1,
);

has pauseid => (
is => 'ro',
isa => Str,
required => 1,
);

__PACKAGE__->meta->make_immutable;

package MetaCPAN::Document::Contributor::Set;

use strict;
use warnings;

use Moose;

extends 'ElasticSearchX::Model::Document::Set';

sub find_release_contributors {
my ( $self, $author, $name ) = @_;

my $query = +{
bool => {
must => [
{ term => { release_author => $author } },
{ term => { release_name => $name } },
]
}
};

my $res = $self->es->search(
index => $self->index->name,
type => 'contributor',
body => {
query => $query,
size => 999,
}
);
$res->{hits}{total} or return {};

return +{
contributors => [ map { $_->{_source} } @{ $res->{hits}{hits} } ]
};
}

1;
8 changes: 8 additions & 0 deletions lib/MetaCPAN/Document/Release.pm
Expand Up @@ -413,6 +413,14 @@ sub get_contributors {
}
$authors = [ grep { $_ ne 'unknown' } @$authors ];

# this check is against a failure in tests (because fake author)
return
unless $self->es->exists(
index => $self->index->name,
type => 'author',
id => $author_name,
);

my $author = $self->es->get(
index => $self->index->name,
type => 'author',
Expand Down
118 changes: 118 additions & 0 deletions lib/MetaCPAN/Script/Contributor.pm
@@ -0,0 +1,118 @@
package MetaCPAN::Script::Contributor;

use strict;
use warnings;

BEGIN {
$ENV{PERL_JSON_BACKEND} = 'JSON::XS';
}

use Moose;
use Ref::Util qw( is_arrayref );

use MetaCPAN::Types qw( Bool HashRef Str );

with 'MetaCPAN::Role::Script', 'MooseX::Getopt',
'MetaCPAN::Script::Role::Contributor';

has all => (
is => 'ro',
isa => Bool,
default => 0,
documentation => 'update contributors for *all* releases',
);

has distribution => (
is => 'ro',
isa => Str,
documentation =>
'update contributors for all releases matching distribution name',
);

has release => (
is => 'ro',
isa => Str,
documentation =>
'update contributors for a single release (format: author/release_name)',
);

has author_release => (
is => 'ro',
isa => HashRef,
lazy => 1,
builder => '_build_author_release',
);

sub _build_author_release {
my $self = shift;
return unless $self->release;
my ( $author, $release ) = split m{/}, $self->release;
$author && $release
or die
"Error: invalid 'release' argument (format: PAUSEID/DISTRIBUTION-VERSION)";
return +{
author => $author,
release => $release,
};
}

sub run {
my $self = shift;

my $query
= $self->all ? { match_all => {} }
: $self->distribution
? { term => { distribution => $self->distribution } }
: $self->release ? {
bool => {
must => [
{ term => { author => $self->author_release->{author} } },
{ term => { name => $self->author_release->{release} } },
]
}
}
: return;

my $timeout = $self->all ? '60m' : '5m';

my $scroll = $self->es->scroll_helper(
size => 500,
scroll => $timeout,
index => $self->index->name,
type => 'release',
body => { query => $query },
fields => [qw( author distribution name )],
);

my @data;

while ( my $r = $scroll->next ) {
my $contrib_data = $self->get_cpan_author_contributors(
$r->{fields}{author}[0],
$r->{fields}{name}[0],
$r->{fields}{distribution}[0],
);
next unless is_arrayref($contrib_data);
push @data => @{$contrib_data};
}

$self->update_release_contirbutors( \@data, $timeout );
}

__PACKAGE__->meta->make_immutable;
1;

__END__
=head1 SYNOPSIS
# bin/metacpan contributor --all
# bin/metacpan contributor --distribution Moose
# bin/metacpan contributor --release ETHER/Moose-2.1806
=head1 DESCRIPTION
Update the list of contributors (CPAN authors only) of all/matching
releases in the 'contributor' type (index).
=cut
4 changes: 4 additions & 0 deletions lib/MetaCPAN/Script/Mapping.pm
Expand Up @@ -8,6 +8,7 @@ use IO::Interactive qw( is_interactive );
use IO::Prompt qw( prompt );
use Log::Contextual qw( :log );
use MetaCPAN::Script::Mapping::CPAN::Author ();
use MetaCPAN::Script::Mapping::CPAN::Contributor ();
use MetaCPAN::Script::Mapping::CPAN::Distribution ();
use MetaCPAN::Script::Mapping::CPAN::Favorite ();
use MetaCPAN::Script::Mapping::CPAN::File ();
Expand Down Expand Up @@ -402,6 +403,9 @@ sub deploy_mapping {
$cpan_index => {
author =>
decode_json(MetaCPAN::Script::Mapping::CPAN::Author::mapping),
contributor =>
decode_json( MetaCPAN::Script::Mapping::CPAN::Contributor::mapping
),
distribution =>
decode_json( MetaCPAN::Script::Mapping::CPAN::Distribution::mapping
),
Expand Down
34 changes: 34 additions & 0 deletions lib/MetaCPAN/Script/Mapping/CPAN/Contributor.pm
@@ -0,0 +1,34 @@
package MetaCPAN::Script::Mapping::CPAN::Contributor;

use strict;
use warnings;

sub mapping {
'{
"dynamic" : false,
"properties" : {
"release_author" : {
"ignore_above" : 2048,
"index" : "not_analyzed",
"type" : "string"
},
"release_name" : {
"ignore_above" : 2048,
"index" : "not_analyzed",
"type" : "string"
},
"distribution" : {
"ignore_above" : 2048,
"index" : "not_analyzed",
"type" : "string"
},
"pauseid" : {
"ignore_above" : 2048,
"index" : "not_analyzed",
"type" : "string"
}
}
}';
}

1;
7 changes: 6 additions & 1 deletion lib/MetaCPAN/Script/Release.pm
Expand Up @@ -19,7 +19,8 @@ use Moose;
use PerlIO::gzip;
use Try::Tiny qw( catch try );

with 'MetaCPAN::Role::Script', 'MooseX::Getopt';
with 'MetaCPAN::Role::Script', 'MooseX::Getopt',
'MetaCPAN::Script::Role::Contributor';

has latest => (
is => 'ro',
Expand Down Expand Up @@ -287,6 +288,10 @@ sub import_archive {
local @ARGV = ( qw(latest --distribution), $document->distribution );
MetaCPAN::Script::Runner->run;
}

my $contrib_data = $self->get_cpan_author_contributors( $document->author,
$document->name, $document->distribution );
$self->update_release_contirbutors($contrib_data);
}

sub _build_cpan_files_list {
Expand Down
68 changes: 68 additions & 0 deletions lib/MetaCPAN/Script/Role/Contributor.pm
@@ -0,0 +1,68 @@
package MetaCPAN::Script::Role::Contributor;

use Moose::Role;

use MetaCPAN::Util qw< digest >;
use Ref::Util qw< is_arrayref >;

sub get_cpan_author_contributors {
my ( $self, $author, $release, $distribution ) = @_;
my @ret;
my $es = $self->es;
my $index_name = $self->index->name;

my $type = $self->index->type('release');
my $data = $type->get_contributors( $author, $release );

for my $d ( @{ $data->{contributors} } ) {
next unless exists $d->{pauseid};

# skip existing records
my $id = digest( $d->{pauseid}, $release );
my $exists = $es->exists(
index => $index_name,
type => 'contributor',
id => $id,
);
next if $exists;

$d->{release_author} = $author;
$d->{release_name} = $release;
$d->{distribution} = $distribution;
push @ret, $d;
}

return \@ret;
}

sub update_release_contirbutors {
my ( $self, $data, $timeout ) = @_;
return unless $data and is_arrayref($data);

my $bulk = $self->es->bulk_helper(
index => $self->index->name,
type => 'contributor',
timeout => $timeout || '5m',
);

for my $d ( @{$data} ) {
my $id = digest( $d->{pauseid}, $d->{release_name} );
$bulk->update(
{
id => $id,
doc => {
pauseid => $d->{pauseid},
release_name => $d->{release_name},
release_author => $d->{release_author},
distribution => $d->{distribution},
},
doc_as_upsert => 1,
}
);
}

$bulk->flush;
}

no Moose::Role;
1;
21 changes: 21 additions & 0 deletions lib/MetaCPAN/Server/Controller/Contributor.pm
@@ -0,0 +1,21 @@
package MetaCPAN::Server::Controller::Contributor;

use strict;
use warnings;

use Moose;
use MetaCPAN::Util qw( digest );

BEGIN { extends 'MetaCPAN::Server::Controller' }

with 'MetaCPAN::Server::Role::JSONP';

sub get : Path('') : Args(2) {
my ( $self, $c, $author, $name ) = @_;
my $data
= $self->model($c)->raw->find_release_contributors( $author, $name );
return unless $data;
$c->stash($data);
}

1;

0 comments on commit 24d7b24

Please sign in to comment.