Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added new script and mapping for Debian package name mapping
We want to provide the distribution name mappings for Perl
packages to 3rd party sources like Debian.

This commit adds the 'external_package' mapping, and starts
with adding the entry for 'debian' + the script to populate
the data.
  • Loading branch information
mickeyn committed May 12, 2017
1 parent 896370a commit 3dfdfd7
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 0 deletions.
181 changes: 181 additions & 0 deletions lib/MetaCPAN/Script/Debian.pm
@@ -0,0 +1,181 @@
package MetaCPAN::Script::Debian;

use Moose;
use namespace::autoclean;

use CPAN::DistnameInfo;
use DBI ();
use Email::Sender::Simple ();
use Email::Simple ();
use List::MoreUtils qw( uniq );

use MetaCPAN::Types qw( HashRef Str );

with 'MetaCPAN::Role::Script', 'MooseX::Getopt';

has email_to => (
is => 'ro',
isa => Str,
required => 1,
);

has _host_regex => (
is => 'ro',
isa => Str,
lazy => 1,
builder => '_build_host_regex',
);

sub _build_host_regex {
my $self = shift;

my @cpan_hosts = qw<
backpan.cpan.org
backpan.perl.org
cpan.metacpan.org
cpan.noris.de
cpan.org
cpan.perl.org
search.cpan.org
www.cpan.org
www.perl.com
>;

return
'^(https?|ftp)://('
. join( '|', map {s/\./\\./r} @cpan_hosts ) . ')/';
}

sub run {
my $self = shift;

# connect to the database
my $dbh
= DBI->connect(
"dbi:Pg:host=public-udd-mirror.xvm.mit.edu;dbname=udd",
'public-udd-mirror', 'public-udd-mirror' );

# special cases
my %skip = ( 'libbssolv-perl' => 1 );

# multiple queries are needed
my @sql = (

# packages with upstream identified as CPAN
q{select u.source, u.upstream_url from upstream_metadata um join upstream u on um.source = u.source where um.key='Archive' and um.value='CPAN'},

# packages which upstream URL pointing to CPAN
qq{select source, upstream_url from upstream where upstream_url ~ '${\$self->_host_regex}'},
);

my %dist;
my @failures;

for my $sql (@sql) {
my $sth = $dbh->prepare($sql);
$sth->execute();

# map Debian source package to CPAN distro
while ( my ( $source, $url ) = $sth->fetchrow ) {
next if $skip{$source};
$self->dist_for( $source, $url );
if ( my $dist = $self->dist_for( $source, $url ) ) {
$dist{$dist} = $source;
}
else {
push @failures => [ $source, $url ];
}
}
}

if (@failures) {
my $email_body = join "\n" =>
map { sprintf "%s %s", $_->[0], $_->[1] // '<undef>' } @failures;

my $email = Email::Simple->create(
header => [
'Content-Type' => 'text/plain; charset=utf-8',
To => $self->email_to,
From => 'noreply@metacpan.org',
Subject => 'Debian package mapping failures report',
'MIME-Version' => '1.0',
],
body => $email_body,
);
Email::Sender::Simple->send($email);
}

my $bulk = $self->es->bulk_helper(
index => $self->index->name,
type => 'distribution',
);

for my $d ( keys %dist ) {
my $exists = $self->es->exists(
index => $self->index->name,
type => 'distribution',
id => $d,
);
next unless $exists;

$bulk->update(
{
id => $d,
doc => +{
'external_package' => {
debian => $dist{$d}
}
}
}
);
}

$bulk->flush;
}

sub dist_for {
my ( $self, $source, $url ) = @_;

my %alias = (
'datapager' => 'data-pager',
'html-format' => 'html-formatter',
);

my $dist = CPAN::DistnameInfo->new($url);
if ( $dist->dist ) {
return $dist->dist;
}
elsif ( $source =~ /^lib(.*)-perl$/ ) {
my $query
= { term => { 'distribution.lowercase' => $alias{$1} // $1 } };

my $res = $self->index->type('release')->filter($query)->raw->all;
return $res->{hits}{hits}[0]{_source}{distribution}
if exists $res->{hits}{hits} and @{ $res->{hits}{hits} } == 1;
}

return;
}

__PACKAGE__->meta->make_immutable;

1;

=pod
=head1 SYNOPSIS
# bin/metacpan river
=head1 DESCRIPTION
Retrieves the CPAN river data from its source and
updates our ES information.
This can then be accessed here:
http://api.metacpan.org/distribution/Moose
http://api.metacpan.org/distribution/HTTP-BrowserDetect
=cut

10 changes: 10 additions & 0 deletions lib/MetaCPAN/Script/Mapping/CPAN/Distribution.pm
Expand Up @@ -70,6 +70,16 @@ sub mapping {
"index" : "not_analyzed",
"type" : "string"
},
"external_package" : {
"dynamic" : true,
"properties" : {
"debian" : {
"ignore_above" : 2048,
"index" : "not_analyzed",
"type" : "string"
}
}
},
"river" : {
"dynamic" : true,
"properties" : {
Expand Down

0 comments on commit 3dfdfd7

Please sign in to comment.