Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #647 from metacpan/mickey/package_cleanup
Added cleanup mode for script/package
  • Loading branch information
oalders committed May 14, 2017
2 parents 4d5f6f7 + 0ec347e commit 94f053f
Showing 1 changed file with 45 additions and 5 deletions.
50 changes: 45 additions & 5 deletions lib/MetaCPAN/Script/Package.pm
Expand Up @@ -2,10 +2,11 @@ package MetaCPAN::Script::Package;

use Moose;

use CPAN::DistnameInfo ();
use IO::Uncompress::Gunzip ();
use Log::Contextual qw( :log );
use MetaCPAN::Document::Package ();
use IO::Uncompress::Gunzip ();
use CPAN::DistnameInfo ();
use MetaCPAN::Types qw( Bool );

with 'MooseX::Getopt', 'MetaCPAN::Role::Script';

Expand All @@ -15,6 +16,12 @@ Loads 02packages.details info into db.
=cut

has clean_up => (
is => 'ro',
isa => Bool,
default => 0,
);

sub run {
my $self = shift;
$self->index_packages;
Expand Down Expand Up @@ -44,11 +51,14 @@ sub index_packages {
}
log_debug {$meta};

my $bulk_helper = $self->es->bulk_helper(
my $bulk = $self->es->bulk_helper(
index => $self->index->name,
type => 'package',
);

my %seen;
log_debug {"adding data"};

# read the rest of the file line-by-line (too big to slurp)
while ( my $line = <$fh> ) {
next unless $line;
Expand All @@ -66,19 +76,49 @@ sub index_packages {
dist_version => $distinfo->version,
};

$bulk_helper->update(
$bulk->update(
{
id => $name,
doc => $doc,
doc_as_upsert => 1,
}
);

$seen{$name} = 1;
}
$bulk->flush;

$self->run_cleanup( $bulk, \%seen ) if $self->clean_up;

$bulk_helper->flush;
log_info {'finished indexing 02packages.details'};
}

sub run_cleanup {
my ( $self, $bulk, $seen ) = @_;

log_debug {"checking package data to remove"};

my $scroll = $self->es->scroll_helper(
index => $self->index->name,
type => 'package',
scroll => '30m',
body => { query => { match_all => {} } },
);

my @remove;
my $count = $scroll->total;
while ( my $p = $scroll->next ) {
my $id = $p->{_id};
unless ( exists $seen->{$id} ) {
push @remove, $id;
log_debug {"removed $id"};
}
log_debug { $count . " left to check" } if --$count % 10000 == 0;
}
$bulk->delete_ids(@remove);
$bulk->flush;
}

__PACKAGE__->meta->make_immutable;
1;

Expand Down

0 comments on commit 94f053f

Please sign in to comment.