Skip to content

Commit

Permalink
Code cleanup for sitemaps.
Browse files Browse the repository at this point in the history
  • Loading branch information
oalders committed Mar 27, 2014
1 parent 46d05bf commit d05a564
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 70 deletions.
1 change: 1 addition & 0 deletions bin/generate_sitemap.pl
Expand Up @@ -7,6 +7,7 @@

use FindBin qw ($Bin);
use lib "$Bin/../lib";

use MetaCPAN::Sitemap;

my @parts = (
Expand Down
122 changes: 52 additions & 70 deletions t/sitemap.t
@@ -1,91 +1,73 @@
use lib './lib';
use strict;
use warnings;

use File::Temp qw/ tempdir /;
use MetaCPAN::Sitemap;
use Test::More;
use Try::Tiny;
use XML::Simple;

BEGIN { use_ok('MetaCPAN::Sitemap'); }
{
require_ok('MetaCPAN::Sitemap');

# A very simple check that calling the process routine with no arguments
# causes a croak result.

try {
my $sitemap = MetaCPAN::Sitemap::process();
}
catch {
ok( 1, "Called with no arguments, caught error: $_" );
};

# Test each of the three things that the production script is going to do,
# but limit the searches to a single chunk of 250 results to speed things
# along.

my @tests = (

{
inputs => {
object_type => 'author',
field_name => 'pauseid',
xml_file => '',
cpan_directory => 'author',
},
pattern => qr{https:.+/author/[a-z0-9A-Z-]+},
# Test each of the three things that the production script is going to do,
# but limit the searches to a single chunk of 250 results to speed things
# along.

my @tests = (
{
inputs => {
object_type => 'author',
field_name => 'pauseid',
xml_file => '',
cpan_directory => 'author',
},
pattern => qr{https:.+/author/[a-z0-9A-Z-]+},
},
{
inputs => {
object_type => 'release',
field_name => 'distribution',
xml_file => '',
cpan_directory => 'release',
filter => { status => 'latest' },
},
pattern => qr{https?:.+/release/[a-z0-9A-Z-]+},
}
);

{
inputs => {
object_type => 'release',
field_name => 'distribution',
xml_file => '',
cpan_directory => 'release',
filter => { status => 'latest' },
},
pattern => qr{https?:.+/release/[a-z0-9A-Z-]+},
}
);

my $search_size = 250;
my $temp_dir = tempdir( CLEANUP => 1 );
my $temp_dir = tempdir( CLEANUP => 1 );

foreach my $test (@tests) {
foreach my $test (@tests) {

# Generate the XML file into a file in a temporary directory, then
# check that the file exists, is valid XML, and has the right number
# of URLs.
# Generate the XML file into a file in a temporary directory, then
# check that the file exists, is valid XML, and has the right number
# of URLs.

my $args = $test->{'inputs'};
$args->{'size'} = $search_size;
$args->{'xml_file'} = File::Spec->catfile( $temp_dir,
"$test->{'inputs'}{'object_type'}.xml.gz" );
my $sitemap = MetaCPAN::Sitemap->new($args);
$sitemap->process();
my $args = $test->{inputs};
$args->{size} = 250;
$args->{xml_file} = File::Spec->catfile( $temp_dir,
"$test->{inputs}{object_type}.xml.gz" );
my $sitemap = MetaCPAN::Sitemap->new($args);
$sitemap->process();

ok( -e $args->{'xml_file'},
"XML output file for $args->{'object_type'} exists" );
ok( -e $args->{xml_file},
"XML output file for $args->{object_type} exists" );

open( my $xmlFH, '<:gzip', $args->{'xml_file'} );
open( my $xml_fh, '<:gzip', $args->{xml_file} );

my $xml = XMLin($xmlFH);
ok( defined $xml, "XML for $args->{'object_type'} checks out" );
my $xml = XMLin($xml_fh);
ok( defined $xml, "XML for $args->{object_type} checks out" );

ok( @{ $xml->{'url'} }, "We have some URLs to look at" );
is(
$sitemap->{'size'},
scalar @{ $xml->{'url'} },
"Number of URLs is correct"
);
ok( @{ $xml->{url} }, 'We have some URLs to look at' );
is(
$sitemap->{size},
scalar @{ $xml->{url} },
"Number of URLs is correct"
);

# Check that each of the urls has the right pattern.
# Check that each of the urls has the right pattern.

foreach my $url ( @{ $xml->{'url'} } ) {
like( $url, $test->{'pattern'}, "URL matches" );
}
foreach my $url ( @{ $xml->{url} } ) {
like( $url, $test->{pattern}, 'URL matches' );
}

done_testing;
}

done_testing();

0 comments on commit d05a564

Please sign in to comment.