Skip to content

Commit

Permalink
Merge pull request #29 from wkretzsch/master
Browse files Browse the repository at this point in the history
msout.pm now detects files with less than expected haplotypes
  • Loading branch information
Chris Fields committed Dec 9, 2011
2 parents 91b8fb1 + 5f7fd90 commit 0664739
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 13 deletions.
17 changes: 12 additions & 5 deletions Bio/SeqIO/msout.pm
Expand Up @@ -77,7 +77,7 @@ particular purpose.

package Bio::SeqIO::msout;
use version;
our $API_VERSION = qv('1.1.6');
our $API_VERSION = qv('1.1.7');

use strict;
use base qw(Bio::SeqIO); # This ISA Bio::SeqIO object
Expand Down Expand Up @@ -533,7 +533,7 @@ Note : This function is only included for convention. It calls get_next_seq(
=cut

sub next_seq {
my $self = shift;
my $self = shift;
return $self->get_next_seq();
}

Expand Down Expand Up @@ -564,8 +564,15 @@ sub get_next_hap {
# Setting last_haps_run_num
$self->{LAST_HAPS_RUN_NUM} = $self->get_next_run_num;

my $last_read_hap = $self->get_last_read_hap_num;
my ($seqstring) =
$self->_get_next_clean_hap( $self->{_filehandle}, 1, $end_run );
if ( !defined $seqstring && $last_read_hap < $self->get_tot_haps ) {
$self->throw(
"msout file has only $last_read_hap hap(s), which is less than indicated in msinfo line ( "
. $self->get_tot_haps
. " )" );
}

return $seqstring;
}
Expand Down Expand Up @@ -739,10 +746,10 @@ sub _load_run_info {
my ( $self, $fh ) = @_;

my $data = <$fh>;

# getting rid of excess newlines
while (defined($data) && $data !~ /./){
$data = <$fh>;
while ( defined($data) && $data !~ /./ ) {
$data = <$fh>;
}

# In this case we are at EOF
Expand Down
82 changes: 74 additions & 8 deletions t/SeqIO/msout.t
@@ -1,6 +1,6 @@
#!/usr/bin/perl
use version;
our $API_VERSION = qv('1.1.6');
our $API_VERSION = qv('1.1.7');

use strict;
use File::Path qw(mkpath rmtree);
Expand All @@ -10,7 +10,7 @@ BEGIN {
use Bio::Root::Test;

test_begin(
-tests => 85,
-tests => 89,
-requires_modules => [q(Bio::SeqIO::msout)],
-requires_networking => 0
);
Expand All @@ -21,19 +21,28 @@ BEGIN {

# skip tests if the msout.pm module is too old.
my $api_version = $Bio::SeqIO::msout::API_VERSION;
cmp_ok( $api_version,
'>=', qv('1.1.5'), "Bio::SeqIO::msout is at least api version 1.1.5" );
cmp_ok( $api_version, '>=', qv('1.1.5'),
"Bio::SeqIO::msout is at least api version 1.1.5" );

test_file_1( 0, "msout/msout_infile1" );
test_file_2( 0, "msout/msout_infile2" );
test_file_3( 0, "msout/msout_infile3" );

# tests to run for api versions >= 1.1.6
SKIP: {
skip q($Bio::SeqIO::msout::API_VERSION < 1.1.6) , 22 unless $api_version >= qv('1.1.6');
skip q($Bio::SeqIO::msout::API_VERSION < 1.1.6), 22
unless $api_version >= qv('1.1.6');
test_file_4( 0, q(msout/msout_infile4) );
}

# tests to run for api versions >= 1.1.7
SKIP: {
skip q($Bio::SeqIO::msout::API_VERSION < 1.1.7), 4
unless $api_version >= qv('1.1.7');
bad_test_file_1( 0, q(msout/bad_msout_infile1) );
bad_test_file_2( 0, q(msout/bad_msout_infile2) );
}

sub create_dir {

my $dir = shift;
Expand Down Expand Up @@ -439,8 +448,8 @@ sub test_file_4 {
## Test file 4
##############################################################################

# All this does is test to see if Bio::SeqIO::msout can handle ms output files
# with multiple newline characters randomly interspersed in the file.
# All this does is test to see if Bio::SeqIO::msout can handle ms output files
# with multiple newline characters randomly interspersed in the file.

my $gzip = shift;
my $infile = shift;
Expand Down Expand Up @@ -794,7 +803,6 @@ END
close OUT;
}


sub print_to_file {
my ( $ra_in, $out ) = @_;
unless ( open OUT, ">$out" ) {
Expand All @@ -820,3 +828,61 @@ sub convert_bases_to_nums {
return @out_seqstrings;

}

sub bad_test_file_1 {
##############################################################################
## Bad Test file 1
##############################################################################

# This sub tests to see if msout.pm will catch if the msinfo line's
# advertized haps are less than are actually in the file

my $gzip = shift;
my $infile = shift;
$infile = test_input_file($infile);

my $file_sequence = $infile;
if ($gzip) {
$file_sequence = "gunzip -c <$file_sequence |";
}
my $msout = Bio::SeqIO->new(
-file => "$file_sequence",
-format => 'msout',
);

isa_ok( $msout, 'Bio::SeqIO::msout' );

throws_ok { $msout->get_next_run }
qr/msout file has only 2 hap\(s\), which is less than indicated in msinfo line \( 9 \)/,
q(Caught error in bad msout file 1);

}

sub bad_test_file_2 {
##############################################################################
## Bad Test file 2
##############################################################################

# This sub tests to see if msout.pm will catch if the msinfo line's
# advertized haps are more than are actually in the file

my $gzip = shift;
my $infile = shift;
$infile = test_input_file($infile);

my $file_sequence = $infile;
if ($gzip) {
$file_sequence = "gunzip -c <$file_sequence |";
}
my $msout = Bio::SeqIO->new(
-file => "$file_sequence",
-format => 'msout',
);

isa_ok( $msout, 'Bio::SeqIO::msout' );

throws_ok { $msout->get_next_run }
qr/\'\/\/\' not encountered when expected. There are more haplos in one of the msOUT runs than advertised in the msinfo line/,
q(Caught error in bad msout file 2);

}
8 changes: 8 additions & 0 deletions t/data/msout/bad_msout_infile1
@@ -0,0 +1,8 @@
ms 9 1 -s 7 -I 3 4 4 1
1 1 1

//
segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85.801
4140411
5040410
11 changes: 11 additions & 0 deletions t/data/msout/bad_msout_infile2
@@ -0,0 +1,11 @@
ms 4 1 -s 7 -I 3 4 4 1
1 1 1

//
segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85.801
4140411
5040410
4140411
4140411
4140411

0 comments on commit 0664739

Please sign in to comment.