Skip to content

Commit

Permalink
Fix for correctly parsing phylip format with a newline between the he…
Browse files Browse the repository at this point in the history
…ader and the sequences.
  • Loading branch information
daisieh committed Aug 13, 2012
1 parent f7b9e38 commit bcaeac2
Show file tree
Hide file tree
Showing 2 changed files with 284 additions and 10 deletions.
34 changes: 24 additions & 10 deletions Bio/AlignIO/phylip.pm
Expand Up @@ -175,7 +175,6 @@ sub next_aln {
@names,$seqname,$start,$end,$count,$seq);

my $aln = Bio::SimpleAlign->new(-source => 'phylip');

# skip blank lines until we see header line
# if we see a non-blank line that isn't the seqcount and residuecount line
# then bail out of next_aln (return)
Expand All @@ -193,10 +192,19 @@ sub next_aln {
my $idlen = $self->idlength;
$count = 0;
my $iter = 1;
my $interleaved = $self->interleaved;
#my $interleaved = $self->interleaved;
my $interleaved = 0;
while( $entry = $self->_readline) {
last if( $entry =~ /^\s?$/ && $interleaved );

if ($entry =~ /^\s?$/) {
if ($interleaved) {
if ($count <= $seqcount) {
my $msg = "Not a valid interleaved PHYLIP file! Expected " . $seqcount . " sequences, got " . ($count-1);
$self->throw($msg);
}
}
$count = 1;
next;
}
# we've hit the next entry.
if( $entry =~ /^\s+(\d+)\s+(\d+)\s*$/) {
$self->_pushback($entry);
Expand All @@ -217,14 +225,14 @@ sub next_aln {

push @names, $name;
$str =~ s/\s//g;
$count = scalar @names;
#$count = scalar @names;
$hash{$count} = $str;

} elsif( $entry =~ /^\s+(.+)$/ ) {
$interleaved = 0;
#$interleaved = 0;
$str = $1;
$str =~ s/\s//g;
$count = scalar @names;
#$count = scalar @names;
$hash{$count} .= $str;
} elsif( $entry =~ /^(.{$idlen})\s*(.*)\s$/ ||
$entry =~ /^(.{$idlen})(\S{$idlen}\s+.+)\s$/ # Handle weirdness when id is too long
Expand All @@ -236,8 +244,13 @@ sub next_aln {

push @names, $name;
$str =~ s/\s//g;
$count = scalar @names;
#$count = scalar @names;
$hash{$count} = $str;
if (length($str) == $residuecount) {
$interleaved = 0;
} else {
$interleaved = 1;
}
} elsif( $interleaved ) {
if( $entry =~ /^(\S+)\s+(.+)/ ||
$entry =~ /^(.{$idlen})(.*)\s$/ ) {
Expand All @@ -247,13 +260,13 @@ sub next_aln {
$name =~ s/_+$//; # remove any trailing _'s
push @names, $name;
$str =~ s/\s//g;
$count = scalar @names;
#$count = scalar @names;
$hash{$count} = $str;
} else {
$self->debug("unmatched line: $entry");
}
}
$self->throw("Not a valid interleaved PHYLIP file!") if $count > $seqcount;
$count++;
}

if( $interleaved ) {
Expand All @@ -265,6 +278,7 @@ sub next_aln {
$self->_pushback($entry);
last;
}

$count = 0, next if $entry =~ /^\s$/;
$entry =~ /\s*(.*)$/ && do {
$str = $1;
Expand Down

0 comments on commit bcaeac2

Please sign in to comment.