Skip to content

Commit

Permalink
Merge pull request #39 from daisieh/bug3331
Browse files Browse the repository at this point in the history
Merging in @daisieh commits
  • Loading branch information
cjfields committed Aug 24, 2012
2 parents 2134908 + f7b9e38 commit 87dbcf5
Show file tree
Hide file tree
Showing 5 changed files with 1,666 additions and 56 deletions.
113 changes: 73 additions & 40 deletions Bio/Tools/Phylo/PAML.pm
Expand Up @@ -81,11 +81,11 @@ baseml, basemlg, codemlsites and yn00
for my $node ($tree->get_nodes()) {
# inspect the tree: the "t" (time) parameter is available via
# $node->branch_length(); all other branch-specific parameters
# ("omega", "dN", etc.) are available via
# ("omega", "dN", etc.) are available via
# ($omega) = $node->get_tag_values('omega');
}
# if you are using model based Codeml then trees are stored in each
# if you are using model based Codeml then trees are stored in each
# modelresult object
for my $modelresult ( $result->get_NSSite_results ) {
# model M0, M1, etc
Expand All @@ -94,7 +94,7 @@ baseml, basemlg, codemlsites and yn00
for my $node ($tree->get_nodes()) {
# inspect the tree: the "t" (time) parameter is available via
# $node->branch_length(); all other branch-specific parameters
# ("omega", "dN", etc.) are available via
# ("omega", "dN", etc.) are available via
# ($omega) = $node->get_tag_values('omega');
}
}
Expand All @@ -121,8 +121,8 @@ Please report any problems to the mailing list (see FEEDBACK below).
=head1 TO DO
Implement get_posteriors(). For NSsites models, obtain arrayrefs of
posterior probabilities for membership in each class for every
Implement get_posteriors(). For NSsites models, obtain arrayrefs of
posterior probabilities for membership in each class for every
position; probabilities correspond to classes w0, w1, ... etc.
my @probs = $result->get_posteriors();
Expand All @@ -149,15 +149,15 @@ the Bioperl mailing list. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
=head2 Support
=head2 Support
Please direct usage questions or support issues to the mailing list:
I<bioperl-l@bioperl.org>
rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.
=head2 Reporting Bugs
Expand All @@ -182,7 +182,7 @@ Dave Messina dmessina@cpan.org
=head1 TODO
RST parsing -- done, Avilella contributions bug#1506, added by jason 1.29
-- still need to parse in joint probability and non-syn changes
-- still need to parse in joint probability and non-syn changes
at site table
=head1 APPENDIX
Expand Down Expand Up @@ -224,7 +224,7 @@ use Bio::Tools::Phylo::PAML::ModelResult;
Returns : Bio::Tools::Phylo::PAML
Args : Hash of options: -file, -fh, -dir
-file (or -fh) should contain the contents of the PAML
outfile;
outfile;
-dir is the (optional) name of the directory in
which the PAML program was run (and includes other
PAML-generated files from which we can try to gather data)
Expand Down Expand Up @@ -299,7 +299,7 @@ sub next_result {
elsif (
m/^Model\s+(\d+)/
|| ( ( !$has_model_line && m/^TREE/ )
&& $seqtype eq 'CODONML'
&& $seqtype eq 'CODONML'
&& ($self->{'_summary'}->{'version'} !~ /4/))
# last bit to keep PAML >= 4 from being caught here
# bug 2482. Not sure this is the right fix, but tests
Expand Down Expand Up @@ -485,7 +485,7 @@ sub _parse_summary {
{
@{ $self->{'_summary'} }{qw(seqtype version seqfile model)} =
( $1, $2, $3, $4 );

# in 4.3, the model is on its own line
if ( !defined $self->{'_summary'}->{'model'} ) {
my $model_line = $self->_readline;
Expand All @@ -494,13 +494,16 @@ sub _parse_summary {
$self->{'_summary'}->{'model'} = $model_line;
}
}

defined $self->{'_summary'}->{'model'}
&& $self->{'_summary'}->{'model'} =~ s/Model:\s+//;
$self->_pushback($_)
if $self->{'_summary'}->{'seqtype'} eq 'AAMODEL';
last;
}
elsif ((m/\s+?\d+?\s+?\d+?/) && ( $self->{'_already_parsed_seqs'} != 1 )) {
$self->_parse_seqs;
}
elsif (m/^Data set \d$/) {
$self->{'_summary'} = {};
$self->{'_summary'}->{'multidata'}++;
Expand Down Expand Up @@ -766,7 +769,7 @@ sub _parse_patterns {
my ($self) = @_;
my ( $patternct, @patterns, $ns, $ls );
return if exists $self->{'_summary'}->{'patterns'};

while ( defined( $_ = $self->_readline ) ) {
if ( /^Codon\s+(usage|position)/ || /Model/ ) {
$self->_pushback($_);
Expand Down Expand Up @@ -858,25 +861,26 @@ sub _parse_distmat {
my ($self) = @_;
my @results;
my $ver = 3.14;
my $firstseq, my $secondseq;

while ( defined( $_ = $self->_readline ) ) {
next if /^\s+$/;

# Bypass the reference information (4 lines)
# We need to get the names of the sequences if this is from YN00:
if (/^\(A\)\sNei-Gojobori\s\(1986\)\smethod/) {
$ver = 3.15;
$_ = $self->_readline;
$_ = $self->_readline;
$_ = $self->_readline;
$_ = $self->_readline;
while ( defined( $_ = $self->_readline ) ) {
if ($_ =~ m/.*\d+?\.\d+?\s*\(.*/) {
$secondseq = $_;
last;
}
$firstseq = $_;
}
}
last;
}

return unless (/^Nei\s*\&\s*Gojobori/);

# skip the next line is ver > 3.15
$self->_readline if ( $ver > 3.14 );
#return unless (/^Nei\s*\&\s*Gojobori/);

# skip the next 3 lines
if ( $self->{'_summary'}->{'seqtype'} eq 'CODONML' ) {
Expand All @@ -886,11 +890,23 @@ sub _parse_distmat {
}
my $seqct = 0;
my @seqs;
if ( $self->{'_summary'}->{'seqtype'} eq 'YN00' ) {
if ($firstseq) {
$firstseq =~ s/(.+?)\s+.*/$1/;
$secondseq =~ s/(.+?)\s+.*/$1/;
chomp $firstseq;
chomp $secondseq;
push @seqs, Bio::PrimarySeq->new( -display_id => $firstseq );
push @seqs, Bio::PrimarySeq->new( -display_id => $secondseq );
}
}
while ( defined( $_ = $self->_readline ) ) {
last if ( /^\s+$/ && exists $self->{'_summary'}->{'ngmatrix'} );
next if ( /^\s+$/ || /^NOTE:/i );
chomp;
my ( $seq, $rest ) = split( /\s+/, $_, 2 );
$_ =~ m/(.+?)\s*(-*\d+?\.\d+?.*)/;
my $seq = $1;
my $rest = $2;
$rest = '' unless defined $rest; # get rid of empty messages
my $j = 0;
if ( $self->{'_summary'}->{'seqtype'} eq 'YN00' ) {
Expand Down Expand Up @@ -918,7 +934,11 @@ sub _parse_distmat {
sub _parse_PairwiseCodon {
my ($self) = @_;
my @result;
my ( $a, $b, $log, $model, $t, $kappa, $omega );
my ( $a, $b, $log, $model, $t, $kappa, $omega, $fixedkappa );
# check to see if we have a fixed kappa:
if ( $self->{'_summary'}->{'model'} =~ /kappa = (\d+?\.\d+?) fixed/) {
$fixedkappa = $1;
}
while ( defined( $_ = $self->_readline ) ) {
if (/^pairwise comparison, codon frequencies\:\s*(\S+)\./) {
$model = $1;
Expand All @@ -937,30 +957,43 @@ sub _parse_PairwiseCodon {
# 0.19045 2.92330 0.10941
s/^\s+//;
( $t, $kappa, $omega ) = split;
# if there was a fixed kappa, there will only be two values here ($t, $omega) and $kappa = $fixedkappa.
if ($omega eq "") {
$omega = $kappa;
$kappa = $fixedkappa;
}
}
}
# 5th line of a pair block, e.g.
# t= 0.1904 S= 5.8 N= 135.2 dN/dS= 0.1094 dN= 0.0476 dS= 0.4353
# OR lines like (note last field; this includes a fix for bug #3040)
# t= 0.0439 S= 0.0 N= 141.0 dN/dS= 0.1626 dN= 0.0146 dS= nan
elsif (
m/^t\=\s*(\d+(\.\d+)?)\s+
S\=\s*(\d+(\.\d+)?)\s+
N\=\s*(\d+(\.\d+)?)\s+
dN\/dS\=\s*(\d+(\.\d+)?)\s+
dN\=\s*(\d+(\.\d+)?)\s+
dS\=\s*(\d+(\.\d+)?|nan)/ox
)
elsif (m/^t\=\s*(\d+(\.\d+)?)\s+/)
{
# Breaking out each piece individually so that you can see
# what each regexp actually looks for
my $parse_string = $_;
$parse_string =~ m/.*t\s*\=\s*(\d+?\.\d+?)\s/;
my $temp_t = $1;
$parse_string =~ m/\sS\s*\=\s*(\d+?\.\d+?)\s/;
my $temp_S = $1;
$parse_string =~ m/\sN\s*\=\s*(\d+?\.\d+?)\s/;
my $temp_N = $1;
$parse_string =~ m/\sdN\/dS\s*\=\s*(\d+?\.\d+?)\s/;
my $temp_omega = $1;
$parse_string =~ m/\sdN\s*\=\s*(\d+?\.\d+?)\s/;
my $temp_dN = $1;
$parse_string =~ m/\sdS\s*\=\s*(.+)\s/;
my $temp_dS = $1;
$result[ $b - 1 ]->[ $a - 1 ] = {
'lnL' => $log,
't' => defined $t && length($t) ? $t : $1,
'S' => $3,
'N' => $5,
't' => defined $t && length($t) ? $t : $temp_t,
'S' => $temp_S,
'N' => $temp_N,
'kappa' => $kappa,
'omega' => defined $omega && length($omega) ? $omega : $7,
'dN' => $9,
'dS' => $11
'omega' => defined $omega && length($omega) ? $omega : $temp_omega,
'dN' => $temp_dN,
'dS' => $temp_dS
};
}
# 4th line of a pair block (which is blank)
Expand Down

0 comments on commit 87dbcf5

Please sign in to comment.