Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/fgvieira/bioperl-live int…
Browse files Browse the repository at this point in the history
…o topic/pull_request_28

Fix conflict with SimpleAlign.t tests, use master branch code
  • Loading branch information
Chris Fields committed Dec 9, 2011
2 parents ea67fb7 + 0ace926 commit 91b8fb1
Show file tree
Hide file tree
Showing 12 changed files with 153 additions and 58 deletions.
3 changes: 2 additions & 1 deletion Bio/AlignIO.pm
Expand Up @@ -393,10 +393,11 @@ sub fh {

sub _initialize {
my($self,@args) = @_;
my ($flat,$alphabet) = $self->_rearrange([qw(DISPLAYNAME_FLAT ALPHABET)],
my ($flat,$alphabet,$width) = $self->_rearrange([qw(DISPLAYNAME_FLAT ALPHABET WIDTH)],
@args);
$self->force_displayname_flat($flat) if defined $flat;
$self->alphabet($alphabet);
$self->width($width) if defined $width;
$self->_initialize_io(@args);
1;
}
Expand Down
4 changes: 2 additions & 2 deletions Bio/AlignIO/fasta.pm
Expand Up @@ -227,8 +227,8 @@ sub write_aln {

sub _get_len {
my ($self,$seq) = @_;
my $chars = $Bio::LocatableSeq::RESIDUE_SYMBOLS;
$seq =~ s{[^$chars]+}{}gi;
my $chars = $Bio::LocatableSeq::GAP_SYMBOLS.$Bio::LocatableSeq::FRAMESHIFT_SYMBOLS;
$seq =~ s{[$chars]+}{}gi;
return CORE::length($seq);
}

Expand Down
23 changes: 22 additions & 1 deletion Bio/PopGen/Genotype.pm
Expand Up @@ -112,10 +112,12 @@ sub new {
my($class,@args) = @_;

my $self = $class->SUPER::new(@args);
my ($marker_name, $ind_id, $alleles) = $self->_rearrange([qw(MARKER_NAME
my ($marker_name, $marker_type, $ind_id, $alleles) = $self->_rearrange([qw(MARKER_NAME
MARKER_TYPE
INDIVIDUAL_ID
ALLELES)],@args);
defined $marker_name && $self->marker_name($marker_name);
defined $marker_type && $self->marker_type($marker_type);
defined $ind_id && $self->individual_id($ind_id);
if( defined $alleles ) {
if( ref($alleles) =~ /array/i ) {
Expand Down Expand Up @@ -145,6 +147,25 @@ sub marker_name{
return $self->{'_marker_name'};
}

=head2 marker_type
Title : marker_type
Usage : my $name = $genotype->marker_type();
Function: Get the marker type for a genotype result
Returns : M (microsatellite, or other multi-allelic
locus) or S (biallelic/SNP locus)
Args : [optional] marker type value to store
=cut

sub marker_type{
my ($self) = shift;
return $self->{'_marker_type'} = shift if @_;
return $self->{'_marker_type'};
}


=head2 individual_id
Title : individual_id
Expand Down
9 changes: 5 additions & 4 deletions Bio/PopGen/IO/hapmap.pm
Expand Up @@ -223,14 +223,15 @@ sub next_individual {
} else {
$markername = "Marker$i";
}

my @alleles = split($self->flag('allele_delimiter'), $m);
if( @alleles != 2 ) {
$self->warn("$m for $samp\n");
} else {
$m = Bio::PopGen::Genotype->new(-alleles => \@alleles,
-marker_name => $markername,
-individual_id=> $samp);
$m = Bio::PopGen::Genotype->new(-alleles => \@alleles,
-marker_name => $markername,
-marker_type => 'S', # Guess hapmap only has SNP data
-individual_id => $samp);
}
$i++;
}
Expand Down
82 changes: 48 additions & 34 deletions Bio/PopGen/IO/phase.pm
Expand Up @@ -90,7 +90,7 @@ package Bio::PopGen::IO::phase;
use vars qw($FieldDelim $AlleleDelim $NoHeader);
use strict;

($FieldDelim,$AlleleDelim,$NoHeader) =(' ', '\s+', 0);
($FieldDelim,$AlleleDelim,$NoHeader) =(' ', '\s+', 1);



Expand Down Expand Up @@ -178,6 +178,7 @@ sub next_individual {
$marker_positions,$micro_snp);

while( defined( $_ = $self->_readline) ) {
chomp;
next if( /^\s+$/ || ! length($_) );
last;
}
Expand Down Expand Up @@ -214,7 +215,6 @@ sub next_individual {
$self->{'_count'}++;
return $self->next_individual;
} else {
chomp $_;
if( $self->{'_row1'} ) {
# if we are looking at the 2nd row of alleles for this id

Expand Down Expand Up @@ -243,25 +243,35 @@ sub next_individual {
$m =~ s/^\s+//;
$m =~ s/\s+$//;
my $markername;
if( defined $self->{'_header'} ) {
$markername = $self->{'_header'}->[$i] || "Marker$i";
if( defined($self->flag('marker_positions')) ) {
$markername = (split($self->flag('field_delimiter'), $self->flag('marker_positions')))[$i];
} elsif( defined $self->{'_header'} ) {
$markername = $self->{'_header'}->[$i] || "$i";
} else {
$markername = "Marker$i";
$markername = "$i";
}

my $markertype;
if( defined($self->flag('marker_positions')) ) {
$markertype = (split('', $self->flag('micro_snp')))[$i-1];
} else {
$markertype = "S";
}

$self->debug( "markername is $markername alleles are $m\n");
my @alleles = split($self->flag('allele_delimiter'), $m);

$m = Bio::PopGen::Genotype->new(-alleles =>\@alleles,
-marker_name => $markername,
-individual_id=> $self->{'_sam'});
$m = Bio::PopGen::Genotype->new(-alleles =>\@alleles,
-marker_name => $markername,
-marker_type => $markertype,
-individual_id => $self->{'_sam'});
$i++;
}
return Bio::PopGen::Individual->new(-unique_id => $self->{'_sam'},
-genotypes =>\@{$self->{'_marker_results'}},
);

} else {
chomp;
$self->{'_header'} = [split($self->flag('field_delimiter'),$_)];
return $self->next_individual; # rerun loop again
}
Expand Down Expand Up @@ -304,28 +314,39 @@ sub write_individual {
my $fielddelim = $self->flag('field_delimiter');
my $alleledelim = $self->flag('allele_delimiter');

# For now capture print_header flag from @inds
my $header = 1;
$header = pop(@inds) if($inds[-1] =~ m/^[01]$/);

foreach my $ind ( @inds ) {
if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
$self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object ($ind)");
next;
}

# we'll go ahead and sort these until
# we have a better way to insure a consistent order
my @marker_names = sort {$a <=> $b} $ind->get_marker_names;
my $n_markers =scalar(@marker_names);
$self->_print( "1\n");
$self->_print( $n_markers, "\n");
if( ! $self->flag('no_header') &&
! $self->flag('header_written') ) {
$self->_print(join($fielddelim, ('P', @marker_names)), "\n");
$self->flag('header_written',1);
}
$self->_print('S'x$n_markers, "\n");

if ($header) {
my $n_markers = scalar(@marker_names);
$self->_print( "1\n");
$self->_print( $n_markers, "\n");
if( $self->flag('no_header') &&
! $self->flag('header_written') ) {
$self->_print(join($fielddelim, ('P', @marker_names)), "\n");
$self->flag('header_written',1);
}
foreach my $geno ($ind->get_Genotypes()) {
$self->_print($geno->marker_type);
}
$self->_print("\n");
}

my(@row1,@row2);
for (@marker_names){
my $geno = $ind->get_Genotypes(-marker => $_);
my @alleles = $geno->get_Alleles();
my @alleles = $geno->get_Alleles(1);
push(@row1,$alleles[0]);
push(@row2,$alleles[1]);
}
Expand Down Expand Up @@ -360,28 +381,21 @@ sub write_population {
# we'll go ahead and sort these until
# we have a better way to insure a consistent order
my @marker_names = sort {$a <=> $b} $pop->get_marker_names;
my $n_markers =scalar(@marker_names);
my $n_markers = scalar(@marker_names);
$self->_print( $pop->get_number_individuals, "\n");
$self->_print( $n_markers, "\n");
if( ! $self->flag('no_header') &&
if( $self->flag('no_header') &&
! $self->flag('header_written') ) {
$self->_print( join($fielddelim, ('P', @marker_names)), "\n");
$self->flag('header_written',1);
}
$self->_print('S'x$n_markers, "\n");

foreach my $ind ( $pop->get_Individuals ) {
my(@row1,@row2);
for (@marker_names){
my $geno = $ind->get_Genotypes(-marker => $_);
my @alleles = $geno->get_Alleles();
push (@row1,$alleles[0]);
push (@row2,$alleles[1]);
}
$self->_print("#",$ind->unique_id,"\n",
join($fielddelim,@row1),"\n",
join($fielddelim,@row2),"\n");

foreach (@marker_names) {
$self->_print(($pop->get_Genotypes($_))[0]->marker_type);
}
$self->_print("\n");

$self->write_individual( $pop->get_Individuals, 0 );
}
}

Expand Down
2 changes: 1 addition & 1 deletion Bio/PopGen/Individual.pm
Expand Up @@ -257,7 +257,7 @@ sub get_Genotypes{
unshift @args, '-marker' if( @args == 1 ); # deal with single args

my ($name) = $self->_rearrange([qw(MARKER)], @args);
if( ! $name ) {
if( ! defined($name) ) {
$self->warn("Only know how to process the -marker field currently");
return();
}
Expand Down
17 changes: 17 additions & 0 deletions Bio/PopGen/Marker.pm
Expand Up @@ -308,6 +308,23 @@ sub reset_alleles{
$self->{'_allele_freqs'} = {};
}

=head2 marker_coverage
Title : marker_coverage
Usage : $marker->marker_coverage();
Function: Get marker coverage, that is, the number of
individuals where the marker is present
excluding missing or ambiguous alleles
Returns : integer, representing marker coverage
Args :
=cut

sub marker_coverage{
my ($self) = @_;

return $self->{_marker_coverage};
}

1;
6 changes: 4 additions & 2 deletions Bio/PopGen/Population.pm
Expand Up @@ -437,11 +437,13 @@ sub get_Marker{
my %alleles;
my $count;
for my $al ( map { $_->get_Alleles} @genotypes ) {
$count++;
$alleles{$al}++
next if($al eq '?');
$count++;
$alleles{$al}++
}
foreach my $allele ( keys %alleles ) {
$marker->add_Allele_Frequency($allele, $alleles{$allele}/$count);
$marker->{_marker_coverage} = $count/2;
}
}
$self->{'_allele_freqs'}->{$markername} = $marker;
Expand Down
4 changes: 2 additions & 2 deletions Bio/PopGen/Statistics.pm
Expand Up @@ -1393,9 +1393,9 @@ sub mcdonald_kreitman {
my $outcount = 1;
for my $ind ( @{$vals{$t}} ) {
my @alleles = $ind->get_Genotypes($codon)->get_Alleles;
if( @alleles > 1 ) {
if( @alleles > 2 ) {
warn("Codon $codon saw ", scalar @alleles, " alleles for ind ", $ind->unique_id, "\n");
die;
# warn("$codon $codon saw ", scalar @alleles, " for ind ", $ind->unique_id, "\n");
} else {
my ($allele) = shift @alleles;
$all_alleles{$ind->unique_id} = $allele;
Expand Down
12 changes: 6 additions & 6 deletions Bio/SimpleAlign.pm
Expand Up @@ -1254,7 +1254,7 @@ sub remove_gaps {
# Do the matching to get the segments to remove
while ($gap_line =~ m/[$del_char]/g) {
my $start = pos($gap_line)-1;
$gap_line=~/\G[$del_char]+/gc;
$gap_line =~ m/\G[$del_char]+/gc;
my $end = pos($gap_line)-1;

#have to offset the start and end for subsequent removes
Expand Down Expand Up @@ -1691,12 +1691,12 @@ sub gap_line {
my %gap_hsh; # column gaps vector
foreach my $seq ( $self->each_seq ) {
my $i = 0;
map {$gap_hsh{$_->[0]} = undef} grep {$_->[1] eq $gapchar}
map {$gap_hsh{$_->[0]} = undef} grep {$_->[1] =~ m/[$gapchar]/}
map {[$i++, $_]} split(//, uc ($seq->seq));
}
my $gap_line;
foreach my $pos ( 0..$self->length-1 ) {
$gap_line .= (exists $gap_hsh{$pos}) ? $gapchar:'.';
$gap_line .= (exists $gap_hsh{$pos}) ? $self->gap_char:'.';
}
return $gap_line;
}
Expand All @@ -1719,14 +1719,14 @@ sub all_gap_line {
my @seqs = $self->each_seq;
foreach my $seq ( @seqs ) {
my $i = 0;
map {$gap_hsh{$_->[0]}++} grep {$_->[1] eq $gapchar}
map {$gap_hsh{$_->[0]}++} grep {$_->[1] =~ m/[$gapchar]/}
map {[$i++, $_]} split(//, uc ($seq->seq));
}
my $gap_line;
foreach my $pos ( 0..$self->length-1 ) {
if (exists $gap_hsh{$pos} && $gap_hsh{$pos} == scalar @seqs) {
# gaps column
$gap_line .= $gapchar;
$gap_line .= $self->gap_char;
} else {
$gap_line .= '.';
}
Expand Down Expand Up @@ -1759,7 +1759,7 @@ sub gap_col_matrix {
my $id = $seq->display_id;
while( $i < $len ) {
$ch = substr($str, $i, 1);
$cols[$i++]->{$id} = ($ch eq $gapchar);
$cols[$i++]->{$id} = ($ch =~ m/[$gapchar]/);
}
}
return \@cols;
Expand Down
6 changes: 4 additions & 2 deletions t/PopGen/MK.t
Expand Up @@ -28,7 +28,8 @@ my $alnio = Bio::AlignIO->new(-format => 'fasta',
my $aln = $alnio->next_aln;
isa_ok($aln,'Bio::SimpleAlign');
my $population = Bio::PopGen::Utilities->aln_to_population(-alignment => $aln,
-site_model=> 'codon');
-site_model => 'codon',
-include_monomorphic => 1);
isa_ok($population,'Bio::PopGen::Population');

my @marker_names = $population->get_marker_names;
Expand Down Expand Up @@ -116,7 +117,8 @@ $alnio = Bio::AlignIO->new(-format => 'fasta',
$aln = $alnio->next_aln;
isa_ok($aln,'Bio::SimpleAlign');
$population = Bio::PopGen::Utilities->aln_to_population(-alignment => $aln,
-site_model=> 'codon');
-site_model => 'codon',
-include_monomorphic => 1);
isa_ok($population,'Bio::PopGen::Population');

@marker_names = $population->get_marker_names;
Expand Down

0 comments on commit 91b8fb1

Please sign in to comment.