Skip to content

Commit

Permalink
Merge pull request #26 from fgvieira/master
Browse files Browse the repository at this point in the history
Fixed some bugs on PopGen, fixes bug #3306
  • Loading branch information
Chris Fields committed Oct 28, 2011
2 parents b141443 + 75d7930 commit 2b10511
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 26 deletions.
25 changes: 16 additions & 9 deletions Bio/PopGen/IO/phase.pm
Expand Up @@ -90,7 +90,7 @@ package Bio::PopGen::IO::phase;
use vars qw($FieldDelim $AlleleDelim $NoHeader);
use strict;

($FieldDelim,$AlleleDelim,$NoHeader) =( ',', '\s+',0);
($FieldDelim,$AlleleDelim,$NoHeader) =(' ', '\s+', 0);



Expand All @@ -108,7 +108,7 @@ use base qw(Bio::PopGen::IO);
Function: Builds a new Bio::PopGen::IO::hapmap object
Returns : an instance of Bio::PopGen::IO::hapmap
Args : [optional, these are the current defaults]
-field_delimiter => ','
-field_delimiter => ' '
-allele_delimiter=> '\s+'
-no_header => 0,
Expand Down Expand Up @@ -311,15 +311,18 @@ sub write_individual {
}
# we'll go ahead and sort these until
# we have a better way to insure a consistent order
my @marker_names = sort $ind->get_marker_names;
my @marker_names = sort {$a <=> $b} $ind->get_marker_names;
my $n_markers =scalar(@marker_names);
$self->_print( "1\n");
$self->_print( $n_markers, "\n");
if( ! $self->flag('no_header') &&
! $self->flag('header_written') ) {
$self->_print(join($fielddelim, ('SAM', @marker_names)), "\n");
$self->_print(join($fielddelim, ('P', @marker_names)), "\n");
$self->flag('header_written',1);
}
$self->_print('S'x$n_markers, "\n");

my(@row1,@row2);

for (@marker_names){
my $geno = $ind->get_Genotypes(-marker => $_);
my @alleles = $geno->get_Alleles();
Expand Down Expand Up @@ -356,13 +359,17 @@ sub write_population {
}
# we'll go ahead and sort these until
# we have a better way to insure a consistent order
my @marker_names = sort $pop->get_marker_names;
my @marker_names = sort {$a <=> $b} $pop->get_marker_names;
my $n_markers =scalar(@marker_names);
$self->_print( $pop->get_number_individuals, "\n");
$self->_print( $n_markers, "\n");
if( ! $self->flag('no_header') &&
! $self->flag('header_written') ) {
$self->_print( join($fielddelim, ('SAM', @marker_names)),
"\n");
$self->_print( join($fielddelim, ('P', @marker_names)), "\n");
$self->flag('header_written',1);
}
$self->_print('S'x$n_markers, "\n");

foreach my $ind ( $pop->get_Individuals ) {
my(@row1,@row2);
for (@marker_names){
Expand All @@ -374,7 +381,7 @@ sub write_population {
$self->_print("#",$ind->unique_id,"\n",
join($fielddelim,@row1),"\n",
join($fielddelim,@row2),"\n");
}
}
}
}

Expand Down
54 changes: 37 additions & 17 deletions Bio/PopGen/Utilities.pm
Expand Up @@ -149,6 +149,21 @@ sub aln_to_population{
INCLUDE_MONOMORPHIC
CHECKISA)],
@args);

my %ambig_code = ('?' => ['?','?'],
'N' => ['?','?'],
'-' => ['?','?'],
'G' => ['G','G'],
'A' => ['A','A'],
'T' => ['T','T'],
'C' => ['C','C'],
'R' => ['A','G'],
'Y' => ['C','T'],
'W' => ['T','A'],
'M' => ['C','A'],
'S' => ['C','G'],
'K' => ['G','T']);

if( ! defined $aln ) {
$self->warn("Must provide a valid Bio::SimpleAlign object to run aln_to_population");
return;
Expand All @@ -174,23 +189,22 @@ sub aln_to_population{
}

for( my $i = 0; $i < $alength; $i++ ) {
my $nm = "Site-$i";
my (@genotypes,%set);

# do we skip indels?
# slicing vertically
for my $seq ( @seqs ) {
my $site = substr($seq,$i,1);
my $site = uc(substr($seq,$i,1));
push @genotypes, $ambig_code{$site};
$set{$site}++;
push @genotypes, $site;
}
if( keys %set > 1 || $includefixed ) {
my $genoct = scalar @genotypes;
for( my $j = 0; $j < $genoct; $j++ ) {
$inds[$j]->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name => $nm,
(-marker_name => ($i+1),
-individual_id=> $inds[$j]->unique_id,
-alleles => [$genotypes[$j]]));
-alleles => $genotypes[$j]));
}
}
}
Expand All @@ -203,31 +217,37 @@ sub aln_to_population{
}
my $codonct = 0;
for( my $i = $phase; $i < $alength; $i += CodonLen ) {
my $nm = "Codon-$codonct-$i";
my (@genotypes,%set,$genoct);

for my $seq ( @seqs ) {
my $site = substr($seq,$i,CodonLen);
my @unambig_site;
my $site = uc(substr($seq,$i,CodonLen));
if( length($site) < CodonLen ) {
# at end of alignment and this is not in phase
$self->debug("phase was $phase, but got to end of alignment with overhang of $site");
next;
}
# do we check for gaps/indels here?
$set{$site}++;
push @genotypes, $site;
for (my $pos=0; $pos<CodonLen; $pos++)
{
$unambig_site[0] .= $ambig_code{substr($site, $pos, 1)}[0];
$unambig_site[1] .= $ambig_code{substr($site, $pos, 1)}[1];
}
push @genotypes, [@unambig_site];
$set{$site}++;
}
$genoct = scalar @genotypes;

# do we include fixed sites? yes I think so since this is
# typically being used by MK
for( my $j = 0; $j < $genoct; $j++ ) {
$inds[$j]->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name => $nm,
-individual_id=> $inds[$j]->unique_id,
-alleles => [$genotypes[$j]]));
# do we include fixed sites? I think we should leave it to the user.
if( keys %set > 1 || $includefixed ) {
for( my $j = 0; $j < $genoct; $j++ ) {
$inds[$j]->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name => ($i/CodonLen),
-individual_id=> $inds[$j]->unique_id,
-alleles => $genotypes[$j]));
}
$codonct++;
}
$codonct++;
}
} else {
$self->throw("Can only build sites based on all the data right now!");
Expand Down

0 comments on commit 2b10511

Please sign in to comment.