Skip to content

Commit

Permalink
Fixed reading sequence ambiguity
Browse files Browse the repository at this point in the history
  • Loading branch information
fgvieira committed Oct 25, 2011
1 parent 3b8ee2c commit 8cca79d
Showing 1 changed file with 23 additions and 10 deletions.
33 changes: 23 additions & 10 deletions Bio/PopGen/Utilities.pm
Expand Up @@ -149,6 +149,21 @@ sub aln_to_population{
INCLUDE_MONOMORPHIC
CHECKISA)],
@args);

my %ambig_code = ('?' => ['?','?'],
'N' => ['?','?'],
'-' => ['?','?'],
'G' => ['G','G'],
'A' => ['A','A'],
'T' => ['T','T'],
'C' => ['C','C'],
'R' => ['A','G'],
'Y' => ['C','T'],
'W' => ['T','A'],
'M' => ['C','A'],
'S' => ['C','G'],
'K' => ['G','T']);

if( ! defined $aln ) {
$self->warn("Must provide a valid Bio::SimpleAlign object to run aln_to_population");
return;
Expand All @@ -174,23 +189,22 @@ sub aln_to_population{
}

for( my $i = 0; $i < $alength; $i++ ) {
my $nm = "Site-$i";
my (@genotypes,%set);

# do we skip indels?
# slicing vertically
for my $seq ( @seqs ) {
my $site = substr($seq,$i,1);
my $site = uc(substr($seq,$i,1));
$set{$site}++;
push @genotypes, $site;
push @genotypes, $ambig_code{$site};
}
if( keys %set > 1 || $includefixed ) {
my $genoct = scalar @genotypes;
for( my $j = 0; $j < $genoct; $j++ ) {
$inds[$j]->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name => $nm,
(-marker_name => ($i+1),
-individual_id=> $inds[$j]->unique_id,
-alleles => [$genotypes[$j]]));
-alleles => $genotypes[$j]));
}
}
}
Expand All @@ -203,29 +217,28 @@ sub aln_to_population{
}
my $codonct = 0;
for( my $i = $phase; $i < $alength; $i += CodonLen ) {
my $nm = "Codon-$codonct-$i";
my (@genotypes,%set,$genoct);

for my $seq ( @seqs ) {
my $site = substr($seq,$i,CodonLen);
my $site = uc(substr($seq,$i,CodonLen));
if( length($site) < CodonLen ) {
# at end of alignment and this is not in phase
$self->debug("phase was $phase, but got to end of alignment with overhang of $site");
next;
}
# do we check for gaps/indels here?
$set{$site}++;
push @genotypes, $site;
push @genotypes, $ambig_code{$site};
}
$genoct = scalar @genotypes;

# do we include fixed sites? yes I think so since this is
# typically being used by MK
for( my $j = 0; $j < $genoct; $j++ ) {
$inds[$j]->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name => $nm,
(-marker_name => ($i+1),
-individual_id=> $inds[$j]->unique_id,
-alleles => [$genotypes[$j]]));
-alleles => $genotypes[$j]));
}
$codonct++;
}
Expand Down

0 comments on commit 8cca79d

Please sign in to comment.