Skip to content

Commit

Permalink
cleaned up a few semantic issues with layered locations, document beh…
Browse files Browse the repository at this point in the history
…avior next
  • Loading branch information
Chris Fields committed Feb 18, 2012
1 parent 3700b92 commit b3de5dc
Show file tree
Hide file tree
Showing 7 changed files with 216 additions and 166 deletions.
71 changes: 40 additions & 31 deletions lib/Biome/Factory/FTLocationFactory.pm
Expand Up @@ -18,6 +18,8 @@ $LOCREG = qr{
# make global for now, allow for abstraction later
our $SIMPLE_CLASS = 'Biome::Location::Simple';

my %OPS = map { $_ => 1 } qw(join order bond complement);

# TODO: refactor to a default attribute that lazily loads the class (I have code
# for that somewhere, let's see, where did I put that......)
sub BUILD {
Expand All @@ -26,7 +28,9 @@ sub BUILD {
}

sub from_string {
my ($self,$locstr,$op) = @_;
my ($self, $locstr, $op, $depth) = @_;
#
$depth ||= 0;
my $loc;

# run on first pass only
Expand All @@ -52,50 +56,56 @@ sub from_string {

SUBLOCS:
while (@sublocs) {
my $subloc = shift @sublocs;
my $oparg = lc(shift @sublocs);

# TODO: make hash lookup
my $oparg = ($subloc eq 'join' || $subloc eq 'bond' ||
$subloc eq 'order' || $subloc eq 'complement') ? $subloc : undef;
# has operator, requires further work (recurse)
if ($oparg) {
if (exists($OPS{$oparg})) {
my $sub = shift @sublocs;

# simple split operators (no recursive calls needed)
if ($sub !~ m{(?:join|order|bond)}) {
my @splitlocs = split(q(,), $sub);
my @splitlocs = split(/,/, $sub);
if (@splitlocs == 1) {
# this should be a complement only
#$self->throw("Getting nested joins is not supported") unless $oparg eq 'complement';
$loc_obj = $SIMPLE_CLASS->new(location_string => "complement($splitlocs[0])");
# this should be a single complement only
$loc_obj = $SIMPLE_CLASS->new(location_string => $splitlocs[0]);
$loc_obj->strand(-1);
} else {
$loc_obj = $SIMPLE_CLASS->new(-verbose => 1,
-location_type => uc $oparg);
while (my $splitloc = shift @splitlocs) {
next unless $splitloc;
my $sobj;
if ($splitloc =~ m{\(($LOCREG)\)}) {
my $comploc = $1;
$sobj = $SIMPLE_CLASS->new(location_string => $comploc);
} else {
$sobj = $SIMPLE_CLASS->new(location_string => $splitloc);
}
$loc_obj->add_sub_Location($sobj);
}
$loc_obj = $SIMPLE_CLASS->new(-location_type => uc $oparg);
my @loc_objs = map {
my $sobj;
if (m{\(($LOCREG)\)}) {
my $comploc = $1;
$sobj = $SIMPLE_CLASS->new(location_string => $comploc);
$sobj->strand(-1);
} else { # normal
$sobj = $SIMPLE_CLASS->new(location_string => $_);
}
$sobj;
} @splitlocs;
$loc_obj->add_sub_Locations(\@loc_objs);
# wrapped with complement(...)
#if ($splitloc =~ m{\(($LOCREG)\)}) {
# my $comploc = $1;
# $sobj = $SIMPLE_CLASS->new(location_string => $comploc);
# $sobj->strand(-1);
#} else { # normal
# $sobj = $SIMPLE_CLASS->new(location_string => $splitloc);
#}
#$loc_obj->add_sub_Location($sobj);

}
} else {
#$self->warn("Nesting operators is not supported yet")
# unless $oparg eq 'complement';
$loc_obj = $self->from_string($sub, $oparg);
$loc_obj = $self->from_string($sub, $oparg, ++$depth);
if ($oparg eq 'complement') {
$loc_obj->strand(-1)
$loc_obj->strand(-1);
} else {
$loc_obj->location_type(uc $oparg) ;
}
}
}
# no operator, simple or fuzzy
else {
$loc_obj = $self->from_string($subloc,1);
$loc_obj = $self->from_string($oparg, 1, ++$depth);
}
#$loc_obj->strand(-1) if ($op && $op eq 'complement');
push @loc_objs, $loc_obj;
Expand All @@ -108,11 +118,10 @@ sub from_string {
}
if ($ct > 1) {
$loc = $SIMPLE_CLASS->new();
$loc->add_sub_Location(shift @loc_objs) while (@loc_objs);
$loc->add_sub_Locations(\@loc_objs);
return $loc;
} else {
$loc = shift @loc_objs;
return $loc;
return $loc_objs[0];
}
} else { # simple location(s)
$loc = $SIMPLE_CLASS->new(location_string => $locstr);
Expand Down
2 changes: 1 addition & 1 deletion lib/Biome/Location/Simple.pm
Expand Up @@ -12,7 +12,7 @@ use namespace::clean -except => 'meta';
# class can override

with 'Biome::Role::Location::Simple';
#with 'Biome::Role::Location::Split';
with 'Biome::Role::Location::Split';
with 'Biome::Role::Location::Locatable';

sub BUILD {
Expand Down
21 changes: 10 additions & 11 deletions lib/Biome/Role/Location/Locatable.pm
Expand Up @@ -125,22 +125,20 @@ sub intersection {
}

sub union {
my ($self, $given, $so) = @_;
my ($self, $newlocs, $so) = @_;

if (ref $given ne 'ARRAY') {
$given = [$given];
}
my @given = ref $newlocs ne 'ARRAY' ? [$newlocs] : @$newlocs;

$self->_eval_ranges(@$given);
$self->_eval_ranges(@given);

unshift @$given, $self if blessed($self);
unshift @given, $self if blessed($self) && $self->start && $self->end;

# cannot give union if a contained location is remote
return if grep { $_->is_remote } (@$given);
return if grep { $_->is_remote } (@given);

my $union_strand = $self->strand; # Strand for the union range object.
my $union_strand = $given[0]->strand;

for my $r (@$given) {
for my $r (@given[1..$#given]) {
if(!defined $r->strand || $union_strand ne $r->strand) {
$union_strand = 0;
last;
Expand All @@ -149,10 +147,11 @@ sub union {

my ($five_prime, $three_prime);

for my $loc (@$given) {
for my $loc (@given) {
$five_prime = $loc if !$five_prime || $five_prime->start > $loc->start;
$three_prime = $loc if !$three_prime || $three_prime->end < $loc->end;
}

return unless $five_prime && $three_prime;

# TODO: do we assign offsets?
Expand Down Expand Up @@ -261,7 +260,7 @@ sub _eval_ranges {
#$self->throw("start is undefined in calling instance") if !defined $self->start;
#$self->throw("end is undefined in calling instance") if !defined $self->end;
for my $obj ($self, @ranges) {
$self->throw("Not an object") unless ref($obj);
$self->throw("Not an object, got $obj") unless ref($obj);
$self->throw("start is undefined in instance ".$obj->to_string) if !defined $obj->start;
$self->throw("end is undefined in instance ".$obj->to_string) if !defined $obj->end;
$self->throw('Rangeable equality or set methods not '.
Expand Down
171 changes: 103 additions & 68 deletions lib/Biome/Role/Location/LocationContainer.pm
@@ -1,105 +1,140 @@
package Biome::Role::Location::LocationContainer;
package Biome::Role::Location::Split;

use 5.010;
use Biome::Role;
use MooseX::Role::Parameterized;

# we make no assumptions about the implementation of the
# class, just that the class has a short name (Segment, Location,
# Feature, etc).

parameter class => (
isa => 'Str',
);

parameter short_name => (
isa => 'Str',
required => 1
);

# Don't assume the plural ends with 's', but default to it

parameter plural => (
isa => 'Str',
use Biome::Type::Location qw(Split_Location_Type ArrayRef_of_Locatable);
use Biome::Type::Sequence qw(Sequence_Strand);
use List::Util qw(reduce);
use namespace::clean -except => 'meta';

requires qw(start end strand start_pos_type end_pos_type);

# TODO: make this a parameterized role at some point? The
# attributes and methods could be named based on the consuming class...

has 'locations' => (
is => 'ro',
isa => ArrayRef_of_Locatable,
traits => ['Array'],
init_arg => undef,
writer => '_set_locations',
handles => {
# override this to allow for expansion of parent location
#push_sub_Location => 'push',
sub_Locations => 'elements',
remove_sub_Locations => 'clear',
get_sub_Location => 'get',
num_sub_Locations => 'count',
},
lazy => 1,
default => sub { shift->short_name.'s' }
default => sub { [] }
);

parameter layered => (
has 'auto_expand' => (
isa => 'Bool',
default => 0
is => 'ro',
default => 1
);

role {
my $p = shift;
has 'guide_strand' => (
isa => Sequence_Strand,
is => 'rw',
default => 0
);

my ($class, $singular, $plural) = ($p->class, $p->short_name, $p->plural);
sub add_sub_Location {
my ($self, $loc) = @_;

$class ||= 'Biome::Role::Location::Simple'; # any location consumer
my $locs = $self->locations;

if ($p->layered) {
$singular = "sub$singular";
$plural = "sub$plural";
if ($self->auto_expand && !$loc->is_remote) {
my $union_loc = @$locs ? $self->union($loc) : $loc;
# carry over data
for my $att (qw(start end start_pos_type end_pos_type)) {
$self->$att($union_loc->$att);
}
$self->strand($union_loc->strand);
$self->seq_id($loc->seq_id) if $loc->seq_id && @$locs;
}

has $singular => (
is => 'ro',
isa => "ArrayRef[$class]", # needs a subtype or role type
traits => ['Array'],
default => sub {[]},
handles => {
"get_$singular" => 'get',
"has_$singular" => 'count',
"all_$plural" => 'elements',
"remove_$plural" => 'clear'
}
);

# implementing class must provide this, is implementation-specific
requires "add_$singular";
};

no Biome::Role;
push @$locs, $loc;
1;
}

1;

__END__
=head1 NAME
Biome::Role::Location::LocationContainer - Parameterizable role for a Location
container.
Biome::Role::Location::Split - Role describing split locations.
=head1 SYNOPSIS
package Foo;
{
package Foo;
use Biome;
with 'Biome::Role::Location::LocationContainer' =>
{ class => 'Biome::SeqFeature::Generic',
abbrev => 'Feature'};
with 'Biome::Role::Location::Split';
other necessary roles...
# Foo now can contain an array of Biome::SeqFeature::Generic. Adding
# a new
}
=head1 DESCRIPTION
{
package Bar;
with 'Biome::Role::Location::Simple';
other necessary roles...
}
my $split = Foo->new(-start => 7, -end => 100, -strand => 1);
my $loc1 = Bar->new(-start => 1, -end => 50, -strand => -1);
my $loc2 = Bar->new(-start => 75, -end => 150); # no strandedness defined
$split->add_subLocation($loc1);
$split->add_subLocation($loc2);
Split locations autoexpand to whatever subLocations they contain by
default and the strand is defined by the subLocations. This is b/c this
implementation is just a simple top-level location that contains other
simple Locations, so the borders should match accordingly and the strand
be dictated by them. However, as this is a simple location, the strand
won't be affected.
Simple parameterizable role for anything that has one or more Locations
(Biome::Role::Location::Simple consumers). This requires the implementation
provide several things:
say $split->start; # 1
say $split->end; # 150
say $split->strand; # 0, strand for sublocations is different
=over3
If you want to explicitly change the top-level coordinate in some way,
then do so after one has finished adding subLocations.
=item *
$split->start(100);
$split->strand(1);
say $split->start; # 100
If you really don't want the split location coordinates set by
subLocations, set autoexpand to 0
$split = Foo->new(-start => 7, -end => 100, -strand => 1, -autoexpand => 0);
=item *
my $loc1 = Bar->new(-start => 1, -end => 50, -strand => -1);
my $loc2 = Bar->new(-start => 75, -end => 150); # no strandedness defined
A method to add new Locations; as this is implementation-specific,
this is required for anything consuming this class. For instance, a consumer
$split->add_subLocation($loc1);
$split->add_subLocation($loc2);
say $split->start; # 7
say $split->end; # 100
say $split->strand; # 1
The default internal behavior for storing sub-Locations is as they are
added (similar in behavior to a JOIN). One can change this by designating
the split_location_type to ORDER, which sorts internal locations by the
start.
=head1 DESCRIPTION
=back
This role describes
=head1 SUBROUTINES/METHODS
Expand Down

0 comments on commit b3de5dc

Please sign in to comment.