Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
grammar now parsing complex locations
  • Loading branch information
cjfields committed Sep 11, 2014
1 parent 49449ec commit dd324ba
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 61 deletions.
50 changes: 30 additions & 20 deletions lib/Bio/Tools/FTLocationParser.pm6
Expand Up @@ -2,60 +2,70 @@ use v6;

# Using a variation on the old NCBI FT BNF, but perl6-ized

grammar Bio::Grammar::Location {
#use Grammar::Tracer;

grammar Bio::Grammar::Location {
token TOP { <location> }

#location ::= <absolute_location> | <feature_name> | <functional_operator>(<location_list>)
token location { <absolute_location> | <feature_name> | <functional_operator>'('<location_list>')' }
token location {
<absolute_location> | <complex_location>
}

#absolute_location ::= <local_location> | <path> : <local_location>
token absolute_location { <local_location> | <path> ':' <local_location> }
token absolute_location { <local_location> | <remote_location> }

token complex_location { <functional_operator>'('<location_list>')' }

token remote_location { <path> ':' <local_location> }

#path ::= <database> :: <primary_accession> | <primary_accession>
token path { <database> '::' <primary_accession> | <primary_accession> }
token path { <primary_accession> | <database_accession> }

#feature_name ::= <path>:<feature_label> | <feature_label>
token feature_name { <path>':'<feature_label> | <feature_label> }

#feature_label :== <symbol>
token feature_label { <symbol>+ }
token database_accession { <database> '::' <primary_accession> }

##feature_name ::= <path>:<feature_label> | <feature_label>
#token feature_name { <path>':'<feature_label> | <feature_label> }
#
##feature_label :== <symbol>
#token feature_label { <symbol>+ }

#local_location ::= <base_position> | <between_position> | <base_range>
token local_location { <base_position> | <between_position> | <base_range> }
token local_location { <base_position> | <base_range> | <between_position> }

#location_list ::= <location> | <location_list>,<location>
token location_list { <location> | <location_list>','<location> }
token location_list { <location> [','<location_list> ]* }

#functional_operator ::= <symbol>
token functional_operator { <.symbol>+ }
token functional_operator { 'join' | 'order' | 'complement' }

#base_position ::= <integer> | <low_base_bound> | <high_base_bound> | <two_base_bound>
token base_position { <abs_base_position> | <low_base_bound> | <high_base_bound> | <two_base_bound> }

# my addition
token abs_base_position { \d+ }

#low_base_bound ::= > <integer>
# low_base_bound ::= > <integer>
token low_base_bound { '>' <abs_base_position> }
token high_base_bound { '<' <abs_base_position> }

#two_base_bound ::= <base_position>.<base_position>
# two_base_bound ::= <base_position>.<base_position>
token two_base_bound { '('? <abs_base_position>'.'<abs_base_position> ')'? }

#between_position ::= <base_position>^<base_position>
# between_position ::= <base_position>^<base_position>
token between_position { <abs_base_position>'^'<abs_base_position> }

#base_range ::= <base_position>..<base_position>
# base_range ::= <base_position>..<base_position>
token base_range { <base_position>'..'<base_position> }

#database ::= <symbol>
# database ::= <symbol>
token database { <.symbol>+ }

#primary_accession ::= <symbol>
# primary_accession ::= <symbol>
token primary_accession { <.symbol>+ }

token symbol { [<+alpha+digit+[_\-'*]>] }

# <up_case_letter> | <low_case_letter> |<digit> | _ | - | ' | *
token symbol { <.+alpha+digit+[_\-.'*]> }
}


Expand Down
81 changes: 40 additions & 41 deletions t/Tools/FTLocationParser.t
Expand Up @@ -34,7 +34,7 @@ my %testcases =
# these variants are not really allowed by the FT definition
# document but we want to be able to cope with it

# Not supported with grammar
# Not supported!!!
#"J00194:(100..202)" => ['J00194:100..202',
# 100, 100, "EXACT", 202, 202, "EXACT", "EXACT", 0, 1, 'J00194'],
#"((122.133)..(204.221))" => ['(122.133)..(204.221)',
Expand Down Expand Up @@ -71,47 +71,47 @@ my %testcases =

"join(AY016290.1:108..185,AY016291.1:1546..1599)"=> [0,
Nil, Nil, "EXACT", Nil, Nil, "EXACT", "JOIN", 2, 0, Nil],
#"complement(join(3207..4831,5834..5902,8881..8969,9276..9403,29535..29764))",
# [0, 3207, 3207, "EXACT", 29764, 29764, "EXACT", "JOIN", 5, -1, Nil],
#"join(complement(29535..29764),complement(9276..9403),complement(8881..8969),complement(5834..5902),complement(3207..4831))",
# ["complement(join(3207..4831,5834..5902,8881..8969,9276..9403,29535..29764))",
# 3207, 3207, "EXACT", 29764, 29764, "EXACT", "JOIN", 5, -1, Nil],
#"join(12..78,134..202)" => [0,
# 12, 12, "EXACT", 202, 202, "EXACT", "JOIN", 2, 1, Nil],
#"join(<12..78,134..202)" => [0,
# Nil, 12, "BEFORE", 202, 202, "EXACT", "JOIN", 2, 1, Nil],
#"complement(join(2691..4571,4918..5163))" => [0,
# 2691, 2691, "EXACT", 5163, 5163, "EXACT", "JOIN", 2, -1, Nil],
#"complement(join(4918..5163,2691..4571))" => [0,
# 2691, 2691, "EXACT", 5163, 5163, "EXACT", "JOIN", 2, -1, Nil],
#"join(complement(4918..5163),complement(2691..4571))" => [
# 'complement(join(2691..4571,4918..5163))',
# 2691, 2691, "EXACT", 5163, 5163, "EXACT", "JOIN", 2, -1, Nil],
#"join(complement(2691..4571),complement(4918..5163))" => [
# 'complement(join(4918..5163,2691..4571))',
# 2691, 2691, "EXACT", 5163, 5163, "EXACT", "JOIN", 2, -1, Nil],
#"complement(34..(122.126))" => [0,
# 34, 34, "EXACT", 122, 126, "WITHIN", "EXACT", 0, -1, Nil],
"complement(join(3207..4831,5834..5902,8881..8969,9276..9403,29535..29764))",
[0, 3207, 3207, "EXACT", 29764, 29764, "EXACT", "JOIN", 5, -1, Nil],
"join(complement(29535..29764),complement(9276..9403),complement(8881..8969),complement(5834..5902),complement(3207..4831))",
["complement(join(3207..4831,5834..5902,8881..8969,9276..9403,29535..29764))",
3207, 3207, "EXACT", 29764, 29764, "EXACT", "JOIN", 5, -1, Nil],
"join(12..78,134..202)" => [0,
12, 12, "EXACT", 202, 202, "EXACT", "JOIN", 2, 1, Nil],
"join(<12..78,134..202)" => [0,
Nil, 12, "BEFORE", 202, 202, "EXACT", "JOIN", 2, 1, Nil],
"complement(join(2691..4571,4918..5163))" => [0,
2691, 2691, "EXACT", 5163, 5163, "EXACT", "JOIN", 2, -1, Nil],
"complement(join(4918..5163,2691..4571))" => [0,
2691, 2691, "EXACT", 5163, 5163, "EXACT", "JOIN", 2, -1, Nil],
"join(complement(4918..5163),complement(2691..4571))" => [
'complement(join(2691..4571,4918..5163))',
2691, 2691, "EXACT", 5163, 5163, "EXACT", "JOIN", 2, -1, Nil],
"join(complement(2691..4571),complement(4918..5163))" => [
'complement(join(4918..5163,2691..4571))',
2691, 2691, "EXACT", 5163, 5163, "EXACT", "JOIN", 2, -1, Nil],
"complement(34..(122.126))" => [0,
34, 34, "EXACT", 122, 126, "WITHIN", "EXACT", 0, -1, Nil],

# complex, technically not legal FT types but we handle and resolve these as needed

#'join(11025..11049,join(complement(239890..240081),complement(241499..241580),complement(251354..251412),complement(315036..315294)))'
# => ['join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
# 11025,11025, 'EXACT', 315294, 315294, 'EXACT', 'JOIN', 2, 0, Nil],
#'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))'
# => [0, 11025,11025, 'EXACT', 315294, 315294, 'EXACT', 'JOIN', 2, 0, Nil],
#'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))'
# => [0, 20464,20464, 'EXACT', 314672, 314672, 'EXACT', 'JOIN', 3, 0, Nil],
#'join(20464..20694,21548..22763,join(complement(231520..231669),complement(232596..232990),complement(314652..314672)))'
# => ['join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
# 20464,20464, 'EXACT', 314672, 314672, 'EXACT', 'JOIN', 3, 0, Nil],
#
#'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)'
# => [0, 1000,1000,'EXACT', 10000, 10000, 'EXACT', 'JOIN', 3, 1, Nil],
#
## not passing completely yet, working out 'order' semantics
#'order(S67862.1:72..75,1..788,S67864.1:1..19)'
# => [0, Nil, Nil, 'EXACT', Nil, Nil, 'EXACT', 'ORDER', 3, 0, Nil],
'join(11025..11049,join(complement(239890..240081),complement(241499..241580),complement(251354..251412),complement(315036..315294)))'
=> ['join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
11025,11025, 'EXACT', 315294, 315294, 'EXACT', 'JOIN', 2, 0, Nil],
'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))'
=> [0, 11025,11025, 'EXACT', 315294, 315294, 'EXACT', 'JOIN', 2, 0, Nil],
'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))'
=> [0, 20464,20464, 'EXACT', 314672, 314672, 'EXACT', 'JOIN', 3, 0, Nil],
'join(20464..20694,21548..22763,join(complement(231520..231669),complement(232596..232990),complement(314652..314672)))'
=> ['join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
20464,20464, 'EXACT', 314672, 314672, 'EXACT', 'JOIN', 3, 0, Nil],

'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)'
=> [0, 1000,1000,'EXACT', 10000, 10000, 'EXACT', 'JOIN', 3, 1, Nil],

# not passing completely yet, working out 'order' semantics
'order(S67862.1:72..75,1..788,S67864.1:1..19)'
=> [0, Nil, Nil, 'EXACT', Nil, Nil, 'EXACT', 'ORDER', 3, 0, Nil],
;

my $p = Bio::Tools::FTLocationParser.new();
Expand All @@ -121,10 +121,9 @@ ok($p ~~ Bio::Tools::FTLocationParser);
# sorting is to keep the order constant from one run to the next
for %testcases.keys -> $locstr {

print "$locstr:";

Bio::Grammar::Location.parse($locstr);

say "$locstr" if !$/.defined;
say $/.gist;

}
Expand Down

0 comments on commit dd324ba

Please sign in to comment.