Skip to content

Commit

Permalink
update grammar, now parses complete FlyBase GFF3
Browse files Browse the repository at this point in the history
  • Loading branch information
cjfields committed Feb 8, 2015
1 parent df9285f commit 0d951f6
Showing 1 changed file with 24 additions and 28 deletions.
52 changes: 24 additions & 28 deletions lib/Bio/Grammar/GFF.pm6
Expand Up @@ -11,32 +11,34 @@ use v6;
grammar Bio::Grammar::GFF {

rule TOP {
[
<gff-line>
]+
<fasta>?
}

rule gff-line {
^^
[
| <feature-line>
| <directive-line>
| <comment>
]+
#<fasta>?
]
$$
}

token comment {
^^
'#'<-[#]> <-[\n]>+
$$
}

token directive-line {
^^
'##'
<directive-name>
<directive-data>?
$$
}

token resolution-line {
^^
'###'
$$
}

# TODO: break out into handling specific directives
Expand Down Expand Up @@ -86,21 +88,16 @@ grammar Bio::Grammar::GFF {
<-[\t]>+
}

# TODO: optimize this?
token strand {
| '-'1
| 0
| 1
| '-'
| '+'
| '.'
< -1 0 1 - + . >
}

token phase {
| <[012]>
| '.'
<[012\.]>
}

# TODO: expand into canonical vs custom, URI-encoding, etc.
# TODO: expand into canonical vs custom, URI-encoding, etc.?
token attributes {
<tag-value>+ % ';'
}
Expand All @@ -114,7 +111,7 @@ grammar Bio::Grammar::GFF {
}

token value {
<-[;=&,]>+
<-[\n;=&,]>+
}

# not sure if there is a way to use a Grammar within another grammar (yet)
Expand All @@ -127,26 +124,25 @@ grammar Bio::Grammar::GFF {
}

token description_line {
^^\> <id> [<.ws> <description>]? $$
^^\> <seq-id> [<.ws> <seq-description>]? $$
}
token id {
| <identifier>
| <generic_id>
token seq-id {
| <seq-identifier>
| <seq-generic-id>
}
token identifier {
#assume we going to parse NCBI specific id for reference number and gi numbers

token seq-identifier {
\S+
}
token generic_id {
token seq-generic-id {
\S+
}

token description {
token seq-description {
\N+
}
token sequence {
<-[>]>+
}

}

}

0 comments on commit 0d951f6

Please sign in to comment.