Skip to content

Commit

Permalink
Add support for collecting grammar coverage.
Browse files Browse the repository at this point in the history
whitequark committed Apr 24, 2015
1 parent fd7209a commit a9fd0a0
Showing 7 changed files with 229 additions and 58 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -4,3 +4,5 @@ _build/
*.egg-info/
/build/
/dist/
/pyparser/coverage/parser.py
/doc/coverage/*
Empty file added doc/coverage/.gitkeep
Empty file.
121 changes: 121 additions & 0 deletions pyparser/coverage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from .. import source, lexer
import os

_buf = None
with open(os.path.join(os.path.dirname(__file__), '..', 'parser.py')) as f:
_buf = source.Buffer(f.read(), f.name)

# Inject the grammar with locations of rules, because Python's
# builtin tracebacks don't include column numbers.
# This would really be more elegant if it used the parser,
# but the parser doesn't work yet at the time of writing.
def instrument():
rewriter = source.Rewriter(_buf)
lex = lexer.Lexer(_buf, (3, 4))
in_grammar = False
stack = []
for token in lex:
if token.kind == 'from':
token = lex.next()
if token.kind == '.':
rewriter.replace(token.loc, "..")

if token.kind == 'class':
token = lex.next()
if token.kind == 'ident' and token.value == 'Parser':
in_grammar = True

if not in_grammar:
continue

if token.kind == 'ident' and \
token.value in ('action', 'Eps', 'Tok', 'Loc', 'Rule', 'Expect',
'Seq', 'SeqN', 'Alt', 'Opt', 'Star', 'Plus', 'List',
'Newline', 'Oper', 'BinOper', 'BeginEnd'):
lparen = lex.next()
if lparen.kind == '(':
rparen = lex.peek()
if rparen.kind == ')':
lex.next()
rewriter.insert_before(rparen.loc,
"loc=(%d,%d)" % (token.loc.begin_pos, token.loc.end_pos))
else:
stack.append(", loc=(%d,%d)" % (token.loc.begin_pos, token.loc.end_pos))

if token.kind == '(':
stack.append(None)

if token.kind == ')':
data = stack.pop()
if data is not None:
rewriter.insert_before(token.loc, data)

with open(os.path.join(os.path.dirname(__file__), 'parser.py'), 'w') as f:
f.write(rewriter.rewrite().source)

# Produce an HTML report for test coverage of parser rules.
def report(parser, name='parser'):
rewriter = source.Rewriter(_buf)
total_pts = 0
total_covered = 0
for rule in parser._all_rules:
pts = len(rule.covered)
covered = len(filter(lambda x: x, rule.covered))
if covered == 0:
klass = 'uncovered'
elif covered < pts:
klass = 'partial'
else:
klass = 'covered'

loc = source.Range(_buf, *rule.loc)
rewriter.insert_before(loc, r"<span class='%s'>" % klass)
rewriter.insert_after(loc, r"</span>")

total_pts += pts
total_covered += covered

print("GRAMMAR COVERAGE: %.2f" % (total_covered / total_pts))

content = rewriter.rewrite().source
content = '\n'.join(map(
lambda x: r"<span id='{0}' class='line'>{1}</span>".format(*x),
enumerate(content.split("\n"), 1)))

with open(os.path.join(os.path.dirname(__file__), '..', '..',
'doc', 'coverage', name + '.html'), 'w') as f:
f.write(r"""
<!DOCTYPE html>
<html>
<head>
<title>{percentage:.2f}%: {file} coverage report</title>
<style type="text/css">
.uncovered {{ background-color: #FFCAAD; }}
.partial {{ background-color: #FFFFB4; }}
.covered {{ background-color: #9CE4B7; }}
pre {{ counter-reset: line; }}
.line::before {{
display: inline-block;
width: 4ex;
padding-right: 1em;
text-align: right;
color: gray;
content: counter(line);
counter-increment: line;
}}
</style>
</head>
<body>
<h1>{percentage:.2f}% ({covered}/{pts}): {file} coverage report</h1>
<pre>{content}</pre>
</body>
</html>
""".format(percentage=total_covered / total_pts,
pts=total_pts, covered=total_covered,
file=os.path.basename(_buf.name),
content=content))

# Create the instrumented parser when `import pyparser.coverage.parser`
# is invoked. Not intended for any use except running the internal testsuite.
instrument()
10 changes: 9 additions & 1 deletion pyparser/lexer.py
Original file line number Diff line number Diff line change
@@ -212,6 +212,13 @@ def next(self, eof_token=False):

return self.queue.pop(0)

def peek(self, eof_token=False):
"""Same as :meth:`next`, except the token is not dequeued."""
if len(self.queue) == 0:
self._refill(eof_token)

return self.queue[-1]

def _refill(self, eof_token):
if self.offset == len(self.source_buffer.source):
if eof_token:
@@ -231,7 +238,8 @@ def _refill(self, eof_token):
raise diagnostic.DiagnosticException(diag)

# Should we emit indent/dedent?
if self.new_line:
if self.new_line and \
match.group(3) is None: # not a newline
whitespace = match.string[match.start(0):match.start(1)]
level = len(whitespace.expandtabs())
range = source.Range(self.source_buffer, match.start(1), match.start(1))
136 changes: 86 additions & 50 deletions pyparser/parser.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,42 @@
"""
The :mod:`parser` module concerns itself with LL(1) parsing.
The :mod:`parser` module concerns itself with parsing Python source.
"""

from __future__ import absolute_import, division, print_function, unicode_literals
from . import source, diagnostic, lexer, ast

# Generic LL parsing combinators
unmatched = object()
class Unmatched:
def __repr__(self):
return "<can't parse>"
unmatched = Unmatched()

_all_rules = []

def llrule(loc, expected, cases=1):
if loc is None:
def decorator(rule):
rule.expected = expected
return rule
else:
def decorator(inner_rule):
if cases == 1:
def rule(*args, **kwargs):
result = inner_rule(*args, **kwargs)
if result is not unmatched:
rule.covered[0] = True
return result
else:
rule = inner_rule

rule.loc, rule.expected, rule.covered = \
loc, expected, [False] * cases
_all_rules.append(rule)

return rule
return decorator

def action(inner_rule):
def action(inner_rule, loc=None):
"""
A decorator returning a function that first runs ``inner_rule`` and then, if its
return value is not None, maps that value using ``mapper``.
@@ -18,50 +46,51 @@ def action(inner_rule):
Similar to attaching semantic actions to rules in traditional parser generators.
"""
def decorator(mapper):
@llrule(loc, inner_rule.expected)
def outer_rule(parser):
result = inner_rule(parser)
if isinstance(result, tuple):
result = mapper(parser, *result)
elif result is not unmatched:
result = mapper(parser, result)
return result
outer_rule.expected = inner_rule.expected
return outer_rule
return decorator

def Eps(value=None):
def Eps(value=None, loc=None):
"""A rule that accepts no tokens (epsilon) and returns ``value``."""
@llrule(loc, lambda parser: [])
def rule(parser):
return value
rule.expected = lambda parser: [[]]
return rule

def Tok(kind):
def Tok(kind, loc=None):
"""A rule that accepts a token of kind ``kind`` and returns it, or returns None."""
@llrule(loc, lambda parser: [kind])
def rule(parser):
return parser._accept(kind)
rule.expected = lambda parser: [[kind]]
return rule

def Loc(kind):
def Loc(kind, loc=None):
"""A rule that accepts a token of kind ``kind`` and returns its location, or returns None."""
@llrule(loc, lambda parser: [kind])
def rule(parser):
result = parser._accept(kind)
if result is not unmatched:
return result.loc
return unmatched
rule.expected = lambda parser: [[kind]]
return rule

def Rule(name):
def Rule(name, loc=None):
"""A proxy for a rule called ``name`` which may not be yet defined."""
@llrule(loc, lambda parser: getattr(parser, name).expected(parser))
def rule(parser):
return getattr(parser, name)()
rule.expected = lambda parser: getattr(parser, name).expected(parser)
return rule

def Expect(inner_rule):
def Expect(inner_rule, loc=None):
"""A rule that executes ``inner_rule`` and emits a diagnostic error if it returns None."""
@llrule(loc, inner_rule.expected)
def rule(parser):
result = inner_rule(parser)
if result is unmatched:
@@ -78,76 +107,84 @@ def rule(parser):
parser.token.loc)
raise diagnostic.DiagnosticException(error)
return result
rule.expected = inner_rule.expected
return rule

def Seq(first_rule, *rest_of_rules):
def Seq(first_rule, *rest_of_rules, **kwargs):
"""
A rule that accepts a sequence of tokens satisfying ``rules`` and returns a tuple
containing their return values, or None if the first rule was not satisfied.
"""
rest_of_rules = map(Expect, rest_of_rules)
@llrule(kwargs.get('loc', None), first_rule.expected)
def rule(parser):
first_result = first_rule(parser)
if first_result is not unmatched:
return tuple([first_result]) + tuple(map(lambda rule: rule(parser), rest_of_rules))
return unmatched
rule.expected = \
lambda parser: first_rule.expected(parser) + \
reduce(list.__add__, map(lambda rule: rule.expected(parser), rest_of_rules))
return rule

def SeqN(n, *inner_rules):
def SeqN(n, *inner_rules, **kwargs):
"""
A rule that accepts a sequence of tokens satisfying ``rules`` and returns
the value returned by rule number ``n``, or None if the first rule was not satisfied.
"""
@action(Seq(*inner_rules))
@action(Seq(*inner_rules), loc=kwargs.get('loc', None))
def rule(parser, *values):
return values[n]
return rule

def Alt(*inner_rules):
def Alt(*inner_rules, **kwargs):
"""
A rule that expects a sequence of tokens satisfying one of ``rules`` in sequence
(a rule is satisfied when it returns anything but None) and returns the return
value of that rule, or None if no rules were satisfied.
"""
def rule(parser):
# semantically reduce(), but faster.
for inner_rule in inner_rules:
result = inner_rule(parser)
if result is not unmatched:
return result
return unmatched
rule.expected = \
lambda parser: reduce(list.__add__, map(lambda x: x.expected(parser), inner_rules))
loc = kwargs.get('loc', None)
expected = lambda parser: reduce(list.__add__, map(lambda x: x.expected(parser), inner_rules))
if loc is not None:
@llrule(loc, expected, cases=len(inner_rules))
def rule(parser):
for idx, inner_rule in enumerate(inner_rules):
result = inner_rule(parser)
if result is not unmatched:
rule.covered[idx] = True
return result
return unmatched
else:
@llrule(loc, expected, cases=len(inner_rules))
def rule(parser):
for inner_rule in inner_rules:
result = inner_rule(parser)
if result is not unmatched:
return result
return unmatched
return rule

def Opt(inner_rule):
def Opt(inner_rule, loc=None):
"""Shorthand for ``Alt(inner_rule, Eps())``"""
return Alt(inner_rule, Eps())
return Alt(inner_rule, Eps(), loc=loc)

def Star(inner_rule):
def Star(inner_rule, loc=None):
"""
A rule that accepts a sequence of tokens satisfying ``inner_rule`` zero or more times,
and returns the returned values in a :class:`list`.
"""
@llrule(loc, lambda parser: [])
def rule(parser):
results = []
while True:
result = inner_rule(parser)
if result is unmatched:
return results
results.append(result)
rule.expected = lambda parser: []
return rule

def Plus(inner_rule):
def Plus(inner_rule, loc=None):
"""
A rule that accepts a sequence of tokens satisfying ``inner_rule`` one or more times,
and returns the returned values in a :class:`list`.
"""
@llrule(loc, inner_rule.expected)
def rule(parser):
result = inner_rule(parser)
if result is unmatched:
@@ -159,21 +196,21 @@ def rule(parser):
if result is unmatched:
return results
results.append(result)
rule.expected = inner_rule.expected
return rule

def List(inner_rule, separator_tok, trailing, leading=True):
def List(inner_rule, separator_tok, trailing, leading=True, loc=None):
if not trailing:
@action(Seq(inner_rule, Star(SeqN(1, Tok(separator_tok), inner_rule))))
def rule(parser, first, rest):
@action(Seq(inner_rule, Star(SeqN(1, Tok(separator_tok), inner_rule))), loc=loc)
def outer_rule(parser, first, rest):
return [first] + rest
return rule
return outer_rule
else:
# A rule like this: stmt (';' stmt)* [';']
# This doesn't yield itself to combinators above, because disambiguating
# another iteration of the Kleene star and the trailing separator
# requires two lookahead tokens (naively).
separator_rule = Tok(separator_tok)
@llrule(loc, inner_rule.expected)
def rule(parser):
results = []

@@ -196,41 +233,40 @@ def rule(parser):
return results
else:
results.append(result)
rule.expected = inner_rule.expected
return rule

# Python AST specific parser combinators
def Newline():
def Newline(loc=None):
"""A rule that accepts token of kind ``newline`` and returns []."""
@llrule(loc, lambda parser: ['newline'])
def rule(parser):
if parser._accept('newline') is not unmatched:
return []
return unmatched
rule.expected = lambda parser: [['newline']]
return rule

def Oper(klass, *kinds):
def Oper(klass, *kinds, **kwargs):
"""
A rule that accepts a sequence of tokens of kinds ``kinds`` and returns
an instance of ``klass`` with ``loc`` encompassing the entire sequence
or None if the first token is not of ``kinds[0]``.
"""
@action(Seq(*map(Loc, kinds)))
@action(Seq(*map(Loc, kinds)), loc=kwargs.get('loc', None))
def rule(parser, *tokens):
return klass(loc=tokens[0].join(tokens[-1]))
return rule

def BinOper(expr_rulename, op_rule, node=ast.BinOp):
@action(Seq(Rule(expr_rulename), Star(Seq(op_rule, Rule(expr_rulename)))))
def BinOper(expr_rulename, op_rule, node=ast.BinOp, loc=None):
@action(Seq(Rule(expr_rulename), Star(Seq(op_rule, Rule(expr_rulename)))), loc=loc)
def rule(parser, lhs, trailers):
for (op, rhs) in trailers:
lhs = node(left=lhs, op=op, right=rhs,
loc=lhs.loc.join(rhs.loc))
return lhs
return rule

def BeginEnd(begin_tok, inner_rule, end_tok, empty=None):
@action(Seq(Loc(begin_tok), inner_rule, Loc(end_tok)))
def BeginEnd(begin_tok, inner_rule, end_tok, empty=None, loc=None):
@action(Seq(Loc(begin_tok), inner_rule, Loc(end_tok)), loc=loc)
def rule(parser, begin_loc, node, end_loc):
if node is None:
node = empty()
@@ -333,7 +369,7 @@ def varargslist_1(self, dstar_loc, kwarg_tok):
dstar_loc=dstar_loc, kwarg_loc=kwarg_tok.loc)

@action(Seq(Loc('*'), Tok('ident'),
Opt(Seq(Loc('**', Tok('ident'))))))
Opt(Seq(Loc('**'), Tok('ident')))))
def varargslist_2(self, star_loc, vararg_tok, kwarg_opt):
dstar_loc, kwarg, kwarg_loc = None
if kwarg_opt:
6 changes: 3 additions & 3 deletions pyparser/source.py
Original file line number Diff line number Diff line change
@@ -201,15 +201,15 @@ def replace(self, range, replacement):

def remove(self, range):
"""Remove `range`."""
self.replace(self, range, "")
self.replace(range, "")

def insert_before(self, range, text):
"""Insert `text` before `range`."""
self.replace(self, range.begin(), "")
self.replace(range.begin(), text)

def insert_after(self, range, text):
"""Insert `text` after `range`."""
self.replace(self, range.end(), "")
self.replace(range.end(), text)

def rewrite(self):
"""Return the rewritten source. May raise :class:`RewriterConflict`."""
12 changes: 8 additions & 4 deletions pyparser/test/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# coding:utf-8

from __future__ import absolute_import, division, print_function, unicode_literals
from .. import source, lexer, diagnostic, parser
from .. import source, lexer, diagnostic, coverage
from ..coverage import parser
import unittest

def tearDownModule():
coverage.report(parser)

class ParserTestCase(unittest.TestCase):

def parser_for(self, code):
@@ -21,11 +25,11 @@ def lexer_next(**args):
return self.parser

def assertParses(self, ast, code):
self.assertEqual(ast, self.parser_for(code).eval_input())
self.assertEqual(ast, self.parser_for(code).file_input())

def assertDiagnoses(self, code, diag):
try:
self.parser_for(code).eval_input()
self.parser_for(code).file_input()
self.fail("Expected a diagnostic")
except diagnostic.DiagnosticException as e:
level, reason, args, loc = diag
@@ -43,5 +47,5 @@ def assertDiagnosesUnexpected(self, code, err_token, loc):
("error", "unexpected {actual}: expected {expected}", {'actual': err_token}, loc))

def test_pass(self):
self.assertParses(None, "pass")
self.assertParses(None, "pass\n")

0 comments on commit a9fd0a0

Please sign in to comment.