Skip to content

Commit a9fd0a0

Browse files
author
whitequark
committedApr 24, 2015
Add support for collecting grammar coverage.
1 parent fd7209a commit a9fd0a0

File tree

7 files changed

+229
-58
lines changed

7 files changed

+229
-58
lines changed
 

Diff for: ‎.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ _build/
44
*.egg-info/
55
/build/
66
/dist/
7+
/pyparser/coverage/parser.py
8+
/doc/coverage/*

Diff for: ‎doc/coverage/.gitkeep

Whitespace-only changes.

Diff for: ‎pyparser/coverage/__init__.py

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
from __future__ import absolute_import, division, print_function, unicode_literals
2+
from .. import source, lexer
3+
import os
4+
5+
_buf = None
6+
with open(os.path.join(os.path.dirname(__file__), '..', 'parser.py')) as f:
7+
_buf = source.Buffer(f.read(), f.name)
8+
9+
# Inject the grammar with locations of rules, because Python's
10+
# builtin tracebacks don't include column numbers.
11+
# This would really be more elegant if it used the parser,
12+
# but the parser doesn't work yet at the time of writing.
13+
def instrument():
14+
rewriter = source.Rewriter(_buf)
15+
lex = lexer.Lexer(_buf, (3, 4))
16+
in_grammar = False
17+
stack = []
18+
for token in lex:
19+
if token.kind == 'from':
20+
token = lex.next()
21+
if token.kind == '.':
22+
rewriter.replace(token.loc, "..")
23+
24+
if token.kind == 'class':
25+
token = lex.next()
26+
if token.kind == 'ident' and token.value == 'Parser':
27+
in_grammar = True
28+
29+
if not in_grammar:
30+
continue
31+
32+
if token.kind == 'ident' and \
33+
token.value in ('action', 'Eps', 'Tok', 'Loc', 'Rule', 'Expect',
34+
'Seq', 'SeqN', 'Alt', 'Opt', 'Star', 'Plus', 'List',
35+
'Newline', 'Oper', 'BinOper', 'BeginEnd'):
36+
lparen = lex.next()
37+
if lparen.kind == '(':
38+
rparen = lex.peek()
39+
if rparen.kind == ')':
40+
lex.next()
41+
rewriter.insert_before(rparen.loc,
42+
"loc=(%d,%d)" % (token.loc.begin_pos, token.loc.end_pos))
43+
else:
44+
stack.append(", loc=(%d,%d)" % (token.loc.begin_pos, token.loc.end_pos))
45+
46+
if token.kind == '(':
47+
stack.append(None)
48+
49+
if token.kind == ')':
50+
data = stack.pop()
51+
if data is not None:
52+
rewriter.insert_before(token.loc, data)
53+
54+
with open(os.path.join(os.path.dirname(__file__), 'parser.py'), 'w') as f:
55+
f.write(rewriter.rewrite().source)
56+
57+
# Produce an HTML report for test coverage of parser rules.
58+
def report(parser, name='parser'):
59+
rewriter = source.Rewriter(_buf)
60+
total_pts = 0
61+
total_covered = 0
62+
for rule in parser._all_rules:
63+
pts = len(rule.covered)
64+
covered = len(filter(lambda x: x, rule.covered))
65+
if covered == 0:
66+
klass = 'uncovered'
67+
elif covered < pts:
68+
klass = 'partial'
69+
else:
70+
klass = 'covered'
71+
72+
loc = source.Range(_buf, *rule.loc)
73+
rewriter.insert_before(loc, r"<span class='%s'>" % klass)
74+
rewriter.insert_after(loc, r"</span>")
75+
76+
total_pts += pts
77+
total_covered += covered
78+
79+
print("GRAMMAR COVERAGE: %.2f" % (total_covered / total_pts))
80+
81+
content = rewriter.rewrite().source
82+
content = '\n'.join(map(
83+
lambda x: r"<span id='{0}' class='line'>{1}</span>".format(*x),
84+
enumerate(content.split("\n"), 1)))
85+
86+
with open(os.path.join(os.path.dirname(__file__), '..', '..',
87+
'doc', 'coverage', name + '.html'), 'w') as f:
88+
f.write(r"""
89+
<!DOCTYPE html>
90+
<html>
91+
<head>
92+
<title>{percentage:.2f}%: {file} coverage report</title>
93+
<style type="text/css">
94+
.uncovered {{ background-color: #FFCAAD; }}
95+
.partial {{ background-color: #FFFFB4; }}
96+
.covered {{ background-color: #9CE4B7; }}
97+
pre {{ counter-reset: line; }}
98+
.line::before {{
99+
display: inline-block;
100+
width: 4ex;
101+
padding-right: 1em;
102+
text-align: right;
103+
color: gray;
104+
content: counter(line);
105+
counter-increment: line;
106+
}}
107+
</style>
108+
</head>
109+
<body>
110+
<h1>{percentage:.2f}% ({covered}/{pts}): {file} coverage report</h1>
111+
<pre>{content}</pre>
112+
</body>
113+
</html>
114+
""".format(percentage=total_covered / total_pts,
115+
pts=total_pts, covered=total_covered,
116+
file=os.path.basename(_buf.name),
117+
content=content))
118+
119+
# Create the instrumented parser when `import pyparser.coverage.parser`
120+
# is invoked. Not intended for any use except running the internal testsuite.
121+
instrument()

Diff for: ‎pyparser/lexer.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,13 @@ def next(self, eof_token=False):
212212

213213
return self.queue.pop(0)
214214

215+
def peek(self, eof_token=False):
216+
"""Same as :meth:`next`, except the token is not dequeued."""
217+
if len(self.queue) == 0:
218+
self._refill(eof_token)
219+
220+
return self.queue[-1]
221+
215222
def _refill(self, eof_token):
216223
if self.offset == len(self.source_buffer.source):
217224
if eof_token:
@@ -231,7 +238,8 @@ def _refill(self, eof_token):
231238
raise diagnostic.DiagnosticException(diag)
232239

233240
# Should we emit indent/dedent?
234-
if self.new_line:
241+
if self.new_line and \
242+
match.group(3) is None: # not a newline
235243
whitespace = match.string[match.start(0):match.start(1)]
236244
level = len(whitespace.expandtabs())
237245
range = source.Range(self.source_buffer, match.start(1), match.start(1))

Diff for: ‎pyparser/parser.py

+86-50
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,42 @@
11
"""
2-
The :mod:`parser` module concerns itself with LL(1) parsing.
2+
The :mod:`parser` module concerns itself with parsing Python source.
33
"""
44

55
from __future__ import absolute_import, division, print_function, unicode_literals
66
from . import source, diagnostic, lexer, ast
77

88
# Generic LL parsing combinators
9-
unmatched = object()
9+
class Unmatched:
10+
def __repr__(self):
11+
return "<can't parse>"
12+
unmatched = Unmatched()
13+
14+
_all_rules = []
15+
16+
def llrule(loc, expected, cases=1):
17+
if loc is None:
18+
def decorator(rule):
19+
rule.expected = expected
20+
return rule
21+
else:
22+
def decorator(inner_rule):
23+
if cases == 1:
24+
def rule(*args, **kwargs):
25+
result = inner_rule(*args, **kwargs)
26+
if result is not unmatched:
27+
rule.covered[0] = True
28+
return result
29+
else:
30+
rule = inner_rule
31+
32+
rule.loc, rule.expected, rule.covered = \
33+
loc, expected, [False] * cases
34+
_all_rules.append(rule)
35+
36+
return rule
37+
return decorator
1038

11-
def action(inner_rule):
39+
def action(inner_rule, loc=None):
1240
"""
1341
A decorator returning a function that first runs ``inner_rule`` and then, if its
1442
return value is not None, maps that value using ``mapper``.
@@ -18,50 +46,51 @@ def action(inner_rule):
1846
Similar to attaching semantic actions to rules in traditional parser generators.
1947
"""
2048
def decorator(mapper):
49+
@llrule(loc, inner_rule.expected)
2150
def outer_rule(parser):
2251
result = inner_rule(parser)
2352
if isinstance(result, tuple):
2453
result = mapper(parser, *result)
2554
elif result is not unmatched:
2655
result = mapper(parser, result)
2756
return result
28-
outer_rule.expected = inner_rule.expected
2957
return outer_rule
3058
return decorator
3159

32-
def Eps(value=None):
60+
def Eps(value=None, loc=None):
3361
"""A rule that accepts no tokens (epsilon) and returns ``value``."""
62+
@llrule(loc, lambda parser: [])
3463
def rule(parser):
3564
return value
36-
rule.expected = lambda parser: [[]]
3765
return rule
3866

39-
def Tok(kind):
67+
def Tok(kind, loc=None):
4068
"""A rule that accepts a token of kind ``kind`` and returns it, or returns None."""
69+
@llrule(loc, lambda parser: [kind])
4170
def rule(parser):
4271
return parser._accept(kind)
43-
rule.expected = lambda parser: [[kind]]
4472
return rule
4573

46-
def Loc(kind):
74+
def Loc(kind, loc=None):
4775
"""A rule that accepts a token of kind ``kind`` and returns its location, or returns None."""
76+
@llrule(loc, lambda parser: [kind])
4877
def rule(parser):
4978
result = parser._accept(kind)
5079
if result is not unmatched:
5180
return result.loc
5281
return unmatched
53-
rule.expected = lambda parser: [[kind]]
5482
return rule
5583

56-
def Rule(name):
84+
def Rule(name, loc=None):
5785
"""A proxy for a rule called ``name`` which may not be yet defined."""
86+
@llrule(loc, lambda parser: getattr(parser, name).expected(parser))
5887
def rule(parser):
5988
return getattr(parser, name)()
60-
rule.expected = lambda parser: getattr(parser, name).expected(parser)
6189
return rule
6290

63-
def Expect(inner_rule):
91+
def Expect(inner_rule, loc=None):
6492
"""A rule that executes ``inner_rule`` and emits a diagnostic error if it returns None."""
93+
@llrule(loc, inner_rule.expected)
6594
def rule(parser):
6695
result = inner_rule(parser)
6796
if result is unmatched:
@@ -78,76 +107,84 @@ def rule(parser):
78107
parser.token.loc)
79108
raise diagnostic.DiagnosticException(error)
80109
return result
81-
rule.expected = inner_rule.expected
82110
return rule
83111

84-
def Seq(first_rule, *rest_of_rules):
112+
def Seq(first_rule, *rest_of_rules, **kwargs):
85113
"""
86114
A rule that accepts a sequence of tokens satisfying ``rules`` and returns a tuple
87115
containing their return values, or None if the first rule was not satisfied.
88116
"""
89117
rest_of_rules = map(Expect, rest_of_rules)
118+
@llrule(kwargs.get('loc', None), first_rule.expected)
90119
def rule(parser):
91120
first_result = first_rule(parser)
92121
if first_result is not unmatched:
93122
return tuple([first_result]) + tuple(map(lambda rule: rule(parser), rest_of_rules))
94123
return unmatched
95-
rule.expected = \
96-
lambda parser: first_rule.expected(parser) + \
97-
reduce(list.__add__, map(lambda rule: rule.expected(parser), rest_of_rules))
98124
return rule
99125

100-
def SeqN(n, *inner_rules):
126+
def SeqN(n, *inner_rules, **kwargs):
101127
"""
102128
A rule that accepts a sequence of tokens satisfying ``rules`` and returns
103129
the value returned by rule number ``n``, or None if the first rule was not satisfied.
104130
"""
105-
@action(Seq(*inner_rules))
131+
@action(Seq(*inner_rules), loc=kwargs.get('loc', None))
106132
def rule(parser, *values):
107133
return values[n]
108134
return rule
109135

110-
def Alt(*inner_rules):
136+
def Alt(*inner_rules, **kwargs):
111137
"""
112138
A rule that expects a sequence of tokens satisfying one of ``rules`` in sequence
113139
(a rule is satisfied when it returns anything but None) and returns the return
114140
value of that rule, or None if no rules were satisfied.
115141
"""
116-
def rule(parser):
117-
# semantically reduce(), but faster.
118-
for inner_rule in inner_rules:
119-
result = inner_rule(parser)
120-
if result is not unmatched:
121-
return result
122-
return unmatched
123-
rule.expected = \
124-
lambda parser: reduce(list.__add__, map(lambda x: x.expected(parser), inner_rules))
142+
loc = kwargs.get('loc', None)
143+
expected = lambda parser: reduce(list.__add__, map(lambda x: x.expected(parser), inner_rules))
144+
if loc is not None:
145+
@llrule(loc, expected, cases=len(inner_rules))
146+
def rule(parser):
147+
for idx, inner_rule in enumerate(inner_rules):
148+
result = inner_rule(parser)
149+
if result is not unmatched:
150+
rule.covered[idx] = True
151+
return result
152+
return unmatched
153+
else:
154+
@llrule(loc, expected, cases=len(inner_rules))
155+
def rule(parser):
156+
for inner_rule in inner_rules:
157+
result = inner_rule(parser)
158+
if result is not unmatched:
159+
return result
160+
return unmatched
125161
return rule
126162

127-
def Opt(inner_rule):
163+
def Opt(inner_rule, loc=None):
128164
"""Shorthand for ``Alt(inner_rule, Eps())``"""
129-
return Alt(inner_rule, Eps())
165+
return Alt(inner_rule, Eps(), loc=loc)
130166

131-
def Star(inner_rule):
167+
def Star(inner_rule, loc=None):
132168
"""
133169
A rule that accepts a sequence of tokens satisfying ``inner_rule`` zero or more times,
134170
and returns the returned values in a :class:`list`.
135171
"""
172+
@llrule(loc, lambda parser: [])
136173
def rule(parser):
137174
results = []
138175
while True:
139176
result = inner_rule(parser)
140177
if result is unmatched:
141178
return results
142179
results.append(result)
143-
rule.expected = lambda parser: []
144180
return rule
145181

146-
def Plus(inner_rule):
182+
def Plus(inner_rule, loc=None):
147183
"""
148184
A rule that accepts a sequence of tokens satisfying ``inner_rule`` one or more times,
149185
and returns the returned values in a :class:`list`.
150186
"""
187+
@llrule(loc, inner_rule.expected)
151188
def rule(parser):
152189
result = inner_rule(parser)
153190
if result is unmatched:
@@ -159,21 +196,21 @@ def rule(parser):
159196
if result is unmatched:
160197
return results
161198
results.append(result)
162-
rule.expected = inner_rule.expected
163199
return rule
164200

165-
def List(inner_rule, separator_tok, trailing, leading=True):
201+
def List(inner_rule, separator_tok, trailing, leading=True, loc=None):
166202
if not trailing:
167-
@action(Seq(inner_rule, Star(SeqN(1, Tok(separator_tok), inner_rule))))
168-
def rule(parser, first, rest):
203+
@action(Seq(inner_rule, Star(SeqN(1, Tok(separator_tok), inner_rule))), loc=loc)
204+
def outer_rule(parser, first, rest):
169205
return [first] + rest
170-
return rule
206+
return outer_rule
171207
else:
172208
# A rule like this: stmt (';' stmt)* [';']
173209
# This doesn't yield itself to combinators above, because disambiguating
174210
# another iteration of the Kleene star and the trailing separator
175211
# requires two lookahead tokens (naively).
176212
separator_rule = Tok(separator_tok)
213+
@llrule(loc, inner_rule.expected)
177214
def rule(parser):
178215
results = []
179216

@@ -196,41 +233,40 @@ def rule(parser):
196233
return results
197234
else:
198235
results.append(result)
199-
rule.expected = inner_rule.expected
200236
return rule
201237

202238
# Python AST specific parser combinators
203-
def Newline():
239+
def Newline(loc=None):
204240
"""A rule that accepts token of kind ``newline`` and returns []."""
241+
@llrule(loc, lambda parser: ['newline'])
205242
def rule(parser):
206243
if parser._accept('newline') is not unmatched:
207244
return []
208245
return unmatched
209-
rule.expected = lambda parser: [['newline']]
210246
return rule
211247

212-
def Oper(klass, *kinds):
248+
def Oper(klass, *kinds, **kwargs):
213249
"""
214250
A rule that accepts a sequence of tokens of kinds ``kinds`` and returns
215251
an instance of ``klass`` with ``loc`` encompassing the entire sequence
216252
or None if the first token is not of ``kinds[0]``.
217253
"""
218-
@action(Seq(*map(Loc, kinds)))
254+
@action(Seq(*map(Loc, kinds)), loc=kwargs.get('loc', None))
219255
def rule(parser, *tokens):
220256
return klass(loc=tokens[0].join(tokens[-1]))
221257
return rule
222258

223-
def BinOper(expr_rulename, op_rule, node=ast.BinOp):
224-
@action(Seq(Rule(expr_rulename), Star(Seq(op_rule, Rule(expr_rulename)))))
259+
def BinOper(expr_rulename, op_rule, node=ast.BinOp, loc=None):
260+
@action(Seq(Rule(expr_rulename), Star(Seq(op_rule, Rule(expr_rulename)))), loc=loc)
225261
def rule(parser, lhs, trailers):
226262
for (op, rhs) in trailers:
227263
lhs = node(left=lhs, op=op, right=rhs,
228264
loc=lhs.loc.join(rhs.loc))
229265
return lhs
230266
return rule
231267

232-
def BeginEnd(begin_tok, inner_rule, end_tok, empty=None):
233-
@action(Seq(Loc(begin_tok), inner_rule, Loc(end_tok)))
268+
def BeginEnd(begin_tok, inner_rule, end_tok, empty=None, loc=None):
269+
@action(Seq(Loc(begin_tok), inner_rule, Loc(end_tok)), loc=loc)
234270
def rule(parser, begin_loc, node, end_loc):
235271
if node is None:
236272
node = empty()
@@ -333,7 +369,7 @@ def varargslist_1(self, dstar_loc, kwarg_tok):
333369
dstar_loc=dstar_loc, kwarg_loc=kwarg_tok.loc)
334370

335371
@action(Seq(Loc('*'), Tok('ident'),
336-
Opt(Seq(Loc('**', Tok('ident'))))))
372+
Opt(Seq(Loc('**'), Tok('ident')))))
337373
def varargslist_2(self, star_loc, vararg_tok, kwarg_opt):
338374
dstar_loc, kwarg, kwarg_loc = None
339375
if kwarg_opt:

Diff for: ‎pyparser/source.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -201,15 +201,15 @@ def replace(self, range, replacement):
201201

202202
def remove(self, range):
203203
"""Remove `range`."""
204-
self.replace(self, range, "")
204+
self.replace(range, "")
205205

206206
def insert_before(self, range, text):
207207
"""Insert `text` before `range`."""
208-
self.replace(self, range.begin(), "")
208+
self.replace(range.begin(), text)
209209

210210
def insert_after(self, range, text):
211211
"""Insert `text` after `range`."""
212-
self.replace(self, range.end(), "")
212+
self.replace(range.end(), text)
213213

214214
def rewrite(self):
215215
"""Return the rewritten source. May raise :class:`RewriterConflict`."""

Diff for: ‎pyparser/test/test_parser.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
# coding:utf-8
22

33
from __future__ import absolute_import, division, print_function, unicode_literals
4-
from .. import source, lexer, diagnostic, parser
4+
from .. import source, lexer, diagnostic, coverage
5+
from ..coverage import parser
56
import unittest
67

8+
def tearDownModule():
9+
coverage.report(parser)
10+
711
class ParserTestCase(unittest.TestCase):
812

913
def parser_for(self, code):
@@ -21,11 +25,11 @@ def lexer_next(**args):
2125
return self.parser
2226

2327
def assertParses(self, ast, code):
24-
self.assertEqual(ast, self.parser_for(code).eval_input())
28+
self.assertEqual(ast, self.parser_for(code).file_input())
2529

2630
def assertDiagnoses(self, code, diag):
2731
try:
28-
self.parser_for(code).eval_input()
32+
self.parser_for(code).file_input()
2933
self.fail("Expected a diagnostic")
3034
except diagnostic.DiagnosticException as e:
3135
level, reason, args, loc = diag
@@ -43,5 +47,5 @@ def assertDiagnosesUnexpected(self, code, err_token, loc):
4347
("error", "unexpected {actual}: expected {expected}", {'actual': err_token}, loc))
4448

4549
def test_pass(self):
46-
self.assertParses(None, "pass")
50+
self.assertParses(None, "pass\n")
4751

0 commit comments

Comments
 (0)
Please sign in to comment.