Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 656fb73f02b7
Choose a base ref
...
head repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: f3d451d6f5e4
Choose a head ref
  • 2 commits
  • 4 files changed
  • 1 contributor

Commits on May 4, 2015

  1. Copy the full SHA
    eefc70d View commit details
  2. Test all toplevel parsing modes.

    whitequark committed May 4, 2015
    Copy the full SHA
    f3d451d View commit details
Showing with 96 additions and 23 deletions.
  1. +18 −6 pyparser/lexer.py
  2. +14 −8 pyparser/parser.py
  3. +14 −6 pyparser/test/test_lexer.py
  4. +50 −3 pyparser/test/test_parser.py
24 changes: 18 additions & 6 deletions pyparser/lexer.py
Original file line number Diff line number Diff line change
@@ -39,6 +39,8 @@ class Lexer:
the source buffer
:ivar offset: (integer) character offset into ``source_buffer``
indicating where the next token will be recognized
:ivar interactive: (boolean) whether a completely empty line
should generate a NEWLINE token, for use in REPLs
"""

_reserved_2_6 = frozenset([
@@ -91,9 +93,10 @@ class Lexer:
:class:`frozenset`\s of string prefixes.
"""

def __init__(self, source_buffer, version):
def __init__(self, source_buffer, version, interactive=False):
self.source_buffer = source_buffer
self.version = version
self.interactive = interactive

self.offset = 0
self.new_line = True
@@ -223,12 +226,19 @@ def peek(self, eof_token=False):
# generate several tokens, e.g. INDENT
def _refill(self, eof_token):
if self.offset == len(self.source_buffer.source):
range = source.Range(self.source_buffer, self.offset, self.offset)

for i in self.indent[1:]:
self.indent.pop(-1)
self.queue.append(Token(range, 'dedent'))

if eof_token:
self.queue.append(Token(
source.Range(self.source_buffer, self.offset, self.offset), 'eof'))
else:
self.queue.append(Token(range, 'eof'))
elif len(self.queue) == 0:
raise StopIteration

return

match = self._lex_token_re.match(self.source_buffer.source, self.offset)
if match is None:
diag = diagnostic.Diagnostic(
@@ -279,7 +289,8 @@ def _refill(self, eof_token):
if match.group(2) is not None:
# 2.1.5. Explicit line joining
return self._refill(eof_token)
if self.new_line:
if self.new_line and not \
(self.interactive and match.group(0) == match.group(3)): # REPL terminator
# 2.1.7. Blank lines
return self._refill(eof_token)

@@ -367,7 +378,8 @@ def _refill(self, eof_token):
self.queue.append(Token(tok_range, "ident", match.group(23)))

elif match.group(24) is not None: # end-of-file
self.queue.append(Token(tok_range, "eof"))
# Reuse the EOF logic
return self._refill(eof_token)

else:
assert False
22 changes: 14 additions & 8 deletions pyparser/parser.py
Original file line number Diff line number Diff line change
@@ -615,16 +615,18 @@ def assert_stmt(self, keyword_loc, test, msg):
return ast.Assert(test=test, msg=msg,
keyword_loc=keyword_loc)

compound_stmt = Alt(Rule('if_stmt'), Rule('while_stmt'), Rule('for_stmt'),
Rule('try_stmt'), Rule('with_stmt'), Rule('funcdef'),
Rule('classdef'), Rule('decorated'))
"""compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt |
funcdef | classdef | decorated"""
@action(Alt(Rule('if_stmt'), Rule('while_stmt'), Rule('for_stmt'),
Rule('try_stmt'), Rule('with_stmt'), Rule('funcdef'),
Rule('classdef'), Rule('decorated')))
def compound_stmt(self, stmt):
"""compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt |
funcdef | classdef | decorated"""
return [stmt]

@action(Seq(Loc('if'), Rule('test'), Loc(':'), Rule('suite'),
Star(Seq(Loc('elif'), Rule('test'), Loc(':'), Rule('suite'))),
Opt(Seq(Loc('else'), Loc(':'), Rule('suite')))))
def if_stmt(if_loc, test, if_colon_loc, body, elifs, else_opt):
def if_stmt(self, if_loc, test, if_colon_loc, body, elifs, else_opt):
"""if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]"""
stmt = ast.If(orelse=[],
else_loc=None, else_colon_loc=None)
@@ -633,12 +635,16 @@ def if_stmt(if_loc, test, if_colon_loc, body, elifs, else_opt):
stmt.else_loc, stmt.else_colon_loc, stmt.orelse = else_opt

for elif_ in elifs:
stmt.if_loc, stmt.test, stmt.if_colon_loc, stmt.body = elif_
stmt.keyword_loc, stmt.test, stmt.if_colon_loc, stmt.body = elif_
stmt.loc = stmt.keyword_loc.join(stmt.body[-1].loc)
if stmt.orelse: stmt.loc = stmt.loc.join(stmt.orelse[-1])
stmt = ast.If(orelse=[stmt],
else_loc=None, else_colon_loc=None)

stmt.if_loc, stmt.test, stmt.if_colon_loc, stmt.body = \
stmt.keyword_loc, stmt.test, stmt.if_colon_loc, stmt.body = \
if_loc, test, if_colon_loc, body
stmt.loc = stmt.keyword_loc.join(stmt.body[-1].loc)
if stmt.orelse: stmt.loc = stmt.loc.join(stmt.orelse[-1])
return stmt

@action(Seq(Loc('while'), Rule('test'), Loc(':'), Rule('suite'),
20 changes: 14 additions & 6 deletions pyparser/test/test_lexer.py
Original file line number Diff line number Diff line change
@@ -6,11 +6,11 @@

class LexerTestCase(unittest.TestCase):

def assertLexesVersions(self, input, versions, *expected_tokens):
def assertLexesVersions(self, input, versions, *expected_tokens, **kwargs):
for version in versions:
tokens = expected_tokens
self.buffer = source.Buffer(input)
self.lexer = lexer.Lexer(self.buffer, version)
self.lexer = lexer.Lexer(self.buffer, version, **kwargs)
for token in self.lexer:
if len(tokens) < 2:
raise Exception("stray tokens: %s" % repr(token))
@@ -34,8 +34,8 @@ def assertDiagnosesVersions(self, input, versions, diag, *tokens):

VERSIONS = [(2,6), (3,0), (3,1)]

def assertLexes(self, input, *tokens):
self.assertLexesVersions(input, self.VERSIONS, *tokens)
def assertLexes(self, input, *tokens, **kwargs):
self.assertLexesVersions(input, self.VERSIONS, *tokens, **kwargs)

def assertDiagnoses(self, input, diag, *tokens):
self.assertDiagnosesVersions(input, self.VERSIONS, diag, *tokens)
@@ -281,7 +281,8 @@ def test_indent(self):
"ident", "x",
"newline", None,
"ident", "x",
"newline", None)
"newline", None,
"dedent", None)

self.assertDiagnosesVersions(
" \tx\n\tx", [(3,0)],
@@ -293,7 +294,14 @@ def test_indent(self):
def test_eof(self):
self.assertLexes("\t",
"indent", None,
"eof", None)
"dedent", None)

def test_interactive(self):
self.assertLexes("x\n\n",
"ident", "x",
"newline", None,
"newline", None,
interactive=True)

def test_diag_unrecognized(self):
self.assertDiagnoses(
53 changes: 50 additions & 3 deletions pyparser/test/test_parser.py
Original file line number Diff line number Diff line change
@@ -15,12 +15,11 @@ class ParserTestCase(unittest.TestCase):

maxDiff = None

def parser_for(self, code, version=(2, 6)):
def parser_for(self, code, version=(2, 6), interactive=False):
code = code.replace("·", "\n")

self.source_buffer = source.Buffer(code)
self.lexer = lexer.Lexer(self.source_buffer, version)
self.parser = parser.Parser(self.lexer)
self.lexer = lexer.Lexer(self.source_buffer, version, interactive=interactive)

old_next = self.lexer.next
def lexer_next(**args):
@@ -29,6 +28,7 @@ def lexer_next(**args):
return token
self.lexer.next = lexer_next

self.parser = parser.Parser(self.lexer)
return self.parser

def flatten_ast(self, node):
@@ -106,6 +106,11 @@ def assertParsesExpr(self, expected_flat_ast, code, loc_matcher=""):
ast = self.assertParsesGen([{'ty': 'Expr', 'value': expected_flat_ast}], code)
self.match_loc(ast, loc_matcher, lambda x: x.body[0].value)

def assertParsesToplevel(self, expected_flat_ast, code, loc_matcher, mode, interactive):
ast = getattr(self.parser_for(code, interactive=interactive), mode)()
self.assertEqual(expected_flat_ast, self.flatten_ast(ast))
self.match_loc(ast, loc_matcher, lambda x: x.body)

def assertDiagnoses(self, code, diag):
try:
self.parser_for(code).file_input()
@@ -743,3 +748,45 @@ def test_augassign(self):
"~~~~~~ 0.loc"
" ~~ 0.op.loc")

#
# PARSING MODES
#

def test_single_input(self):
self.assertParsesToplevel(
{'ty': 'Interactive', 'body': []},
"\n",
"",
mode='single_input', interactive=True)

self.assertParsesToplevel(
{'ty': 'Interactive', 'body': [
{'ty': 'Expr', 'value': self.ast_1}
]},
"1\n",
"",
mode='single_input', interactive=True)

self.assertParsesToplevel(
{'ty': 'Interactive', 'body': [
{'ty': 'If', 'test': self.ast_x, 'body': [
{'ty': 'Expr', 'value': self.ast_1}
], 'orelse': []}
]},
"if x: 1\n\n",
"",
mode='single_input', interactive=True)

def test_file_input(self):
self.assertParsesToplevel(
{'ty': 'Module', 'body': []},
"\n",
"",
mode='file_input', interactive=True)

def test_eval_input(self):
self.assertParsesToplevel(
{'ty': 'Expression', 'body': [self.ast_1]},
"1\n",
"",
mode='eval_input', interactive=True)