Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 44644f2880dc
Choose a base ref
...
head repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 9178a50930d0
Choose a head ref
  • 6 commits
  • 3 files changed
  • 1 contributor

Commits on May 7, 2015

  1. Improve testbench.

    whitequark committed May 7, 2015
    Copy the full SHA
    1fcdcc8 View commit details
  2. Fix a namespace collision.

    whitequark committed May 7, 2015
    Copy the full SHA
    ba810c5 View commit details
  3. Implement from __future__ import print_function.

    whitequark committed May 7, 2015
    Copy the full SHA
    5517553 View commit details
  4. Fix call argument parsing.

    whitequark committed May 7, 2015
    Copy the full SHA
    d1c9edf View commit details
  5. Implement string concatenation.

    whitequark committed May 7, 2015
    Copy the full SHA
    ccb652f View commit details
  6. Copy the full SHA
    9178a50 View commit details
Showing with 109 additions and 35 deletions.
  1. +5 −1 pyparser/lexer.py
  2. +61 −34 pyparser/parser.py
  3. +43 −0 pyparser/test/test_parser.py
6 changes: 5 additions & 1 deletion pyparser/lexer.py
Original file line number Diff line number Diff line change
@@ -97,6 +97,7 @@ def __init__(self, source_buffer, version, interactive=False):
self.source_buffer = source_buffer
self.version = version
self.interactive = interactive
self.print_function = False

self.offset = 0
self.new_line = True
@@ -364,7 +365,10 @@ def _refill(self, eof_token):
elif match.group(21) is not None: # keywords and operators
kwop = match.group(21)
self._match_pair_delim(tok_range, kwop)
self.queue.append(Token(tok_range, kwop))
if kwop == 'print' and self.print_function:
self.queue.append(Token(tok_range, "ident", "print"))
else:
self.queue.append(Token(tok_range, kwop))

elif match.group(22) is not None: # identifier
self.queue.append(Token(tok_range, "ident", match.group(22)))
95 changes: 61 additions & 34 deletions pyparser/parser.py
Original file line number Diff line number Diff line change
@@ -352,26 +352,27 @@ class Parser:

# Generic LL parsing methods
def __init__(self, lexer):
self.lexer = lexer
self.tokens = list(lexer) + [self.lexer.next(eof_token=True)]
self.index = 0
self.token = self.tokens[self.index]
self.lexer = lexer
self._tokens = []
self._index = -1
self._advance()

def _save(self):
return self.index
return self._index

def _restore(self, data):
self.index = data
self.token = self.tokens[self.index]
self._index = data
self._token = self._tokens[self._index]

def _advance(self):
if self.index < len(self.tokens) - 1:
self.index += 1
self.token = self.tokens[self.index]
self._index += 1
if self._index == len(self._tokens):
self._tokens.append(self.lexer.next(eof_token=True))
self._token = self._tokens[self._index]

def _accept(self, expected_kind):
if self.token.kind == expected_kind:
result = self.token
if self._token.kind == expected_kind:
result = self._token
self._advance()
return result
return unmatched
@@ -399,6 +400,10 @@ def _empty_arglist(self):
star_loc=None, dstar_loc=None, loc=None)

# Python-specific methods
def add_flags(self, flags):
if 'print_function' in flags:
self.lexer.print_function = True

@action(Alt(Newline(),
Rule('simple_stmt'),
SeqN(0, Rule('compound_stmt'), Newline())))
@@ -471,17 +476,18 @@ def varargslist_2(self, dstar_loc, kwarg_tok):
return ast.arguments(args=[], defaults=[], vararg=None, kwarg=kwarg_tok.value,
star_loc=None, vararg_loc=None,
dstar_loc=dstar_loc, kwarg_loc=kwarg_tok.loc,
begin_loc=None, end_loc=None, equals_locs=[], loc=kwarg_tok.loc)
begin_loc=None, end_loc=None, equals_locs=[],
loc=dstar_loc.join(kwarg_tok.loc))

@action(Seq(Loc('*'), Tok('ident'),
Opt(Seq(Tok(','), Loc('**'), Tok('ident')))))
def varargslist_3(self, star_loc, vararg_tok, kwarg_opt):
dstar_loc = kwarg = kwarg_loc = None
loc = vararg_tok.loc
loc = star_loc.join(vararg_tok.loc)
if kwarg_opt:
_, dstar_loc, kwarg_tok = kwarg_opt
kwarg, kwarg_loc = kwarg_tok.value, kwarg_tok.loc
loc = kwarg_tok.loc
loc = star_loc.join(kwarg_tok.loc)
return ast.arguments(args=[], defaults=[], vararg=vararg_tok.value, kwarg=kwarg,
star_loc=star_loc, vararg_loc=vararg_tok.loc,
dstar_loc=dstar_loc, kwarg_loc=kwarg_loc,
@@ -519,7 +525,7 @@ def fparam_loc(fparam, default_opt):

if args.loc is None:
args.loc = fparam_loc(*fparams[0]).join(fparam_loc(*fparams[-1]))
else:
elif len(fparams) > 0:
args.loc = args.loc.join(fparam_loc(*fparams[0]))

return args
@@ -718,6 +724,10 @@ def import_from(self, from_loc, module_name, import_loc, names):
loc = from_loc.join(names[-1].loc)
if rparen_loc:
loc = loc.join(rparen_loc)

if module == '__future__':
self.add_flags([x.name for x in names])

return ast.ImportFrom(names=names, module=module, level=len(dots),
keyword_loc=from_loc, dots_loc=dots_loc, module_loc=module_loc,
import_loc=import_loc, lparen_loc=lparen_loc, rparen_loc=rparen_loc,
@@ -1065,24 +1075,29 @@ def power(self, atom, trailers, factor_opt):
loc=atom.loc.join(factor.loc))
return atom

@action(Rule('testlist1'))
def atom_1(self, expr):
return ast.Repr(value=expr, loc=None)

@action(Tok('ident'))
def atom_1(self, tok):
def atom_2(self, tok):
return ast.Name(id=tok.value, loc=tok.loc, ctx=None)

@action(Alt(Tok('int'), Tok('float'), Tok('complex')))
def atom_2(self, tok):
def atom_3(self, tok):
return ast.Num(n=tok.value, loc=tok.loc)

# TODO: does not handle string concatenation
@action(Seq(Tok('strbegin'), Tok('strdata'), Tok('strend')))
def atom_3(self, begin_tok, data_tok, end_tok):
def atom_4(self, begin_tok, data_tok, end_tok):
return ast.Str(s=data_tok.value,
begin_loc=begin_tok.loc, end_loc=end_tok.loc,
loc=begin_tok.loc.join(end_tok.loc))

@action(Rule('testlist1'))
def atom_4(self, expr):
return ast.Repr(value=expr, loc=None)
@action(Plus(atom_4))
def atom_5(self, strings):
return ast.Str(s=''.join([x.s for x in strings]),
begin_loc=strings[0].begin_loc, end_loc=strings[-1].end_loc,
loc=strings[0].loc.join(strings[-1].loc))

atom = Alt(BeginEnd('(', Opt(Alt(Rule('yield_expr'), Rule('testlist_gexp'))), ')',
empty=lambda: ast.Tuple(elts=[], ctx=None, loc=None)),
@@ -1091,8 +1106,8 @@ def atom_4(self, expr):
BeginEnd('{', Opt(Rule('dictmaker')), '}',
empty=lambda: ast.Dict(keys=[], values=[], colon_locs=[],
ctx=None, loc=None)),
BeginEnd('`', atom_4, '`'),
atom_1, atom_2, atom_3)
BeginEnd('`', atom_1, '`'),
atom_2, atom_3, atom_5)
"""atom: ('(' [yield_expr|testlist_gexp] ')' |
'[' [listmaker] ']' |
'{' [dictmaker] '}' |
@@ -1294,7 +1309,7 @@ def arglist(self, pre_args, rest):
for arg in pre_args + post_args:
if isinstance(arg, ast.keyword):
call.keywords.append(arg)
elif len(call.args) > 0:
elif len(call.keywords) > 0:
error = diagnostic.Diagnostic(
"error", "non-keyword arg after keyword arg", {}, arg.loc)
raise diagnostic.DiagnosticException(error)
@@ -1384,15 +1399,27 @@ def yield_expr(self, stmt_loc, exprs):
return ast.Yield(value=exprs,
yield_loc=stmt_loc, loc=stmt_loc.join(exprs.loc))

def for_code(code, version=(2,7)):
import sys

def for_code(code, version=sys.version_info[0:2]):
return Parser(lexer.Lexer(source.Buffer(code), version))

if __name__ == "__main__":
import sys, time, codecs
def main():
import time, codecs
for filename in sys.argv[1:]:
with codecs.open(filename, encoding='utf-8') as f:
start = time.time()
ast = for_code(f.read()).file_input()
interval = time.time() - start
print(ast)
print("elapsed: %.2f" % interval, file=sys.stderr)
input = f.read()
try:
start = time.time()
root = for_code(input).file_input()
interval = time.time() - start

print(root)
print("elapsed: %.2f (%.2f kb/s)" % (interval, len(input)/interval/1000),
file=sys.stderr)
except diagnostic.DiagnosticException as e:
print(e.render,
file=sys.stderr)

if __name__ == "__main__":
main()
43 changes: 43 additions & 0 deletions pyparser/test/test_parser.py
Original file line number Diff line number Diff line change
@@ -206,6 +206,13 @@ def test_string(self):
"^ begin_loc"
" ^ end_loc")

self.assertParsesExpr(
{'ty': 'Str', 's': 'foobar'},
"'foo' 'bar'",
"~~~~~~~~~~~ loc"
"^ begin_loc"
" ^ end_loc")

def test_ident(self):
self.assertParsesExpr(
{'ty': 'Name', 'id': 'foo', 'ctx': None},
@@ -617,6 +624,12 @@ def test_call(self):
" ^ end_loc"
"~~~ loc")

self.assertParsesExpr(
{'ty': 'Call', 'func': self.ast_x, 'starargs': None, 'kwargs': None,
'args': [self.ast_y, self.ast_z], 'keywords': []},
"x(y, z)",
"~~~~~~~ loc")

self.assertParsesExpr(
{'ty': 'Call', 'func': self.ast_x, 'starargs': None, 'kwargs': None,
'args': [self.ast_y], 'keywords': [
@@ -1385,6 +1398,14 @@ def test_args(self):
"x, y",
"~~~~ loc")

self.assertParsesArgs(
{'ty': 'arguments', 'args': [], 'defaults': [],
'vararg': 'y', 'kwarg': None},
"*y",
"^ star_loc"
" ~ vararg_loc"
"~~ loc")

self.assertParsesArgs(
{'ty': 'arguments', 'args': [self.ast_x], 'defaults': [],
'vararg': 'y', 'kwarg': None},
@@ -1393,6 +1414,14 @@ def test_args(self):
" ~ vararg_loc"
"~~~~~ loc")

self.assertParsesArgs(
{'ty': 'arguments', 'args': [], 'defaults': [],
'vararg': None, 'kwarg': 'y'},
"**y",
"^^ dstar_loc"
" ~ kwarg_loc"
"~~~ loc")

self.assertParsesArgs(
{'ty': 'arguments', 'args': [self.ast_x], 'defaults': [],
'vararg': None, 'kwarg': 'y'},
@@ -1477,3 +1506,17 @@ def test_eval_input(self):
{'ty': 'Expression', 'body': [self.ast_1]},
"1·",
mode='eval_input', interactive=True)

#
# FUTURE IMPORTS
#

def test_future_print(self):
self.assertParsesSuite(
[{'ty': 'ImportFrom',
'names': [{'ty': 'alias', 'name': 'print_function', 'asname': None}],
'module': '__future__', 'level': 0},
{'ty': 'Expr', 'value':
{'ty': 'Call', 'func': {'ty': 'Name', 'id': 'print', 'ctx': None},
'starargs': None, 'kwargs': None, 'args': [self.ast_x], 'keywords': []}}],
"from __future__ import print_function·print(x)")