Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: c8c29b356b9d
Choose a base ref
...
head repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 6eca9e698cd0
Choose a head ref
  • 2 commits
  • 15 files changed
  • 1 contributor

Commits on Apr 3, 2015

  1. Copy the full SHA
    f8dd2b4 View commit details
  2. Reproduce Python 3.0 lexer more strictly.

    whitequark committed Apr 3, 2015
    Copy the full SHA
    6eca9e6 View commit details
11 changes: 9 additions & 2 deletions pyparser/lexer.py
Original file line number Diff line number Diff line change
@@ -99,6 +99,12 @@ def __init__(self, source_buffer, version):
re_keywords = "|".join([kw for kw in re_reserved if kw.isalnum()])
re_operators = "|".join([re.escape(op) for op in re_reserved if not op.isalnum()])

# Python 3.0 uses ID_Start, >3.0 uses XID_Start
if self.version == (3, 0):
id_xid = ""
else:
id_xid = "X"

# To speed things up on CPython, we use the re module to generate a DFA
# from our token set and execute it in C. Every result yielded by
# iterating this regular expression has exactly one non-empty group
@@ -145,10 +151,11 @@ def __init__(self, source_buffer, version):
)
| ((?:{keywords})\b|{operators}) # 21 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*\b) # 22 identifier
| (\p{{XID_Start}}\p{{XID_Continue}}*) # 23 Unicode identifier
| (\p{{{id_xid}ID_Start}}\p{{{id_xid}ID_Continue}}*) # 23 Unicode identifier
| ($) # 24 end-of-file
)
""".format(keywords=re_keywords, operators=re_operators), re.VERBOSE|re.UNICODE)
""".format(keywords=re_keywords, operators=re_operators,
id_xid=id_xid), re.VERBOSE|re.UNICODE)

# These are identical for all lexer instances.
_lex_escape_re = re.compile(r"""
32 changes: 32 additions & 0 deletions upstream-doc/grammar-diff-2.6-2.7.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
--- 2.6 2015-04-03 11:08:37.607410329 +0300
+++ 2.7 2015-04-03 11:08:49.663443774 +0300
@@ -83,2 +71,2 @@
-with_stmt: 'with' test [ with_var ] ':' suite
-with_var: 'as' expr
+with_stmt: 'with' with_item (',' with_item)* ':' suite
+with_item: test ['as' expr]
@@ -115 +103 @@
-atom: ('(' [yield_expr|testlist_gexp] ')' |
+atom: ('(' [yield_expr|testlist_comp] ')' |
@@ -117 +105 @@
- '{' [dictmaker] '}' |
+ '{' [dictorsetmaker] '}' |
@@ -121 +109 @@
-testlist_gexp: test ( gen_for | (',' test)* [','] )
+testlist_comp: test ( comp_for | (',' test)* [','] )
@@ -129 +117,2 @@
-dictmaker: test ':' test (',' test ':' test)* [',']
+dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+ (test (comp_for | (',' test)* [','])) )
@@ -136 +125,3 @@
-argument: test [gen_for] | test '=' test # Really [keyword '='] test
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+argument: test [comp_for] | test '=' test
@@ -142,3 +133,3 @@
-gen_iter: gen_for | gen_if
-gen_for: 'for' exprlist 'in' or_test [gen_iter]
-gen_if: 'if' old_test [gen_iter]
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' or_test [comp_iter]
+comp_if: 'if' old_test [comp_iter]
97 changes: 97 additions & 0 deletions upstream-doc/grammar-diff-2.7-3.0.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
--- 2.7 2015-04-03 11:08:49.663443774 +0300
+++ 3.0 2015-04-03 11:09:02.483479332 +0300
@@ -25,7 +25,10 @@
-funcdef: 'def' NAME parameters ':' suite
-parameters: '(' [varargslist] ')'
-varargslist: ((fpdef ['=' test] ',')*
- ('*' NAME [',' '**' NAME] | '**' NAME) |
- fpdef ['=' test] (',' fpdef ['=' test])* [','])
-fpdef: NAME | '(' fplist ')'
-fplist: fpdef (',' fpdef)* [',']
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: ((tfpdef ['=' test] ',')*
+ ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
+ | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
+tfpdef: NAME [':' test]
+varargslist: ((vfpdef ['=' test] ',')*
+ ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
+ | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
+vfpdef: NAME
@@ -35,2 +38,2 @@
-small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
- import_stmt | global_stmt | exec_stmt | assert_stmt)
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+ import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
@@ -42,2 +44,0 @@
-print_stmt: 'print' ( [ test (',' test)* [','] ] |
- '>>' test [ (',' test)+ [','] ] )
@@ -51 +52 @@
-raise_stmt: 'raise' [test [',' test [',' test]]]
+raise_stmt: 'raise' [test ['from' test]]
@@ -54 +55,2 @@
-import_from: ('from' ('.'* dotted_name | '.'+)
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
@@ -62 +64 @@
-exec_stmt: 'exec' expr ['in' test [',' test]]
+nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
@@ -71,5 +73,5 @@
-with_stmt: 'with' with_item (',' with_item)* ':' suite
-with_item: test ['as' expr]
+with_stmt: 'with' test [ with_var ] ':' suite
+with_var: 'as' expr
@@ -77 +79 @@
-except_clause: 'except' [test [('as' | ',') test]]
+except_clause: 'except' [test ['as' NAME]]
@@ -80,9 +81,0 @@
-# Backward compatibility cruft to support:
-# [ x for x in lambda: True, lambda: False if x() ]
-# even while also allowing:
-# lambda x: 5 if x else 2
-# (But not a mix of the two)
-testlist_safe: old_test [(',' old_test)+ [',']]
-old_test: or_test | old_lambdef
-old_lambdef: 'lambda' [varargslist] ':' old_test
-
@@ -89,0 +83,3 @@
+test_nocond: or_test | lambdef_nocond
+lambdef: 'lambda' [varargslist] ':' test
+lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
@@ -93,2 +89,3 @@
-comparison: expr (comp_op expr)*
-comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+comparison: star_expr (comp_op star_expr)*
+comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: ['*'] expr
@@ -104 +101 @@
- '[' [listmaker] ']' |
+ '[' [testlist_comp] ']' |
@@ -106,3 +103 @@
- '`' testlist1 '`' |
- NAME | NUMBER | STRING+)
-listmaker: test ( list_for | (',' test)* [','] )
+ NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
@@ -110 +104,0 @@
-lambdef: 'lambda' [varargslist] ':' test
@@ -113 +107 @@
-subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
+subscript: test | [test] ':' [test] [sliceop]
@@ -115 +109 @@
-exprlist: expr (',' expr)* [',']
+exprlist: star_expr (',' star_expr)* [',']
@@ -120 +114 @@
-classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
@@ -125,7 +119 @@
-# The reason that keywords are test nodes instead of NAME is that using NAME
-# results in an ambiguity. ast.c makes sure it's a NAME.
-argument: test [comp_for] | test '=' test
-
-list_iter: list_for | list_if
-list_for: 'for' exprlist 'in' testlist_safe [list_iter]
-list_if: 'if' old_test [list_iter]
+argument: test [comp_for] | test '=' test # Really [keyword '='] test
@@ -135 +123 @@
-comp_if: 'if' old_test [comp_iter]
+comp_if: 'if' test_nocond [comp_iter]
13 changes: 13 additions & 0 deletions upstream-doc/grammar-diff-3.0-3.1.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
--- 3.0 2015-04-03 11:09:02.483479332 +0300
+++ 3.1 2015-04-03 11:09:12.495507099 +0300
@@ -76,2 +76,2 @@
-with_stmt: 'with' test [ with_var ] ':' suite
-with_var: 'as' expr
+with_stmt: 'with' with_item (',' with_item)* ':' suite
+with_item: test ['as' expr]
@@ -90 +90 @@
-comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
@@ -118,0 +119,2 @@
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
36 changes: 36 additions & 0 deletions upstream-doc/grammar-diff-3.1-3.2.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
--- 3.1 2015-04-03 11:09:12.495507099 +0300
+++ 3.2 2015-04-03 11:09:22.835535774 +0300
@@ -27,3 +27,3 @@
-typedargslist: ((tfpdef ['=' test] ',')*
- ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
- | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
+typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
+ ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
+ | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
@@ -31,3 +31,3 @@
-varargslist: ((vfpdef ['=' test] ',')*
- ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
- | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
+varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
+ ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
+ | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
@@ -40,2 +40,3 @@
-expr_stmt: testlist (augassign (yield_expr|testlist) |
- ('=' (yield_expr|testlist))*)
+expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
+ ('=' (yield_expr|testlist_star_expr))*)
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
@@ -89 +90,3 @@
-comparison: star_expr (comp_op star_expr)*
+comparison: expr (comp_op expr)*
+# <> isn't actually a valid comparison operator in Python. It's here for the
+# sake of a __future__ import described in PEP 401
@@ -104 +107 @@
-testlist_comp: test ( comp_for | (',' test)* [','] )
+testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
@@ -109 +112 @@
-exprlist: star_expr (',' star_expr)* [',']
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
@@ -125,2 +127,0 @@
-
-testlist1: test (',' test)*
6 changes: 6 additions & 0 deletions upstream-doc/grammar-diff-3.2-3.3.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--- 3.2 2015-04-03 11:09:22.835535774 +0300
+++ 3.3 2015-04-03 11:09:38.243578496 +0300
@@ -132 +132,2 @@
-yield_expr: 'yield' [testlist]
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
Empty file.
8 changes: 8 additions & 0 deletions upstream-doc/grammar-diff-3.4-3.5.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--- 3.4 2015-04-03 11:09:47.843605113 +0300
+++ 3.5 2015-04-03 11:09:57.671632359 +0300
@@ -43 +43 @@
-augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
@@ -100 +100 @@
-term: factor (('*'|'/'|'%'|'//') factor)*
+term: factor (('*'|'@'|'/'|'%'|'//') factor)*
15 changes: 15 additions & 0 deletions upstream-doc/lexer-diff-2.6-2.7.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
--- 2.6 2015-04-03 11:00:34.794067739 +0300
+++ 2.7 2015-04-03 10:54:38.481072544 +0300
@@ -125 +125,3 @@
-Changed in version 2.5: Both as and with are only recognized when the with_statement future feature has been enabled. It will always be enabled in Python 2.6. See section The with statement for details. Note that using as and with as identifiers will always issue a warning, even when the with_statement future directive is not in effect.
+Changed in version 2.5: Using as and with as identifiers triggers a warning. To use them as keywords, enable the with_statement future feature .
+
+Changed in version 2.6: as and with are full keywords.
@@ -145,0 +148 @@
+ | "b" | "B" | "br" | "Br" | "bR" | "BR"
@@ -156 +159 @@
-In plain English: String literals can be enclosed in matching single quotes (') or double quotes ("). They can also be enclosed in matching groups of three single or double quotes (these are generally referred to as triple-quoted strings). The backslash (\) character is used to escape characters that otherwise have a special meaning, such as newline, backslash itself, or the quote character. String literals may optionally be prefixed with a letter 'r' or 'R'; such strings are called raw strings and use different rules for interpreting backslash escape sequences. A prefix of 'u' or 'U' makes the string a Unicode string. Unicode strings use the Unicode character set as defined by the Unicode Consortium and ISO 10646. Some additional escape sequences, described below, are available in Unicode strings. The two prefix characters may be combined; in this case, 'u' must appear before 'r'.
+In plain English: String literals can be enclosed in matching single quotes (') or double quotes ("). They can also be enclosed in matching groups of three single or double quotes (these are generally referred to as triple-quoted strings). The backslash (\) character is used to escape characters that otherwise have a special meaning, such as newline, backslash itself, or the quote character. String literals may optionally be prefixed with a letter 'r' or 'R'; such strings are called raw strings and use different rules for interpreting backslash escape sequences. A prefix of 'u' or 'U' makes the string a Unicode string. Unicode strings use the Unicode character set as defined by the Unicode Consortium and ISO 10646. Some additional escape sequences, described below, are available in Unicode strings. A prefix of 'b' or 'B' is ignored in Python 2; it indicates that the literal should become a bytes literal in Python 3 (e.g. when code is automatically converted with 2to3). A 'u' or 'b' prefix may be followed by an 'r' prefix.
@@ -182 +185 @@
-Any Unicode character can be encoded this way, but characters outside the Basic Multilingual Plane (BMP) will be encoded using a surrogate pair if Python is compiled to use 16-bit code units (the default). Individual code units which form parts of a surrogate pair can be encoded using this escape sequence.
+Any Unicode character can be encoded this way, but characters outside the Basic Multilingual Plane (BMP) will be encoded using a surrogate pair if Python is compiled to use 16-bit code units (the default).
Loading