Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: e6b50820b3a0
Choose a base ref
...
head repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 4ba633b76505
Choose a head ref
  • 2 commits
  • 6 files changed
  • 1 contributor

Commits on Apr 2, 2015

  1. Implement Python 3 compliant integer lexing.

    whitequark committed Apr 2, 2015

    Verified

    This commit was signed with the committer’s verified signature. The key has expired.
    ondrejmirtes Ondřej Mirtes
    Copy the full SHA
    7b74ff2 View commit details
  2. Make use of unicode_literals future import.

    whitequark committed Apr 2, 2015

    Verified

    This commit was signed with the committer’s verified signature. The key has expired.
    ondrejmirtes Ondřej Mirtes
    Copy the full SHA
    4ba633b View commit details
Showing with 253 additions and 194 deletions.
  1. +6 −6 pyparser/diagnostic.py
  2. +86 −49 pyparser/lexer.py
  3. +1 −1 pyparser/source.py
  4. +8 −8 pyparser/test/test_diagnostic.py
  5. +141 −119 pyparser/test/test_lexer.py
  6. +11 −11 pyparser/test/test_source.py
12 changes: 6 additions & 6 deletions pyparser/diagnostic.py
Original file line number Diff line number Diff line change
@@ -68,18 +68,18 @@ def render(self):
x + (1 + "a")
~ ^ ~~~
"""
source_line = self.location.source_line().rstrip(u"\n")
highlight_line = bytearray(u" ", 'utf-8') * len(source_line)
source_line = self.location.source_line().rstrip("\n")
highlight_line = bytearray(" ", 'utf-8') * len(source_line)

for hilight in self.highlights:
lft, rgt = hilight.column_range()
highlight_line[lft:rgt] = bytearray(u"~", 'utf-8') * hilight.size()
highlight_line[lft:rgt] = bytearray("~", 'utf-8') * hilight.size()

lft, rgt = self.location.column_range()
highlight_line[lft:rgt] = bytearray(u"^", 'utf-8') * self.location.size()
highlight_line[lft:rgt] = bytearray("^", 'utf-8') * self.location.size()

return [
u"%s: %s: %s" % (str(self.location), self.level, self.message()),
"%s: %s: %s" % (str(self.location), self.level, self.message()),
source_line,
highlight_line.decode('utf-8')
]
@@ -96,4 +96,4 @@ def __init__(self, diagnostic):

def __str__(self):
return "\n".join(self.diagnostic.render() +
reduce(list.__add__, map(Diagnostic.render, self.diagnostic.notes)))
reduce(list.__add__, map(Diagnostic.render, self.diagnostic.notes), []))
135 changes: 86 additions & 49 deletions pyparser/lexer.py
Original file line number Diff line number Diff line change
@@ -22,21 +22,21 @@ class Lexer:
"""

_reserved_2_6 = frozenset([
u'!=', u'%', u'%=', u'&', u'&=', u'(', u')', u'*', u'**', u'**=', u'*=', u'+', u'+=',
u',', u'-', u'-=', u'.', u'/', u'//', u'//=', u'/=', u':', u';', u'<', u'<<', u'<<=',
u'<=', u'<>', u'=', u'==', u'>', u'>=', u'>>', u'>>=', u'@', u'[', u']', u'^', u'^=', u'`',
u'and', u'as', u'assert', u'break', u'class', u'continue', u'def', u'del', u'elif',
u'else', u'except', u'exec', u'finally', u'for', u'from', u'global', u'if', u'import',
u'in', u'is', u'lambda', u'not', u'or', u'pass', u'print', u'raise', u'return', u'try',
u'while', u'with', u'yield', u'{', u'|', u'|=', u'}', u'~'
"!=", "%", "%=", "&", "&=", "(", ")", "*", "**", "**=", "*=", "+", "+=",
",", "-", "-=", ".", "/", "//", "//=", "/=", ":", ";", "<", "<<", "<<=",
"<=", "<>", "=", "==", ">", ">=", ">>", ">>=", "@", "[", "]", "^", "^=", "`",
"and", "as", "assert", "break", "class", "continue", "def", "del", "elif",
"else", "except", "exec", "finally", "for", "from", "global", "if", "import",
"in", "is", "lambda", "not", "or", "pass", "print", "raise", "return", "try",
"while", "with", "yield", "{", "|", "|=", "}", "~"
])

_reserved_3_0 = _reserved_2_6 \
- set([u'<>', u'`', u'exec', u'print']) \
| set([u'->', u'...', u'False', u'None', u'nonlocal', u'True'])
- set(["<>", "`", "exec", "print"]) \
| set(["->", "...", "False", "None", "nonlocal", "True"])

_reserved_3_1 = _reserved_3_0 \
| set([u'<>'])
| set(["<>"])

_reserved = {
(2, 6): _reserved_2_6,
@@ -54,6 +54,8 @@ class Lexer:

def __init__(self, source_buffer, version):
self.source_buffer = source_buffer
self.version = version

self.offset = 0
self.comments = []
self.queue = []
@@ -84,7 +86,7 @@ def __init__(self, source_buffer, version):
# otherwise grab all keywords; it is made to work by making it impossible
# for the keyword case to match a word prefix, and ordering it before
# the identifier case.
self.lex_token = re.compile(u"""
self.lex_token = re.compile("""
[ \t\f]* # initial whitespace
( # 1
(\\\\)? # ?2 line continuation
@@ -99,16 +101,17 @@ def __init__(self, source_buffer, version):
) ([jJ])? # ?6 complex suffix
| ([0-9]+) [jJ] # 7 complex literal
| (?: # integer literal
( [1-9] [0-9]* ) # 8 dec
| 0[oO]? ( [0-7]+ ) # 9 oct
| 0[xX] ( [0-9A-Fa-f]+ ) # 10 hex
| 0[bB] ( [01]+ ) # 11 bin
( [1-9] [0-9]* ) # 8 dec
| 0[oO] ( [0-7]+ ) # 9 oct
| 0[xX] ( [0-9A-Fa-f]+ ) # 10 hex
| 0[bB] ( [01]+ ) # 11 bin
| ( [0-9] [0-9]* ) # 12 bare oct
)
[Ll]?
| ([BbUu]?[Rr]?) # ?12 string literal options
(""\"|"|'''|') # 13 string literal start
| ((?:{keywords})\\b|{operators}) # 14 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*) # 15 identifier
| ([BbUu]?[Rr]?) # ?13 string literal options
(""\"|"|'''|') # 14 string literal start
| ((?:{keywords})\\b|{operators}) # 15 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*) # 16 identifier
)
""".format(keywords=re_keywords, operators=re_operators), re.VERBOSE)

@@ -142,8 +145,8 @@ def _lex(self):
self.source_buffer.source, self.offset)
if match is None:
diag = diagnostic.Diagnostic(
"fatal", u"unexpected {character}",
{"character": repr(self.source_buffer.source[self.offset]).lstrip(u"u")},
"fatal", "unexpected {character}",
{"character": repr(self.source_buffer.source[self.offset]).lstrip("u")},
source.Range(self.source_buffer, self.offset, self.offset + 1))
raise diagnostic.DiagnosticException(diag)
self.offset = match.end(0)
@@ -157,70 +160,104 @@ def _lex(self):
# 2.1.5. Explicit line joining
return self._lex()
return tok_range, "newline", None

elif match.group(4) is not None: # comment
self.comments.append((tok_range, match.group(4)))
return self._lex()

elif match.group(5) is not None: # floating point or complex literal
if match.group(6) is None:
return tok_range, "float", float(match.group(5))
else:
return tok_range, "complex", float(match.group(5)) * 1j

elif match.group(7) is not None: # complex literal
return tok_range, "complex", int(match.group(7)) * 1j

elif match.group(8) is not None: # integer literal, dec
return tok_range, "int", int(match.group(8))
literal = match.group(8)
self._check_long_literal(tok_range, match.group(1))
return tok_range, "int", int(literal)

elif match.group(9) is not None: # integer literal, oct
return tok_range, "int", int(match.group(9), 8)
literal = match.group(9)
self._check_long_literal(tok_range, match.group(1))
return tok_range, "int", int(literal, 8)

elif match.group(10) is not None: # integer literal, hex
return tok_range, "int", int(match.group(10), 16)
literal = match.group(10)
self._check_long_literal(tok_range, match.group(1))
return tok_range, "int", int(literal, 16)

elif match.group(11) is not None: # integer literal, bin
return tok_range, "int", int(match.group(11), 2)
elif match.group(13) is not None: # string literal start
options = match.group(12).lower()
return tok_range, match.group(13), options
elif match.group(14) is not None: # keywords and operators
self._match_pair_delim(tok_range, match.group(14))
return tok_range, match.group(14), None
elif match.group(15) is not None: # identifier
return tok_range, "ident", match.group(15)
else:
assert False
literal = match.group(11)
self._check_long_literal(tok_range, match.group(1))
return tok_range, "int", int(literal, 2)

elif match.group(12) is not None: # integer literal, bare oct
literal = match.group(12)
if len(literal) > 1 and self.version >= (3, 0):
error = diagnostic.Diagnostic(
"error", "in Python 3, decimal literals must not start with a zero", {},
source.Range(self.source_buffer, tok_range.begin_pos, tok_range.begin_pos + 1))
raise diagnostic.DiagnosticException(error)
return tok_range, "int", int(literal, 8)

elif match.group(14) is not None: # string literal start
options = match.group(13).lower()
return tok_range, match.group(14), options

elif match.group(15) is not None: # keywords and operators
self._match_pair_delim(tok_range, match.group(15))
return tok_range, match.group(15), None

elif match.group(16) is not None: # identifier
return tok_range, "ident", match.group(16)

assert False

def _check_long_literal(self, range, literal):
if literal[-1] in "lL" and self.version >= (3, 0):
error = diagnostic.Diagnostic(
"error", "in Python 3, long integer literals were removed", {},
source.Range(self.source_buffer, range.end_pos - 1, range.end_pos))
raise diagnostic.DiagnosticException(error)

def _match_pair_delim(self, range, kwop):
if kwop == '(':
if kwop == "(":
self.parentheses.append(range)
elif kwop == '[':
elif kwop == "[":
self.square_braces.append(range)
elif kwop == '{':
elif kwop == "{":
self.curly_braces.append(range)
elif kwop == ')':
self._check_innermost_pair_delim(range, '(')
elif kwop == ")":
self._check_innermost_pair_delim(range, "(")
self.parentheses.pop()
elif kwop == ']':
self._check_innermost_pair_delim(range, '[')
elif kwop == "]":
self._check_innermost_pair_delim(range, "[")
self.square_braces.pop()
elif kwop == '}':
self._check_innermost_pair_delim(range, '{')
elif kwop == "}":
self._check_innermost_pair_delim(range, "{")
self.curly_braces.pop()

def _check_innermost_pair_delim(self, range, expected):
ranges = []
if len(self.parentheses) > 0:
ranges.append(('(', self.parentheses[-1]))
ranges.append(("(", self.parentheses[-1]))
if len(self.square_braces) > 0:
ranges.append(('[', self.square_braces[-1]))
ranges.append(("[", self.square_braces[-1]))
if len(self.curly_braces) > 0:
ranges.append(('{', self.curly_braces[-1]))
ranges.append(("{", self.curly_braces[-1]))

ranges.sort(key=lambda k: k[1].begin_pos)
compl_kind, compl_range = ranges[-1]
if compl_kind != expected:
note = diagnostic.Diagnostic(
"note", u"'{delimiter}' opened here",
"note", "'{delimiter}' opened here",
{"delimiter": compl_kind},
compl_range)
error = diagnostic.Diagnostic(
"fatal", u"mismatched '{delimiter}'",
"fatal", "mismatched '{delimiter}'",
{"delimiter": range.source()},
range, notes=[note])
raise diagnostic.DiagnosticException(error)
2 changes: 1 addition & 1 deletion pyparser/source.py
Original file line number Diff line number Diff line change
@@ -60,7 +60,7 @@ def _extract_line_begins(self):
self._line_begins = [0]
index = None
while True:
index = self.source.find(u"\n", index) + 1
index = self.source.find("\n", index) + 1
if index == 0:
return self._line_begins
self._line_begins.append(index)
16 changes: 8 additions & 8 deletions pyparser/test/test_diagnostic.py
Original file line number Diff line number Diff line change
@@ -5,23 +5,23 @@
class DiagnosticTestCase(unittest.TestCase):

def setUp(self):
self.buffer = source.Buffer(u"x + (1 + 'a')\n")
self.buffer = source.Buffer("x + (1 + 'a')\n")

def test_message(self):
diag = diagnostic.Diagnostic(
"error", u"{x} doesn't work", {"x": "everything"},
"error", "{x} doesn't work", {"x": "everything"},
source.Range(self.buffer, 0, 0))
self.assertEqual(u"everything doesn't work", diag.message())
self.assertEqual("everything doesn't work", diag.message())

def test_render(self):
diag = diagnostic.Diagnostic(
"error", u"cannot add {lft} and {rgt}",
{"lft": u"integer", "rgt": u"string"},
"error", "cannot add {lft} and {rgt}",
{"lft": "integer", "rgt": "string"},
source.Range(self.buffer, 7, 8),
[source.Range(self.buffer, 5, 6),
source.Range(self.buffer, 9, 12)])
self.assertEqual(
[u"<input>:1:8: error: cannot add integer and string",
u"x + (1 + 'a')",
u" ~ ^ ~~~ "],
["<input>:1:8: error: cannot add integer and string",
"x + (1 + 'a')",
" ~ ^ ~~~ "],
diag.render())
Loading