Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: e6b50820b3a0
Choose a base ref
...
head repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 4ba633b76505
Choose a head ref
  • 2 commits
  • 6 files changed
  • 1 contributor

Commits on Apr 2, 2015

  1. Implement Python 3 compliant integer lexing.

    whitequark committed Apr 2, 2015
    Copy the full SHA
    7b74ff2 View commit details
  2. Make use of unicode_literals future import.

    whitequark committed Apr 2, 2015
    Copy the full SHA
    4ba633b View commit details
Showing with 253 additions and 194 deletions.
  1. +6 −6 pyparser/diagnostic.py
  2. +86 −49 pyparser/lexer.py
  3. +1 −1 pyparser/source.py
  4. +8 −8 pyparser/test/test_diagnostic.py
  5. +141 −119 pyparser/test/test_lexer.py
  6. +11 −11 pyparser/test/test_source.py
12 changes: 6 additions & 6 deletions pyparser/diagnostic.py
Original file line number Diff line number Diff line change
@@ -68,18 +68,18 @@ def render(self):
x + (1 + "a")
~ ^ ~~~
"""
source_line = self.location.source_line().rstrip(u"\n")
highlight_line = bytearray(u" ", 'utf-8') * len(source_line)
source_line = self.location.source_line().rstrip("\n")
highlight_line = bytearray(" ", 'utf-8') * len(source_line)

for hilight in self.highlights:
lft, rgt = hilight.column_range()
highlight_line[lft:rgt] = bytearray(u"~", 'utf-8') * hilight.size()
highlight_line[lft:rgt] = bytearray("~", 'utf-8') * hilight.size()

lft, rgt = self.location.column_range()
highlight_line[lft:rgt] = bytearray(u"^", 'utf-8') * self.location.size()
highlight_line[lft:rgt] = bytearray("^", 'utf-8') * self.location.size()

return [
u"%s: %s: %s" % (str(self.location), self.level, self.message()),
"%s: %s: %s" % (str(self.location), self.level, self.message()),
source_line,
highlight_line.decode('utf-8')
]
@@ -96,4 +96,4 @@ def __init__(self, diagnostic):

def __str__(self):
return "\n".join(self.diagnostic.render() +
reduce(list.__add__, map(Diagnostic.render, self.diagnostic.notes)))
reduce(list.__add__, map(Diagnostic.render, self.diagnostic.notes), []))
135 changes: 86 additions & 49 deletions pyparser/lexer.py
Original file line number Diff line number Diff line change
@@ -22,21 +22,21 @@ class Lexer:
"""

_reserved_2_6 = frozenset([
u'!=', u'%', u'%=', u'&', u'&=', u'(', u')', u'*', u'**', u'**=', u'*=', u'+', u'+=',
u',', u'-', u'-=', u'.', u'/', u'//', u'//=', u'/=', u':', u';', u'<', u'<<', u'<<=',
u'<=', u'<>', u'=', u'==', u'>', u'>=', u'>>', u'>>=', u'@', u'[', u']', u'^', u'^=', u'`',
u'and', u'as', u'assert', u'break', u'class', u'continue', u'def', u'del', u'elif',
u'else', u'except', u'exec', u'finally', u'for', u'from', u'global', u'if', u'import',
u'in', u'is', u'lambda', u'not', u'or', u'pass', u'print', u'raise', u'return', u'try',
u'while', u'with', u'yield', u'{', u'|', u'|=', u'}', u'~'
"!=", "%", "%=", "&", "&=", "(", ")", "*", "**", "**=", "*=", "+", "+=",
",", "-", "-=", ".", "/", "//", "//=", "/=", ":", ";", "<", "<<", "<<=",
"<=", "<>", "=", "==", ">", ">=", ">>", ">>=", "@", "[", "]", "^", "^=", "`",
"and", "as", "assert", "break", "class", "continue", "def", "del", "elif",
"else", "except", "exec", "finally", "for", "from", "global", "if", "import",
"in", "is", "lambda", "not", "or", "pass", "print", "raise", "return", "try",
"while", "with", "yield", "{", "|", "|=", "}", "~"
])

_reserved_3_0 = _reserved_2_6 \
- set([u'<>', u'`', u'exec', u'print']) \
| set([u'->', u'...', u'False', u'None', u'nonlocal', u'True'])
- set(["<>", "`", "exec", "print"]) \
| set(["->", "...", "False", "None", "nonlocal", "True"])

_reserved_3_1 = _reserved_3_0 \
| set([u'<>'])
| set(["<>"])

_reserved = {
(2, 6): _reserved_2_6,
@@ -54,6 +54,8 @@ class Lexer:

def __init__(self, source_buffer, version):
self.source_buffer = source_buffer
self.version = version

self.offset = 0
self.comments = []
self.queue = []
@@ -84,7 +86,7 @@ def __init__(self, source_buffer, version):
# otherwise grab all keywords; it is made to work by making it impossible
# for the keyword case to match a word prefix, and ordering it before
# the identifier case.
self.lex_token = re.compile(u"""
self.lex_token = re.compile("""
[ \t\f]* # initial whitespace
( # 1
(\\\\)? # ?2 line continuation
@@ -99,16 +101,17 @@ def __init__(self, source_buffer, version):
) ([jJ])? # ?6 complex suffix
| ([0-9]+) [jJ] # 7 complex literal
| (?: # integer literal
( [1-9] [0-9]* ) # 8 dec
| 0[oO]? ( [0-7]+ ) # 9 oct
| 0[xX] ( [0-9A-Fa-f]+ ) # 10 hex
| 0[bB] ( [01]+ ) # 11 bin
( [1-9] [0-9]* ) # 8 dec
| 0[oO] ( [0-7]+ ) # 9 oct
| 0[xX] ( [0-9A-Fa-f]+ ) # 10 hex
| 0[bB] ( [01]+ ) # 11 bin
| ( [0-9] [0-9]* ) # 12 bare oct
)
[Ll]?
| ([BbUu]?[Rr]?) # ?12 string literal options
(""\"|"|'''|') # 13 string literal start
| ((?:{keywords})\\b|{operators}) # 14 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*) # 15 identifier
| ([BbUu]?[Rr]?) # ?13 string literal options
(""\"|"|'''|') # 14 string literal start
| ((?:{keywords})\\b|{operators}) # 15 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*) # 16 identifier
)
""".format(keywords=re_keywords, operators=re_operators), re.VERBOSE)

@@ -142,8 +145,8 @@ def _lex(self):
self.source_buffer.source, self.offset)
if match is None:
diag = diagnostic.Diagnostic(
"fatal", u"unexpected {character}",
{"character": repr(self.source_buffer.source[self.offset]).lstrip(u"u")},
"fatal", "unexpected {character}",
{"character": repr(self.source_buffer.source[self.offset]).lstrip("u")},
source.Range(self.source_buffer, self.offset, self.offset + 1))
raise diagnostic.DiagnosticException(diag)
self.offset = match.end(0)
@@ -157,70 +160,104 @@ def _lex(self):
# 2.1.5. Explicit line joining
return self._lex()
return tok_range, "newline", None

elif match.group(4) is not None: # comment
self.comments.append((tok_range, match.group(4)))
return self._lex()

elif match.group(5) is not None: # floating point or complex literal
if match.group(6) is None:
return tok_range, "float", float(match.group(5))
else:
return tok_range, "complex", float(match.group(5)) * 1j

elif match.group(7) is not None: # complex literal
return tok_range, "complex", int(match.group(7)) * 1j

elif match.group(8) is not None: # integer literal, dec
return tok_range, "int", int(match.group(8))
literal = match.group(8)
self._check_long_literal(tok_range, match.group(1))
return tok_range, "int", int(literal)

elif match.group(9) is not None: # integer literal, oct
return tok_range, "int", int(match.group(9), 8)
literal = match.group(9)
self._check_long_literal(tok_range, match.group(1))
return tok_range, "int", int(literal, 8)

elif match.group(10) is not None: # integer literal, hex
return tok_range, "int", int(match.group(10), 16)
literal = match.group(10)
self._check_long_literal(tok_range, match.group(1))
return tok_range, "int", int(literal, 16)

elif match.group(11) is not None: # integer literal, bin
return tok_range, "int", int(match.group(11), 2)
elif match.group(13) is not None: # string literal start
options = match.group(12).lower()
return tok_range, match.group(13), options
elif match.group(14) is not None: # keywords and operators
self._match_pair_delim(tok_range, match.group(14))
return tok_range, match.group(14), None
elif match.group(15) is not None: # identifier
return tok_range, "ident", match.group(15)
else:
assert False
literal = match.group(11)
self._check_long_literal(tok_range, match.group(1))
return tok_range, "int", int(literal, 2)

elif match.group(12) is not None: # integer literal, bare oct
literal = match.group(12)
if len(literal) > 1 and self.version >= (3, 0):
error = diagnostic.Diagnostic(
"error", "in Python 3, decimal literals must not start with a zero", {},
source.Range(self.source_buffer, tok_range.begin_pos, tok_range.begin_pos + 1))
raise diagnostic.DiagnosticException(error)
return tok_range, "int", int(literal, 8)

elif match.group(14) is not None: # string literal start
options = match.group(13).lower()
return tok_range, match.group(14), options

elif match.group(15) is not None: # keywords and operators
self._match_pair_delim(tok_range, match.group(15))
return tok_range, match.group(15), None

elif match.group(16) is not None: # identifier
return tok_range, "ident", match.group(16)

assert False

def _check_long_literal(self, range, literal):
if literal[-1] in "lL" and self.version >= (3, 0):
error = diagnostic.Diagnostic(
"error", "in Python 3, long integer literals were removed", {},
source.Range(self.source_buffer, range.end_pos - 1, range.end_pos))
raise diagnostic.DiagnosticException(error)

def _match_pair_delim(self, range, kwop):
if kwop == '(':
if kwop == "(":
self.parentheses.append(range)
elif kwop == '[':
elif kwop == "[":
self.square_braces.append(range)
elif kwop == '{':
elif kwop == "{":
self.curly_braces.append(range)
elif kwop == ')':
self._check_innermost_pair_delim(range, '(')
elif kwop == ")":
self._check_innermost_pair_delim(range, "(")
self.parentheses.pop()
elif kwop == ']':
self._check_innermost_pair_delim(range, '[')
elif kwop == "]":
self._check_innermost_pair_delim(range, "[")
self.square_braces.pop()
elif kwop == '}':
self._check_innermost_pair_delim(range, '{')
elif kwop == "}":
self._check_innermost_pair_delim(range, "{")
self.curly_braces.pop()

def _check_innermost_pair_delim(self, range, expected):
ranges = []
if len(self.parentheses) > 0:
ranges.append(('(', self.parentheses[-1]))
ranges.append(("(", self.parentheses[-1]))
if len(self.square_braces) > 0:
ranges.append(('[', self.square_braces[-1]))
ranges.append(("[", self.square_braces[-1]))
if len(self.curly_braces) > 0:
ranges.append(('{', self.curly_braces[-1]))
ranges.append(("{", self.curly_braces[-1]))

ranges.sort(key=lambda k: k[1].begin_pos)
compl_kind, compl_range = ranges[-1]
if compl_kind != expected:
note = diagnostic.Diagnostic(
"note", u"'{delimiter}' opened here",
"note", "'{delimiter}' opened here",
{"delimiter": compl_kind},
compl_range)
error = diagnostic.Diagnostic(
"fatal", u"mismatched '{delimiter}'",
"fatal", "mismatched '{delimiter}'",
{"delimiter": range.source()},
range, notes=[note])
raise diagnostic.DiagnosticException(error)
2 changes: 1 addition & 1 deletion pyparser/source.py
Original file line number Diff line number Diff line change
@@ -60,7 +60,7 @@ def _extract_line_begins(self):
self._line_begins = [0]
index = None
while True:
index = self.source.find(u"\n", index) + 1
index = self.source.find("\n", index) + 1
if index == 0:
return self._line_begins
self._line_begins.append(index)
16 changes: 8 additions & 8 deletions pyparser/test/test_diagnostic.py
Original file line number Diff line number Diff line change
@@ -5,23 +5,23 @@
class DiagnosticTestCase(unittest.TestCase):

def setUp(self):
self.buffer = source.Buffer(u"x + (1 + 'a')\n")
self.buffer = source.Buffer("x + (1 + 'a')\n")

def test_message(self):
diag = diagnostic.Diagnostic(
"error", u"{x} doesn't work", {"x": "everything"},
"error", "{x} doesn't work", {"x": "everything"},
source.Range(self.buffer, 0, 0))
self.assertEqual(u"everything doesn't work", diag.message())
self.assertEqual("everything doesn't work", diag.message())

def test_render(self):
diag = diagnostic.Diagnostic(
"error", u"cannot add {lft} and {rgt}",
{"lft": u"integer", "rgt": u"string"},
"error", "cannot add {lft} and {rgt}",
{"lft": "integer", "rgt": "string"},
source.Range(self.buffer, 7, 8),
[source.Range(self.buffer, 5, 6),
source.Range(self.buffer, 9, 12)])
self.assertEqual(
[u"<input>:1:8: error: cannot add integer and string",
u"x + (1 + 'a')",
u" ~ ^ ~~~ "],
["<input>:1:8: error: cannot add integer and string",
"x + (1 + 'a')",
" ~ ^ ~~~ "],
diag.render())
Loading