Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 92f471028f06
Choose a base ref
...
head repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: e6b50820b3a0
Choose a head ref
  • 2 commits
  • 7 files changed
  • 1 contributor

Commits on Apr 2, 2015

  1. Implement explicit line joining.

    whitequark committed Apr 2, 2015
    Copy the full SHA
    6ec06de View commit details
  2. Python 3 compatibility.

    whitequark committed Apr 2, 2015
    Copy the full SHA
    e6b5082 View commit details
Showing with 165 additions and 152 deletions.
  1. +1 −3 pyparser/__init__.py
  2. +7 −7 pyparser/diagnostic.py
  3. +49 −45 pyparser/lexer.py
  4. +1 −0 pyparser/source.py
  5. +2 −2 pyparser/test/test_diagnostic.py
  6. +103 −94 pyparser/test/test_lexer.py
  7. +2 −1 pyparser/test/test_source.py
4 changes: 1 addition & 3 deletions pyparser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
import source
import diagnostic
import lexer
from . import source, diagnostic, lexer
14 changes: 7 additions & 7 deletions pyparser/diagnostic.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
and presentation of diagnostic messages.
"""

import exceptions
from __future__ import absolute_import, division, print_function, unicode_literals

class Diagnostic:
"""
@@ -69,23 +69,23 @@ def render(self):
~ ^ ~~~
"""
source_line = self.location.source_line().rstrip(u"\n")
highlight_line = bytearray(' ') * len(source_line)
highlight_line = bytearray(u" ", 'utf-8') * len(source_line)

for hilight in self.highlights:
lft, rgt = hilight.column_range()
highlight_line[lft:rgt] = bytearray('~') * hilight.size()
highlight_line[lft:rgt] = bytearray(u"~", 'utf-8') * hilight.size()

lft, rgt = self.location.column_range()
highlight_line[lft:rgt] = bytearray('^') * self.location.size()
highlight_line[lft:rgt] = bytearray(u"^", 'utf-8') * self.location.size()

return [
u"%s: %s: %s" % (unicode(self.location), self.level, self.message()),
u"%s: %s: %s" % (str(self.location), self.level, self.message()),
source_line,
unicode(highlight_line)
highlight_line.decode('utf-8')
]


class Exception(exceptions.Exception):
class DiagnosticException(Exception):
"""
:class:`Exception` is an exception which carries a :class:`Diagnostic`.
94 changes: 49 additions & 45 deletions pyparser/lexer.py
Original file line number Diff line number Diff line change
@@ -2,8 +2,8 @@
The :mod:`lexer` module concerns itself with tokenizing Python source.
"""

import source
import diagnostic
from __future__ import absolute_import, division, print_function, unicode_literals
from . import source, diagnostic
import re

class Lexer:
@@ -84,30 +84,31 @@ def __init__(self, source_buffer, version):
# otherwise grab all keywords; it is made to work by making it impossible
# for the keyword case to match a word prefix, and ordering it before
# the identifier case.
self.lex_token = re.compile(ur"""
self.lex_token = re.compile(u"""
[ \t\f]* # initial whitespace
( # 1
([\n]) # 2 newline
| (\#.+) # 3 comment
| ( # 4 floating point or complex literal
(\\\\)? # ?2 line continuation
([\n]|[\r][\n]|[\r]) # 3 newline
| (\#.+) # 4 comment
| ( # 5 floating point or complex literal
(?: [0-9]* \. [0-9]+
| [0-9]+ \.?
) [eE] [+-]? [0-9]+
| [0-9]* \. [0-9]+
| [0-9]+ \.
) ([jJ])? # ?5 complex suffix
| ([0-9]+) [jJ] # 6 complex literal
) ([jJ])? # ?6 complex suffix
| ([0-9]+) [jJ] # 7 complex literal
| (?: # integer literal
( [1-9] [0-9]* ) # 7 dec
| 0[oO]? ( [0-7]+ ) # 8 oct
| 0[xX] ( [0-9A-Fa-f]+ ) # 9 hex
| 0[bB] ( [01]+ ) # 10 bin
( [1-9] [0-9]* ) # 8 dec
| 0[oO]? ( [0-7]+ ) # 9 oct
| 0[xX] ( [0-9A-Fa-f]+ ) # 10 hex
| 0[bB] ( [01]+ ) # 11 bin
)
[Ll]?
| ([BbUu]?[Rr]?) # ?11 string literal options
(""\"|"|'''|') # 12 string literal start
| ((?:{keywords})\b|{operators}) # 13 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*) # 14 identifier
| ([BbUu]?[Rr]?) # ?12 string literal options
(""\"|"|'''|') # 13 string literal start
| ((?:{keywords})\\b|{operators}) # 14 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*) # 15 identifier
)
""".format(keywords=re_keywords, operators=re_operators), re.VERBOSE)

@@ -144,41 +145,44 @@ def _lex(self):
"fatal", u"unexpected {character}",
{"character": repr(self.source_buffer.source[self.offset]).lstrip(u"u")},
source.Range(self.source_buffer, self.offset, self.offset + 1))
raise diagnostic.Exception(diag)
raise diagnostic.DiagnosticException(diag)
self.offset = match.end(0)

tok_range = source.Range(self.source_buffer, *match.span(1))
if match.group(2) is not None: # newline
if match.group(3) is not None: # newline
if len(self.parentheses) + len(self.square_braces) + len(self.curly_braces) > 0:
# Implicitly joined lines.
# 2.1.6 Implicit line joining
return self._lex()
if match.group(2) is not None:
# 2.1.5. Explicit line joining
return self._lex()
return tok_range, "newline", None
elif match.group(3) is not None: # comment
self.comments.append((tok_range, match.group(3)))
elif match.group(4) is not None: # comment
self.comments.append((tok_range, match.group(4)))
return self._lex()
elif match.group(4) is not None: # floating point or complex literal
if match.group(5) is None:
return tok_range, "float", float(match.group(4))
elif match.group(5) is not None: # floating point or complex literal
if match.group(6) is None:
return tok_range, "float", float(match.group(5))
else:
return tok_range, "complex", float(match.group(4)) * 1j
elif match.group(6) is not None: # complex literal
return tok_range, "complex", int(match.group(6)) * 1j
elif match.group(7) is not None: # integer literal, dec
return tok_range, "int", int(match.group(7))
elif match.group(8) is not None: # integer literal, oct
return tok_range, "int", int(match.group(8), 8)
elif match.group(9) is not None: # integer literal, hex
return tok_range, "int", int(match.group(9), 16)
elif match.group(10) is not None: # integer literal, bin
return tok_range, "int", int(match.group(10), 2)
elif match.group(12) is not None: # string literal start
options = match.group(11).lower()
return tok_range, match.group(12), options
elif match.group(13) is not None: # keywords and operators
self._match_pair_delim(tok_range, match.group(13))
return tok_range, match.group(13), None
elif match.group(14) is not None: # identifier
return tok_range, "ident", match.group(14)
return tok_range, "complex", float(match.group(5)) * 1j
elif match.group(7) is not None: # complex literal
return tok_range, "complex", int(match.group(7)) * 1j
elif match.group(8) is not None: # integer literal, dec
return tok_range, "int", int(match.group(8))
elif match.group(9) is not None: # integer literal, oct
return tok_range, "int", int(match.group(9), 8)
elif match.group(10) is not None: # integer literal, hex
return tok_range, "int", int(match.group(10), 16)
elif match.group(11) is not None: # integer literal, bin
return tok_range, "int", int(match.group(11), 2)
elif match.group(13) is not None: # string literal start
options = match.group(12).lower()
return tok_range, match.group(13), options
elif match.group(14) is not None: # keywords and operators
self._match_pair_delim(tok_range, match.group(14))
return tok_range, match.group(14), None
elif match.group(15) is not None: # identifier
return tok_range, "ident", match.group(15)
else:
assert False

@@ -208,7 +212,7 @@ def _check_innermost_pair_delim(self, range, expected):
if len(self.curly_braces) > 0:
ranges.append(('{', self.curly_braces[-1]))

ranges.sort(key=lambda (_, range): range.begin_pos)
ranges.sort(key=lambda k: k[1].begin_pos)
compl_kind, compl_range = ranges[-1]
if compl_kind != expected:
note = diagnostic.Diagnostic(
@@ -219,7 +223,7 @@ def _check_innermost_pair_delim(self, range, expected):
"fatal", u"mismatched '{delimiter}'",
{"delimiter": range.source()},
range, notes=[note])
raise diagnostic.Exception(error)
raise diagnostic.DiagnosticException(error)

def __iter__(self):
return self
1 change: 1 addition & 0 deletions pyparser/source.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
location information and original source from a range.
"""

from __future__ import absolute_import, division, print_function, unicode_literals
import bisect

class Buffer:
4 changes: 2 additions & 2 deletions pyparser/test/test_diagnostic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from .. import source, diagnostic
import unittest
import pyparser.source as source
import pyparser.diagnostic as diagnostic

class DiagnosticTestCase(unittest.TestCase):

197 changes: 103 additions & 94 deletions pyparser/test/test_lexer.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,32 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from .. import source, lexer, diagnostic
import unittest
import pyparser

class LexerTestCase(unittest.TestCase):

def assertLexesVersion(self, input, version, *tokens):
self.buffer = pyparser.source.Buffer(unicode(input))
self.lexer = pyparser.lexer.Lexer(self.buffer, version)
self.buffer = source.Buffer(input)
self.lexer = lexer.Lexer(self.buffer, version)
for (range, token, data) in self.lexer:
if len(tokens) < 2:
raise Exception(u"stray tokens: %s" % unicode((token,data)))
raise Exception(u"stray tokens: %s" % (token,data))
expected_token, expected_data = tokens[:2]
tokens = tokens[2:]
self.assertEqual(unicode(expected_token), token)
self.assertEqual(expected_token, token)
self.assertEqual(expected_data, data)
self.assertEqual((), tokens)

def assertDiagnosesVersion(self, input, version, (reason, args, loc), *tokens):
def assertDiagnosesVersion(self, input, version, diag, *tokens):
try:
self.assertLexesVersion(input, version, *tokens)
except pyparser.diagnostic.Exception as e:
except diagnostic.DiagnosticException as e:
reason, args, loc = diag
self.assertEqual(reason, e.diagnostic.reason)
self.assertEqual(args, e.diagnostic.arguments)
self.assertEqual(pyparser.source.Range(self.buffer, *loc),
self.assertEqual(source.Range(self.buffer, *loc),
e.diagnostic.location)
return
self.assert_("Expected a diagnostic")

VERSIONS = [(2,6), (3,0), (3,1)]

@@ -35,119 +39,124 @@ def assertDiagnoses(self, input, diag, *tokens):
self.assertDiagnosesVersion(input, version, diag, *tokens)

def test_empty(self):
self.assertLexes("")
self.assertLexes(u"")

def test_newline(self):
self.assertLexes("\n",
'newline', None)
self.assertLexes(u"\n",
u"newline", None)
self.assertLexes(u"\r\n",
u"newline", None)
self.assertLexes(u"\r",
u"newline", None)
self.assertLexes(u"\\\n")

def test_comment(self):
self.assertLexes("# foo")
self.assertEqual([(pyparser.source.Range(self.buffer, 0, 5), "# foo")],
self.assertLexes(u"# foo")
self.assertEqual([(source.Range(self.buffer, 0, 5), "# foo")],
self.lexer.comments)

def test_float(self):
self.assertLexes("0.0",
"float", 0.0)
self.assertLexes(".0",
"float", 0.0)
self.assertLexes("0.",
"float", 0.0)
self.assertLexes("0.0e0",
"float", 0.0)
self.assertLexes(".0e0",
"float", 0.0)
self.assertLexes("0.e0",
"float", 0.0)
self.assertLexes("0e0",
"float", 0.0)
self.assertLexes("0e00",
"float", 0.0)
self.assertLexes("0e+0",
"float", 0.0)
self.assertLexes("0e-0",
"float", 0.0)
self.assertLexes(u"0.0",
u"float", 0.0)
self.assertLexes(u".0",
u"float", 0.0)
self.assertLexes(u"0.",
u"float", 0.0)
self.assertLexes(u"0.0e0",
u"float", 0.0)
self.assertLexes(u".0e0",
u"float", 0.0)
self.assertLexes(u"0.e0",
u"float", 0.0)
self.assertLexes(u"0e0",
u"float", 0.0)
self.assertLexes(u"0e00",
u"float", 0.0)
self.assertLexes(u"0e+0",
u"float", 0.0)
self.assertLexes(u"0e-0",
u"float", 0.0)

def test_complex(self):
self.assertLexes("1e+1j",
"complex", 10j)
self.assertLexes("10j",
"complex", 10j)
self.assertLexes(u"1e+1j",
u"complex", 10j)
self.assertLexes(u"10j",
u"complex", 10j)

def test_integer(self):
self.assertLexes("123",
'int', 123)
self.assertLexes("0123",
'int', 0123)
self.assertLexes("0o123",
'int', 0o123)
self.assertLexes("0x123af",
'int', 0x123af)
self.assertLexes("0b0101",
'int', 0b0101)
self.assertLexes("123L",
'int', 123L)
self.assertLexes("123l",
'int', 123l)
self.assertLexes(u"123",
u"int", 123)
self.assertLexes(u"0123",
u"int", 83)
self.assertLexes(u"0o123",
u"int", 0o123)
self.assertLexes(u"0x123af",
u"int", 0x123af)
self.assertLexes(u"0b0101",
u"int", 0b0101)
self.assertLexes(u"123L",
u"int", 123)
self.assertLexes(u"123l",
u"int", 123)

def test_string_literal(self):
self.assertLexes("'",
"'", "")
self.assertLexes("u'",
"'", "u")
self.assertLexes("ur'",
"'", "ur")
self.assertLexes("UR'",
"'", "ur")

self.assertLexes("'''",
"'''", "")
self.assertLexes("\"\"\"",
"\"\"\"", "")
self.assertLexes(u"\"",
u"\"", "")
self.assertLexes(u"u\"",
u"\"", "u")
self.assertLexes(u"ur\"",
u"\"", "ur")
self.assertLexes(u"UR\"",
u"\"", "ur")

self.assertLexes(u"'''",
u"'''", "")
self.assertLexes(u"\"\"\"",
u"\"\"\"", "")

def test_identifier(self):
self.assertLexes("a",
"ident", "a")
self.assertLexes("andi",
"ident", "andi")
self.assertLexes(u"a",
u"ident", "a")
self.assertLexes(u"andi",
u"ident", "andi")

def test_keywords(self):
self.assertLexes("/",
"/", None)
self.assertLexes("//",
"//", None)
self.assertLexes("//=",
"//=", None)
self.assertLexes("and",
"and", None)

self.assertLexesVersion("<>", (2,6),
"<>", None)
self.assertLexesVersion("<>", (3,0),
"<", None,
">", None)
self.assertLexesVersion("<>", (3,1),
"<>", None)
self.assertLexes(u"/",
u"/", None)
self.assertLexes(u"//",
u"//", None)
self.assertLexes(u"//=",
u"//=", None)
self.assertLexes(u"and",
u"and", None)

self.assertLexesVersion(u"<>", (2,6),
u"<>", None)
self.assertLexesVersion(u"<>", (3,0),
u"<", None,
u">", None)
self.assertLexesVersion(u"<>", (3,1),
u"<>", None)

def test_implicit_joining(self):
self.assertLexes("[1,\n2]",
'[', None,
'int', 1,
',', None,
'int', 2,
']', None)
self.assertLexes(u"[1,\n2]",
u"[", None,
u"int", 1,
u",", None,
u"int", 2,
u"]", None)

def test_diag_unrecognized(self):
self.assertDiagnoses("$",
self.assertDiagnoses(u"$",
(u"unexpected {character}", {"character": "'$'"}, (0, 1)))

def test_diag_delim_mismatch(self):
self.assertDiagnoses("[)",
self.assertDiagnoses(u"[)",
(u"mismatched '{delimiter}'", {"delimiter": u")"}, (1, 2)),
'[', None)
u"[", None)

"""
def test_(self):
self.assertLexes("",
self.assertLexes(u"",
)
"""
3 changes: 2 additions & 1 deletion pyparser/test/test_source.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from .. import source
import unittest
import pyparser.source as source

class BufferTestCase(unittest.TestCase):