Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 92f471028f06
Choose a base ref
...
head repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: e6b50820b3a0
Choose a head ref
  • 2 commits
  • 7 files changed
  • 1 contributor

Commits on Apr 2, 2015

  1. Implement explicit line joining.

    whitequark committed Apr 2, 2015
    Copy the full SHA
    6ec06de View commit details
  2. Python 3 compatibility.

    whitequark committed Apr 2, 2015

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    e6b5082 View commit details
Showing with 165 additions and 152 deletions.
  1. +1 −3 pyparser/__init__.py
  2. +7 −7 pyparser/diagnostic.py
  3. +49 −45 pyparser/lexer.py
  4. +1 −0 pyparser/source.py
  5. +2 −2 pyparser/test/test_diagnostic.py
  6. +103 −94 pyparser/test/test_lexer.py
  7. +2 −1 pyparser/test/test_source.py
4 changes: 1 addition & 3 deletions pyparser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
import source
import diagnostic
import lexer
from . import source, diagnostic, lexer
14 changes: 7 additions & 7 deletions pyparser/diagnostic.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
and presentation of diagnostic messages.
"""

import exceptions
from __future__ import absolute_import, division, print_function, unicode_literals

class Diagnostic:
"""
@@ -69,23 +69,23 @@ def render(self):
~ ^ ~~~
"""
source_line = self.location.source_line().rstrip(u"\n")
highlight_line = bytearray(' ') * len(source_line)
highlight_line = bytearray(u" ", 'utf-8') * len(source_line)

for hilight in self.highlights:
lft, rgt = hilight.column_range()
highlight_line[lft:rgt] = bytearray('~') * hilight.size()
highlight_line[lft:rgt] = bytearray(u"~", 'utf-8') * hilight.size()

lft, rgt = self.location.column_range()
highlight_line[lft:rgt] = bytearray('^') * self.location.size()
highlight_line[lft:rgt] = bytearray(u"^", 'utf-8') * self.location.size()

return [
u"%s: %s: %s" % (unicode(self.location), self.level, self.message()),
u"%s: %s: %s" % (str(self.location), self.level, self.message()),
source_line,
unicode(highlight_line)
highlight_line.decode('utf-8')
]


class Exception(exceptions.Exception):
class DiagnosticException(Exception):
"""
:class:`Exception` is an exception which carries a :class:`Diagnostic`.
94 changes: 49 additions & 45 deletions pyparser/lexer.py
Original file line number Diff line number Diff line change
@@ -2,8 +2,8 @@
The :mod:`lexer` module concerns itself with tokenizing Python source.
"""

import source
import diagnostic
from __future__ import absolute_import, division, print_function, unicode_literals
from . import source, diagnostic
import re

class Lexer:
@@ -84,30 +84,31 @@ def __init__(self, source_buffer, version):
# otherwise grab all keywords; it is made to work by making it impossible
# for the keyword case to match a word prefix, and ordering it before
# the identifier case.
self.lex_token = re.compile(ur"""
self.lex_token = re.compile(u"""
[ \t\f]* # initial whitespace
( # 1
([\n]) # 2 newline
| (\#.+) # 3 comment
| ( # 4 floating point or complex literal
(\\\\)? # ?2 line continuation
([\n]|[\r][\n]|[\r]) # 3 newline
| (\#.+) # 4 comment
| ( # 5 floating point or complex literal
(?: [0-9]* \. [0-9]+
| [0-9]+ \.?
) [eE] [+-]? [0-9]+
| [0-9]* \. [0-9]+
| [0-9]+ \.
) ([jJ])? # ?5 complex suffix
| ([0-9]+) [jJ] # 6 complex literal
) ([jJ])? # ?6 complex suffix
| ([0-9]+) [jJ] # 7 complex literal
| (?: # integer literal
( [1-9] [0-9]* ) # 7 dec
| 0[oO]? ( [0-7]+ ) # 8 oct
| 0[xX] ( [0-9A-Fa-f]+ ) # 9 hex
| 0[bB] ( [01]+ ) # 10 bin
( [1-9] [0-9]* ) # 8 dec
| 0[oO]? ( [0-7]+ ) # 9 oct
| 0[xX] ( [0-9A-Fa-f]+ ) # 10 hex
| 0[bB] ( [01]+ ) # 11 bin
)
[Ll]?
| ([BbUu]?[Rr]?) # ?11 string literal options
(""\"|"|'''|') # 12 string literal start
| ((?:{keywords})\b|{operators}) # 13 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*) # 14 identifier
| ([BbUu]?[Rr]?) # ?12 string literal options
(""\"|"|'''|') # 13 string literal start
| ((?:{keywords})\\b|{operators}) # 14 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*) # 15 identifier
)
""".format(keywords=re_keywords, operators=re_operators), re.VERBOSE)

@@ -144,41 +145,44 @@ def _lex(self):
"fatal", u"unexpected {character}",
{"character": repr(self.source_buffer.source[self.offset]).lstrip(u"u")},
source.Range(self.source_buffer, self.offset, self.offset + 1))
raise diagnostic.Exception(diag)
raise diagnostic.DiagnosticException(diag)
self.offset = match.end(0)

tok_range = source.Range(self.source_buffer, *match.span(1))
if match.group(2) is not None: # newline
if match.group(3) is not None: # newline
if len(self.parentheses) + len(self.square_braces) + len(self.curly_braces) > 0:
# Implicitly joined lines.
# 2.1.6 Implicit line joining
return self._lex()
if match.group(2) is not None:
# 2.1.5. Explicit line joining
return self._lex()
return tok_range, "newline", None
elif match.group(3) is not None: # comment
self.comments.append((tok_range, match.group(3)))
elif match.group(4) is not None: # comment
self.comments.append((tok_range, match.group(4)))
return self._lex()
elif match.group(4) is not None: # floating point or complex literal
if match.group(5) is None:
return tok_range, "float", float(match.group(4))
elif match.group(5) is not None: # floating point or complex literal
if match.group(6) is None:
return tok_range, "float", float(match.group(5))
else:
return tok_range, "complex", float(match.group(4)) * 1j
elif match.group(6) is not None: # complex literal
return tok_range, "complex", int(match.group(6)) * 1j
elif match.group(7) is not None: # integer literal, dec
return tok_range, "int", int(match.group(7))
elif match.group(8) is not None: # integer literal, oct
return tok_range, "int", int(match.group(8), 8)
elif match.group(9) is not None: # integer literal, hex
return tok_range, "int", int(match.group(9), 16)
elif match.group(10) is not None: # integer literal, bin
return tok_range, "int", int(match.group(10), 2)
elif match.group(12) is not None: # string literal start
options = match.group(11).lower()
return tok_range, match.group(12), options
elif match.group(13) is not None: # keywords and operators
self._match_pair_delim(tok_range, match.group(13))
return tok_range, match.group(13), None
elif match.group(14) is not None: # identifier
return tok_range, "ident", match.group(14)
return tok_range, "complex", float(match.group(5)) * 1j
elif match.group(7) is not None: # complex literal
return tok_range, "complex", int(match.group(7)) * 1j
elif match.group(8) is not None: # integer literal, dec
return tok_range, "int", int(match.group(8))
elif match.group(9) is not None: # integer literal, oct
return tok_range, "int", int(match.group(9), 8)
elif match.group(10) is not None: # integer literal, hex
return tok_range, "int", int(match.group(10), 16)
elif match.group(11) is not None: # integer literal, bin
return tok_range, "int", int(match.group(11), 2)
elif match.group(13) is not None: # string literal start
options = match.group(12).lower()
return tok_range, match.group(13), options
elif match.group(14) is not None: # keywords and operators
self._match_pair_delim(tok_range, match.group(14))
return tok_range, match.group(14), None
elif match.group(15) is not None: # identifier
return tok_range, "ident", match.group(15)
else:
assert False

@@ -208,7 +212,7 @@ def _check_innermost_pair_delim(self, range, expected):
if len(self.curly_braces) > 0:
ranges.append(('{', self.curly_braces[-1]))

ranges.sort(key=lambda (_, range): range.begin_pos)
ranges.sort(key=lambda k: k[1].begin_pos)
compl_kind, compl_range = ranges[-1]
if compl_kind != expected:
note = diagnostic.Diagnostic(
@@ -219,7 +223,7 @@ def _check_innermost_pair_delim(self, range, expected):
"fatal", u"mismatched '{delimiter}'",
{"delimiter": range.source()},
range, notes=[note])
raise diagnostic.Exception(error)
raise diagnostic.DiagnosticException(error)

def __iter__(self):
return self
1 change: 1 addition & 0 deletions pyparser/source.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
location information and original source from a range.
"""

from __future__ import absolute_import, division, print_function, unicode_literals
import bisect

class Buffer:
4 changes: 2 additions & 2 deletions pyparser/test/test_diagnostic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from .. import source, diagnostic
import unittest
import pyparser.source as source
import pyparser.diagnostic as diagnostic

class DiagnosticTestCase(unittest.TestCase):

Loading