Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 8742c25bb532
Choose a base ref
...
head repository: m-labs/pythonparser
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: e2369956fc48
Choose a head ref
  • 2 commits
  • 4 files changed
  • 1 contributor

Commits on Apr 2, 2015

  1. Replace re with regex.

    The only thing we need from regex is Unicode character classes
    for Python 3 identifiers.
    whitequark committed Apr 2, 2015
    Copy the full SHA
    ba6d766 View commit details
  2. Add support for Unicode identifiers.

    whitequark committed Apr 2, 2015
    Copy the full SHA
    e236995 View commit details
Showing with 29 additions and 6 deletions.
  1. +2 −1 .gitignore
  2. +12 −3 pyparser/lexer.py
  3. +13 −0 pyparser/test/test_lexer.py
  4. +2 −2 setup.py
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -2,4 +2,5 @@
__pycache__/
_build/
*.egg-info/

/build/
/dist/
15 changes: 12 additions & 3 deletions pyparser/lexer.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@

from __future__ import absolute_import, division, print_function, unicode_literals
from . import source, diagnostic
import re
import regex as re
import unicodedata
import sys

@@ -142,9 +142,10 @@ def __init__(self, source_buffer, version):
| (""\"|'''|"|')
)
| ((?:{keywords})\b|{operators}) # 21 keywords and operators
| ([A-Za-z_][A-Za-z0-9_]*) # 22 identifier
| ([A-Za-z_][A-Za-z0-9_]*\b) # 22 identifier
| (\p{{XID_Start}}\p{{XID_Continue}}*) # 23 Unicode identifier
)
""".format(keywords=re_keywords, operators=re_operators), re.VERBOSE)
""".format(keywords=re_keywords, operators=re_operators), re.VERBOSE|re.UNICODE)

# These are identical for all lexer instances.
_lex_escape_re = re.compile(r"""
@@ -280,6 +281,14 @@ def _lex(self):
elif match.group(22) is not None: # identifier
return tok_range, "ident", match.group(22)

elif match.group(23) is not None: # identifier
if self.version < (3, 0):
error = diagnostic.Diagnostic(
"error", "in Python 2, Unicode identifiers are not allowed", {},
tok_range)
raise diagnostic.DiagnosticException(error)
return tok_range, "ident", match.group(23)

assert False

def _string_literal(self, options, begin_span, data, data_span, end_span):
13 changes: 13 additions & 0 deletions pyparser/test/test_lexer.py
Original file line number Diff line number Diff line change
@@ -200,6 +200,19 @@ def test_identifier(self):
"ident", "a")
self.assertLexes("andi",
"ident", "andi")
self.assertLexesVersions(
"ышка", [(3,0)],
"ident", "ышка")
self.assertLexesVersions(
"ышкаs", [(3,0)],
"ident", "ышкаs")
self.assertLexesVersions(
"sышка", [(3,0)],
"ident", "sышка")

self.assertDiagnosesVersions(
"ышка", [(2,7)],
[("error", "in Python 2, Unicode identifiers are not allowed", (0, 4))])

def test_keywords(self):
self.assertLexes("/",
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -12,15 +12,15 @@ def run(self):
os.system('rsync -avz doc/_build/html/ shell.serverraum.org:~/web/m-labs.hk/pyparser')

setup(
name="artiq",
name="pyparser",
version="0.0+dev",
author="whitequark",
author_email="whitequark@whitequark.org",
url="http://m-labs.hk/pyparser",
description="A Python parser intended for use in tooling",
long_description=open("README.rst").read(),
license="BSD",
install_requires=[],
install_requires=['regex'],
extras_require={},
dependency_links=[],
packages=find_packages(exclude=['tests*']),