m-labs · Apr 2, 2015 · Apr 2, 2015
Showing with 29 additions and 6 deletions.

+2 −1 .gitignore

+12 −3 pyparser/lexer.py

+13 −0 pyparser/test/test_lexer.py

+2 −2 setup.py
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,5 @@
 __pycache__/
 _build/
 *.egg-info/
-
+/build/
+/dist/
diff --git a/pyparser/lexer.py b/pyparser/lexer.py
@@ -4,7 +4,7 @@
 
 from __future__ import absolute_import, division, print_function, unicode_literals
 from . import source, diagnostic
-import re
+import regex as re
 import unicodedata
 import sys
 
@@ -142,9 +142,10 @@ def __init__(self, source_buffer, version):
             |   (""\"|'''|"|')
             )
         |   ((?:{keywords})\b|{operators}) # 21 keywords and operators
-        |   ([A-Za-z_][A-Za-z0-9_]*) # 22 identifier
+        |   ([A-Za-z_][A-Za-z0-9_]*\b) # 22 identifier
+        |   (\p{{XID_Start}}\p{{XID_Continue}}*) # 23 Unicode identifier
         )
-        """.format(keywords=re_keywords, operators=re_operators), re.VERBOSE)
+        """.format(keywords=re_keywords, operators=re_operators), re.VERBOSE|re.UNICODE)
 
     # These are identical for all lexer instances.
     _lex_escape_re = re.compile(r"""
@@ -280,6 +281,14 @@ def _lex(self):
         elif match.group(22) is not None: # identifier
             return tok_range, "ident", match.group(22)
 
+        elif match.group(23) is not None: # identifier
+            if self.version < (3, 0):
+                error = diagnostic.Diagnostic(
+                    "error", "in Python 2, Unicode identifiers are not allowed", {},
+                    tok_range)
+                raise diagnostic.DiagnosticException(error)
+            return tok_range, "ident", match.group(23)
+
         assert False
 
     def _string_literal(self, options, begin_span, data, data_span, end_span):

diff --git a/pyparser/test/test_lexer.py b/pyparser/test/test_lexer.py
@@ -200,6 +200,19 @@ def test_identifier(self):
                          "ident", "a")
         self.assertLexes("andi",
                          "ident", "andi")
+        self.assertLexesVersions(
+                         "ышка", [(3,0)],
+                         "ident", "ышка")
+        self.assertLexesVersions(
+                         "ышкаs", [(3,0)],
+                         "ident", "ышкаs")
+        self.assertLexesVersions(
+                         "sышка", [(3,0)],
+                         "ident", "sышка")
+
+        self.assertDiagnosesVersions(
+                         "ышка", [(2,7)],
+                         [("error", "in Python 2, Unicode identifiers are not allowed", (0, 4))])
 
     def test_keywords(self):
         self.assertLexes("/",

diff --git a/setup.py b/setup.py
@@ -12,15 +12,15 @@ def run(self):
         os.system('rsync -avz doc/_build/html/ shell.serverraum.org:~/web/m-labs.hk/pyparser')
 
 setup(
-    name="artiq",
+    name="pyparser",
     version="0.0+dev",
     author="whitequark",
     author_email="whitequark@whitequark.org",
     url="http://m-labs.hk/pyparser",
     description="A Python parser intended for use in tooling",
     long_description=open("README.rst").read(),
     license="BSD",
-    install_requires=[],
+    install_requires=['regex'],
     extras_require={},
     dependency_links=[],
     packages=find_packages(exclude=['tests*']),
-Original file line number
+Diff line change
@@ @@ -2,4 +2,5 @@ @@
     __pycache__/
     _build/
     *.egg-info/
+    /build/
+    /dist/