Skip to content

Commit 7c51298

Browse files
author
whitequark
committedApr 4, 2015
Add lexer.Token.
It is not convenient to represent tokens as tuples.
1 parent 6eca9e6 commit 7c51298

File tree

2 files changed

+47
-33
lines changed

2 files changed

+47
-33
lines changed
 

‎pyparser/lexer.py

+42-28
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,21 @@
1111
if sys.version_info[0] == 3:
1212
unichr = chr
1313

14+
class Token:
15+
"""
16+
The :class:`Token` encapsulates a single lexer token and its location
17+
in the source code.
18+
19+
:ivar loc: (:class:`pyparser.source.Range`) token location
20+
:ivar kind: (string) token kind; interned (can be compared using ``is``)
21+
:ivar value: token value; None or a kind-specific class
22+
"""
23+
def __init__(self, loc, kind, value=None):
24+
self.loc, self.kind, self.value = loc, kind, value
25+
26+
def __repr__(self):
27+
return "Token(%s, %s, %s)" % (repr(self.loc), self.kind, repr(self.value))
28+
1429
class Lexer:
1530
"""
1631
The :class:`Lexer` class extracts tokens and comments from
@@ -178,19 +193,18 @@ def __init__(self, source_buffer, version):
178193

179194
def next(self):
180195
"""
181-
Returns token at ``offset`` as a tuple (*range*, *token*, *data*)
182-
and advances ``offset`` to point past the end of the token,
183-
where:
196+
Returns token at ``offset`` as a :class:`Token` and advances ``offset``
197+
to point past the end of the token, where the token has:
184198
185-
- *range* is a :class:`pyparser.source.Range` that includes
199+
- *range* which is a :class:`pyparser.source.Range` that includes
186200
the token but not surrounding whitespace,
187-
- *token* is a string containing one of Python keywords or operators,
201+
- *kind* which is a string containing one of Python keywords or operators,
188202
``newline``, ``float``, ``int``, ``complex``, ``strbegin``,
189203
``strdata``, ``strend``, ``ident``, ``indent`` or ``dedent``,
190-
- *data* is the flags as lowercase string if *token* is ``strbegin``,
191-
the string contents if *token* is ``strdata``,
192-
the numeric value if *token* is ``float``, ``int`` or ``complex``,
193-
the identifier if *token* is ``ident`` and ``None`` in any other case.
204+
- *value* which is the flags as lowercase string if *kind* is ``strbegin``,
205+
the string contents if *kind* is ``strdata``,
206+
the numeric value if *kind* is ``float``, ``int`` or ``complex``,
207+
the identifier if *kind* is ``ident`` and ``None`` in any other case.
194208
"""
195209
if len(self.queue) == 0:
196210
self._refill()
@@ -218,15 +232,15 @@ def _refill(self):
218232
range = source.Range(self.source_buffer, match.start(1), match.start(1))
219233
if level > self.indent[-1][0]:
220234
self.indent.append((level, range, whitespace))
221-
self.queue.append((range, 'indent', None))
235+
self.queue.append(Token(range, 'indent'))
222236
elif level < self.indent[-1][0]:
223237
exact = False
224238
while level <= self.indent[-1][0]:
225239
if level == self.indent[-1][0] or self.indent[-1][0] == 0:
226240
exact = True
227241
break
228242
self.indent.pop(-1)
229-
self.queue.append((range, 'dedent', None))
243+
self.queue.append(Token(range, 'dedent'))
230244
if not exact:
231245
note = diagnostic.Diagnostic(
232246
"note", "expected to match level here", {},
@@ -257,7 +271,7 @@ def _refill(self):
257271
return self._refill()
258272

259273
self.new_line = True
260-
self.queue.append((tok_range, "newline", None))
274+
self.queue.append(Token(tok_range, "newline"))
261275
return
262276

263277
# Lexing non-whitespace now.
@@ -269,32 +283,32 @@ def _refill(self):
269283

270284
elif match.group(5) is not None: # floating point or complex literal
271285
if match.group(6) is None:
272-
self.queue.append((tok_range, "float", float(match.group(5))))
286+
self.queue.append(Token(tok_range, "float", float(match.group(5))))
273287
else:
274-
self.queue.append((tok_range, "complex", float(match.group(5)) * 1j))
288+
self.queue.append(Token(tok_range, "complex", float(match.group(5)) * 1j))
275289

276290
elif match.group(7) is not None: # complex literal
277-
self.queue.append((tok_range, "complex", int(match.group(7)) * 1j))
291+
self.queue.append(Token(tok_range, "complex", int(match.group(7)) * 1j))
278292

279293
elif match.group(8) is not None: # integer literal, dec
280294
literal = match.group(8)
281295
self._check_long_literal(tok_range, match.group(1))
282-
self.queue.append((tok_range, "int", int(literal)))
296+
self.queue.append(Token(tok_range, "int", int(literal)))
283297

284298
elif match.group(9) is not None: # integer literal, oct
285299
literal = match.group(9)
286300
self._check_long_literal(tok_range, match.group(1))
287-
self.queue.append((tok_range, "int", int(literal, 8)))
301+
self.queue.append(Token(tok_range, "int", int(literal, 8)))
288302

289303
elif match.group(10) is not None: # integer literal, hex
290304
literal = match.group(10)
291305
self._check_long_literal(tok_range, match.group(1))
292-
self.queue.append((tok_range, "int", int(literal, 16)))
306+
self.queue.append(Token(tok_range, "int", int(literal, 16)))
293307

294308
elif match.group(11) is not None: # integer literal, bin
295309
literal = match.group(11)
296310
self._check_long_literal(tok_range, match.group(1))
297-
self.queue.append((tok_range, "int", int(literal, 2)))
311+
self.queue.append(Token(tok_range, "int", int(literal, 2)))
298312

299313
elif match.group(12) is not None: # integer literal, bare oct
300314
literal = match.group(12)
@@ -303,7 +317,7 @@ def _refill(self):
303317
"error", "in Python 3, decimal literals must not start with a zero", {},
304318
source.Range(self.source_buffer, tok_range.begin_pos, tok_range.begin_pos + 1))
305319
raise diagnostic.DiagnosticException(error)
306-
self.queue.append((tok_range, "int", int(literal, 8)))
320+
self.queue.append(Token(tok_range, "int", int(literal, 8)))
307321

308322
elif match.group(14) is not None: # long string literal
309323
self._string_literal(
@@ -326,21 +340,21 @@ def _refill(self):
326340
elif match.group(21) is not None: # keywords and operators
327341
kwop = match.group(21)
328342
self._match_pair_delim(tok_range, kwop)
329-
self.queue.append((tok_range, kwop, None))
343+
self.queue.append(Token(tok_range, kwop))
330344

331345
elif match.group(22) is not None: # identifier
332-
self.queue.append((tok_range, "ident", match.group(22)))
346+
self.queue.append(Token(tok_range, "ident", match.group(22)))
333347

334348
elif match.group(23) is not None: # Unicode identifier
335349
if self.version < (3, 0):
336350
error = diagnostic.Diagnostic(
337351
"error", "in Python 2, Unicode identifiers are not allowed", {},
338352
tok_range)
339353
raise diagnostic.DiagnosticException(error)
340-
self.queue.append((tok_range, "ident", match.group(23)))
354+
self.queue.append(Token(tok_range, "ident", match.group(23)))
341355

342356
elif match.group(24) is not None: # end-of-file
343-
self.queue.append((tok_range, "eof", None))
357+
self.queue.append(Token(tok_range, "eof"))
344358

345359
else:
346360
assert False
@@ -357,11 +371,11 @@ def _string_literal(self, options, begin_span, data, data_span, end_span):
357371
begin_range)
358372
raise diagnostic.DiagnosticException(error)
359373

360-
self.queue.append((begin_range, 'strbegin', options))
361-
self.queue.append((data_range,
374+
self.queue.append(Token(begin_range, 'strbegin', options))
375+
self.queue.append(Token(data_range,
362376
'strdata', self._replace_escape(data_range, options, data)))
363-
self.queue.append((source.Range(self.source_buffer, *end_span),
364-
'strend', None))
377+
self.queue.append(Token(source.Range(self.source_buffer, *end_span),
378+
'strend'))
365379

366380
def _replace_escape(self, range, mode, value):
367381
is_raw = ("r" in mode)

‎pyparser/test/test_lexer.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ def assertLexesVersions(self, input, versions, *expected_tokens):
1111
tokens = expected_tokens
1212
self.buffer = source.Buffer(input)
1313
self.lexer = lexer.Lexer(self.buffer, version)
14-
for (range, token, data) in self.lexer:
14+
for token in self.lexer:
1515
if len(tokens) < 2:
16-
raise Exception("stray tokens: %s" % ((token,data),))
17-
expected_token, expected_data = tokens[:2]
16+
raise Exception("stray tokens: %s" % repr(token))
17+
expected_kind, expected_value = tokens[:2]
1818
tokens = tokens[2:]
19-
self.assertEqual(expected_token, token)
20-
self.assertEqual(expected_data, data)
19+
self.assertEqual(expected_kind, token.kind)
20+
self.assertEqual(expected_value, token.value)
2121
self.assertEqual((), tokens)
2222

2323
def assertDiagnosesVersions(self, input, versions, diag, *tokens):

0 commit comments

Comments
 (0)
Please sign in to comment.