11
11
if sys .version_info [0 ] == 3 :
12
12
unichr = chr
13
13
14
+ class Token :
15
+ """
16
+ The :class:`Token` encapsulates a single lexer token and its location
17
+ in the source code.
18
+
19
+ :ivar loc: (:class:`pyparser.source.Range`) token location
20
+ :ivar kind: (string) token kind; interned (can be compared using ``is``)
21
+ :ivar value: token value; None or a kind-specific class
22
+ """
23
+ def __init__ (self , loc , kind , value = None ):
24
+ self .loc , self .kind , self .value = loc , kind , value
25
+
26
+ def __repr__ (self ):
27
+ return "Token(%s, %s, %s)" % (repr (self .loc ), self .kind , repr (self .value ))
28
+
14
29
class Lexer :
15
30
"""
16
31
The :class:`Lexer` class extracts tokens and comments from
@@ -178,19 +193,18 @@ def __init__(self, source_buffer, version):
178
193
179
194
def next (self ):
180
195
"""
181
- Returns token at ``offset`` as a tuple (*range*, *token*, *data*)
182
- and advances ``offset`` to point past the end of the token,
183
- where:
196
+ Returns token at ``offset`` as a :class:`Token` and advances ``offset``
197
+ to point past the end of the token, where the token has:
184
198
185
- - *range* is a :class:`pyparser.source.Range` that includes
199
+ - *range* which is a :class:`pyparser.source.Range` that includes
186
200
the token but not surrounding whitespace,
187
- - *token* is a string containing one of Python keywords or operators,
201
+ - *kind* which is a string containing one of Python keywords or operators,
188
202
``newline``, ``float``, ``int``, ``complex``, ``strbegin``,
189
203
``strdata``, ``strend``, ``ident``, ``indent`` or ``dedent``,
190
- - *data* is the flags as lowercase string if *token * is ``strbegin``,
191
- the string contents if *token * is ``strdata``,
192
- the numeric value if *token * is ``float``, ``int`` or ``complex``,
193
- the identifier if *token * is ``ident`` and ``None`` in any other case.
204
+ - *value* which is the flags as lowercase string if *kind * is ``strbegin``,
205
+ the string contents if *kind * is ``strdata``,
206
+ the numeric value if *kind * is ``float``, ``int`` or ``complex``,
207
+ the identifier if *kind * is ``ident`` and ``None`` in any other case.
194
208
"""
195
209
if len (self .queue ) == 0 :
196
210
self ._refill ()
@@ -218,15 +232,15 @@ def _refill(self):
218
232
range = source .Range (self .source_buffer , match .start (1 ), match .start (1 ))
219
233
if level > self .indent [- 1 ][0 ]:
220
234
self .indent .append ((level , range , whitespace ))
221
- self .queue .append ((range , 'indent' , None ))
235
+ self .queue .append (Token (range , 'indent' ))
222
236
elif level < self .indent [- 1 ][0 ]:
223
237
exact = False
224
238
while level <= self .indent [- 1 ][0 ]:
225
239
if level == self .indent [- 1 ][0 ] or self .indent [- 1 ][0 ] == 0 :
226
240
exact = True
227
241
break
228
242
self .indent .pop (- 1 )
229
- self .queue .append ((range , 'dedent' , None ))
243
+ self .queue .append (Token (range , 'dedent' ))
230
244
if not exact :
231
245
note = diagnostic .Diagnostic (
232
246
"note" , "expected to match level here" , {},
@@ -257,7 +271,7 @@ def _refill(self):
257
271
return self ._refill ()
258
272
259
273
self .new_line = True
260
- self .queue .append ((tok_range , "newline" , None ))
274
+ self .queue .append (Token (tok_range , "newline" ))
261
275
return
262
276
263
277
# Lexing non-whitespace now.
@@ -269,32 +283,32 @@ def _refill(self):
269
283
270
284
elif match .group (5 ) is not None : # floating point or complex literal
271
285
if match .group (6 ) is None :
272
- self .queue .append ((tok_range , "float" , float (match .group (5 ))))
286
+ self .queue .append (Token (tok_range , "float" , float (match .group (5 ))))
273
287
else :
274
- self .queue .append ((tok_range , "complex" , float (match .group (5 )) * 1j ))
288
+ self .queue .append (Token (tok_range , "complex" , float (match .group (5 )) * 1j ))
275
289
276
290
elif match .group (7 ) is not None : # complex literal
277
- self .queue .append ((tok_range , "complex" , int (match .group (7 )) * 1j ))
291
+ self .queue .append (Token (tok_range , "complex" , int (match .group (7 )) * 1j ))
278
292
279
293
elif match .group (8 ) is not None : # integer literal, dec
280
294
literal = match .group (8 )
281
295
self ._check_long_literal (tok_range , match .group (1 ))
282
- self .queue .append ((tok_range , "int" , int (literal )))
296
+ self .queue .append (Token (tok_range , "int" , int (literal )))
283
297
284
298
elif match .group (9 ) is not None : # integer literal, oct
285
299
literal = match .group (9 )
286
300
self ._check_long_literal (tok_range , match .group (1 ))
287
- self .queue .append ((tok_range , "int" , int (literal , 8 )))
301
+ self .queue .append (Token (tok_range , "int" , int (literal , 8 )))
288
302
289
303
elif match .group (10 ) is not None : # integer literal, hex
290
304
literal = match .group (10 )
291
305
self ._check_long_literal (tok_range , match .group (1 ))
292
- self .queue .append ((tok_range , "int" , int (literal , 16 )))
306
+ self .queue .append (Token (tok_range , "int" , int (literal , 16 )))
293
307
294
308
elif match .group (11 ) is not None : # integer literal, bin
295
309
literal = match .group (11 )
296
310
self ._check_long_literal (tok_range , match .group (1 ))
297
- self .queue .append ((tok_range , "int" , int (literal , 2 )))
311
+ self .queue .append (Token (tok_range , "int" , int (literal , 2 )))
298
312
299
313
elif match .group (12 ) is not None : # integer literal, bare oct
300
314
literal = match .group (12 )
@@ -303,7 +317,7 @@ def _refill(self):
303
317
"error" , "in Python 3, decimal literals must not start with a zero" , {},
304
318
source .Range (self .source_buffer , tok_range .begin_pos , tok_range .begin_pos + 1 ))
305
319
raise diagnostic .DiagnosticException (error )
306
- self .queue .append ((tok_range , "int" , int (literal , 8 )))
320
+ self .queue .append (Token (tok_range , "int" , int (literal , 8 )))
307
321
308
322
elif match .group (14 ) is not None : # long string literal
309
323
self ._string_literal (
@@ -326,21 +340,21 @@ def _refill(self):
326
340
elif match .group (21 ) is not None : # keywords and operators
327
341
kwop = match .group (21 )
328
342
self ._match_pair_delim (tok_range , kwop )
329
- self .queue .append ((tok_range , kwop , None ))
343
+ self .queue .append (Token (tok_range , kwop ))
330
344
331
345
elif match .group (22 ) is not None : # identifier
332
- self .queue .append ((tok_range , "ident" , match .group (22 )))
346
+ self .queue .append (Token (tok_range , "ident" , match .group (22 )))
333
347
334
348
elif match .group (23 ) is not None : # Unicode identifier
335
349
if self .version < (3 , 0 ):
336
350
error = diagnostic .Diagnostic (
337
351
"error" , "in Python 2, Unicode identifiers are not allowed" , {},
338
352
tok_range )
339
353
raise diagnostic .DiagnosticException (error )
340
- self .queue .append ((tok_range , "ident" , match .group (23 )))
354
+ self .queue .append (Token (tok_range , "ident" , match .group (23 )))
341
355
342
356
elif match .group (24 ) is not None : # end-of-file
343
- self .queue .append ((tok_range , "eof" , None ))
357
+ self .queue .append (Token (tok_range , "eof" ))
344
358
345
359
else :
346
360
assert False
@@ -357,11 +371,11 @@ def _string_literal(self, options, begin_span, data, data_span, end_span):
357
371
begin_range )
358
372
raise diagnostic .DiagnosticException (error )
359
373
360
- self .queue .append ((begin_range , 'strbegin' , options ))
361
- self .queue .append ((data_range ,
374
+ self .queue .append (Token (begin_range , 'strbegin' , options ))
375
+ self .queue .append (Token (data_range ,
362
376
'strdata' , self ._replace_escape (data_range , options , data )))
363
- self .queue .append ((source .Range (self .source_buffer , * end_span ),
364
- 'strend' , None ))
377
+ self .queue .append (Token (source .Range (self .source_buffer , * end_span ),
378
+ 'strend' ))
365
379
366
380
def _replace_escape (self , range , mode , value ):
367
381
is_raw = ("r" in mode )
0 commit comments