46
46
47
47
# Generic LL parsing combinators
48
48
class Unmatched :
49
- def __init__ (self , diagnostic = None ):
50
- self .diagnostic = diagnostic
51
-
52
- def __repr__ (self ):
53
- if self .diagnostic :
54
- return "<can't parse: %s>" % repr (self .diagnostic )
55
- else :
56
- return "<can't parse>"
49
+ pass
57
50
58
51
unmatched = Unmatched ()
59
52
@@ -67,7 +60,7 @@ def decorator(inner_rule):
67
60
if cases == 1 :
68
61
def rule (* args , ** kwargs ):
69
62
result = inner_rule (* args , ** kwargs )
70
- if not isinstance ( result , Unmatched ) :
63
+ if result is not unmatched :
71
64
rule .covered [0 ] = True
72
65
return result
73
66
else :
@@ -93,7 +86,7 @@ def decorator(mapper):
93
86
@llrule (loc , inner_rule .expected )
94
87
def outer_rule (parser ):
95
88
result = inner_rule (parser )
96
- if isinstance ( result , Unmatched ) :
89
+ if result is unmatched :
97
90
return result
98
91
if isinstance (result , tuple ):
99
92
return mapper (parser , * result )
@@ -121,7 +114,7 @@ def Loc(kind, loc=None):
121
114
@llrule (loc , lambda parser : [kind ])
122
115
def rule (parser ):
123
116
result = parser ._accept (kind )
124
- if isinstance ( result , Unmatched ) :
117
+ if result is unmatched :
125
118
return result
126
119
return result .loc
127
120
return rule
@@ -138,19 +131,23 @@ def Expect(inner_rule, loc=None):
138
131
@llrule (loc , inner_rule .expected )
139
132
def rule (parser ):
140
133
result = inner_rule (parser )
141
- if isinstance (result , Unmatched ):
142
- expected = inner_rule .expected (parser )
134
+ if result is unmatched :
135
+ expected = reduce (list .__add__ , [rule .expected (parser ) for rule in parser ._errrules ])
136
+ expected = list (sorted (set (expected )))
137
+
143
138
if len (expected ) > 1 :
144
139
expected = ' or ' .join ([', ' .join (expected [0 :- 1 ]), expected [- 1 ]])
145
140
elif len (expected ) == 1 :
146
141
expected = expected [0 ]
147
142
else :
148
143
expected = '(impossible)'
144
+
145
+ error_tok = parser ._tokens [parser ._errindex ]
149
146
error = diagnostic .Diagnostic (
150
147
"error" , "unexpected {actual}: expected {expected}" ,
151
- {'actual' : parser . token .kind , 'expected' : expected },
152
- parser . token .loc )
153
- return Unmatched ( diagnostic .DiagnosticException (error ) )
148
+ {'actual' : error_tok .kind , 'expected' : expected },
149
+ error_tok .loc )
150
+ raise diagnostic .DiagnosticException (error )
154
151
return result
155
152
return rule
156
153
@@ -162,13 +159,13 @@ def Seq(first_rule, *rest_of_rules, **kwargs):
162
159
@llrule (kwargs .get ('loc' , None ), first_rule .expected )
163
160
def rule (parser ):
164
161
result = first_rule (parser )
165
- if isinstance ( result , Unmatched ) :
162
+ if result is unmatched :
166
163
return result
167
164
168
165
results = [result ]
169
166
for rule in rest_of_rules :
170
167
result = rule (parser )
171
- if isinstance ( result , Unmatched ) :
168
+ if result is unmatched :
172
169
return result
173
170
results .append (result )
174
171
return tuple (results )
@@ -198,8 +195,8 @@ def rule(parser):
198
195
data = parser ._save ()
199
196
for idx , inner_rule in enumerate (inner_rules ):
200
197
result = inner_rule (parser )
201
- if isinstance ( result , Unmatched ) :
202
- parser ._restore (data )
198
+ if result is unmatched :
199
+ parser ._restore (data , rule = inner_rule )
203
200
else :
204
201
rule .covered [idx ] = True
205
202
return result
@@ -210,8 +207,8 @@ def rule(parser):
210
207
data = parser ._save ()
211
208
for inner_rule in inner_rules :
212
209
result = inner_rule (parser )
213
- if isinstance ( result , Unmatched ) :
214
- parser ._restore (data )
210
+ if result is unmatched :
211
+ parser ._restore (data , rule = inner_rule )
215
212
else :
216
213
return result
217
214
return unmatched
@@ -232,8 +229,8 @@ def rule(parser):
232
229
while True :
233
230
data = parser ._save ()
234
231
result = inner_rule (parser )
235
- if isinstance ( result , Unmatched ) :
236
- parser ._restore (data )
232
+ if result is unmatched :
233
+ parser ._restore (data , rule = inner_rule )
237
234
return results
238
235
results .append (result )
239
236
return rule
@@ -246,15 +243,15 @@ def Plus(inner_rule, loc=None):
246
243
@llrule (loc , inner_rule .expected )
247
244
def rule (parser ):
248
245
result = inner_rule (parser )
249
- if isinstance ( result , Unmatched ) :
246
+ if result is unmatched :
250
247
return result
251
248
252
249
results = [result ]
253
250
while True :
254
251
data = parser ._save ()
255
252
result = inner_rule (parser )
256
- if isinstance ( result , Unmatched ) :
257
- parser ._restore (data )
253
+ if result is unmatched :
254
+ parser ._restore (data , rule = inner_rule )
258
255
return results
259
256
results .append (result )
260
257
return rule
@@ -280,19 +277,19 @@ def rule(parser):
280
277
281
278
if leading :
282
279
result = inner_rule (parser )
283
- if isinstance ( result , Unmatched ) :
280
+ if result is unmatched :
284
281
return result
285
282
else :
286
283
results .append (result )
287
284
288
285
while True :
289
286
result = separator_rule (parser )
290
- if isinstance ( result , Unmatched ) :
287
+ if result is unmatched :
291
288
results .trailing_comma = None
292
289
return results
293
290
294
291
result_1 = inner_rule (parser )
295
- if isinstance ( result_1 , Unmatched ) :
292
+ if result_1 is unmatched :
296
293
results .trailing_comma = result
297
294
return results
298
295
else :
@@ -305,7 +302,7 @@ def Newline(loc=None):
305
302
@llrule (loc , lambda parser : ['newline' ])
306
303
def rule (parser ):
307
304
result = parser ._accept ('newline' )
308
- if isinstance ( result , Unmatched ) :
305
+ if result is unmatched :
309
306
return result
310
307
return []
311
308
return rule
@@ -352,18 +349,32 @@ class Parser:
352
349
353
350
# Generic LL parsing methods
354
351
def __init__ (self , lexer ):
355
- self .lexer = lexer
356
- self ._tokens = []
357
- self ._index = - 1
352
+ self .lexer = lexer
353
+ self ._tokens = []
354
+ self ._index = - 1
355
+ self ._errindex = - 1
356
+ self ._errrules = []
358
357
self ._advance ()
359
358
360
359
def _save (self ):
361
360
return self ._index
362
361
363
- def _restore (self , data ):
362
+ def _restore (self , data , rule ):
364
363
self ._index = data
365
364
self ._token = self ._tokens [self ._index ]
366
365
366
+ if self ._index > self ._errindex :
367
+ # We have advanced since last error
368
+ self ._errindex = self ._index
369
+ self ._errrules = [rule ]
370
+ elif self ._index == self ._errindex :
371
+ # We're at the same place as last error
372
+ self ._errrules .append (rule )
373
+ else :
374
+ # We've backtracked far and are now just failing the
375
+ # whole parse
376
+ pass
377
+
367
378
def _advance (self ):
368
379
self ._index += 1
369
380
if self ._index == len (self ._tokens ):
@@ -404,22 +415,22 @@ def add_flags(self, flags):
404
415
if 'print_function' in flags :
405
416
self .lexer .print_function = True
406
417
407
- @action (Alt (Newline (),
408
- Rule ('simple_stmt' ),
409
- SeqN (0 , Rule ('compound_stmt' ), Newline ())))
418
+ @action (Expect ( Alt (Newline (),
419
+ Rule ('simple_stmt' ),
420
+ SeqN (0 , Rule ('compound_stmt' ), Newline () ))))
410
421
def single_input (self , body ):
411
422
"""single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE"""
412
423
loc = None if body == [] else body [0 ].loc
413
424
return ast .Interactive (body = body , loc = loc )
414
425
415
- @action (SeqN (0 , Star (Alt (Newline (), Rule ('stmt' ))), Tok ('eof' )))
426
+ @action (Expect ( SeqN (0 , Star (Alt (Newline (), Rule ('stmt' ))), Tok ('eof' ) )))
416
427
def file_input (parser , body ):
417
428
"""file_input: (NEWLINE | stmt)* ENDMARKER"""
418
429
body = reduce (list .__add__ , body , [])
419
430
loc = None if body == [] else body [0 ].loc
420
431
return ast .Module (body = body , loc = loc )
421
432
422
- @action (SeqN (0 , Rule ('testlist' ), Star (Tok ('newline' )), Tok ('eof' )))
433
+ @action (Expect ( SeqN (0 , Rule ('testlist' ), Star (Tok ('newline' )), Tok ('eof' ) )))
423
434
def eval_input (self , expr ):
424
435
"""eval_input: testlist NEWLINE* ENDMARKER"""
425
436
return ast .Expression (body = [expr ], loc = expr .loc )
0 commit comments