1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 __version__ = 'SPARK-0.6.1'
23
24 import re
25 import sys
26
28 namelist, namedict, classlist = [], {}, [instance.__class__]
29 for c in classlist:
30 for b in c.__bases__:
31 classlist.append(b)
32 for name in dir(c):
33 if name not in namedict:
34 namelist.append(name)
35 namedict[name] = 1
36 return namelist
37
40 pattern = self.reflect()
41 self.re = re.compile(pattern, re.VERBOSE)
42
43 self.index2func = {}
44 for name, number in self.re.groupindex.items():
45 self.index2func[number-1] = getattr(self, 't_' + name)
46
48 doc = getattr(self, name).__doc__
49 rv = '(?P<%s>%s)' % (name[2:], doc)
50 return rv
51
60
62 print "Lexical error at position %s" % pos
63 raise SystemExit
64
78
80 r'( . | \n )+'
81 pass
82
85 self.rules = {}
86 self.rule2func = {}
87 self.rule2name = {}
88 self.collectRules()
89 self.startRule = self.augment(start)
90 self.ruleschanged = 1
91
92 _START = 'START'
93 _EOF = 'EOF'
94
95
96
97
99
123
130
132
133
134
135
136
137 startRule = (self._START, ( start, self._EOF ))
138 self.rule2func[startRule] = lambda args: args[0]
139 self.rules[self._START] = [ startRule ]
140 self.rule2name[startRule] = ''
141 return startRule
142
144 union = {}
145 self.first = {}
146
147 for rulelist in self.rules.values():
148 for lhs, rhs in rulelist:
149 if lhs not in self.first:
150 self.first[lhs] = {}
151
152 if len(rhs) == 0:
153 self.first[lhs][None] = 1
154 continue
155
156 sym = rhs[0]
157 if sym not in self.rules:
158 self.first[lhs][sym] = 1
159 else:
160 union[(sym, lhs)] = 1
161 changes = 1
162 while changes:
163 changes = 0
164 for src, dest in union.keys():
165 destlen = len(self.first[dest])
166 self.first[dest].update(self.first[src])
167 if len(self.first[dest]) != destlen:
168 changes = 1
169
170
171
172
173
174
175
176
179
181 print "Syntax error at or near `%s' token" % token
182 raise SystemExit
183
184 - def parse(self, tokens):
185 tree = {}
186 tokens.append(self._EOF)
187 states = { 0: [ (self.startRule, 0, 0) ] }
188
189 if self.ruleschanged:
190 self.makeFIRST()
191
192 for i in xrange(len(tokens)):
193 states[i+1] = []
194
195 if states[i] == []:
196 break
197 self.buildState(tokens[i], states, i, tree)
198
199
200
201 if i < len(tokens)-1 or states[i+1] != [(self.startRule, 2, 0)]:
202 del tokens[-1]
203 self.error(tokens[i-1])
204 rv = self.buildTree(tokens, tree, ((self.startRule, 2, 0), i+1))
205 del tokens[-1]
206 return rv
207
209 needsCompletion = {}
210 state = states[i]
211 predicted = {}
212
213 for item in state:
214 rule, pos, parent = item
215 lhs, rhs = rule
216
217
218
219
220 if pos == len(rhs):
221 if len(rhs) == 0:
222 needsCompletion[lhs] = (item, i)
223
224 for pitem in states[parent]:
225 if pitem is item:
226 break
227
228 prule, ppos, pparent = pitem
229 plhs, prhs = prule
230
231 if prhs[ppos:ppos+1] == (lhs,):
232 new = (prule,
233 ppos+1,
234 pparent)
235 if new not in state:
236 state.append(new)
237 tree[(new, i)] = [(item, i)]
238 else:
239 tree[(new, i)].append((item, i))
240 continue
241
242 nextSym = rhs[pos]
243
244
245
246
247 if nextSym in self.rules:
248
249
250
251
252
253
254
255 if nextSym in needsCompletion:
256 new = (rule, pos+1, parent)
257 olditem_i = needsCompletion[nextSym]
258 if new not in state:
259 state.append(new)
260 tree[(new, i)] = [olditem_i]
261 else:
262 tree[(new, i)].append(olditem_i)
263
264
265
266
267 if nextSym in predicted:
268 continue
269 predicted[nextSym] = 1
270
271 ttype = token is not self._EOF and \
272 self.typestring(token) or \
273 None
274 if ttype is not None:
275
276
277
278
279
280
281
282
283
284 for prule in self.rules[nextSym]:
285 new = (prule, 0, i)
286 prhs = prule[1]
287 if len(prhs) == 0:
288 state.append(new)
289 continue
290 prhs0 = prhs[0]
291 if prhs0 not in self.rules:
292 if prhs0 != ttype:
293 continue
294 else:
295 state.append(new)
296 continue
297 first = self.first[prhs0]
298 if None not in first and \
299 ttype not in first:
300 continue
301 state.append(new)
302 continue
303
304 for prule in self.rules[nextSym]:
305
306
307
308
309
310 prhs = prule[1]
311 if len(prhs) > 0 and \
312 prhs[0] not in self.rules and \
313 token != prhs[0]:
314 continue
315 state.append((prule, 0, i))
316
317
318
319
320 elif token == nextSym:
321
322 states[i+1].append((rule, pos+1, parent))
323
325 stack = []
326 self.buildTree_r(stack, tokens, -1, tree, root)
327 return stack[0]
328
329 - def buildTree_r(self, stack, tokens, tokpos, tree, root):
330 (rule, pos, parent), state = root
331
332 while pos > 0:
333 want = ((rule, pos, parent), state)
334 if want not in tree:
335
336
337
338
339
340
341 pos = pos - 1
342 state = state - 1
343 stack.insert(0, tokens[tokpos])
344 tokpos = tokpos - 1
345 else:
346
347
348
349
350
351
352
353
354
355 children = tree[want]
356 if len(children) > 1:
357 child = self.ambiguity(children)
358 else:
359 child = children[0]
360
361 tokpos = self.buildTree_r(stack,
362 tokens, tokpos,
363 tree, child)
364 pos = pos - 1
365 (crule, cpos, cparent), cstate = child
366 state = cparent
367
368 lhs, rhs = rule
369 result = self.rule2func[rule](stack[:len(rhs)])
370 stack[:len(rhs)] = [result]
371 return tokpos
372
392
394
395
396
397
398
399 return list[0]
400
401
402
403
404
405
406
407
408
413
415 rebind = lambda lhs, self=self: \
416 lambda args, lhs=lhs, self=self: \
417 self.buildASTNode(args, lhs)
418 lhs, rhs = rule
419 return rule, rebind(lhs)
420
429
430 - def terminal(self, token): return token
431
433 rv = self.AST(type)
434 rv[:len(args)] = args
435 return rv
436
437
438
439
440
441
442
443
444
445
446
449
499
500
501
502
503
504
505
506
511
513 rebind = lambda func, self=self: \
514 lambda args, func=func, self=self: \
515 self.foundMatch(args, func)
516 lhs, rhs = rule
517 rhslist = list(rhs)
518 rhslist.reverse()
519
520 return (lhs, tuple(rhslist)), rebind(func)
521
523 func(args[-1])
524 return args[-1]
525
538
539 - def match(self, ast=None):
540 if ast is None:
541 ast = self.ast
542 self.input = []
543
544 self.match_r(ast)
545 self.parse(self.input)
546
548
549
550
551 return list[-1]
552
553 -def _dump(tokens, states):
554 for i in range(len(states)):
555 print 'state', i
556 for (lhs, rhs), pos, parent in states[i]:
557 print '\t', lhs, '::=',
558 print ' '.join(rhs[:pos]),
559 print '.',
560 print ' '.join(rhs[pos:]),
561 print ',', parent
562 if i < len(tokens):
563 print
564 print 'token', str(tokens[i])
565 print
566