1 | ## This file is part of PyANTLR. See LICENSE.txt for license |
---|
2 | ## details..........Copyright (C) Wolfgang Haefelinger, 2004. |
---|
3 | |
---|
4 | ## get sys module |
---|
5 | import sys |
---|
6 | |
---|
7 | version = sys.version.split()[0] |
---|
8 | if version < '2.2.1': |
---|
9 | False = 0 |
---|
10 | if version < '2.3': |
---|
11 | True = not False |
---|
12 | |
---|
13 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
14 | ### global symbols ### |
---|
15 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
16 | |
---|
17 | ### ANTLR Standard Tokens |
---|
18 | SKIP = -1 |
---|
19 | INVALID_TYPE = 0 |
---|
20 | EOF_TYPE = 1 |
---|
21 | EOF = 1 |
---|
22 | NULL_TREE_LOOKAHEAD = 3 |
---|
23 | MIN_USER_TYPE = 4 |
---|
24 | |
---|
25 | ### ANTLR's EOF Symbol |
---|
26 | EOF_CHAR = '' |
---|
27 | |
---|
28 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
29 | ### general functions ### |
---|
30 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
31 | |
---|
32 | ## Version should be automatically derived from configure.in. For now, |
---|
33 | ## we need to bump it ourselfs. Don't remove the <version> tags. |
---|
34 | ## <version> |
---|
35 | def version(): |
---|
36 | r = { |
---|
37 | 'major' : '2', |
---|
38 | 'minor' : '7', |
---|
39 | 'micro' : '5', |
---|
40 | 'patch' : '' , |
---|
41 | 'version': '2.7.5' |
---|
42 | } |
---|
43 | return r |
---|
44 | ## </version> |
---|
45 | |
---|
46 | def error(fmt,*args): |
---|
47 | if fmt: |
---|
48 | print "error: ", fmt % tuple(args) |
---|
49 | |
---|
50 | def ifelse(cond,_then,_else): |
---|
51 | if cond : |
---|
52 | r = _then |
---|
53 | else: |
---|
54 | r = _else |
---|
55 | return r |
---|
56 | |
---|
57 | def is_string_type(x): |
---|
58 | return (isinstance(x,str) or isinstance(x,unicode)) |
---|
59 | |
---|
60 | def assert_string_type(x): |
---|
61 | assert is_string_type(x) |
---|
62 | pass |
---|
63 | |
---|
64 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
65 | ### ANTLR Exceptions ### |
---|
66 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
67 | |
---|
68 | class ANTLRException(Exception): |
---|
69 | |
---|
70 | def __init__(self, *args): |
---|
71 | Exception.__init__(self, *args) |
---|
72 | |
---|
73 | |
---|
74 | class RecognitionException(ANTLRException): |
---|
75 | |
---|
76 | def __init__(self, *args): |
---|
77 | ANTLRException.__init__(self, *args) |
---|
78 | self.fileName = None |
---|
79 | self.line = -1 |
---|
80 | self.column = -1 |
---|
81 | if len(args) >= 2: |
---|
82 | self.fileName = args[1] |
---|
83 | if len(args) >= 3: |
---|
84 | self.line = args[2] |
---|
85 | if len(args) >= 4: |
---|
86 | self.column = args[3] |
---|
87 | |
---|
88 | def __str__(self): |
---|
89 | buf = [''] |
---|
90 | if self.fileName: |
---|
91 | buf.append(self.fileName + ":") |
---|
92 | if self.line != -1: |
---|
93 | if not self.fileName: |
---|
94 | buf.append("line ") |
---|
95 | buf.append(str(self.line)) |
---|
96 | if self.column != -1: |
---|
97 | buf.append(":" + str(self.column)) |
---|
98 | buf.append(":") |
---|
99 | buf.append(" ") |
---|
100 | return str('').join(buf) |
---|
101 | |
---|
102 | __repr__ = __str__ |
---|
103 | |
---|
104 | |
---|
105 | class NoViableAltException(RecognitionException): |
---|
106 | |
---|
107 | def __init__(self, *args): |
---|
108 | RecognitionException.__init__(self, *args) |
---|
109 | self.token = None |
---|
110 | self.node = None |
---|
111 | if isinstance(args[0],AST): |
---|
112 | self.node = args[0] |
---|
113 | elif isinstance(args[0],Token): |
---|
114 | self.token = args[0] |
---|
115 | else: |
---|
116 | raise TypeError("NoViableAltException requires Token or AST argument") |
---|
117 | |
---|
118 | def __str__(self): |
---|
119 | if self.token: |
---|
120 | line = self.token.getLine() |
---|
121 | col = self.token.getColumn() |
---|
122 | text = self.token.getText() |
---|
123 | return "unexpected symbol at line %s (column %s): \"%s\"" % (line,col,text) |
---|
124 | if self.node == ASTNULL: |
---|
125 | return "unexpected end of subtree" |
---|
126 | assert self.node |
---|
127 | ### hackish, we assume that an AST contains method getText |
---|
128 | return "unexpected node: %s" % (self.node.getText()) |
---|
129 | |
---|
130 | __repr__ = __str__ |
---|
131 | |
---|
132 | |
---|
133 | class NoViableAltForCharException(RecognitionException): |
---|
134 | |
---|
135 | def __init__(self, *args): |
---|
136 | self.foundChar = None |
---|
137 | if len(args) == 2: |
---|
138 | self.foundChar = args[0] |
---|
139 | scanner = args[1] |
---|
140 | RecognitionException.__init__(self, "NoViableAlt", |
---|
141 | scanner.getFilename(), |
---|
142 | scanner.getLine(), |
---|
143 | scanner.getColumn()) |
---|
144 | elif len(args) == 4: |
---|
145 | self.foundChar = args[0] |
---|
146 | fileName = args[1] |
---|
147 | line = args[2] |
---|
148 | column = args[3] |
---|
149 | RecognitionException.__init__(self, "NoViableAlt", |
---|
150 | fileName, line, column) |
---|
151 | else: |
---|
152 | RecognitionException.__init__(self, "NoViableAlt", |
---|
153 | '', -1, -1) |
---|
154 | |
---|
155 | def __str__(self): |
---|
156 | mesg = "unexpected char: " |
---|
157 | if self.foundChar >= ' ' and self.foundChar <= '~': |
---|
158 | mesg += "'" + self.foundChar + "'" |
---|
159 | elif self.foundChar: |
---|
160 | mesg += "0x" + hex(ord(self.foundChar)).upper()[2:] |
---|
161 | else: |
---|
162 | mesg += "<None>" |
---|
163 | return mesg |
---|
164 | |
---|
165 | __repr__ = __str__ |
---|
166 | |
---|
167 | |
---|
168 | class SemanticException(RecognitionException): |
---|
169 | |
---|
170 | def __init__(self, *args): |
---|
171 | RecognitionException.__init__(self, *args) |
---|
172 | |
---|
173 | |
---|
174 | class MismatchedCharException(RecognitionException): |
---|
175 | |
---|
176 | NONE = 0 |
---|
177 | CHAR = 1 |
---|
178 | NOT_CHAR = 2 |
---|
179 | RANGE = 3 |
---|
180 | NOT_RANGE = 4 |
---|
181 | SET = 5 |
---|
182 | NOT_SET = 6 |
---|
183 | |
---|
184 | def __init__(self, *args): |
---|
185 | self.args = args |
---|
186 | if len(args) == 5: |
---|
187 | # Expected range / not range |
---|
188 | if args[3]: |
---|
189 | self.mismatchType = MismatchedCharException.NOT_RANGE |
---|
190 | else: |
---|
191 | self.mismatchType = MismatchedCharException.RANGE |
---|
192 | self.foundChar = args[0] |
---|
193 | self.expecting = args[1] |
---|
194 | self.upper = args[2] |
---|
195 | self.scanner = args[4] |
---|
196 | RecognitionException.__init__(self, "Mismatched char range", |
---|
197 | self.scanner.getFilename(), |
---|
198 | self.scanner.getLine(), |
---|
199 | self.scanner.getColumn()) |
---|
200 | elif len(args) == 4 and is_string_type(args[1]): |
---|
201 | # Expected char / not char |
---|
202 | if args[2]: |
---|
203 | self.mismatchType = MismatchedCharException.NOT_CHAR |
---|
204 | else: |
---|
205 | self.mismatchType = MismatchedCharException.CHAR |
---|
206 | self.foundChar = args[0] |
---|
207 | self.expecting = args[1] |
---|
208 | self.scanner = args[3] |
---|
209 | RecognitionException.__init__(self, "Mismatched char", |
---|
210 | self.scanner.getFilename(), |
---|
211 | self.scanner.getLine(), |
---|
212 | self.scanner.getColumn()) |
---|
213 | elif len(args) == 4 and isinstance(args[1], BitSet): |
---|
214 | # Expected BitSet / not BitSet |
---|
215 | if args[2]: |
---|
216 | self.mismatchType = MismatchedCharException.NOT_SET |
---|
217 | else: |
---|
218 | self.mismatchType = MismatchedCharException.SET |
---|
219 | self.foundChar = args[0] |
---|
220 | self.set = args[1] |
---|
221 | self.scanner = args[3] |
---|
222 | RecognitionException.__init__(self, "Mismatched char set", |
---|
223 | self.scanner.getFilename(), |
---|
224 | self.scanner.getLine(), |
---|
225 | self.scanner.getColumn()) |
---|
226 | else: |
---|
227 | self.mismatchType = MismatchedCharException.NONE |
---|
228 | RecognitionException.__init__(self, "Mismatched char") |
---|
229 | |
---|
230 | ## Append a char to the msg buffer. If special, |
---|
231 | # then show escaped version |
---|
232 | # |
---|
233 | def appendCharName(self, sb, c): |
---|
234 | if not c or c == 65535: |
---|
235 | # 65535 = (char) -1 = EOF |
---|
236 | sb.append("'<EOF>'") |
---|
237 | elif c == '\n': |
---|
238 | sb.append("'\\n'") |
---|
239 | elif c == '\r': |
---|
240 | sb.append("'\\r'"); |
---|
241 | elif c == '\t': |
---|
242 | sb.append("'\\t'") |
---|
243 | else: |
---|
244 | sb.append('\'' + c + '\'') |
---|
245 | |
---|
246 | ## |
---|
247 | # Returns an error message with line number/column information |
---|
248 | # |
---|
249 | def __str__(self): |
---|
250 | sb = [''] |
---|
251 | sb.append(RecognitionException.__str__(self)) |
---|
252 | |
---|
253 | if self.mismatchType == MismatchedCharException.CHAR: |
---|
254 | sb.append("expecting ") |
---|
255 | self.appendCharName(sb, self.expecting) |
---|
256 | sb.append(", found ") |
---|
257 | self.appendCharName(sb, self.foundChar) |
---|
258 | elif self.mismatchType == MismatchedCharException.NOT_CHAR: |
---|
259 | sb.append("expecting anything but '") |
---|
260 | self.appendCharName(sb, self.expecting) |
---|
261 | sb.append("'; got it anyway") |
---|
262 | elif self.mismatchType in [MismatchedCharException.RANGE, MismatchedCharException.NOT_RANGE]: |
---|
263 | sb.append("expecting char ") |
---|
264 | if self.mismatchType == MismatchedCharException.NOT_RANGE: |
---|
265 | sb.append("NOT ") |
---|
266 | sb.append("in range: ") |
---|
267 | appendCharName(sb, self.expecting) |
---|
268 | sb.append("..") |
---|
269 | appendCharName(sb, self.upper) |
---|
270 | sb.append(", found ") |
---|
271 | appendCharName(sb, self.foundChar) |
---|
272 | elif self.mismatchType in [MismatchedCharException.SET, MismatchedCharException.NOT_SET]: |
---|
273 | sb.append("expecting ") |
---|
274 | if self.mismatchType == MismatchedCharException.NOT_SET: |
---|
275 | sb.append("NOT ") |
---|
276 | sb.append("one of (") |
---|
277 | for i in range(len(self.set)): |
---|
278 | self.appendCharName(sb, self.set[i]) |
---|
279 | sb.append("), found ") |
---|
280 | self.appendCharName(sb, self.foundChar) |
---|
281 | |
---|
282 | return str().join(sb).strip() |
---|
283 | |
---|
284 | __repr__ = __str__ |
---|
285 | |
---|
286 | |
---|
287 | class MismatchedTokenException(RecognitionException): |
---|
288 | |
---|
289 | NONE = 0 |
---|
290 | TOKEN = 1 |
---|
291 | NOT_TOKEN = 2 |
---|
292 | RANGE = 3 |
---|
293 | NOT_RANGE = 4 |
---|
294 | SET = 5 |
---|
295 | NOT_SET = 6 |
---|
296 | |
---|
297 | def __init__(self, *args): |
---|
298 | self.args = args |
---|
299 | self.tokenNames = [] |
---|
300 | self.token = None |
---|
301 | self.tokenText = '' |
---|
302 | self.node = None |
---|
303 | if len(args) == 6: |
---|
304 | # Expected range / not range |
---|
305 | if args[3]: |
---|
306 | self.mismatchType = MismatchedTokenException.NOT_RANGE |
---|
307 | else: |
---|
308 | self.mismatchType = MismatchedTokenException.RANGE |
---|
309 | self.tokenNames = args[0] |
---|
310 | self.expecting = args[2] |
---|
311 | self.upper = args[3] |
---|
312 | self.fileName = args[5] |
---|
313 | |
---|
314 | elif len(args) == 4 and isinstance(args[2], int): |
---|
315 | # Expected token / not token |
---|
316 | if args[3]: |
---|
317 | self.mismatchType = MismatchedTokenException.NOT_TOKEN |
---|
318 | else: |
---|
319 | self.mismatchType = MismatchedTokenException.TOKEN |
---|
320 | self.tokenNames = args[0] |
---|
321 | self.expecting = args[2] |
---|
322 | |
---|
323 | elif len(args) == 4 and isinstance(args[2], BitSet): |
---|
324 | # Expected BitSet / not BitSet |
---|
325 | if args[3]: |
---|
326 | self.mismatchType = MismatchedTokenException.NOT_SET |
---|
327 | else: |
---|
328 | self.mismatchType = MismatchedTokenException.SET |
---|
329 | self.tokenNames = args[0] |
---|
330 | self.set = args[2] |
---|
331 | |
---|
332 | else: |
---|
333 | self.mismatchType = MismatchedTokenException.NONE |
---|
334 | RecognitionException.__init__(self, "Mismatched Token: expecting any AST node", "<AST>", -1, -1) |
---|
335 | |
---|
336 | if len(args) >= 2: |
---|
337 | if isinstance(args[1],Token): |
---|
338 | self.token = args[1] |
---|
339 | self.tokenText = self.token.getText() |
---|
340 | RecognitionException.__init__(self, "Mismatched Token", |
---|
341 | self.fileName, |
---|
342 | self.token.getLine(), |
---|
343 | self.token.getColumn()) |
---|
344 | elif isinstance(args[1],AST): |
---|
345 | self.node = args[1] |
---|
346 | self.tokenText = str(self.node) |
---|
347 | RecognitionException.__init__(self, "Mismatched Token", |
---|
348 | "<AST>", |
---|
349 | self.node.getLine(), |
---|
350 | self.node.getColumn()) |
---|
351 | else: |
---|
352 | self.tokenText = "<empty tree>" |
---|
353 | RecognitionException.__init__(self, "Mismatched Token", |
---|
354 | "<AST>", -1, -1) |
---|
355 | |
---|
356 | def appendTokenName(self, sb, tokenType): |
---|
357 | if tokenType == INVALID_TYPE: |
---|
358 | sb.append("<Set of tokens>") |
---|
359 | elif tokenType < 0 or tokenType >= len(self.tokenNames): |
---|
360 | sb.append("<" + str(tokenType) + ">") |
---|
361 | else: |
---|
362 | sb.append(self.tokenNames[tokenType]) |
---|
363 | |
---|
364 | ## |
---|
365 | # Returns an error message with line number/column information |
---|
366 | # |
---|
367 | def __str__(self): |
---|
368 | sb = [''] |
---|
369 | sb.append(RecognitionException.__str__(self)) |
---|
370 | |
---|
371 | if self.mismatchType == MismatchedTokenException.TOKEN: |
---|
372 | sb.append("expecting ") |
---|
373 | self.appendTokenName(sb, self.expecting) |
---|
374 | sb.append(", found " + self.tokenText) |
---|
375 | elif self.mismatchType == MismatchedTokenException.NOT_TOKEN: |
---|
376 | sb.append("expecting anything but '") |
---|
377 | self.appendTokenName(sb, self.expecting) |
---|
378 | sb.append("'; got it anyway") |
---|
379 | elif self.mismatchType in [MismatchedTokenException.RANGE, MismatchedTokenException.NOT_RANGE]: |
---|
380 | sb.append("expecting token ") |
---|
381 | if self.mismatchType == MismatchedTokenException.NOT_RANGE: |
---|
382 | sb.append("NOT ") |
---|
383 | sb.append("in range: ") |
---|
384 | appendTokenName(sb, self.expecting) |
---|
385 | sb.append("..") |
---|
386 | appendTokenName(sb, self.upper) |
---|
387 | sb.append(", found " + self.tokenText) |
---|
388 | elif self.mismatchType in [MismatchedTokenException.SET, MismatchedTokenException.NOT_SET]: |
---|
389 | sb.append("expecting ") |
---|
390 | if self.mismatchType == MismatchedTokenException.NOT_SET: |
---|
391 | sb.append("NOT ") |
---|
392 | sb.append("one of (") |
---|
393 | for i in range(len(self.set)): |
---|
394 | self.appendTokenName(sb, self.set[i]) |
---|
395 | sb.append("), found " + self.tokenText) |
---|
396 | |
---|
397 | return str().join(sb).strip() |
---|
398 | |
---|
399 | __repr__ = __str__ |
---|
400 | |
---|
401 | |
---|
402 | class TokenStreamException(ANTLRException): |
---|
403 | |
---|
404 | def __init__(self, *args): |
---|
405 | ANTLRException.__init__(self, *args) |
---|
406 | |
---|
407 | |
---|
408 | # Wraps an Exception in a TokenStreamException |
---|
409 | class TokenStreamIOException(TokenStreamException): |
---|
410 | |
---|
411 | def __init__(self, *args): |
---|
412 | if args and isinstance(args[0], Exception): |
---|
413 | io = args[0] |
---|
414 | TokenStreamException.__init__(self, str(io)) |
---|
415 | self.io = io |
---|
416 | else: |
---|
417 | TokenStreamException.__init__(self, *args) |
---|
418 | self.io = self |
---|
419 | |
---|
420 | |
---|
421 | # Wraps a RecognitionException in a TokenStreamException |
---|
422 | class TokenStreamRecognitionException(TokenStreamException): |
---|
423 | |
---|
424 | def __init__(self, *args): |
---|
425 | if args and isinstance(args[0], RecognitionException): |
---|
426 | recog = args[0] |
---|
427 | TokenStreamException.__init__(self, str(recog)) |
---|
428 | self.recog = recog |
---|
429 | else: |
---|
430 | raise TypeError("TokenStreamRecognitionException requires RecognitionException argument") |
---|
431 | |
---|
432 | def __str__(self): |
---|
433 | return str(self.recog) |
---|
434 | |
---|
435 | __repr__ = __str__ |
---|
436 | |
---|
437 | |
---|
438 | class TokenStreamRetryException(TokenStreamException): |
---|
439 | |
---|
440 | def __init__(self, *args): |
---|
441 | TokenStreamException.__init__(self, *args) |
---|
442 | |
---|
443 | |
---|
444 | class CharStreamException(ANTLRException): |
---|
445 | |
---|
446 | def __init__(self, *args): |
---|
447 | ANTLRException.__init__(self, *args) |
---|
448 | |
---|
449 | |
---|
450 | # Wraps an Exception in a CharStreamException |
---|
451 | class CharStreamIOException(CharStreamException): |
---|
452 | |
---|
453 | def __init__(self, *args): |
---|
454 | if args and isinstance(args[0], Exception): |
---|
455 | io = args[0] |
---|
456 | CharStreamException.__init__(self, str(io)) |
---|
457 | self.io = io |
---|
458 | else: |
---|
459 | CharStreamException.__init__(self, *args) |
---|
460 | self.io = self |
---|
461 | |
---|
462 | |
---|
463 | class TryAgain(Exception): |
---|
464 | pass |
---|
465 | |
---|
466 | |
---|
467 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
468 | ### Token ### |
---|
469 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
470 | |
---|
471 | class Token(object): |
---|
472 | SKIP = -1 |
---|
473 | INVALID_TYPE = 0 |
---|
474 | EOF_TYPE = 1 |
---|
475 | EOF = 1 |
---|
476 | NULL_TREE_LOOKAHEAD = 3 |
---|
477 | MIN_USER_TYPE = 4 |
---|
478 | |
---|
479 | def __init__(self,**argv): |
---|
480 | try: |
---|
481 | self.type = argv['type'] |
---|
482 | except: |
---|
483 | self.type = INVALID_TYPE |
---|
484 | try: |
---|
485 | self.text = argv['text'] |
---|
486 | except: |
---|
487 | self.text = "<no text>" |
---|
488 | |
---|
489 | def isEOF(self): |
---|
490 | return (self.type == EOF_TYPE) |
---|
491 | |
---|
492 | def getColumn(self): |
---|
493 | return 0 |
---|
494 | |
---|
495 | def getLine(self): |
---|
496 | return 0 |
---|
497 | |
---|
498 | def getFilename(self): |
---|
499 | return None |
---|
500 | |
---|
501 | def setFilename(self,name): |
---|
502 | return self |
---|
503 | |
---|
504 | def getText(self): |
---|
505 | return "<no text>" |
---|
506 | |
---|
507 | def setText(self,text): |
---|
508 | if is_string_type(text): |
---|
509 | pass |
---|
510 | else: |
---|
511 | raise TypeError("Token.setText requires string argument") |
---|
512 | return self |
---|
513 | |
---|
514 | def setColumn(self,column): |
---|
515 | return self |
---|
516 | |
---|
517 | def setLine(self,line): |
---|
518 | return self |
---|
519 | |
---|
520 | def getType(self): |
---|
521 | return self.type |
---|
522 | |
---|
523 | def setType(self,type): |
---|
524 | if isinstance(type,int): |
---|
525 | self.type = type |
---|
526 | else: |
---|
527 | raise TypeError("Token.setType requires integer argument") |
---|
528 | return self |
---|
529 | |
---|
530 | def toString(self): |
---|
531 | ## not optimal |
---|
532 | type_ = self.type |
---|
533 | if type_ == 3: |
---|
534 | tval = 'NULL_TREE_LOOKAHEAD' |
---|
535 | elif type_ == 1: |
---|
536 | tval = 'EOF_TYPE' |
---|
537 | elif type_ == 0: |
---|
538 | tval = 'INVALID_TYPE' |
---|
539 | elif type_ == -1: |
---|
540 | tval = 'SKIP' |
---|
541 | else: |
---|
542 | tval = type_ |
---|
543 | return '["%s",<%s>]' % (self.getText(),tval) |
---|
544 | |
---|
545 | __str__ = toString |
---|
546 | __repr__ = toString |
---|
547 | |
---|
548 | ### static attribute .. |
---|
549 | Token.badToken = Token( type=INVALID_TYPE, text="<no text>") |
---|
550 | |
---|
551 | if __name__ == "__main__": |
---|
552 | print "testing .." |
---|
553 | T = Token.badToken |
---|
554 | print T |
---|
555 | |
---|
556 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
557 | ### CommonToken ### |
---|
558 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
559 | |
---|
560 | class CommonToken(Token): |
---|
561 | |
---|
562 | def __init__(self,**argv): |
---|
563 | Token.__init__(self,**argv) |
---|
564 | self.line = 0 |
---|
565 | self.col = 0 |
---|
566 | try: |
---|
567 | self.line = argv['line'] |
---|
568 | except: |
---|
569 | pass |
---|
570 | try: |
---|
571 | self.col = argv['col'] |
---|
572 | except: |
---|
573 | pass |
---|
574 | |
---|
575 | def getLine(self): |
---|
576 | return self.line |
---|
577 | |
---|
578 | def getText(self): |
---|
579 | return self.text |
---|
580 | |
---|
581 | def getColumn(self): |
---|
582 | return self.col |
---|
583 | |
---|
584 | def setLine(self,line): |
---|
585 | self.line = line |
---|
586 | return self |
---|
587 | |
---|
588 | def setText(self,text): |
---|
589 | self.text = text |
---|
590 | return self |
---|
591 | |
---|
592 | def setColumn(self,col): |
---|
593 | self.col = col |
---|
594 | return self |
---|
595 | |
---|
596 | def toString(self): |
---|
597 | ## not optimal |
---|
598 | type_ = self.type |
---|
599 | if type_ == 3: |
---|
600 | tval = 'NULL_TREE_LOOKAHEAD' |
---|
601 | elif type_ == 1: |
---|
602 | tval = 'EOF_TYPE' |
---|
603 | elif type_ == 0: |
---|
604 | tval = 'INVALID_TYPE' |
---|
605 | elif type_ == -1: |
---|
606 | tval = 'SKIP' |
---|
607 | else: |
---|
608 | tval = type_ |
---|
609 | d = { |
---|
610 | 'text' : self.text, |
---|
611 | 'type' : tval, |
---|
612 | 'line' : self.line, |
---|
613 | 'colm' : self.col |
---|
614 | } |
---|
615 | |
---|
616 | fmt = '["%(text)s",<%(type)s>,line=%(line)s,col=%(colm)s]' |
---|
617 | return fmt % d |
---|
618 | |
---|
619 | __str__ = toString |
---|
620 | __repr__ = toString |
---|
621 | |
---|
622 | |
---|
623 | if __name__ == '__main__' : |
---|
624 | T = CommonToken() |
---|
625 | print T |
---|
626 | T = CommonToken(col=15,line=1,text="some text", type=5) |
---|
627 | print T |
---|
628 | T = CommonToken() |
---|
629 | T.setLine(1).setColumn(15).setText("some text").setType(5) |
---|
630 | print T |
---|
631 | print T.getLine() |
---|
632 | print T.getColumn() |
---|
633 | print T.getText() |
---|
634 | print T.getType() |
---|
635 | |
---|
636 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
637 | ### CommonHiddenStreamToken ### |
---|
638 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
639 | |
---|
640 | class CommonHiddenStreamToken(CommonToken): |
---|
641 | def __init__(self,*args): |
---|
642 | CommonToken.__init__(self,*args) |
---|
643 | self.hiddenBefore = None |
---|
644 | self.hiddenAfter = None |
---|
645 | |
---|
646 | def getHiddenAfter(self): |
---|
647 | return self.hiddenAfter |
---|
648 | |
---|
649 | def getHiddenBefore(self): |
---|
650 | return self.hiddenBefore |
---|
651 | |
---|
652 | def setHiddenAfter(self,t): |
---|
653 | self.hiddenAfter = t |
---|
654 | |
---|
655 | def setHiddenBefore(self, t): |
---|
656 | self.hiddenBefore = t |
---|
657 | |
---|
658 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
659 | ### Queue ### |
---|
660 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
661 | |
---|
662 | ## Shall be a circular buffer on tokens .. |
---|
663 | class Queue(object): |
---|
664 | |
---|
665 | def __init__(self): |
---|
666 | self.buffer = [] # empty list |
---|
667 | |
---|
668 | def append(self,item): |
---|
669 | self.buffer.append(item) |
---|
670 | |
---|
671 | def elementAt(self,index): |
---|
672 | return self.buffer[index] |
---|
673 | |
---|
674 | def reset(self): |
---|
675 | self.buffer = [] |
---|
676 | |
---|
677 | def removeFirst(self): |
---|
678 | self.buffer.pop(0) |
---|
679 | |
---|
680 | def length(self): |
---|
681 | return len(self.buffer) |
---|
682 | |
---|
683 | def __str__(self): |
---|
684 | return str(self.buffer) |
---|
685 | |
---|
686 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
687 | ### InputBuffer ### |
---|
688 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
689 | |
---|
690 | class InputBuffer(object): |
---|
691 | def __init__(self): |
---|
692 | self.nMarkers = 0 |
---|
693 | self.markerOffset = 0 |
---|
694 | self.numToConsume = 0 |
---|
695 | self.queue = Queue() |
---|
696 | |
---|
697 | def __str__(self): |
---|
698 | return "(%s,%s,%s,%s)" % ( |
---|
699 | self.nMarkers, |
---|
700 | self.markerOffset, |
---|
701 | self.numToConsume, |
---|
702 | self.queue) |
---|
703 | |
---|
704 | def __repr__(self): |
---|
705 | return str(self) |
---|
706 | |
---|
707 | def commit(self): |
---|
708 | self.nMarkers -= 1 |
---|
709 | |
---|
710 | def consume(self) : |
---|
711 | self.numToConsume += 1 |
---|
712 | |
---|
713 | ## probably better to return a list of items |
---|
714 | ## because of unicode. Or return a unicode |
---|
715 | ## string .. |
---|
716 | def getLAChars(self) : |
---|
717 | i = self.markerOffset |
---|
718 | n = self.queue.length() |
---|
719 | s = '' |
---|
720 | while i<n: |
---|
721 | s += self.queue.elementAt(i) |
---|
722 | return s |
---|
723 | |
---|
724 | ## probably better to return a list of items |
---|
725 | ## because of unicode chars |
---|
726 | def getMarkedChars(self) : |
---|
727 | s = '' |
---|
728 | i = 0 |
---|
729 | n = self.markerOffset |
---|
730 | while i<n: |
---|
731 | s += self.queue.elementAt(i) |
---|
732 | return s |
---|
733 | |
---|
734 | def isMarked(self) : |
---|
735 | return self.nMarkers != 0 |
---|
736 | |
---|
737 | def fill(self,k): |
---|
738 | ### abstract method |
---|
739 | raise NotImplementedError() |
---|
740 | |
---|
741 | def LA(self,k) : |
---|
742 | self.fill(k) |
---|
743 | return self.queue.elementAt(self.markerOffset + k - 1) |
---|
744 | |
---|
745 | def mark(self) : |
---|
746 | self.syncConsume() |
---|
747 | self.nMarkers += 1 |
---|
748 | return self.markerOffset |
---|
749 | |
---|
750 | def rewind(self,mark) : |
---|
751 | self.syncConsume() |
---|
752 | self.markerOffset = mark |
---|
753 | self.nMarkers -= 1 |
---|
754 | |
---|
755 | def reset(self) : |
---|
756 | self.nMarkers = 0 |
---|
757 | self.markerOffset = 0 |
---|
758 | self.numToConsume = 0 |
---|
759 | self.queue.reset() |
---|
760 | |
---|
761 | def syncConsume(self) : |
---|
762 | while self.numToConsume > 0: |
---|
763 | if self.nMarkers > 0: |
---|
764 | # guess mode -- leave leading characters and bump offset. |
---|
765 | self.markerOffset += 1 |
---|
766 | else: |
---|
767 | # normal mode -- remove first character |
---|
768 | self.queue.removeFirst() |
---|
769 | self.numToConsume -= 1 |
---|
770 | |
---|
771 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
772 | ### CharBuffer ### |
---|
773 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
774 | |
---|
775 | class CharBuffer(InputBuffer): |
---|
776 | def __init__(self,reader): |
---|
777 | ##assert isinstance(reader,file) |
---|
778 | super(CharBuffer,self).__init__() |
---|
779 | ## a reader is supposed to be anything that has |
---|
780 | ## a method 'read(int)'. |
---|
781 | self.input = reader |
---|
782 | |
---|
783 | def __str__(self): |
---|
784 | base = super(CharBuffer,self).__str__() |
---|
785 | return "CharBuffer{%s,%s" % (base,str(input)) |
---|
786 | |
---|
787 | def fill(self,amount): |
---|
788 | try: |
---|
789 | self.syncConsume() |
---|
790 | while self.queue.length() < (amount + self.markerOffset) : |
---|
791 | ## retrieve just one char - what happend at end |
---|
792 | ## of input? |
---|
793 | c = self.input.read(1) |
---|
794 | ### python's behaviour is to return the empty string on |
---|
795 | ### EOF, ie. no exception whatsoever is thrown. An empty |
---|
796 | ### python string has the nice feature that it is of |
---|
797 | ### type 'str' and "not ''" would return true. Contrary, |
---|
798 | ### one can't do this: '' in 'abc'. This should return |
---|
799 | ### false, but all we get is then a TypeError as an |
---|
800 | ### empty string is not a character. |
---|
801 | |
---|
802 | ### Let's assure then that we have either seen a |
---|
803 | ### character or an empty string (EOF). |
---|
804 | assert len(c) == 0 or len(c) == 1 |
---|
805 | |
---|
806 | ### And it shall be of type string (ASCII or UNICODE). |
---|
807 | assert is_string_type(c) |
---|
808 | |
---|
809 | ### Just append EOF char to buffer. Note that buffer may |
---|
810 | ### contain then just more than one EOF char .. |
---|
811 | |
---|
812 | ### use unicode chars instead of ASCII .. |
---|
813 | self.queue.append(c) |
---|
814 | except Exception,e: |
---|
815 | raise CharStreamIOException(e) |
---|
816 | ##except: # (mk) Cannot happen ... |
---|
817 | ##error ("unexpected exception caught ..") |
---|
818 | ##assert 0 |
---|
819 | |
---|
820 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
821 | ### LexerSharedInputState ### |
---|
822 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
823 | |
---|
824 | class LexerSharedInputState(object): |
---|
825 | def __init__(self,ibuf): |
---|
826 | assert isinstance(ibuf,InputBuffer) |
---|
827 | self.input = ibuf |
---|
828 | self.column = 1 |
---|
829 | self.line = 1 |
---|
830 | self.tokenStartColumn = 1 |
---|
831 | self.tokenStartLine = 1 |
---|
832 | self.guessing = 0 |
---|
833 | self.filename = None |
---|
834 | |
---|
835 | def reset(self): |
---|
836 | self.column = 1 |
---|
837 | self.line = 1 |
---|
838 | self.tokenStartColumn = 1 |
---|
839 | self.tokenStartLine = 1 |
---|
840 | self.guessing = 0 |
---|
841 | self.filename = None |
---|
842 | self.input.reset() |
---|
843 | |
---|
844 | def LA(self,k): |
---|
845 | return self.input.LA(k) |
---|
846 | |
---|
847 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
848 | ### TokenStream ### |
---|
849 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
850 | |
---|
851 | class TokenStream(object): |
---|
852 | def nextToken(self): |
---|
853 | pass |
---|
854 | |
---|
855 | def __iter__(self): |
---|
856 | return TokenStreamIterator(self) |
---|
857 | |
---|
858 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
859 | ### TokenStreamIterator ### |
---|
860 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
861 | |
---|
862 | class TokenStreamIterator(object): |
---|
863 | def __init__(self,inst): |
---|
864 | if isinstance(inst,TokenStream): |
---|
865 | self.inst = inst |
---|
866 | return |
---|
867 | raise TypeError("TokenStreamIterator requires TokenStream object") |
---|
868 | |
---|
869 | def next(self): |
---|
870 | assert self.inst |
---|
871 | item = self.inst.nextToken() |
---|
872 | if not item or item.isEOF(): |
---|
873 | raise StopIteration() |
---|
874 | return item |
---|
875 | |
---|
876 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
877 | ### TokenStreamSelector ### |
---|
878 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
879 | |
---|
880 | class TokenStreamSelector(TokenStream): |
---|
881 | |
---|
882 | def __init__(self): |
---|
883 | self._input = None |
---|
884 | self._stmap = {} |
---|
885 | self._stack = [] |
---|
886 | |
---|
887 | def addInputStream(self,stream,key): |
---|
888 | self._stmap[key] = stream |
---|
889 | |
---|
890 | def getCurrentStream(self): |
---|
891 | return self._input |
---|
892 | |
---|
893 | def getStream(self,sname): |
---|
894 | try: |
---|
895 | stream = self._stmap[sname] |
---|
896 | except: |
---|
897 | raise ValueError("TokenStream " + sname + " not found"); |
---|
898 | return stream; |
---|
899 | |
---|
900 | def nextToken(self): |
---|
901 | while 1: |
---|
902 | try: |
---|
903 | return self._input.nextToken() |
---|
904 | except TokenStreamRetryException,r: |
---|
905 | ### just retry "forever" |
---|
906 | pass |
---|
907 | |
---|
908 | def pop(self): |
---|
909 | stream = self._stack.pop(); |
---|
910 | self.select(stream); |
---|
911 | return stream; |
---|
912 | |
---|
913 | def push(self,arg): |
---|
914 | self._stack.append(self._input); |
---|
915 | self.select(arg) |
---|
916 | |
---|
917 | def retry(self): |
---|
918 | raise TokenStreamRetryException() |
---|
919 | |
---|
920 | def select(self,arg): |
---|
921 | if isinstance(arg,TokenStream): |
---|
922 | self._input = arg |
---|
923 | return |
---|
924 | if is_string_type(arg): |
---|
925 | self._input = self.getStream(arg) |
---|
926 | return |
---|
927 | raise TypeError("TokenStreamSelector.select requires " + |
---|
928 | "TokenStream or string argument") |
---|
929 | |
---|
930 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
931 | ### TokenStreamBasicFilter ### |
---|
932 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
933 | |
---|
934 | class TokenStreamBasicFilter(TokenStream): |
---|
935 | |
---|
936 | def __init__(self,input): |
---|
937 | |
---|
938 | self.input = input; |
---|
939 | self.discardMask = BitSet() |
---|
940 | |
---|
941 | def discard(self,arg): |
---|
942 | if isinstance(arg,int): |
---|
943 | self.discardMask.add(arg) |
---|
944 | return |
---|
945 | if isinstance(arg,BitSet): |
---|
946 | self.discardMark = arg |
---|
947 | return |
---|
948 | raise TypeError("TokenStreamBasicFilter.discard requires" + |
---|
949 | "integer or BitSet argument") |
---|
950 | |
---|
951 | def nextToken(self): |
---|
952 | tok = self.input.nextToken() |
---|
953 | while tok and self.discardMask.member(tok.getType()): |
---|
954 | tok = self.input.nextToken() |
---|
955 | return tok |
---|
956 | |
---|
957 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
958 | ### TokenStreamHiddenTokenFilter ### |
---|
959 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
960 | |
---|
961 | class TokenStreamHiddenTokenFilter(TokenStreamBasicFilter): |
---|
962 | |
---|
963 | def __init__(self,input): |
---|
964 | TokenStreamBasicFilter.__init__(self,input) |
---|
965 | self.hideMask = BitSet() |
---|
966 | self.nextMonitoredToken = None |
---|
967 | self.lastHiddenToken = None |
---|
968 | self.firstHidden = None |
---|
969 | |
---|
970 | def consume(self): |
---|
971 | self.nextMonitoredToken = self.input.nextToken() |
---|
972 | |
---|
973 | def consumeFirst(self): |
---|
974 | self.consume() |
---|
975 | |
---|
976 | p = None; |
---|
977 | while self.hideMask.member(self.LA(1).getType()) or \ |
---|
978 | self.discardMask.member(self.LA(1).getType()): |
---|
979 | if self.hideMask.member(self.LA(1).getType()): |
---|
980 | if not p: |
---|
981 | p = self.LA(1) |
---|
982 | else: |
---|
983 | p.setHiddenAfter(self.LA(1)) |
---|
984 | self.LA(1).setHiddenBefore(p) |
---|
985 | p = self.LA(1) |
---|
986 | self.lastHiddenToken = p |
---|
987 | if not self.firstHidden: |
---|
988 | self.firstHidden = p |
---|
989 | self.consume() |
---|
990 | |
---|
991 | def getDiscardMask(self): |
---|
992 | return self.discardMask |
---|
993 | |
---|
994 | def getHiddenAfter(self,t): |
---|
995 | return t.getHiddenAfter() |
---|
996 | |
---|
997 | def getHiddenBefore(self,t): |
---|
998 | return t.getHiddenBefore() |
---|
999 | |
---|
1000 | def getHideMask(self): |
---|
1001 | return self.hideMask |
---|
1002 | |
---|
1003 | def getInitialHiddenToken(self): |
---|
1004 | return self.firstHidden |
---|
1005 | |
---|
1006 | def hide(self,m): |
---|
1007 | if isinstance(m,int): |
---|
1008 | self.hideMask.add(m) |
---|
1009 | return |
---|
1010 | if isinstance(m.BitMask): |
---|
1011 | self.hideMask = m |
---|
1012 | return |
---|
1013 | |
---|
1014 | def LA(self,i): |
---|
1015 | return self.nextMonitoredToken |
---|
1016 | |
---|
1017 | def nextToken(self): |
---|
1018 | if not self.LA(1): |
---|
1019 | self.consumeFirst() |
---|
1020 | |
---|
1021 | monitored = self.LA(1) |
---|
1022 | |
---|
1023 | monitored.setHiddenBefore(self.lastHiddenToken) |
---|
1024 | self.lastHiddenToken = None |
---|
1025 | |
---|
1026 | self.consume() |
---|
1027 | p = monitored |
---|
1028 | |
---|
1029 | while self.hideMask.member(self.LA(1).getType()) or \ |
---|
1030 | self.discardMask.member(self.LA(1).getType()): |
---|
1031 | if self.hideMask.member(self.LA(1).getType()): |
---|
1032 | p.setHiddenAfter(self.LA(1)) |
---|
1033 | if p != monitored: |
---|
1034 | self.LA(1).setHiddenBefore(p) |
---|
1035 | p = self.lastHiddenToken = self.LA(1) |
---|
1036 | self.consume() |
---|
1037 | return monitored |
---|
1038 | |
---|
1039 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1040 | ### StringBuffer ### |
---|
1041 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1042 | |
---|
1043 | class StringBuffer: |
---|
1044 | def __init__(self,string=None): |
---|
1045 | if string: |
---|
1046 | self.text = list(string) |
---|
1047 | else: |
---|
1048 | self.text = [] |
---|
1049 | |
---|
1050 | def setLength(self,sz): |
---|
1051 | if not sz : |
---|
1052 | self.text = [] |
---|
1053 | return |
---|
1054 | assert sz>0 |
---|
1055 | if sz >= self.length(): |
---|
1056 | return |
---|
1057 | ### just reset to empty buffer |
---|
1058 | self.text = self.text[0:sz] |
---|
1059 | |
---|
1060 | def length(self): |
---|
1061 | return len(self.text) |
---|
1062 | |
---|
1063 | def append(self,c): |
---|
1064 | self.text.append(c) |
---|
1065 | |
---|
1066 | ### return buffer as string. Arg 'a' is used as index |
---|
1067 | ## into the buffer and 2nd argument shall be the length. |
---|
1068 | ## If 2nd args is absent, we return chars till end of |
---|
1069 | ## buffer starting with 'a'. |
---|
1070 | def getString(self,a=None,length=None): |
---|
1071 | if not a : |
---|
1072 | a = 0 |
---|
1073 | assert a>=0 |
---|
1074 | if a>= len(self.text) : |
---|
1075 | return "" |
---|
1076 | |
---|
1077 | if not length: |
---|
1078 | ## no second argument |
---|
1079 | L = self.text[a:] |
---|
1080 | else: |
---|
1081 | assert (a+length) <= len(self.text) |
---|
1082 | b = a + length |
---|
1083 | L = self.text[a:b] |
---|
1084 | s = "" |
---|
1085 | for x in L : s += x |
---|
1086 | return s |
---|
1087 | |
---|
1088 | toString = getString ## alias |
---|
1089 | |
---|
1090 | def __str__(self): |
---|
1091 | return str(self.text) |
---|
1092 | |
---|
1093 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1094 | ### Reader ### |
---|
1095 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1096 | |
---|
1097 | ## When reading Japanese chars, it happens that a stream returns a |
---|
1098 | ## 'char' of length 2. This looks like a bug in the appropriate |
---|
1099 | ## codecs - but I'm rather unsure about this. Anyway, if this is |
---|
1100 | ## the case, I'm going to split this string into a list of chars |
---|
1101 | ## and put them on hold, ie. on a buffer. Next time when called |
---|
1102 | ## we read from buffer until buffer is empty. |
---|
1103 | ## wh: nov, 25th -> problem does not appear in Python 2.4.0.c1. |
---|
1104 | |
---|
1105 | class Reader(object): |
---|
1106 | def __init__(self,stream): |
---|
1107 | self.cin = stream |
---|
1108 | self.buf = [] |
---|
1109 | |
---|
1110 | def read(self,num): |
---|
1111 | assert num==1 |
---|
1112 | |
---|
1113 | if len(self.buf): |
---|
1114 | return self.buf.pop() |
---|
1115 | |
---|
1116 | ## Read a char - this may return a string. |
---|
1117 | ## Is this a bug in codecs/Python? |
---|
1118 | c = self.cin.read(1) |
---|
1119 | |
---|
1120 | if not c or len(c)==1: |
---|
1121 | return c |
---|
1122 | |
---|
1123 | L = list(c) |
---|
1124 | L.reverse() |
---|
1125 | for x in L: |
---|
1126 | self.buf.append(x) |
---|
1127 | |
---|
1128 | ## read one char .. |
---|
1129 | return self.read(1) |
---|
1130 | |
---|
1131 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1132 | ### CharScanner ### |
---|
1133 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1134 | |
---|
1135 | class CharScanner(TokenStream): |
---|
1136 | ## class members |
---|
1137 | NO_CHAR = 0 |
---|
1138 | EOF_CHAR = '' ### EOF shall be the empty string. |
---|
1139 | |
---|
1140 | def __init__(self, *argv, **kwargs): |
---|
1141 | super(CharScanner, self).__init__() |
---|
1142 | self.saveConsumedInput = True |
---|
1143 | self.tokenClass = None |
---|
1144 | self.caseSensitive = True |
---|
1145 | self.caseSensitiveLiterals = True |
---|
1146 | self.literals = None |
---|
1147 | self.tabsize = 8 |
---|
1148 | self._returnToken = None |
---|
1149 | self.commitToPath = False |
---|
1150 | self.traceDepth = 0 |
---|
1151 | self.text = StringBuffer() |
---|
1152 | self.hashString = hash(self) |
---|
1153 | self.setTokenObjectClass(CommonToken) |
---|
1154 | self.setInput(*argv) |
---|
1155 | |
---|
1156 | def __iter__(self): |
---|
1157 | return CharScannerIterator(self) |
---|
1158 | |
---|
1159 | def setInput(self,*argv): |
---|
1160 | ## case 1: |
---|
1161 | ## if there's no arg we default to read from |
---|
1162 | ## standard input |
---|
1163 | if not argv: |
---|
1164 | import sys |
---|
1165 | self.setInput(sys.stdin) |
---|
1166 | return |
---|
1167 | |
---|
1168 | ## get 1st argument |
---|
1169 | arg1 = argv[0] |
---|
1170 | |
---|
1171 | ## case 2: |
---|
1172 | ## if arg1 is a string, we assume it's a file name |
---|
1173 | ## and open a stream using 2nd argument as open |
---|
1174 | ## mode. If there's no 2nd argument we fall back to |
---|
1175 | ## mode '+rb'. |
---|
1176 | if is_string_type(arg1): |
---|
1177 | f = open(arg1,"rb") |
---|
1178 | self.setInput(f) |
---|
1179 | self.setFilename(arg1) |
---|
1180 | return |
---|
1181 | |
---|
1182 | ## case 3: |
---|
1183 | ## if arg1 is a file we wrap it by a char buffer ( |
---|
1184 | ## some additional checks?? No, can't do this in |
---|
1185 | ## general). |
---|
1186 | if isinstance(arg1,file): |
---|
1187 | self.setInput(CharBuffer(arg1)) |
---|
1188 | return |
---|
1189 | |
---|
1190 | ## case 4: |
---|
1191 | ## if arg1 is of type SharedLexerInputState we use |
---|
1192 | ## argument as is. |
---|
1193 | if isinstance(arg1,LexerSharedInputState): |
---|
1194 | self.inputState = arg1 |
---|
1195 | return |
---|
1196 | |
---|
1197 | ## case 5: |
---|
1198 | ## check whether argument type is of type input |
---|
1199 | ## buffer. If so create a SharedLexerInputState and |
---|
1200 | ## go ahead. |
---|
1201 | if isinstance(arg1,InputBuffer): |
---|
1202 | self.setInput(LexerSharedInputState(arg1)) |
---|
1203 | return |
---|
1204 | |
---|
1205 | ## case 6: |
---|
1206 | ## check whether argument type has a method read(int) |
---|
1207 | ## If so create CharBuffer ... |
---|
1208 | try: |
---|
1209 | if arg1.read: |
---|
1210 | rd = Reader(arg1) |
---|
1211 | cb = CharBuffer(rd) |
---|
1212 | ss = LexerSharedInputState(cb) |
---|
1213 | self.inputState = ss |
---|
1214 | return |
---|
1215 | except: |
---|
1216 | pass |
---|
1217 | |
---|
1218 | ## case 7: |
---|
1219 | ## raise wrong argument exception |
---|
1220 | raise TypeError(argv) |
---|
1221 | |
---|
1222 | def setTabSize(self,size) : |
---|
1223 | self.tabsize = size |
---|
1224 | |
---|
1225 | def getTabSize(self) : |
---|
1226 | return self.tabsize |
---|
1227 | |
---|
1228 | def setCaseSensitive(self,t) : |
---|
1229 | self.caseSensitive = t |
---|
1230 | |
---|
1231 | def setCommitToPath(self,commit) : |
---|
1232 | self.commitToPath = commit |
---|
1233 | |
---|
1234 | def setFilename(self,f) : |
---|
1235 | self.inputState.filename = f |
---|
1236 | |
---|
1237 | def setLine(self,line) : |
---|
1238 | self.inputState.line = line |
---|
1239 | |
---|
1240 | def setText(self,s) : |
---|
1241 | self.resetText() |
---|
1242 | self.text.append(s) |
---|
1243 | |
---|
1244 | def getCaseSensitive(self) : |
---|
1245 | return self.caseSensitive |
---|
1246 | |
---|
1247 | def getCaseSensitiveLiterals(self) : |
---|
1248 | return self.caseSensitiveLiterals |
---|
1249 | |
---|
1250 | def getColumn(self) : |
---|
1251 | return self.inputState.column |
---|
1252 | |
---|
1253 | def setColumn(self,c) : |
---|
1254 | self.inputState.column = c |
---|
1255 | |
---|
1256 | def getCommitToPath(self) : |
---|
1257 | return self.commitToPath |
---|
1258 | |
---|
1259 | def getFilename(self) : |
---|
1260 | return self.inputState.filename |
---|
1261 | |
---|
1262 | def getInputBuffer(self) : |
---|
1263 | return self.inputState.input |
---|
1264 | |
---|
1265 | def getInputState(self) : |
---|
1266 | return self.inputState |
---|
1267 | |
---|
1268 | def setInputState(self,state) : |
---|
1269 | assert isinstance(state,LexerSharedInputState) |
---|
1270 | self.inputState = state |
---|
1271 | |
---|
1272 | def getLine(self) : |
---|
1273 | return self.inputState.line |
---|
1274 | |
---|
1275 | def getText(self) : |
---|
1276 | return str(self.text) |
---|
1277 | |
---|
1278 | def getTokenObject(self) : |
---|
1279 | return self._returnToken |
---|
1280 | |
---|
1281 | def LA(self,i) : |
---|
1282 | c = self.inputState.input.LA(i) |
---|
1283 | if not self.caseSensitive: |
---|
1284 | ### E0006 |
---|
1285 | c = c.__class__.lower(c) |
---|
1286 | return c |
---|
1287 | |
---|
1288 | def makeToken(self,type) : |
---|
1289 | try: |
---|
1290 | ## dynamically load a class |
---|
1291 | assert self.tokenClass |
---|
1292 | tok = self.tokenClass() |
---|
1293 | tok.setType(type) |
---|
1294 | tok.setColumn(self.inputState.tokenStartColumn) |
---|
1295 | tok.setLine(self.inputState.tokenStartLine) |
---|
1296 | return tok |
---|
1297 | except: |
---|
1298 | self.panic("unable to create new token") |
---|
1299 | return Token.badToken |
---|
1300 | |
---|
1301 | def mark(self) : |
---|
1302 | return self.inputState.input.mark() |
---|
1303 | |
---|
1304 | def _match_bitset(self,b) : |
---|
1305 | if b.member(self.LA(1)): |
---|
1306 | self.consume() |
---|
1307 | else: |
---|
1308 | raise MismatchedCharException(self.LA(1), b, False, self) |
---|
1309 | |
---|
1310 | def _match_string(self,s) : |
---|
1311 | for c in s: |
---|
1312 | if self.LA(1) == c: |
---|
1313 | self.consume() |
---|
1314 | else: |
---|
1315 | raise MismatchedCharException(self.LA(1), c, False, self) |
---|
1316 | |
---|
1317 | def match(self,item): |
---|
1318 | if is_string_type(item): |
---|
1319 | return self._match_string(item) |
---|
1320 | else: |
---|
1321 | return self._match_bitset(item) |
---|
1322 | |
---|
1323 | def matchNot(self,c) : |
---|
1324 | if self.LA(1) != c: |
---|
1325 | self.consume() |
---|
1326 | else: |
---|
1327 | raise MismatchedCharException(self.LA(1), c, True, self) |
---|
1328 | |
---|
1329 | def matchRange(self,c1,c2) : |
---|
1330 | if self.LA(1) < c1 or self.LA(1) > c2 : |
---|
1331 | raise MismatchedCharException(self.LA(1), c1, c2, False, self) |
---|
1332 | else: |
---|
1333 | self.consume() |
---|
1334 | |
---|
1335 | def newline(self) : |
---|
1336 | self.inputState.line += 1 |
---|
1337 | self.inputState.column = 1 |
---|
1338 | |
---|
1339 | def tab(self) : |
---|
1340 | c = self.getColumn() |
---|
1341 | nc = ( ((c-1)/self.tabsize) + 1) * self.tabsize + 1 |
---|
1342 | self.setColumn(nc) |
---|
1343 | |
---|
1344 | def panic(self,s='') : |
---|
1345 | print "CharScanner: panic: " + s |
---|
1346 | sys.exit(1) |
---|
1347 | |
---|
1348 | def reportError(self,ex) : |
---|
1349 | print ex |
---|
1350 | |
---|
1351 | def reportError(self,s) : |
---|
1352 | if not self.getFilename(): |
---|
1353 | print "error: " + str(s) |
---|
1354 | else: |
---|
1355 | print self.getFilename() + ": error: " + str(s) |
---|
1356 | |
---|
1357 | def reportWarning(self,s) : |
---|
1358 | if not self.getFilename(): |
---|
1359 | print "warning: " + str(s) |
---|
1360 | else: |
---|
1361 | print self.getFilename() + ": warning: " + str(s) |
---|
1362 | |
---|
1363 | def resetText(self) : |
---|
1364 | self.text.setLength(0) |
---|
1365 | self.inputState.tokenStartColumn = self.inputState.column |
---|
1366 | self.inputState.tokenStartLine = self.inputState.line |
---|
1367 | |
---|
1368 | def rewind(self,pos) : |
---|
1369 | self.inputState.input.rewind(pos) |
---|
1370 | |
---|
1371 | def setTokenObjectClass(self,cl): |
---|
1372 | self.tokenClass = cl |
---|
1373 | |
---|
1374 | def testForLiteral(self,token): |
---|
1375 | if not token: |
---|
1376 | return |
---|
1377 | assert isinstance(token,Token) |
---|
1378 | |
---|
1379 | _type = token.getType() |
---|
1380 | |
---|
1381 | ## special tokens can't be literals |
---|
1382 | if _type in [SKIP,INVALID_TYPE,EOF_TYPE,NULL_TREE_LOOKAHEAD] : |
---|
1383 | return |
---|
1384 | |
---|
1385 | _text = token.getText() |
---|
1386 | if not _text: |
---|
1387 | return |
---|
1388 | |
---|
1389 | assert is_string_type(_text) |
---|
1390 | _type = self.testLiteralsTable(_text,_type) |
---|
1391 | token.setType(_type) |
---|
1392 | return _type |
---|
1393 | |
---|
1394 | def testLiteralsTable(self,*args): |
---|
1395 | if is_string_type(args[0]): |
---|
1396 | s = args[0] |
---|
1397 | i = args[1] |
---|
1398 | else: |
---|
1399 | s = self.text.getString() |
---|
1400 | i = args[0] |
---|
1401 | |
---|
1402 | ## check whether integer has been given |
---|
1403 | if not isinstance(i,int): |
---|
1404 | assert isinstance(i,int) |
---|
1405 | |
---|
1406 | ## check whether we have a dict |
---|
1407 | assert isinstance(self.literals,dict) |
---|
1408 | try: |
---|
1409 | ## E0010 |
---|
1410 | if not self.caseSensitiveLiterals: |
---|
1411 | s = s.__class__.lower(s) |
---|
1412 | i = self.literals[s] |
---|
1413 | except: |
---|
1414 | pass |
---|
1415 | return i |
---|
1416 | |
---|
1417 | def toLower(self,c): |
---|
1418 | return c.__class__.lower() |
---|
1419 | |
---|
1420 | def traceIndent(self): |
---|
1421 | print ' ' * self.traceDepth |
---|
1422 | |
---|
1423 | def traceIn(self,rname): |
---|
1424 | self.traceDepth += 1 |
---|
1425 | self.traceIndent() |
---|
1426 | print "> lexer %s c== %s" % (rname,self.LA(1)) |
---|
1427 | |
---|
1428 | def traceOut(self,rname): |
---|
1429 | self.traceIndent() |
---|
1430 | print "< lexer %s c== %s" % (rname,self.LA(1)) |
---|
1431 | self.traceDepth -= 1 |
---|
1432 | |
---|
1433 | def uponEOF(self): |
---|
1434 | pass |
---|
1435 | |
---|
1436 | def append(self,c): |
---|
1437 | if self.saveConsumedInput : |
---|
1438 | self.text.append(c) |
---|
1439 | |
---|
1440 | def commit(self): |
---|
1441 | self.inputState.input.commit() |
---|
1442 | |
---|
1443 | def consume(self): |
---|
1444 | if not self.inputState.guessing: |
---|
1445 | c = self.LA(1) |
---|
1446 | if self.caseSensitive: |
---|
1447 | self.append(c) |
---|
1448 | else: |
---|
1449 | # use input.LA(), not LA(), to get original case |
---|
1450 | # CharScanner.LA() would toLower it. |
---|
1451 | c = self.inputState.input.LA(1) |
---|
1452 | self.append(c) |
---|
1453 | |
---|
1454 | if c and c in "\t": |
---|
1455 | self.tab() |
---|
1456 | else: |
---|
1457 | self.inputState.column += 1 |
---|
1458 | self.inputState.input.consume() |
---|
1459 | |
---|
1460 | ## Consume chars until one matches the given char |
---|
1461 | def consumeUntil_char(self,c): |
---|
1462 | while self.LA(1) != EOF_CHAR and self.LA(1) != c: |
---|
1463 | self.consume() |
---|
1464 | |
---|
1465 | ## Consume chars until one matches the given set |
---|
1466 | def consumeUntil_bitset(self,bitset): |
---|
1467 | while self.LA(1) != EOF_CHAR and not self.set.member(self.LA(1)): |
---|
1468 | self.consume() |
---|
1469 | |
---|
1470 | ### If symbol seen is EOF then generate and set token, otherwise |
---|
1471 | ### throw exception. |
---|
1472 | def default(self,la1): |
---|
1473 | if not la1 : |
---|
1474 | self.uponEOF() |
---|
1475 | self._returnToken = self.makeToken(EOF_TYPE) |
---|
1476 | else: |
---|
1477 | self.raise_NoViableAlt(la1) |
---|
1478 | |
---|
1479 | def filterdefault(self,la1,*args): |
---|
1480 | if not la1: |
---|
1481 | self.uponEOF() |
---|
1482 | self._returnToken = self.makeToken(EOF_TYPE) |
---|
1483 | return |
---|
1484 | |
---|
1485 | if not args: |
---|
1486 | self.consume() |
---|
1487 | raise TryAgain() |
---|
1488 | else: |
---|
1489 | ### apply filter object |
---|
1490 | self.commit(); |
---|
1491 | try: |
---|
1492 | func=args[0] |
---|
1493 | args=args[1:] |
---|
1494 | apply(func,args) |
---|
1495 | except RecognitionException, e: |
---|
1496 | ## catastrophic failure |
---|
1497 | self.reportError(e); |
---|
1498 | self.consume(); |
---|
1499 | raise TryAgain() |
---|
1500 | |
---|
1501 | def raise_NoViableAlt(self,la1=None): |
---|
1502 | if not la1: la1 = self.LA(1) |
---|
1503 | fname = self.getFilename() |
---|
1504 | line = self.getLine() |
---|
1505 | col = self.getColumn() |
---|
1506 | raise NoViableAltForCharException(la1,fname,line,col) |
---|
1507 | |
---|
1508 | def set_return_token(self,_create,_token,_ttype,_offset): |
---|
1509 | if _create and not _token and (not _ttype == SKIP): |
---|
1510 | string = self.text.getString(_offset) |
---|
1511 | _token = self.makeToken(_ttype) |
---|
1512 | _token.setText(string) |
---|
1513 | self._returnToken = _token |
---|
1514 | return _token |
---|
1515 | |
---|
1516 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1517 | ### CharScannerIterator ### |
---|
1518 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1519 | |
---|
1520 | class CharScannerIterator: |
---|
1521 | |
---|
1522 | def __init__(self,inst): |
---|
1523 | if isinstance(inst,CharScanner): |
---|
1524 | self.inst = inst |
---|
1525 | return |
---|
1526 | raise TypeError("CharScannerIterator requires CharScanner object") |
---|
1527 | |
---|
1528 | def next(self): |
---|
1529 | assert self.inst |
---|
1530 | item = self.inst.nextToken() |
---|
1531 | if not item or item.isEOF(): |
---|
1532 | raise StopIteration() |
---|
1533 | return item |
---|
1534 | |
---|
1535 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1536 | ### BitSet ### |
---|
1537 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1538 | |
---|
1539 | ### I'm assuming here that a long is 64bits. It appears however, that |
---|
1540 | ### a long is of any size. That means we can use a single long as the |
---|
1541 | ### bitset (!), ie. Python would do almost all the work (TBD). |
---|
1542 | |
---|
1543 | class BitSet(object): |
---|
1544 | BITS = 64 |
---|
1545 | NIBBLE = 4 |
---|
1546 | LOG_BITS = 6 |
---|
1547 | MOD_MASK = BITS -1 |
---|
1548 | |
---|
1549 | def __init__(self,data=None): |
---|
1550 | if not data: |
---|
1551 | BitSet.__init__(self,[long(0)]) |
---|
1552 | return |
---|
1553 | if isinstance(data,int): |
---|
1554 | BitSet.__init__(self,[long(data)]) |
---|
1555 | return |
---|
1556 | if isinstance(data,long): |
---|
1557 | BitSet.__init__(self,[data]) |
---|
1558 | return |
---|
1559 | if not isinstance(data,list): |
---|
1560 | raise TypeError("BitSet requires integer, long, or " + |
---|
1561 | "list argument") |
---|
1562 | for x in data: |
---|
1563 | if not isinstance(x,long): |
---|
1564 | raise TypeError(self,"List argument item is " + |
---|
1565 | "not a long: %s" % (x)) |
---|
1566 | self.data = data |
---|
1567 | |
---|
1568 | def __str__(self): |
---|
1569 | bits = len(self.data) * BitSet.BITS |
---|
1570 | s = "" |
---|
1571 | for i in xrange(0,bits): |
---|
1572 | if self.at(i): |
---|
1573 | s += "1" |
---|
1574 | else: |
---|
1575 | s += "o" |
---|
1576 | if not ((i+1) % 10): |
---|
1577 | s += '|%s|' % (i+1) |
---|
1578 | return s |
---|
1579 | |
---|
1580 | def __repr__(self): |
---|
1581 | return str(self) |
---|
1582 | |
---|
1583 | def member(self,item): |
---|
1584 | if not item: |
---|
1585 | return False |
---|
1586 | |
---|
1587 | if isinstance(item,int): |
---|
1588 | return self.at(item) |
---|
1589 | |
---|
1590 | if not is_string_type(item): |
---|
1591 | raise TypeError(self,"char or unichar expected: %s" % (item)) |
---|
1592 | |
---|
1593 | ## char is a (unicode) string with at most lenght 1, ie. |
---|
1594 | ## a char. |
---|
1595 | |
---|
1596 | if len(item) != 1: |
---|
1597 | raise TypeError(self,"char expected: %s" % (item)) |
---|
1598 | |
---|
1599 | ### handle ASCII/UNICODE char |
---|
1600 | num = ord(item) |
---|
1601 | |
---|
1602 | ### check whether position num is in bitset |
---|
1603 | return self.at(num) |
---|
1604 | |
---|
1605 | def wordNumber(self,bit): |
---|
1606 | return bit >> BitSet.LOG_BITS |
---|
1607 | |
---|
1608 | def bitMask(self,bit): |
---|
1609 | pos = bit & BitSet.MOD_MASK ## bit mod BITS |
---|
1610 | return (1L << pos) |
---|
1611 | |
---|
1612 | def set(self,bit,on=True): |
---|
1613 | # grow bitset as required (use with care!) |
---|
1614 | i = self.wordNumber(bit) |
---|
1615 | mask = self.bitMask(bit) |
---|
1616 | if i>=len(self.data): |
---|
1617 | d = i - len(self.data) + 1 |
---|
1618 | for x in xrange(0,d): |
---|
1619 | self.data.append(0L) |
---|
1620 | assert len(self.data) == i+1 |
---|
1621 | if on: |
---|
1622 | self.data[i] |= mask |
---|
1623 | else: |
---|
1624 | self.data[i] &= (~mask) |
---|
1625 | |
---|
1626 | ### make add an alias for set |
---|
1627 | add = set |
---|
1628 | |
---|
1629 | def off(self,bit,off=True): |
---|
1630 | self.set(bit,not off) |
---|
1631 | |
---|
1632 | def at(self,bit): |
---|
1633 | i = self.wordNumber(bit) |
---|
1634 | v = self.data[i] |
---|
1635 | m = self.bitMask(bit) |
---|
1636 | return v & m |
---|
1637 | |
---|
1638 | |
---|
1639 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1640 | ### some further funcs ### |
---|
1641 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1642 | |
---|
1643 | def illegalarg_ex(func): |
---|
1644 | raise ValueError( |
---|
1645 | "%s is only valid if parser is built for debugging" % |
---|
1646 | (func.func_name)) |
---|
1647 | |
---|
1648 | def runtime_ex(func): |
---|
1649 | raise RuntimeException( |
---|
1650 | "%s is only valid if parser is built for debugging" % |
---|
1651 | (func.func_name)) |
---|
1652 | |
---|
1653 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1654 | ### TokenBuffer ### |
---|
1655 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1656 | |
---|
1657 | class TokenBuffer(object): |
---|
1658 | def __init__(self,stream): |
---|
1659 | self.input = stream |
---|
1660 | self.nMarkers = 0 |
---|
1661 | self.markerOffset = 0 |
---|
1662 | self.numToConsume = 0 |
---|
1663 | self.queue = Queue() |
---|
1664 | |
---|
1665 | def reset(self) : |
---|
1666 | self.nMarkers = 0 |
---|
1667 | self.markerOffset = 0 |
---|
1668 | self.numToConsume = 0 |
---|
1669 | self.queue.reset() |
---|
1670 | |
---|
1671 | def consume(self) : |
---|
1672 | self.numToConsume += 1 |
---|
1673 | |
---|
1674 | def fill(self, amount): |
---|
1675 | self.syncConsume() |
---|
1676 | while self.queue.length() < (amount + self.markerOffset): |
---|
1677 | self.queue.append(self.input.nextToken()) |
---|
1678 | |
---|
1679 | def getInput(self): |
---|
1680 | return self.input |
---|
1681 | |
---|
1682 | def LA(self,k) : |
---|
1683 | self.fill(k) |
---|
1684 | return self.queue.elementAt(self.markerOffset + k - 1).type |
---|
1685 | |
---|
1686 | def LT(self,k) : |
---|
1687 | self.fill(k) |
---|
1688 | return self.queue.elementAt(self.markerOffset + k - 1) |
---|
1689 | |
---|
1690 | def mark(self) : |
---|
1691 | self.syncConsume() |
---|
1692 | self.nMarkers += 1 |
---|
1693 | return self.markerOffset |
---|
1694 | |
---|
1695 | def rewind(self,mark) : |
---|
1696 | self.syncConsume() |
---|
1697 | self.markerOffset = mark |
---|
1698 | self.nMarkers -= 1 |
---|
1699 | |
---|
1700 | def syncConsume(self) : |
---|
1701 | while self.numToConsume > 0: |
---|
1702 | if self.nMarkers > 0: |
---|
1703 | # guess mode -- leave leading characters and bump offset. |
---|
1704 | self.markerOffset += 1 |
---|
1705 | else: |
---|
1706 | # normal mode -- remove first character |
---|
1707 | self.queue.removeFirst() |
---|
1708 | self.numToConsume -= 1 |
---|
1709 | |
---|
1710 | def __str__(self): |
---|
1711 | return "(%s,%s,%s,%s,%s)" % ( |
---|
1712 | self.input, |
---|
1713 | self.nMarkers, |
---|
1714 | self.markerOffset, |
---|
1715 | self.numToConsume, |
---|
1716 | self.queue) |
---|
1717 | |
---|
1718 | def __repr__(self): |
---|
1719 | return str(self) |
---|
1720 | |
---|
1721 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1722 | ### ParserSharedInputState ### |
---|
1723 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1724 | |
---|
1725 | class ParserSharedInputState(object): |
---|
1726 | |
---|
1727 | def __init__(self): |
---|
1728 | self.input = None |
---|
1729 | self.reset() |
---|
1730 | |
---|
1731 | def reset(self): |
---|
1732 | self.guessing = 0 |
---|
1733 | self.filename = None |
---|
1734 | if self.input: |
---|
1735 | self.input.reset() |
---|
1736 | |
---|
1737 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1738 | ### Parser ### |
---|
1739 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1740 | |
---|
1741 | class Parser(object): |
---|
1742 | |
---|
1743 | def __init__(self, *args, **kwargs): |
---|
1744 | self.tokenNames = None |
---|
1745 | self.returnAST = None |
---|
1746 | self.astFactory = None |
---|
1747 | self.tokenTypeToASTClassMap = {} |
---|
1748 | self.ignoreInvalidDebugCalls = False |
---|
1749 | self.traceDepth = 0 |
---|
1750 | if not args: |
---|
1751 | self.inputState = ParserSharedInputState() |
---|
1752 | return |
---|
1753 | arg0 = args[0] |
---|
1754 | assert isinstance(arg0,ParserSharedInputState) |
---|
1755 | self.inputState = arg0 |
---|
1756 | return |
---|
1757 | |
---|
1758 | def getTokenTypeToASTClassMap(self): |
---|
1759 | return self.tokenTypeToASTClassMap |
---|
1760 | |
---|
1761 | |
---|
1762 | def addMessageListener(self, l): |
---|
1763 | if not self.ignoreInvalidDebugCalls: |
---|
1764 | illegalarg_ex(addMessageListener) |
---|
1765 | |
---|
1766 | def addParserListener(self,l) : |
---|
1767 | if (not self.ignoreInvalidDebugCalls) : |
---|
1768 | illegalarg_ex(addParserListener) |
---|
1769 | |
---|
1770 | def addParserMatchListener(self, l) : |
---|
1771 | if (not self.ignoreInvalidDebugCalls) : |
---|
1772 | illegalarg_ex(addParserMatchListener) |
---|
1773 | |
---|
1774 | def addParserTokenListener(self, l) : |
---|
1775 | if (not self.ignoreInvalidDebugCalls): |
---|
1776 | illegalarg_ex(addParserTokenListener) |
---|
1777 | |
---|
1778 | def addSemanticPredicateListener(self, l) : |
---|
1779 | if (not self.ignoreInvalidDebugCalls): |
---|
1780 | illegalarg_ex(addSemanticPredicateListener) |
---|
1781 | |
---|
1782 | def addSyntacticPredicateListener(self, l) : |
---|
1783 | if (not self.ignoreInvalidDebugCalls): |
---|
1784 | illegalarg_ex(addSyntacticPredicateListener) |
---|
1785 | |
---|
1786 | def addTraceListener(self, l) : |
---|
1787 | if (not self.ignoreInvalidDebugCalls): |
---|
1788 | illegalarg_ex(addTraceListener) |
---|
1789 | |
---|
1790 | def consume(self): |
---|
1791 | raise NotImplementedError() |
---|
1792 | |
---|
1793 | def _consumeUntil_type(self,tokenType): |
---|
1794 | while self.LA(1) != EOF_TYPE and self.LA(1) != tokenType: |
---|
1795 | self.consume() |
---|
1796 | |
---|
1797 | def _consumeUntil_bitset(self, set): |
---|
1798 | while self.LA(1) != EOF_TYPE and not set.member(self.LA(1)): |
---|
1799 | self.consume() |
---|
1800 | |
---|
1801 | def consumeUntil(self,arg): |
---|
1802 | if isinstance(arg,int): |
---|
1803 | self._consumeUntil_type(arg) |
---|
1804 | else: |
---|
1805 | self._consumeUntil_bitset(arg) |
---|
1806 | |
---|
1807 | def defaultDebuggingSetup(self): |
---|
1808 | pass |
---|
1809 | |
---|
1810 | def getAST(self) : |
---|
1811 | return self.returnAST |
---|
1812 | |
---|
1813 | def getASTFactory(self) : |
---|
1814 | return self.astFactory |
---|
1815 | |
---|
1816 | def getFilename(self) : |
---|
1817 | return self.inputState.filename |
---|
1818 | |
---|
1819 | def getInputState(self) : |
---|
1820 | return self.inputState |
---|
1821 | |
---|
1822 | def setInputState(self, state) : |
---|
1823 | self.inputState = state |
---|
1824 | |
---|
1825 | def getTokenName(self,num) : |
---|
1826 | return self.tokenNames[num] |
---|
1827 | |
---|
1828 | def getTokenNames(self) : |
---|
1829 | return self.tokenNames |
---|
1830 | |
---|
1831 | def isDebugMode(self) : |
---|
1832 | return self.false |
---|
1833 | |
---|
1834 | def LA(self, i): |
---|
1835 | raise NotImplementedError() |
---|
1836 | |
---|
1837 | def LT(self, i): |
---|
1838 | raise NotImplementedError() |
---|
1839 | |
---|
1840 | def mark(self): |
---|
1841 | return self.inputState.input.mark() |
---|
1842 | |
---|
1843 | def _match_int(self,t): |
---|
1844 | if (self.LA(1) != t): |
---|
1845 | raise MismatchedTokenException( |
---|
1846 | self.tokenNames, self.LT(1), t, False, self.getFilename()) |
---|
1847 | else: |
---|
1848 | self.consume() |
---|
1849 | |
---|
1850 | def _match_set(self, b): |
---|
1851 | if (not b.member(self.LA(1))): |
---|
1852 | raise MismatchedTokenException( |
---|
1853 | self.tokenNames,self.LT(1), b, False, self.getFilename()) |
---|
1854 | else: |
---|
1855 | self.consume() |
---|
1856 | |
---|
1857 | def match(self,set) : |
---|
1858 | if isinstance(set,int): |
---|
1859 | self._match_int(set) |
---|
1860 | return |
---|
1861 | if isinstance(set,BitSet): |
---|
1862 | self._match_set(set) |
---|
1863 | return |
---|
1864 | raise TypeError("Parser.match requires integer ot BitSet argument") |
---|
1865 | |
---|
1866 | def matchNot(self,t): |
---|
1867 | if self.LA(1) == t: |
---|
1868 | raise MismatchedTokenException( |
---|
1869 | tokenNames, self.LT(1), t, True, self.getFilename()) |
---|
1870 | else: |
---|
1871 | self.consume() |
---|
1872 | |
---|
1873 | def removeMessageListener(self, l) : |
---|
1874 | if (not self.ignoreInvalidDebugCalls): |
---|
1875 | runtime_ex(removeMessageListener) |
---|
1876 | |
---|
1877 | def removeParserListener(self, l) : |
---|
1878 | if (not self.ignoreInvalidDebugCalls): |
---|
1879 | runtime_ex(removeParserListener) |
---|
1880 | |
---|
1881 | def removeParserMatchListener(self, l) : |
---|
1882 | if (not self.ignoreInvalidDebugCalls): |
---|
1883 | runtime_ex(removeParserMatchListener) |
---|
1884 | |
---|
1885 | def removeParserTokenListener(self, l) : |
---|
1886 | if (not self.ignoreInvalidDebugCalls): |
---|
1887 | runtime_ex(removeParserTokenListener) |
---|
1888 | |
---|
1889 | def removeSemanticPredicateListener(self, l) : |
---|
1890 | if (not self.ignoreInvalidDebugCalls): |
---|
1891 | runtime_ex(removeSemanticPredicateListener) |
---|
1892 | |
---|
1893 | def removeSyntacticPredicateListener(self, l) : |
---|
1894 | if (not self.ignoreInvalidDebugCalls): |
---|
1895 | runtime_ex(removeSyntacticPredicateListener) |
---|
1896 | |
---|
1897 | def removeTraceListener(self, l) : |
---|
1898 | if (not self.ignoreInvalidDebugCalls): |
---|
1899 | runtime_ex(removeTraceListener) |
---|
1900 | |
---|
1901 | def reportError(self,x) : |
---|
1902 | fmt = "syntax error:" |
---|
1903 | f = self.getFilename() |
---|
1904 | if f: |
---|
1905 | fmt = ("%s:" % f) + fmt |
---|
1906 | if isinstance(x,Token): |
---|
1907 | line = x.getColumn() |
---|
1908 | col = x.getLine() |
---|
1909 | text = x.getText() |
---|
1910 | fmt = fmt + 'unexpected symbol at line %s (column %s) : "%s"' |
---|
1911 | print >>sys.stderr, fmt % (line,col,text) |
---|
1912 | else: |
---|
1913 | print >>sys.stderr, fmt,str(x) |
---|
1914 | |
---|
1915 | def reportWarning(self,s): |
---|
1916 | f = self.getFilename() |
---|
1917 | if f: |
---|
1918 | print "%s:warning: %s" % (f,str(x)) |
---|
1919 | else: |
---|
1920 | print "warning: %s" % (str(x)) |
---|
1921 | |
---|
1922 | def rewind(self, pos) : |
---|
1923 | self.inputState.input.rewind(pos) |
---|
1924 | |
---|
1925 | def setASTFactory(self, f) : |
---|
1926 | self.astFactory = f |
---|
1927 | |
---|
1928 | def setASTNodeClass(self, cl) : |
---|
1929 | self.astFactory.setASTNodeType(cl) |
---|
1930 | |
---|
1931 | def setASTNodeType(self, nodeType) : |
---|
1932 | self.setASTNodeClass(nodeType) |
---|
1933 | |
---|
1934 | def setDebugMode(self, debugMode) : |
---|
1935 | if (not self.ignoreInvalidDebugCalls): |
---|
1936 | runtime_ex(setDebugMode) |
---|
1937 | |
---|
1938 | def setFilename(self, f) : |
---|
1939 | self.inputState.filename = f |
---|
1940 | |
---|
1941 | def setIgnoreInvalidDebugCalls(self, value) : |
---|
1942 | self.ignoreInvalidDebugCalls = value |
---|
1943 | |
---|
1944 | def setTokenBuffer(self, t) : |
---|
1945 | self.inputState.input = t |
---|
1946 | |
---|
1947 | def traceIndent(self): |
---|
1948 | print " " * self.traceDepth |
---|
1949 | |
---|
1950 | def traceIn(self,rname): |
---|
1951 | self.traceDepth += 1 |
---|
1952 | self.trace("> ", rname) |
---|
1953 | |
---|
1954 | def traceOut(self,rname): |
---|
1955 | self.trace("< ", rname) |
---|
1956 | self.traceDepth -= 1 |
---|
1957 | |
---|
1958 | ### wh: moved from ASTFactory to Parser |
---|
1959 | def addASTChild(self,currentAST, child): |
---|
1960 | if not child: |
---|
1961 | return |
---|
1962 | if not currentAST.root: |
---|
1963 | currentAST.root = child |
---|
1964 | elif not currentAST.child: |
---|
1965 | currentAST.root.setFirstChild(child) |
---|
1966 | else: |
---|
1967 | currentAST.child.setNextSibling(child) |
---|
1968 | currentAST.child = child |
---|
1969 | currentAST.advanceChildToEnd() |
---|
1970 | |
---|
1971 | ### wh: moved from ASTFactory to Parser |
---|
1972 | def makeASTRoot(self,currentAST,root) : |
---|
1973 | if root: |
---|
1974 | ### Add the current root as a child of new root |
---|
1975 | root.addChild(currentAST.root) |
---|
1976 | ### The new current child is the last sibling of the old root |
---|
1977 | currentAST.child = currentAST.root |
---|
1978 | currentAST.advanceChildToEnd() |
---|
1979 | ### Set the new root |
---|
1980 | currentAST.root = root |
---|
1981 | |
---|
1982 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1983 | ### LLkParser ### |
---|
1984 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
1985 | |
---|
1986 | class LLkParser(Parser): |
---|
1987 | |
---|
1988 | def __init__(self, *args, **kwargs): |
---|
1989 | try: |
---|
1990 | arg1 = args[0] |
---|
1991 | except: |
---|
1992 | arg1 = 1 |
---|
1993 | |
---|
1994 | if isinstance(arg1,int): |
---|
1995 | super(LLkParser,self).__init__() |
---|
1996 | self.k = arg1 |
---|
1997 | return |
---|
1998 | |
---|
1999 | if isinstance(arg1,ParserSharedInputState): |
---|
2000 | super(LLkParser,self).__init__(arg1) |
---|
2001 | self.set_k(1,*args) |
---|
2002 | return |
---|
2003 | |
---|
2004 | if isinstance(arg1,TokenBuffer): |
---|
2005 | super(LLkParser,self).__init__() |
---|
2006 | self.setTokenBuffer(arg1) |
---|
2007 | self.set_k(1,*args) |
---|
2008 | return |
---|
2009 | |
---|
2010 | if isinstance(arg1,TokenStream): |
---|
2011 | super(LLkParser,self).__init__() |
---|
2012 | tokenBuf = TokenBuffer(arg1) |
---|
2013 | self.setTokenBuffer(tokenBuf) |
---|
2014 | self.set_k(1,*args) |
---|
2015 | return |
---|
2016 | |
---|
2017 | ### unknown argument |
---|
2018 | raise TypeError("LLkParser requires integer, " + |
---|
2019 | "ParserSharedInputStream or TokenStream argument") |
---|
2020 | |
---|
2021 | def consume(self): |
---|
2022 | self.inputState.input.consume() |
---|
2023 | |
---|
2024 | def LA(self,i): |
---|
2025 | return self.inputState.input.LA(i) |
---|
2026 | |
---|
2027 | def LT(self,i): |
---|
2028 | return self.inputState.input.LT(i) |
---|
2029 | |
---|
2030 | def set_k(self,index,*args): |
---|
2031 | try: |
---|
2032 | self.k = args[index] |
---|
2033 | except: |
---|
2034 | self.k = 1 |
---|
2035 | |
---|
2036 | def trace(self,ee,rname): |
---|
2037 | print type(self) |
---|
2038 | self.traceIndent() |
---|
2039 | guess = "" |
---|
2040 | if self.inputState.guessing > 0: |
---|
2041 | guess = " [guessing]" |
---|
2042 | print(ee + rname + guess) |
---|
2043 | for i in xrange(1,self.k+1): |
---|
2044 | if i != 1: |
---|
2045 | print(", ") |
---|
2046 | if self.LT(i) : |
---|
2047 | v = self.LT(i).getText() |
---|
2048 | else: |
---|
2049 | v = "null" |
---|
2050 | print "LA(%s) == %s" % (i,v) |
---|
2051 | print("\n") |
---|
2052 | |
---|
2053 | def traceIn(self,rname): |
---|
2054 | self.traceDepth += 1; |
---|
2055 | self.trace("> ", rname); |
---|
2056 | |
---|
2057 | def traceOut(self,rname): |
---|
2058 | self.trace("< ", rname); |
---|
2059 | self.traceDepth -= 1; |
---|
2060 | |
---|
2061 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2062 | ### TreeParserSharedInputState ### |
---|
2063 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2064 | |
---|
2065 | class TreeParserSharedInputState(object): |
---|
2066 | def __init__(self): |
---|
2067 | self.guessing = 0 |
---|
2068 | |
---|
2069 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2070 | ### TreeParser ### |
---|
2071 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2072 | |
---|
2073 | class TreeParser(object): |
---|
2074 | |
---|
2075 | def __init__(self, *args, **kwargs): |
---|
2076 | self.inputState = TreeParserSharedInputState() |
---|
2077 | self._retTree = None |
---|
2078 | self.tokenNames = [] |
---|
2079 | self.returnAST = None |
---|
2080 | self.astFactory = ASTFactory() |
---|
2081 | self.traceDepth = 0 |
---|
2082 | |
---|
2083 | def getAST(self): |
---|
2084 | return self.returnAST |
---|
2085 | |
---|
2086 | def getASTFactory(self): |
---|
2087 | return self.astFactory |
---|
2088 | |
---|
2089 | def getTokenName(self,num) : |
---|
2090 | return self.tokenNames[num] |
---|
2091 | |
---|
2092 | def getTokenNames(self): |
---|
2093 | return self.tokenNames |
---|
2094 | |
---|
2095 | def match(self,t,set) : |
---|
2096 | assert isinstance(set,int) or isinstance(set,BitSet) |
---|
2097 | if not t or t == ASTNULL: |
---|
2098 | raise MismatchedTokenException(self.getTokenNames(), t,set, False) |
---|
2099 | |
---|
2100 | if isinstance(set,int) and t.getType() != set: |
---|
2101 | raise MismatchedTokenException(self.getTokenNames(), t,set, False) |
---|
2102 | |
---|
2103 | if isinstance(set,BitSet) and not set.member(t.getType): |
---|
2104 | raise MismatchedTokenException(self.getTokenNames(), t,set, False) |
---|
2105 | |
---|
2106 | def matchNot(self,t, ttype) : |
---|
2107 | if not t or (t == ASTNULL) or (t.getType() == ttype): |
---|
2108 | raise MismatchedTokenException(getTokenNames(), t, ttype, True) |
---|
2109 | |
---|
2110 | def reportError(self,ex): |
---|
2111 | print >>sys.stderr,"error:",ex |
---|
2112 | |
---|
2113 | def reportWarning(self, s): |
---|
2114 | print "warning:",s |
---|
2115 | |
---|
2116 | def setASTFactory(self,f): |
---|
2117 | self.astFactory = f |
---|
2118 | |
---|
2119 | def setASTNodeType(self,nodeType): |
---|
2120 | self.setASTNodeClass(nodeType) |
---|
2121 | |
---|
2122 | def setASTNodeClass(self,nodeType): |
---|
2123 | self.astFactory.setASTNodeType(nodeType) |
---|
2124 | |
---|
2125 | def traceIndent(self): |
---|
2126 | print " " * self.traceDepth |
---|
2127 | |
---|
2128 | def traceIn(self,rname,t): |
---|
2129 | self.traceDepth += 1 |
---|
2130 | self.traceIndent() |
---|
2131 | print("> " + rname + "(" + |
---|
2132 | ifelse(t,str(t),"null") + ")" + |
---|
2133 | ifelse(self.inputState.guessing>0,"[guessing]","")) |
---|
2134 | |
---|
2135 | def traceOut(self,rname,t): |
---|
2136 | self.traceIndent() |
---|
2137 | print("< " + rname + "(" + |
---|
2138 | ifelse(t,str(t),"null") + ")" + |
---|
2139 | ifelse(self.inputState.guessing>0,"[guessing]","")) |
---|
2140 | self.traceDepth -= 1 |
---|
2141 | |
---|
2142 | ### wh: moved from ASTFactory to TreeParser |
---|
2143 | def addASTChild(self,currentAST, child): |
---|
2144 | if not child: |
---|
2145 | return |
---|
2146 | if not currentAST.root: |
---|
2147 | currentAST.root = child |
---|
2148 | elif not currentAST.child: |
---|
2149 | currentAST.root.setFirstChild(child) |
---|
2150 | else: |
---|
2151 | currentAST.child.setNextSibling(child) |
---|
2152 | currentAST.child = child |
---|
2153 | currentAST.advanceChildToEnd() |
---|
2154 | |
---|
2155 | ### wh: moved from ASTFactory to TreeParser |
---|
2156 | def makeASTRoot(self,currentAST,root): |
---|
2157 | if root: |
---|
2158 | ### Add the current root as a child of new root |
---|
2159 | root.addChild(currentAST.root) |
---|
2160 | ### The new current child is the last sibling of the old root |
---|
2161 | currentAST.child = currentAST.root |
---|
2162 | currentAST.advanceChildToEnd() |
---|
2163 | ### Set the new root |
---|
2164 | currentAST.root = root |
---|
2165 | |
---|
2166 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2167 | ### funcs to work on trees ### |
---|
2168 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2169 | |
---|
2170 | def rightmost(ast): |
---|
2171 | if ast: |
---|
2172 | while(ast.right): |
---|
2173 | ast = ast.right |
---|
2174 | return ast |
---|
2175 | |
---|
2176 | def cmptree(s,t,partial): |
---|
2177 | while(s and t): |
---|
2178 | ### as a quick optimization, check roots first. |
---|
2179 | if not s.equals(t): |
---|
2180 | return False |
---|
2181 | |
---|
2182 | ### if roots match, do full list match test on children. |
---|
2183 | if not cmptree(s.getFirstChild(),t.getFirstChild(),partial): |
---|
2184 | return False |
---|
2185 | |
---|
2186 | s = s.getNextSibling() |
---|
2187 | t = t.getNextSibling() |
---|
2188 | |
---|
2189 | r = ifelse(partial,not t,not s and not t) |
---|
2190 | return r |
---|
2191 | |
---|
2192 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2193 | ### AST ### |
---|
2194 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2195 | |
---|
2196 | class AST(object): |
---|
2197 | def __init__(self): |
---|
2198 | pass |
---|
2199 | |
---|
2200 | def addChild(self, c): |
---|
2201 | pass |
---|
2202 | |
---|
2203 | def equals(self, t): |
---|
2204 | return False |
---|
2205 | |
---|
2206 | def equalsList(self, t): |
---|
2207 | return False |
---|
2208 | |
---|
2209 | def equalsListPartial(self, t): |
---|
2210 | return False |
---|
2211 | |
---|
2212 | def equalsTree(self, t): |
---|
2213 | return False |
---|
2214 | |
---|
2215 | def equalsTreePartial(self, t): |
---|
2216 | return False |
---|
2217 | |
---|
2218 | def findAll(self, tree): |
---|
2219 | return None |
---|
2220 | |
---|
2221 | def findAllPartial(self, subtree): |
---|
2222 | return None |
---|
2223 | |
---|
2224 | def getFirstChild(self): |
---|
2225 | return self |
---|
2226 | |
---|
2227 | def getNextSibling(self): |
---|
2228 | return self |
---|
2229 | |
---|
2230 | def getText(self): |
---|
2231 | return "" |
---|
2232 | |
---|
2233 | def getType(self): |
---|
2234 | return INVALID_TYPE |
---|
2235 | |
---|
2236 | def getLine(self): |
---|
2237 | return 0 |
---|
2238 | |
---|
2239 | def getColumn(self): |
---|
2240 | return 0 |
---|
2241 | |
---|
2242 | def getNumberOfChildren(self): |
---|
2243 | return 0 |
---|
2244 | |
---|
2245 | def initialize(self, t, txt): |
---|
2246 | pass |
---|
2247 | |
---|
2248 | def initialize(self, t): |
---|
2249 | pass |
---|
2250 | |
---|
2251 | def setFirstChild(self, c): |
---|
2252 | pass |
---|
2253 | |
---|
2254 | def setNextSibling(self, n): |
---|
2255 | pass |
---|
2256 | |
---|
2257 | def setText(self, text): |
---|
2258 | pass |
---|
2259 | |
---|
2260 | def setType(self, ttype): |
---|
2261 | pass |
---|
2262 | |
---|
2263 | def toString(self): |
---|
2264 | self.getText() |
---|
2265 | |
---|
2266 | __str__ = toString |
---|
2267 | |
---|
2268 | def toStringList(self): |
---|
2269 | return self.getText() |
---|
2270 | |
---|
2271 | def toStringTree(self): |
---|
2272 | return self.getText() |
---|
2273 | |
---|
2274 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2275 | ### ASTNULLType ### |
---|
2276 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2277 | |
---|
2278 | ### There is only one instance of this class **/ |
---|
2279 | class ASTNULLType(AST): |
---|
2280 | def __init__(self): |
---|
2281 | AST.__init__(self) |
---|
2282 | pass |
---|
2283 | |
---|
2284 | def getText(self): |
---|
2285 | return "<ASTNULL>" |
---|
2286 | |
---|
2287 | def getType(self): |
---|
2288 | return NULL_TREE_LOOKAHEAD |
---|
2289 | |
---|
2290 | |
---|
2291 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2292 | ### BaseAST ### |
---|
2293 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2294 | |
---|
2295 | class BaseAST(AST): |
---|
2296 | |
---|
2297 | verboseStringConversion = False |
---|
2298 | tokenNames = None |
---|
2299 | |
---|
2300 | def __init__(self): |
---|
2301 | self.down = None ## kid |
---|
2302 | self.right = None ## sibling |
---|
2303 | |
---|
2304 | def addChild(self,node): |
---|
2305 | if node: |
---|
2306 | t = rightmost(self.down) |
---|
2307 | if t: |
---|
2308 | t.right = node |
---|
2309 | else: |
---|
2310 | assert not self.down |
---|
2311 | self.down = node |
---|
2312 | |
---|
2313 | def getNumberOfChildren(self): |
---|
2314 | t = self.down |
---|
2315 | n = 0 |
---|
2316 | while t: |
---|
2317 | n += 1 |
---|
2318 | t = t.right |
---|
2319 | return n |
---|
2320 | |
---|
2321 | def doWorkForFindAll(self,v,target,partialMatch): |
---|
2322 | sibling = self |
---|
2323 | |
---|
2324 | while sibling: |
---|
2325 | c1 = partialMatch and sibling.equalsTreePartial(target) |
---|
2326 | if c1: |
---|
2327 | v.append(sibling) |
---|
2328 | else: |
---|
2329 | c2 = not partialMatch and sibling.equalsTree(target) |
---|
2330 | if c2: |
---|
2331 | v.append(sibling) |
---|
2332 | |
---|
2333 | ### regardless of match or not, check any children for matches |
---|
2334 | if sibling.getFirstChild(): |
---|
2335 | sibling.getFirstChild().doWorkForFindAll(v,target,partialMatch) |
---|
2336 | |
---|
2337 | sibling = sibling.getNextSibling() |
---|
2338 | |
---|
2339 | ### Is node t equal to 'self' in terms of token type and text? |
---|
2340 | def equals(self,t): |
---|
2341 | if not t: |
---|
2342 | return False |
---|
2343 | return self.getText() == t.getText() and self.getType() == t.getType() |
---|
2344 | |
---|
2345 | ### Is t an exact structural and equals() match of this tree. The |
---|
2346 | ### 'self' reference is considered the start of a sibling list. |
---|
2347 | ### |
---|
2348 | def equalsList(self, t): |
---|
2349 | return cmptree(self, t, partial=False) |
---|
2350 | |
---|
2351 | ### Is 't' a subtree of this list? |
---|
2352 | ### The siblings of the root are NOT ignored. |
---|
2353 | ### |
---|
2354 | def equalsListPartial(self,t): |
---|
2355 | return cmptree(self,t,partial=True) |
---|
2356 | |
---|
2357 | ### Is tree rooted at 'self' equal to 't'? The siblings |
---|
2358 | ### of 'self' are ignored. |
---|
2359 | ### |
---|
2360 | def equalsTree(self, t): |
---|
2361 | return self.equals(t) and \ |
---|
2362 | cmptree(self.getFirstChild(), t.getFirstChild(), partial=False) |
---|
2363 | |
---|
2364 | ### Is 't' a subtree of the tree rooted at 'self'? The siblings |
---|
2365 | ### of 'self' are ignored. |
---|
2366 | ### |
---|
2367 | def equalsTreePartial(self, t): |
---|
2368 | if not t: |
---|
2369 | return True |
---|
2370 | return self.equals(t) and cmptree( |
---|
2371 | self.getFirstChild(), t.getFirstChild(), partial=True) |
---|
2372 | |
---|
2373 | ### Walk the tree looking for all exact subtree matches. Return |
---|
2374 | ### an ASTEnumerator that lets the caller walk the list |
---|
2375 | ### of subtree roots found herein. |
---|
2376 | def findAll(self,target): |
---|
2377 | roots = [] |
---|
2378 | |
---|
2379 | ### the empty tree cannot result in an enumeration |
---|
2380 | if not target: |
---|
2381 | return None |
---|
2382 | # find all matches recursively |
---|
2383 | self.doWorkForFindAll(roots, target, False) |
---|
2384 | return roots |
---|
2385 | |
---|
2386 | ### Walk the tree looking for all subtrees. Return |
---|
2387 | ### an ASTEnumerator that lets the caller walk the list |
---|
2388 | ### of subtree roots found herein. |
---|
2389 | def findAllPartial(self,sub): |
---|
2390 | roots = [] |
---|
2391 | |
---|
2392 | ### the empty tree cannot result in an enumeration |
---|
2393 | if not sub: |
---|
2394 | return None |
---|
2395 | |
---|
2396 | self.doWorkForFindAll(roots, sub, True) ### find all matches recursively |
---|
2397 | return roots |
---|
2398 | |
---|
2399 | ### Get the first child of this node None if not children |
---|
2400 | def getFirstChild(self): |
---|
2401 | return self.down |
---|
2402 | |
---|
2403 | ### Get the next sibling in line after this one |
---|
2404 | def getNextSibling(self): |
---|
2405 | return self.right |
---|
2406 | |
---|
2407 | ### Get the token text for this node |
---|
2408 | def getText(self): |
---|
2409 | return "" |
---|
2410 | |
---|
2411 | ### Get the token type for this node |
---|
2412 | def getType(self): |
---|
2413 | return 0 |
---|
2414 | |
---|
2415 | def getLine(self): |
---|
2416 | return 0 |
---|
2417 | |
---|
2418 | def getColumn(self): |
---|
2419 | return 0 |
---|
2420 | |
---|
2421 | ### Remove all children */ |
---|
2422 | def removeChildren(self): |
---|
2423 | self.down = None |
---|
2424 | |
---|
2425 | def setFirstChild(self,c): |
---|
2426 | self.down = c |
---|
2427 | |
---|
2428 | def setNextSibling(self, n): |
---|
2429 | self.right = n |
---|
2430 | |
---|
2431 | ### Set the token text for this node |
---|
2432 | def setText(self, text): |
---|
2433 | pass |
---|
2434 | |
---|
2435 | ### Set the token type for this node |
---|
2436 | def setType(self, ttype): |
---|
2437 | pass |
---|
2438 | |
---|
2439 | ### static |
---|
2440 | def setVerboseStringConversion(verbose,names): |
---|
2441 | verboseStringConversion = verbose |
---|
2442 | tokenNames = names |
---|
2443 | setVerboseStringConversion = staticmethod(setVerboseStringConversion) |
---|
2444 | |
---|
2445 | ### Return an array of strings that maps token ID to it's text. |
---|
2446 | ## @since 2.7.3 |
---|
2447 | def getTokenNames(): |
---|
2448 | return tokenNames |
---|
2449 | |
---|
2450 | def toString(self): |
---|
2451 | return self.getText() |
---|
2452 | |
---|
2453 | ### return tree as lisp string - sibling included |
---|
2454 | def toStringList(self): |
---|
2455 | ts = self.toStringTree() |
---|
2456 | sib = self.getNextSibling() |
---|
2457 | if sib: |
---|
2458 | ts += sib.toStringList() |
---|
2459 | return ts |
---|
2460 | |
---|
2461 | __str__ = toStringList |
---|
2462 | |
---|
2463 | ### return tree as string - siblings ignored |
---|
2464 | def toStringTree(self): |
---|
2465 | ts = "" |
---|
2466 | kid = self.getFirstChild() |
---|
2467 | if kid: |
---|
2468 | ts += " (" |
---|
2469 | ts += " " + self.toString() |
---|
2470 | if kid: |
---|
2471 | ts += kid.toStringList() |
---|
2472 | ts += " )" |
---|
2473 | return ts |
---|
2474 | |
---|
2475 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2476 | ### CommonAST ### |
---|
2477 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2478 | |
---|
2479 | ### Common AST node implementation |
---|
2480 | class CommonAST(BaseAST): |
---|
2481 | def __init__(self,token=None): |
---|
2482 | super(CommonAST,self).__init__() |
---|
2483 | self.ttype = INVALID_TYPE |
---|
2484 | self.text = "<no text>" |
---|
2485 | self.line = 0 |
---|
2486 | self.column= 0 |
---|
2487 | self.initialize(token) |
---|
2488 | #assert self.text |
---|
2489 | |
---|
2490 | ### Get the token text for this node |
---|
2491 | def getText(self): |
---|
2492 | return self.text |
---|
2493 | |
---|
2494 | ### Get the token type for this node |
---|
2495 | def getType(self): |
---|
2496 | return self.ttype |
---|
2497 | |
---|
2498 | ### Get the line for this node |
---|
2499 | def getLine(self): |
---|
2500 | return self.line |
---|
2501 | |
---|
2502 | ### Get the column for this node |
---|
2503 | def getColumn(self): |
---|
2504 | return self.column |
---|
2505 | |
---|
2506 | def initialize(self,*args): |
---|
2507 | if not args: |
---|
2508 | return |
---|
2509 | |
---|
2510 | arg0 = args[0] |
---|
2511 | |
---|
2512 | if isinstance(arg0,int): |
---|
2513 | arg1 = args[1] |
---|
2514 | self.setType(arg0) |
---|
2515 | self.setText(arg1) |
---|
2516 | return |
---|
2517 | |
---|
2518 | if isinstance(arg0,AST) or isinstance(arg0,Token): |
---|
2519 | self.setText(arg0.getText()) |
---|
2520 | self.setType(arg0.getType()) |
---|
2521 | self.line = arg0.getLine() |
---|
2522 | self.column = arg0.getColumn() |
---|
2523 | return |
---|
2524 | |
---|
2525 | ### Set the token text for this node |
---|
2526 | def setText(self,text_): |
---|
2527 | assert is_string_type(text_) |
---|
2528 | self.text = text_ |
---|
2529 | |
---|
2530 | ### Set the token type for this node |
---|
2531 | def setType(self,ttype_): |
---|
2532 | assert isinstance(ttype_,int) |
---|
2533 | self.ttype = ttype_ |
---|
2534 | |
---|
2535 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2536 | ### CommonASTWithHiddenTokens ### |
---|
2537 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2538 | |
---|
2539 | class CommonASTWithHiddenTokens(CommonAST): |
---|
2540 | |
---|
2541 | def __init__(self,*args): |
---|
2542 | CommonAST.__init__(self,*args) |
---|
2543 | self.hiddenBefore = None |
---|
2544 | self.hiddenAfter = None |
---|
2545 | |
---|
2546 | def getHiddenAfter(self): |
---|
2547 | return self.hiddenAfter |
---|
2548 | |
---|
2549 | def getHiddenBefore(self): |
---|
2550 | return self.hiddenBefore |
---|
2551 | |
---|
2552 | def initialize(self,*args): |
---|
2553 | CommonAST.initialize(self,*args) |
---|
2554 | if args and isinstance(args[0],Token): |
---|
2555 | assert isinstance(args[0],CommonHiddenStreamToken) |
---|
2556 | self.hiddenBefore = args[0].getHiddenBefore() |
---|
2557 | self.hiddenAfter = args[0].getHiddenAfter() |
---|
2558 | |
---|
2559 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2560 | ### ASTPair ### |
---|
2561 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2562 | |
---|
2563 | class ASTPair(object): |
---|
2564 | def __init__(self): |
---|
2565 | self.root = None ### current root of tree |
---|
2566 | self.child = None ### current child to which siblings are added |
---|
2567 | |
---|
2568 | ### Make sure that child is the last sibling */ |
---|
2569 | def advanceChildToEnd(self): |
---|
2570 | if self.child: |
---|
2571 | while self.child.getNextSibling(): |
---|
2572 | self.child = self.child.getNextSibling() |
---|
2573 | |
---|
2574 | ### Copy an ASTPair. Don't call it clone() because we want type-safety */ |
---|
2575 | def copy(self): |
---|
2576 | tmp = ASTPair() |
---|
2577 | tmp.root = self.root |
---|
2578 | tmp.child = self.child |
---|
2579 | return tmp |
---|
2580 | |
---|
2581 | def toString(self): |
---|
2582 | r = ifelse(not root,"null",self.root.getText()) |
---|
2583 | c = ifelse(not child,"null",self.child.getText()) |
---|
2584 | return "[%s,%s]" % (r,c) |
---|
2585 | |
---|
2586 | __str__ = toString |
---|
2587 | __repr__ = toString |
---|
2588 | |
---|
2589 | |
---|
2590 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2591 | ### ASTFactory ### |
---|
2592 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2593 | |
---|
2594 | class ASTFactory(object): |
---|
2595 | def __init__(self,table=None): |
---|
2596 | self._class = None |
---|
2597 | self._classmap = ifelse(table,table,None) |
---|
2598 | |
---|
2599 | def create(self,*args): |
---|
2600 | if not args: |
---|
2601 | return self.create(INVALID_TYPE) |
---|
2602 | |
---|
2603 | arg0 = args[0] |
---|
2604 | arg1 = None |
---|
2605 | arg2 = None |
---|
2606 | |
---|
2607 | try: |
---|
2608 | arg1 = args[1] |
---|
2609 | arg2 = args[2] |
---|
2610 | except: |
---|
2611 | pass |
---|
2612 | |
---|
2613 | # ctor(int) |
---|
2614 | if isinstance(arg0,int) and not arg2: |
---|
2615 | ### get class for 'self' type |
---|
2616 | c = self.getASTNodeType(arg0) |
---|
2617 | t = self.create(c) |
---|
2618 | if t: |
---|
2619 | t.initialize(arg0, ifelse(arg1,arg1,"")) |
---|
2620 | return t |
---|
2621 | |
---|
2622 | # ctor(int,something) |
---|
2623 | if isinstance(arg0,int) and arg2: |
---|
2624 | t = self.create(arg2) |
---|
2625 | if t: |
---|
2626 | t.initialize(arg0,arg1) |
---|
2627 | return t |
---|
2628 | |
---|
2629 | # ctor(AST) |
---|
2630 | if isinstance(arg0,AST): |
---|
2631 | t = self.create(arg0.getType()) |
---|
2632 | if t: |
---|
2633 | t.initialize(arg0) |
---|
2634 | return t |
---|
2635 | |
---|
2636 | # ctor(token) |
---|
2637 | if isinstance(arg0,Token) and not arg1: |
---|
2638 | ttype = arg0.getType() |
---|
2639 | assert isinstance(ttype,int) |
---|
2640 | t = self.create(ttype) |
---|
2641 | if t: |
---|
2642 | t.initialize(arg0) |
---|
2643 | return t |
---|
2644 | |
---|
2645 | # ctor(token,class) |
---|
2646 | if isinstance(arg0,Token) and arg1: |
---|
2647 | assert isinstance(arg1,type) |
---|
2648 | assert issubclass(arg1,AST) |
---|
2649 | # this creates instance of 'arg1' using 'arg0' as |
---|
2650 | # argument. Wow, that's magic! |
---|
2651 | t = arg1(arg0) |
---|
2652 | assert t and isinstance(t,AST) |
---|
2653 | return t |
---|
2654 | |
---|
2655 | # ctor(class) |
---|
2656 | if isinstance(arg0,type): |
---|
2657 | ### next statement creates instance of type (!) |
---|
2658 | t = arg0() |
---|
2659 | assert isinstance(t,AST) |
---|
2660 | return t |
---|
2661 | |
---|
2662 | |
---|
2663 | def setASTNodeClass(self,className=None): |
---|
2664 | if not className: |
---|
2665 | return |
---|
2666 | assert isinstance(className,type) |
---|
2667 | assert issubclass(className,AST) |
---|
2668 | self._class = className |
---|
2669 | |
---|
2670 | ### kind of misnomer - use setASTNodeClass instead. |
---|
2671 | setASTNodeType = setASTNodeClass |
---|
2672 | |
---|
2673 | def getASTNodeClass(self): |
---|
2674 | return self._class |
---|
2675 | |
---|
2676 | |
---|
2677 | |
---|
2678 | def getTokenTypeToASTClassMap(self): |
---|
2679 | return self._classmap |
---|
2680 | |
---|
2681 | def setTokenTypeToASTClassMap(self,amap): |
---|
2682 | self._classmap = amap |
---|
2683 | |
---|
2684 | def error(self, e): |
---|
2685 | import sys |
---|
2686 | print >> sys.stderr, e |
---|
2687 | |
---|
2688 | def setTokenTypeASTNodeType(self, tokenType, className): |
---|
2689 | """ |
---|
2690 | Specify a mapping between a token type and a (AST) class. |
---|
2691 | """ |
---|
2692 | if not self._classmap: |
---|
2693 | self._classmap = {} |
---|
2694 | |
---|
2695 | if not className: |
---|
2696 | try: |
---|
2697 | del self._classmap[tokenType] |
---|
2698 | except: |
---|
2699 | pass |
---|
2700 | else: |
---|
2701 | ### here we should also perform actions to ensure that |
---|
2702 | ### a. class can be loaded |
---|
2703 | ### b. class is a subclass of AST |
---|
2704 | ### |
---|
2705 | assert isinstance(className,type) |
---|
2706 | assert issubclass(className,AST) ## a & b |
---|
2707 | ### enter the class |
---|
2708 | self._classmap[tokenType] = className |
---|
2709 | |
---|
2710 | def getASTNodeType(self,tokenType): |
---|
2711 | """ |
---|
2712 | For a given token type return the AST node type. First we |
---|
2713 | lookup a mapping table, second we try _class |
---|
2714 | and finally we resolve to "antlr.CommonAST". |
---|
2715 | """ |
---|
2716 | |
---|
2717 | # first |
---|
2718 | if self._classmap: |
---|
2719 | try: |
---|
2720 | c = self._classmap[tokenType] |
---|
2721 | if c: |
---|
2722 | return c |
---|
2723 | except: |
---|
2724 | pass |
---|
2725 | # second |
---|
2726 | if self._class: |
---|
2727 | return self._class |
---|
2728 | |
---|
2729 | # default |
---|
2730 | return CommonAST |
---|
2731 | |
---|
2732 | ### methods that have been moved to file scope - just listed |
---|
2733 | ### here to be somewhat consistent with original API |
---|
2734 | def dup(self,t): |
---|
2735 | return antlr.dup(t,self) |
---|
2736 | |
---|
2737 | def dupList(self,t): |
---|
2738 | return antlr.dupList(t,self) |
---|
2739 | |
---|
2740 | def dupTree(self,t): |
---|
2741 | return antlr.dupTree(t,self) |
---|
2742 | |
---|
2743 | ### methods moved to other classes |
---|
2744 | ### 1. makeASTRoot -> Parser |
---|
2745 | ### 2. addASTChild -> Parser |
---|
2746 | |
---|
2747 | ### non-standard: create alias for longish method name |
---|
2748 | maptype = setTokenTypeASTNodeType |
---|
2749 | |
---|
2750 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2751 | ### ASTVisitor ### |
---|
2752 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2753 | |
---|
2754 | class ASTVisitor(object): |
---|
2755 | def __init__(self,*args): |
---|
2756 | pass |
---|
2757 | |
---|
2758 | def visit(self,ast): |
---|
2759 | pass |
---|
2760 | |
---|
2761 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2762 | ### static methods and variables ### |
---|
2763 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx### |
---|
2764 | |
---|
2765 | ASTNULL = ASTNULLType() |
---|
2766 | |
---|
2767 | ### wh: moved from ASTFactory as there's nothing ASTFactory-specific |
---|
2768 | ### in this method. |
---|
2769 | def make(*nodes): |
---|
2770 | if not nodes: |
---|
2771 | return None |
---|
2772 | |
---|
2773 | for i in xrange(0,len(nodes)): |
---|
2774 | node = nodes[i] |
---|
2775 | if node: |
---|
2776 | assert isinstance(node,AST) |
---|
2777 | |
---|
2778 | root = nodes[0] |
---|
2779 | tail = None |
---|
2780 | if root: |
---|
2781 | root.setFirstChild(None) |
---|
2782 | |
---|
2783 | for i in xrange(1,len(nodes)): |
---|
2784 | if not nodes[i]: |
---|
2785 | continue |
---|
2786 | if not root: |
---|
2787 | root = tail = nodes[i] |
---|
2788 | elif not tail: |
---|
2789 | root.setFirstChild(nodes[i]) |
---|
2790 | tail = root.getFirstChild() |
---|
2791 | else: |
---|
2792 | tail.setNextSibling(nodes[i]) |
---|
2793 | tail = tail.getNextSibling() |
---|
2794 | |
---|
2795 | ### Chase tail to last sibling |
---|
2796 | while tail.getNextSibling(): |
---|
2797 | tail = tail.getNextSibling() |
---|
2798 | return root |
---|
2799 | |
---|
2800 | def dup(t,factory): |
---|
2801 | if not t: |
---|
2802 | return None |
---|
2803 | |
---|
2804 | if factory: |
---|
2805 | dup_t = factory.create(t.__class__) |
---|
2806 | else: |
---|
2807 | raise TypeError("dup function requires ASTFactory argument") |
---|
2808 | dup_t.initialize(t) |
---|
2809 | return dup_t |
---|
2810 | |
---|
2811 | def dupList(t,factory): |
---|
2812 | result = dupTree(t,factory) |
---|
2813 | nt = result |
---|
2814 | while t: |
---|
2815 | ## for each sibling of the root |
---|
2816 | t = t.getNextSibling() |
---|
2817 | nt.setNextSibling(dupTree(t,factory)) |
---|
2818 | nt = nt.getNextSibling() |
---|
2819 | return result |
---|
2820 | |
---|
2821 | def dupTree(t,factory): |
---|
2822 | result = dup(t,factory) |
---|
2823 | if t: |
---|
2824 | result.setFirstChild(dupList(t.getFirstChild(),factory)) |
---|
2825 | return result |
---|
2826 | |
---|
2827 | ###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx |
---|
2828 | ### $Id: antlr.py,v 1.1.1.1 2005/02/02 10:24:36 geronimo Exp $ |
---|
2829 | |
---|
2830 | # Local Variables: *** |
---|
2831 | # mode: python *** |
---|
2832 | # py-indent-offset: 4 *** |
---|
2833 | # End: *** |
---|