1 | // |
---|
2 | // Pascal Grammar |
---|
3 | // |
---|
4 | // Adapted from, |
---|
5 | // Pascal User Manual And Report (Second Edition-1978) |
---|
6 | // Kathleen Jensen - Niklaus Wirth |
---|
7 | // |
---|
8 | // By |
---|
9 | // |
---|
10 | // Hakki Dogusan dogusanh@tr-net.net.tr |
---|
11 | // |
---|
12 | // Then significantly enhanced by Piet Schoutteten |
---|
13 | // with some guidance by Terence Parr. Piet added tree |
---|
14 | // construction, and some tree walkers. |
---|
15 | // |
---|
16 | |
---|
17 | { |
---|
18 | import java.util.*; |
---|
19 | import java.io.*; |
---|
20 | import antlr.collections.AST; |
---|
21 | import antlr.collections.impl.*; |
---|
22 | import antlr.debug.misc.*; |
---|
23 | import antlr.*; |
---|
24 | } |
---|
25 | |
---|
26 | |
---|
27 | class PascalParser extends Parser; |
---|
28 | options { |
---|
29 | k = 2; // two token lookahead |
---|
30 | exportVocab=Pascal; // Call its vocabulary "Pascal" |
---|
31 | codeGenMakeSwitchThreshold = 2; // Some optimizations |
---|
32 | codeGenBitsetTestThreshold = 3; |
---|
33 | defaultErrorHandler = false; // Don't generate parser error handlers |
---|
34 | buildAST = true; |
---|
35 | ASTLabelType = "PascalAST"; |
---|
36 | } |
---|
37 | |
---|
38 | /* Define imaginary tokens used to organize tree |
---|
39 | * |
---|
40 | * One of the principles here is that any time you have a list of |
---|
41 | * stuff, you usually want to treat it like one thing (a list) a some |
---|
42 | * point in the grammar. You want trees to have a fixed number of children |
---|
43 | * as much as possible. For example, the definition of a procedure should |
---|
44 | * be something like #(PROCEDURE ID #(ARGDECLS ARG1 ARG2...)) not |
---|
45 | * #(PROCEDURE ID ARG1 ARG2 ... ) since this is harder to parse and |
---|
46 | * harder to manipulate. Same is true for statement lists (BLOCK) etc... |
---|
47 | */ |
---|
48 | tokens { |
---|
49 | BLOCK; // list of statements |
---|
50 | IDLIST; // list of identifiers; e.g., #(PROGRAM #(IDLIST ID ID...)) |
---|
51 | ELIST; // expression list for proc args etc... |
---|
52 | FUNC_CALL; |
---|
53 | PROC_CALL; |
---|
54 | SCALARTYPE; // IDLIST that is really a scalar type like (Mon,Tue,Wed) |
---|
55 | TYPELIST; // list of types such as for array declarations |
---|
56 | VARIANT_TAG;// for CASEs in a RECORD |
---|
57 | VARIANT_TAG_NO_ID;// for CASEs in a RECORD (no id, just a type) |
---|
58 | VARIANT_CASE;// a case of the variant |
---|
59 | CONSTLIST; // List of constants |
---|
60 | FIELDLIST; // list of fields in a record |
---|
61 | ARGDECLS; // overall group of declarations of args for proc/func. |
---|
62 | VARDECL; // declaration of a variable |
---|
63 | ARGDECL; // declaration of a parameter |
---|
64 | ARGLIST; // list of actual arguments (expressions) |
---|
65 | TYPEDECL; // declaration of a type |
---|
66 | FIELD; // the root a RECORD field |
---|
67 | } |
---|
68 | |
---|
69 | // Define some methods and variables to use in the generated parser. |
---|
70 | { |
---|
71 | /** Overall symbol table for translator */ |
---|
72 | public static SymbolTable symbolTable = new SymbolTable(); |
---|
73 | |
---|
74 | // This method decides what action to take based on the type of |
---|
75 | // file we are looking at |
---|
76 | public static void doFile(File f) throws Exception { |
---|
77 | // If this is a directory, walk each file/dir in that directory |
---|
78 | translateFilePath = f.getParent(); |
---|
79 | if (f.isDirectory()) { |
---|
80 | String files[] = f.list(); |
---|
81 | for(int i=0; i < files.length; i++) |
---|
82 | { |
---|
83 | doFile(new File(f, files[i])); |
---|
84 | } |
---|
85 | } |
---|
86 | // otherwise, if this is a Pascal file, parse it! |
---|
87 | else if ((f.getName().length()>4) && |
---|
88 | f.getName().substring(f.getName().length()-4).toLowerCase().equals(".pas")) { |
---|
89 | System.err.println(" "+f.getAbsolutePath()); |
---|
90 | |
---|
91 | if (translateFileName == null) { |
---|
92 | translateFileName = f.getName(); //set this file as the one to translate |
---|
93 | currentFileName = f.getName(); |
---|
94 | } |
---|
95 | |
---|
96 | parseFile(f.getName(),new FileInputStream(f)); |
---|
97 | } |
---|
98 | else { |
---|
99 | System.err.println("Can not parse: "+f.getAbsolutePath()); |
---|
100 | } |
---|
101 | } |
---|
102 | |
---|
103 | // Here's where we do the real work... |
---|
104 | public static void parseFile(String f,InputStream s) throws Exception { |
---|
105 | try { |
---|
106 | currentFileName = f; // set this File as the currentFileName |
---|
107 | |
---|
108 | // Create a scanner that reads from the input stream passed to us |
---|
109 | PascalLexer lexer = new PascalLexer(s); |
---|
110 | |
---|
111 | // Create a parser that reads from the scanner |
---|
112 | PascalParser parser = new PascalParser(lexer); |
---|
113 | |
---|
114 | // set AST type to PascalAST (has symbol) |
---|
115 | parser.setASTNodeClass("PascalAST"); |
---|
116 | |
---|
117 | // start parsing at the program rule |
---|
118 | parser.program(); |
---|
119 | |
---|
120 | CommonAST t = (CommonAST)parser.getAST(); |
---|
121 | |
---|
122 | // do something with the tree |
---|
123 | parser.doTreeAction(f, parser.getAST(), parser.getTokenNames()); |
---|
124 | //System.out.println(parser.getAST().toStringList()); |
---|
125 | |
---|
126 | |
---|
127 | |
---|
128 | // build symbol table |
---|
129 | |
---|
130 | // Get the tree out of the parser |
---|
131 | AST resultTree1 = parser.getAST(); |
---|
132 | |
---|
133 | // Make an instance of the tree parser |
---|
134 | // PascalTreeParserSuper treeParser1 = new PascalTreeParserSuper(); |
---|
135 | SymtabPhase treeParser1 = new SymtabPhase(); |
---|
136 | |
---|
137 | treeParser1.setASTNodeClass("PascalAST"); |
---|
138 | |
---|
139 | // Begin tree parser at only rule |
---|
140 | treeParser1.program(resultTree1); |
---|
141 | |
---|
142 | |
---|
143 | |
---|
144 | // parser.doTreeAction(f, treeParser1.getAST(), treeParser1.getTokenNames()); |
---|
145 | |
---|
146 | |
---|
147 | |
---|
148 | |
---|
149 | } |
---|
150 | catch (Exception e) { |
---|
151 | System.err.println("parser exception: "+e); |
---|
152 | e.printStackTrace(); // so we can get stack trace |
---|
153 | } |
---|
154 | } |
---|
155 | public void doTreeAction(String f, AST t, String[] tokenNames) { |
---|
156 | if ( t==null ) return; |
---|
157 | if ( showTree ) { |
---|
158 | ((CommonAST)t).setVerboseStringConversion(true, tokenNames); |
---|
159 | ASTFactory factory = new ASTFactory(); |
---|
160 | AST r = factory.create(0,"AST ROOT"); |
---|
161 | r.setFirstChild(t); |
---|
162 | ASTFrame frame = new ASTFrame("Pascal AST", r); |
---|
163 | frame.setVisible(true); |
---|
164 | //System.out.println(t.toStringList()); |
---|
165 | } |
---|
166 | |
---|
167 | } |
---|
168 | static boolean showTree = true; |
---|
169 | public static String translateFilePath; |
---|
170 | public static String translateFileName; |
---|
171 | public static String currentFileName; // not static, recursive USES ... other FileName in currentFileName |
---|
172 | public static String oldtranslateFileName; |
---|
173 | |
---|
174 | |
---|
175 | // main |
---|
176 | public static void main(String[] args) { |
---|
177 | // Use a try/catch block for parser exceptions |
---|
178 | try { |
---|
179 | // if we have at least one command-line argument |
---|
180 | if (args.length > 0 ) { |
---|
181 | |
---|
182 | // for each directory/file specified on the command line |
---|
183 | for(int i=0; i< args.length;i++) |
---|
184 | { |
---|
185 | if ( args[i].equals("-showtree") ) { |
---|
186 | showTree = true; |
---|
187 | } |
---|
188 | else { |
---|
189 | System.err.println("Parsing..."); |
---|
190 | doFile(new File(args[i])); // parse it |
---|
191 | } |
---|
192 | } |
---|
193 | } |
---|
194 | else |
---|
195 | System.err.println("Usage: java PascalParser <file/directory name>"); |
---|
196 | |
---|
197 | } |
---|
198 | catch(Exception e) { |
---|
199 | System.err.println("exception: "+e); |
---|
200 | e.printStackTrace(System.err); // so we can get stack trace |
---|
201 | } |
---|
202 | } |
---|
203 | |
---|
204 | } |
---|
205 | |
---|
206 | |
---|
207 | program |
---|
208 | : programHeading (INTERFACE!)? |
---|
209 | block |
---|
210 | DOT! |
---|
211 | ; |
---|
212 | |
---|
213 | programHeading |
---|
214 | : PROGRAM^ identifier LPAREN! identifierList RPAREN! SEMI! |
---|
215 | | UNIT^ identifier SEMI! |
---|
216 | ; |
---|
217 | |
---|
218 | identifier |
---|
219 | : IDENT |
---|
220 | ; |
---|
221 | |
---|
222 | block |
---|
223 | : ( labelDeclarationPart |
---|
224 | | constantDefinitionPart |
---|
225 | | typeDefinitionPart |
---|
226 | | variableDeclarationPart |
---|
227 | | procedureAndFunctionDeclarationPart |
---|
228 | | usesUnitsPart |
---|
229 | | IMPLEMENTATION |
---|
230 | )* |
---|
231 | compoundStatement |
---|
232 | ; |
---|
233 | |
---|
234 | usesUnitsPart |
---|
235 | : USES^ identifierList SEMI! |
---|
236 | ; |
---|
237 | |
---|
238 | labelDeclarationPart |
---|
239 | : LABEL^ label ( COMMA! label )* SEMI! |
---|
240 | ; |
---|
241 | |
---|
242 | label |
---|
243 | : unsignedInteger |
---|
244 | ; |
---|
245 | |
---|
246 | constantDefinitionPart |
---|
247 | : CONST^ constantDefinition ( SEMI! constantDefinition )* SEMI! |
---|
248 | ; |
---|
249 | |
---|
250 | constantDefinition |
---|
251 | : identifier EQUAL^ constant |
---|
252 | ; |
---|
253 | |
---|
254 | constantChr |
---|
255 | : CHR^ LPAREN! unsignedInteger RPAREN! |
---|
256 | ; |
---|
257 | |
---|
258 | constant |
---|
259 | : unsignedNumber |
---|
260 | |! s:sign n:unsignedNumber { #constant=#(s,n); } |
---|
261 | | identifier |
---|
262 | |! s2:sign id:identifier { #constant=#(s2,id); } |
---|
263 | | string |
---|
264 | | constantChr |
---|
265 | ; |
---|
266 | |
---|
267 | unsignedNumber |
---|
268 | : unsignedInteger |
---|
269 | | unsignedReal |
---|
270 | ; |
---|
271 | |
---|
272 | unsignedInteger |
---|
273 | : NUM_INT |
---|
274 | ; |
---|
275 | |
---|
276 | unsignedReal |
---|
277 | : NUM_REAL |
---|
278 | ; |
---|
279 | |
---|
280 | sign |
---|
281 | : PLUS | MINUS |
---|
282 | ; |
---|
283 | |
---|
284 | string |
---|
285 | : STRING_LITERAL |
---|
286 | ; |
---|
287 | |
---|
288 | typeDefinitionPart |
---|
289 | : TYPE^ typeDefinition ( SEMI! typeDefinition )* SEMI! |
---|
290 | ; |
---|
291 | |
---|
292 | //PSPSPS |
---|
293 | typeDefinition |
---|
294 | : identifier e:EQUAL^ {#e.setType(TYPEDECL);} |
---|
295 | ( type |
---|
296 | | functionType |
---|
297 | // | FUNCTION^ (formalParameterList)? COLON! resultType |
---|
298 | | procedureType |
---|
299 | // | PROCEDURE^ (formalParameterList)? |
---|
300 | ) |
---|
301 | ; |
---|
302 | |
---|
303 | functionType |
---|
304 | : FUNCTION^ (formalParameterList)? COLON! resultType |
---|
305 | ; |
---|
306 | |
---|
307 | procedureType |
---|
308 | : PROCEDURE^ (formalParameterList)? |
---|
309 | ; |
---|
310 | |
---|
311 | type |
---|
312 | : simpleType |
---|
313 | | structuredType |
---|
314 | | pointerType |
---|
315 | ; |
---|
316 | |
---|
317 | simpleType |
---|
318 | : scalarType |
---|
319 | | subrangeType |
---|
320 | | typeIdentifier |
---|
321 | | stringtype |
---|
322 | ; |
---|
323 | |
---|
324 | scalarType |
---|
325 | : LPAREN^ identifierList RPAREN! {#scalarType.setType(SCALARTYPE);} |
---|
326 | ; |
---|
327 | |
---|
328 | subrangeType |
---|
329 | : constant DOTDOT^ constant |
---|
330 | ; |
---|
331 | |
---|
332 | typeIdentifier |
---|
333 | : identifier |
---|
334 | | CHAR |
---|
335 | | BOOLEAN |
---|
336 | | INTEGER |
---|
337 | | REAL |
---|
338 | | STRING // as in return type: FUNCTION ... (...): string; |
---|
339 | ; |
---|
340 | |
---|
341 | structuredType |
---|
342 | : PACKED^ unpackedStructuredType |
---|
343 | | unpackedStructuredType |
---|
344 | ; |
---|
345 | |
---|
346 | unpackedStructuredType |
---|
347 | : arrayType |
---|
348 | | recordType |
---|
349 | | setType |
---|
350 | | fileType |
---|
351 | ; |
---|
352 | |
---|
353 | stringtype |
---|
354 | : STRING^ LBRACK! (identifier|unsignedNumber) RBRACK! |
---|
355 | ; |
---|
356 | |
---|
357 | arrayType |
---|
358 | : ARRAY^ LBRACK! typeList RBRACK! OF! componentType |
---|
359 | | ARRAY^ LBRACK2! typeList RBRACK2! OF! componentType |
---|
360 | ; |
---|
361 | |
---|
362 | typeList |
---|
363 | : indexType ( COMMA! indexType )* |
---|
364 | {#typeList = #(#[TYPELIST],#typeList);} |
---|
365 | ; |
---|
366 | |
---|
367 | indexType |
---|
368 | : simpleType |
---|
369 | ; |
---|
370 | |
---|
371 | componentType |
---|
372 | : type |
---|
373 | ; |
---|
374 | |
---|
375 | recordType |
---|
376 | : RECORD^ fieldList END! |
---|
377 | ; |
---|
378 | |
---|
379 | fieldList |
---|
380 | : ( fixedPart ( SEMI! variantPart | SEMI! )? |
---|
381 | | variantPart |
---|
382 | ) |
---|
383 | {#fieldList=#([FIELDLIST],#fieldList);} |
---|
384 | ; |
---|
385 | |
---|
386 | fixedPart |
---|
387 | : recordSection ( SEMI! recordSection )* |
---|
388 | ; |
---|
389 | |
---|
390 | recordSection |
---|
391 | : identifierList COLON! type |
---|
392 | {#recordSection = #([FIELD],#recordSection);} |
---|
393 | ; |
---|
394 | |
---|
395 | variantPart |
---|
396 | : CASE^ tag OF! variant ( SEMI! variant | SEMI! )* |
---|
397 | ; |
---|
398 | |
---|
399 | tag! |
---|
400 | : id:identifier COLON t:typeIdentifier {#tag=#([VARIANT_TAG],id,t);} |
---|
401 | | t2:typeIdentifier {#tag=#([VARIANT_TAG_NO_ID],t2);} |
---|
402 | ; |
---|
403 | |
---|
404 | variant |
---|
405 | : constList c:COLON^ {#c.setType(VARIANT_CASE);} |
---|
406 | LPAREN! fieldList RPAREN! |
---|
407 | ; |
---|
408 | |
---|
409 | setType |
---|
410 | : SET^ OF! baseType |
---|
411 | ; |
---|
412 | |
---|
413 | baseType |
---|
414 | : simpleType |
---|
415 | ; |
---|
416 | |
---|
417 | fileType |
---|
418 | : FILE^ OF! type |
---|
419 | | FILE |
---|
420 | ; |
---|
421 | |
---|
422 | pointerType |
---|
423 | : POINTER^ typeIdentifier |
---|
424 | ; |
---|
425 | |
---|
426 | /** Yields a list of VARDECL-rooted subtrees with VAR at the overall root */ |
---|
427 | variableDeclarationPart |
---|
428 | : VAR^ variableDeclaration ( SEMI! variableDeclaration )* SEMI! |
---|
429 | ; |
---|
430 | |
---|
431 | variableDeclaration |
---|
432 | : identifierList c:COLON^ {#c.setType(VARDECL);} type |
---|
433 | ; |
---|
434 | |
---|
435 | procedureAndFunctionDeclarationPart |
---|
436 | : procedureOrFunctionDeclaration SEMI! |
---|
437 | ; |
---|
438 | |
---|
439 | procedureOrFunctionDeclaration |
---|
440 | : procedureDeclaration |
---|
441 | | functionDeclaration |
---|
442 | ; |
---|
443 | |
---|
444 | procedureDeclaration |
---|
445 | : PROCEDURE^ identifier (formalParameterList)? SEMI! |
---|
446 | block |
---|
447 | ; |
---|
448 | |
---|
449 | formalParameterList |
---|
450 | : LPAREN^ formalParameterSection ( SEMI! formalParameterSection )* RPAREN! |
---|
451 | {#formalParameterList.setType(ARGDECLS);} |
---|
452 | ; |
---|
453 | |
---|
454 | formalParameterSection |
---|
455 | : parameterGroup |
---|
456 | | VAR^ parameterGroup |
---|
457 | | FUNCTION^ parameterGroup |
---|
458 | | PROCEDURE^ parameterGroup |
---|
459 | ; |
---|
460 | |
---|
461 | parameterGroup! |
---|
462 | : ids:identifierList COLON! t:typeIdentifier |
---|
463 | {#parameterGroup = #([ARGDECL],ids,t);} |
---|
464 | ; |
---|
465 | |
---|
466 | identifierList |
---|
467 | : identifier ( COMMA! identifier )* |
---|
468 | {#identifierList = #(#[IDLIST],#identifierList);} |
---|
469 | ; |
---|
470 | |
---|
471 | constList |
---|
472 | : constant ( COMMA! constant )* |
---|
473 | {#constList = #([CONSTLIST],#constList);} |
---|
474 | ; |
---|
475 | |
---|
476 | functionDeclaration |
---|
477 | : FUNCTION^ identifier (formalParameterList)? COLON! resultType SEMI! |
---|
478 | block |
---|
479 | ; |
---|
480 | |
---|
481 | resultType |
---|
482 | : typeIdentifier |
---|
483 | ; |
---|
484 | |
---|
485 | statement |
---|
486 | : label COLON^ unlabelledStatement |
---|
487 | | unlabelledStatement |
---|
488 | ; |
---|
489 | |
---|
490 | unlabelledStatement |
---|
491 | : simpleStatement |
---|
492 | | structuredStatement |
---|
493 | ; |
---|
494 | |
---|
495 | simpleStatement |
---|
496 | : assignmentStatement |
---|
497 | | procedureStatement |
---|
498 | | gotoStatement |
---|
499 | | emptyStatement |
---|
500 | ; |
---|
501 | |
---|
502 | assignmentStatement |
---|
503 | : variable ASSIGN^ expression |
---|
504 | ; |
---|
505 | |
---|
506 | /** A variable is an id with a suffix and can look like: |
---|
507 | * id |
---|
508 | * id[expr,...] |
---|
509 | * id.id |
---|
510 | * id.id[expr,...] |
---|
511 | * id^ |
---|
512 | * id^.id |
---|
513 | * id^.id[expr,...] |
---|
514 | * ... |
---|
515 | * |
---|
516 | * LL has a really hard time with this construct as it's naturally |
---|
517 | * left-recursive. We have to turn into a simple loop rather than |
---|
518 | * recursive loop, hence, the suffixes. I keep in the same rule |
---|
519 | * for easy tree construction. |
---|
520 | */ |
---|
521 | variable |
---|
522 | : ( AT^ identifier // AT is root of identifier; then other op becomes root |
---|
523 | | identifier |
---|
524 | ) |
---|
525 | ( LBRACK^ expression ( COMMA! expression)* RBRACK! |
---|
526 | | LBRACK2^ expression ( COMMA! expression)* RBRACK2! |
---|
527 | | DOT^ identifier |
---|
528 | | POINTER^ |
---|
529 | )* |
---|
530 | ; |
---|
531 | |
---|
532 | expression |
---|
533 | : simpleExpression |
---|
534 | ( (EQUAL^ | NOT_EQUAL^ | LT^ | LE^ | GE^ | GT^ | IN^) simpleExpression )* |
---|
535 | ; |
---|
536 | |
---|
537 | simpleExpression |
---|
538 | : term ( (PLUS^ | MINUS^ | OR^) term )* |
---|
539 | ; |
---|
540 | |
---|
541 | term |
---|
542 | : signedFactor ( (STAR^ | SLASH^ | DIV^ | MOD^ | AND^) signedFactor )* |
---|
543 | ; |
---|
544 | |
---|
545 | signedFactor |
---|
546 | : (PLUS^|MINUS^)? factor |
---|
547 | ; |
---|
548 | |
---|
549 | factor |
---|
550 | : variable |
---|
551 | | LPAREN! expression RPAREN! |
---|
552 | | functionDesignator |
---|
553 | | unsignedConstant |
---|
554 | | set |
---|
555 | | NOT^ factor |
---|
556 | ; |
---|
557 | |
---|
558 | unsignedConstant |
---|
559 | : unsignedNumber |
---|
560 | | constantChr //pspsps added |
---|
561 | | string |
---|
562 | | NIL |
---|
563 | ; |
---|
564 | |
---|
565 | functionDesignator! |
---|
566 | : id:identifier LPAREN! args:parameterList RPAREN! |
---|
567 | {#functionDesignator = #([FUNC_CALL],id,args);} |
---|
568 | ; |
---|
569 | |
---|
570 | parameterList |
---|
571 | : actualParameter ( COMMA! actualParameter )* |
---|
572 | {#parameterList = #([ARGLIST],#parameterList);} |
---|
573 | ; |
---|
574 | |
---|
575 | set |
---|
576 | : LBRACK^ elementList RBRACK! {#set.setType(SET);} |
---|
577 | | LBRACK2^ elementList RBRACK2! {#set.setType(SET);} |
---|
578 | ; |
---|
579 | |
---|
580 | elementList |
---|
581 | : element ( COMMA! element )* |
---|
582 | | |
---|
583 | ; |
---|
584 | |
---|
585 | element |
---|
586 | : expression ( DOTDOT^ expression )? |
---|
587 | ; |
---|
588 | |
---|
589 | procedureStatement! |
---|
590 | : id:identifier ( LPAREN! args:parameterList RPAREN! )? |
---|
591 | {#procedureStatement = #([PROC_CALL],id,args);} |
---|
592 | ; |
---|
593 | |
---|
594 | actualParameter |
---|
595 | : expression |
---|
596 | ; |
---|
597 | |
---|
598 | gotoStatement |
---|
599 | : GOTO^ label |
---|
600 | ; |
---|
601 | |
---|
602 | emptyStatement |
---|
603 | : |
---|
604 | ; |
---|
605 | |
---|
606 | empty |
---|
607 | : /* empty */ |
---|
608 | ; |
---|
609 | |
---|
610 | structuredStatement |
---|
611 | : compoundStatement |
---|
612 | | conditionalStatement |
---|
613 | | repetetiveStatement |
---|
614 | | withStatement |
---|
615 | ; |
---|
616 | |
---|
617 | compoundStatement |
---|
618 | : BEGIN! |
---|
619 | statements |
---|
620 | END! |
---|
621 | ; |
---|
622 | |
---|
623 | statements |
---|
624 | : statement ( SEMI! statement )* {#statements = #([BLOCK],#statements);} |
---|
625 | ; |
---|
626 | |
---|
627 | conditionalStatement |
---|
628 | : ifStatement |
---|
629 | | caseStatement |
---|
630 | ; |
---|
631 | |
---|
632 | ifStatement |
---|
633 | : IF^ expression THEN! statement |
---|
634 | ( |
---|
635 | // CONFLICT: the old "dangling-else" problem... |
---|
636 | // ANTLR generates proper code matching |
---|
637 | // as soon as possible. Hush warning. |
---|
638 | options { |
---|
639 | generateAmbigWarnings=false; |
---|
640 | } |
---|
641 | : ELSE! statement |
---|
642 | )? |
---|
643 | ; |
---|
644 | |
---|
645 | caseStatement //pspsps ??? |
---|
646 | : CASE^ expression OF! |
---|
647 | caseListElement ( SEMI! caseListElement )* |
---|
648 | ( SEMI! ELSE! statements )? |
---|
649 | END! |
---|
650 | ; |
---|
651 | |
---|
652 | caseListElement |
---|
653 | : constList COLON^ statement |
---|
654 | ; |
---|
655 | |
---|
656 | repetetiveStatement |
---|
657 | : whileStatement |
---|
658 | | repeatStatement |
---|
659 | | forStatement |
---|
660 | ; |
---|
661 | |
---|
662 | whileStatement |
---|
663 | : WHILE^ expression DO! statement |
---|
664 | ; |
---|
665 | |
---|
666 | repeatStatement |
---|
667 | : REPEAT^ statements UNTIL! expression |
---|
668 | ; |
---|
669 | |
---|
670 | forStatement |
---|
671 | : FOR^ identifier ASSIGN! forList DO! statement |
---|
672 | ; |
---|
673 | |
---|
674 | forList |
---|
675 | : initialValue (TO^ | DOWNTO^) finalValue |
---|
676 | ; |
---|
677 | |
---|
678 | initialValue |
---|
679 | : expression |
---|
680 | ; |
---|
681 | |
---|
682 | finalValue |
---|
683 | : expression |
---|
684 | ; |
---|
685 | |
---|
686 | withStatement |
---|
687 | : WITH^ recordVariableList DO! statement |
---|
688 | ; |
---|
689 | |
---|
690 | recordVariableList |
---|
691 | : variable ( COMMA! variable )* |
---|
692 | ; |
---|
693 | |
---|
694 | //---------------------------------------------------------------------------- |
---|
695 | // The Pascal scanner |
---|
696 | //---------------------------------------------------------------------------- |
---|
697 | class PascalLexer extends Lexer; |
---|
698 | |
---|
699 | options { |
---|
700 | charVocabulary = '\0'..'\377'; |
---|
701 | exportVocab = Pascal; // call the vocabulary "Pascal" |
---|
702 | testLiterals = false; // don't automatically test for literals |
---|
703 | k = 4; // four characters of lookahead |
---|
704 | caseSensitive = false; |
---|
705 | caseSensitiveLiterals = false; |
---|
706 | } |
---|
707 | |
---|
708 | tokens { |
---|
709 | AND = "and" ; |
---|
710 | ARRAY = "array" ; |
---|
711 | BEGIN = "begin" ; |
---|
712 | BOOLEAN = "boolean" ; |
---|
713 | CASE = "case" ; |
---|
714 | CHAR = "char" ; |
---|
715 | CHR = "chr" ; |
---|
716 | CONST = "const" ; |
---|
717 | DIV = "div" ; |
---|
718 | DO = "do" ; |
---|
719 | DOWNTO = "downto" ; |
---|
720 | ELSE = "else" ; |
---|
721 | END = "end" ; |
---|
722 | FILE = "file" ; |
---|
723 | FOR = "for" ; |
---|
724 | FUNCTION = "function" ; |
---|
725 | GOTO = "goto" ; |
---|
726 | IF = "if" ; |
---|
727 | IN = "in" ; |
---|
728 | INTEGER = "integer" ; |
---|
729 | LABEL = "label" ; |
---|
730 | MOD = "mod" ; |
---|
731 | NIL = "nil" ; |
---|
732 | NOT = "not" ; |
---|
733 | OF = "of" ; |
---|
734 | OR = "or" ; |
---|
735 | PACKED = "packed" ; |
---|
736 | PROCEDURE = "procedure" ; |
---|
737 | PROGRAM = "program" ; |
---|
738 | REAL = "real" ; |
---|
739 | RECORD = "record" ; |
---|
740 | REPEAT = "repeat" ; |
---|
741 | SET = "set" ; |
---|
742 | THEN = "then" ; |
---|
743 | TO = "to" ; |
---|
744 | TYPE = "type" ; |
---|
745 | UNTIL = "until" ; |
---|
746 | VAR = "var" ; |
---|
747 | WHILE = "while" ; |
---|
748 | WITH = "with" ; |
---|
749 | METHOD ; |
---|
750 | ADDSUBOR ; |
---|
751 | ASSIGNEQUAL ; |
---|
752 | SIGN ; |
---|
753 | FUNC ; |
---|
754 | NODE_NOT_EMIT ; |
---|
755 | MYASTVAR ; |
---|
756 | LF ; |
---|
757 | UNIT = "unit" ; |
---|
758 | INTERFACE = "interface" ; |
---|
759 | USES = "uses" ; |
---|
760 | STRING = "string" ; |
---|
761 | IMPLEMENTATION = "implementation" ; |
---|
762 | //pspsps ??? |
---|
763 | } |
---|
764 | |
---|
765 | //---------------------------------------------------------------------------- |
---|
766 | // OPERATORS |
---|
767 | //---------------------------------------------------------------------------- |
---|
768 | PLUS : '+' ; |
---|
769 | MINUS : '-' ; |
---|
770 | STAR : '*' ; |
---|
771 | SLASH : '/' ; |
---|
772 | ASSIGN : ":=" ; |
---|
773 | COMMA : ',' ; |
---|
774 | SEMI : ';' ; |
---|
775 | COLON : ':' ; |
---|
776 | EQUAL : '=' ; |
---|
777 | NOT_EQUAL : "<>" ; |
---|
778 | LT : '<' ; |
---|
779 | LE : "<=" ; |
---|
780 | GE : ">=" ; |
---|
781 | GT : '>' ; |
---|
782 | LPAREN : '(' ; |
---|
783 | RPAREN : ')' ; |
---|
784 | LBRACK : '[' ; // line_tab[line] |
---|
785 | LBRACK2 : "(." ; // line_tab(.line.) |
---|
786 | RBRACK : ']' ; |
---|
787 | RBRACK2 : ".)" ; |
---|
788 | POINTER : '^' ; |
---|
789 | AT : '@' ; |
---|
790 | DOT : '.' ('.' {$setType(DOTDOT);})? ; |
---|
791 | LCURLY : "{" ; |
---|
792 | RCURLY : "}" ; |
---|
793 | |
---|
794 | |
---|
795 | // Whitespace -- ignored |
---|
796 | WS : ( ' ' |
---|
797 | | '\t' |
---|
798 | | '\f' |
---|
799 | // handle newlines |
---|
800 | | ( "\r\n" // Evil DOS |
---|
801 | | '\r' // Macintosh |
---|
802 | | '\n' // Unix (the right way) |
---|
803 | ) |
---|
804 | { newline(); } |
---|
805 | ) |
---|
806 | { _ttype = Token.SKIP; } |
---|
807 | ; |
---|
808 | |
---|
809 | |
---|
810 | COMMENT_1 |
---|
811 | : "(*" |
---|
812 | ( options { generateAmbigWarnings=false; } |
---|
813 | : { LA(2) != ')' }? '*' |
---|
814 | | '\r' '\n' {newline();} |
---|
815 | | '\r' {newline();} |
---|
816 | | '\n' {newline();} |
---|
817 | | ~('*' | '\n' | '\r') |
---|
818 | )* |
---|
819 | "*)" |
---|
820 | {$setType(Token.SKIP);} |
---|
821 | ; |
---|
822 | |
---|
823 | COMMENT_2 |
---|
824 | : '{' |
---|
825 | ( options {generateAmbigWarnings=false;} |
---|
826 | : '\r' '\n' {newline();} |
---|
827 | | '\r' {newline();} |
---|
828 | | '\n' {newline();} |
---|
829 | | ~('}' | '\n' | '\r') |
---|
830 | )* |
---|
831 | '}' |
---|
832 | {$setType(Token.SKIP);} |
---|
833 | ; |
---|
834 | |
---|
835 | // an identifier. Note that testLiterals is set to true! This means |
---|
836 | // that after we match the rule, we look in the literals table to see |
---|
837 | // if it's a literal or really an identifer |
---|
838 | IDENT |
---|
839 | options {testLiterals=true;} |
---|
840 | : ('a'..'z') ('a'..'z'|'0'..'9'|'_')* //pspsps |
---|
841 | ; |
---|
842 | |
---|
843 | // string literals |
---|
844 | STRING_LITERAL |
---|
845 | : '\'' ("\'\'" | ~('\''))* '\'' //pspsps * in stead of + because of e.g. '' |
---|
846 | ; |
---|
847 | |
---|
848 | /** a numeric literal. Form is (from Wirth) |
---|
849 | * digits |
---|
850 | * digits . digits |
---|
851 | * digits . digits exponent |
---|
852 | * digits exponent |
---|
853 | */ |
---|
854 | NUM_INT |
---|
855 | : ('0'..'9')+ // everything starts with a digit sequence |
---|
856 | ( ( {(LA(2)!='.')&&(LA(2)!=')')}? // force k=2; avoid ".." |
---|
857 | //PSPSPS example ARRAY (.1..99.) OF char; // after .. thinks it's a NUM_REAL |
---|
858 | '.' {$setType(NUM_REAL);} // dot means we are float |
---|
859 | ('0'..'9')+ (EXPONENT)? |
---|
860 | )? |
---|
861 | | EXPONENT {$setType(NUM_REAL);} // 'E' means we are float |
---|
862 | ) |
---|
863 | ; |
---|
864 | |
---|
865 | // a couple protected methods to assist in matching floating point numbers |
---|
866 | protected |
---|
867 | EXPONENT |
---|
868 | : ('e') ('+'|'-')? ('0'..'9')+ |
---|
869 | ; |
---|