Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

CharScanner.hpp

Last change on this file was 28, checked in by lnalod, 15 years ago
Update of the YAO generator code and ANTLR source code for the compatibility with Mandriva 2009 and 2010. These distributions have an another version of the gcc compiler and this not allowed a correct compilation of the old sources.
File size: 13.6 KB

Line
1	#ifndef INC_CharScanner_hpp__
2	#define INC_CharScanner_hpp__
3
4	/* ANTLR Translator Generator
5	* Project led by Terence Parr at http://www.jGuru.com
6	* Software rights: http://www.antlr.org/license.html
7	*
8	* $Id: //depot/code/org.antlr/release/antlr-2.7.7/lib/cpp/antlr/CharScanner.hpp#2 $
9	*/
10
11	#include <cstdio>
12	#include <antlr/config.hpp>
13
14	#include <map>
15
16	#ifdef HAS_NOT_CCTYPE_H
17	#include <ctype.h>
18	#else
19	#include <cctype>
20	#endif
21
22	#if ( _MSC_VER == 1200 )
23	// VC6 seems to need this
24	// note that this is not a standard C++ include file.
25	# include <stdio.h>
26	#endif
27
28	#include <antlr/TokenStream.hpp>
29	#include <antlr/RecognitionException.hpp>
30	#include <antlr/SemanticException.hpp>
31	#include <antlr/MismatchedCharException.hpp>
32	#include <antlr/InputBuffer.hpp>
33	#include <antlr/BitSet.hpp>
34	#include <antlr/LexerSharedInputState.hpp>
35
36	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
37	namespace antlr {
38	#endif
39
40	class ANTLR_API CharScanner;
41
42	ANTLR_C_USING(tolower)
43
44	// Modification done by hand because of compilation problems in the 2009 and 2010 version of Mandriva
45	//#ifdef ANTLR_REALLY_NO_STRCASECMP
46
47
48	// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
49	// on the mac has neither...
50	inline int strcasecmp(const char s1, const char s2)
51	{
52	while (true)
53	{
54	char c1 = tolower(*s1++),
55	c2 = tolower(*s2++);
56	if (c1 < c2) return -1;
57	if (c1 > c2) return 1;
58	if (c1 == 0) return 0;
59	}
60	}
61	// Modification done by hand because of compilation problems in the 2009 and 2010 version of Mandriva
62	/*#else
63	#ifdef NO_STRCASECMP
64	ANTLR_C_USING(stricmp)
65	#else
66	ANTLR_C_USING(strcasecmp)
67	#endif
68	#endif
69	*/
70	/** Functor for the literals map
71	*/
72	class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
73	private:
74	const CharScanner* scanner;
75	public:
76	#ifdef NO_TEMPLATE_PARTS
77	CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
78	#endif
79	CharScannerLiteralsLess(const CharScanner* theScanner)
80	: scanner(theScanner)
81	{
82	}
83	bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
84	// defaults are good enough..
85	// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
86	// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
87	};
88
89	/** Superclass of generated lexers
90	*/
91	class ANTLR_API CharScanner : public TokenStream {
92	protected:
93	typedef RefToken (*factory_type)();
94	public:
95	CharScanner(InputBuffer& cb, bool case_sensitive );
96	CharScanner(InputBuffer* cb, bool case_sensitive );
97	CharScanner(const LexerSharedInputState& state, bool case_sensitive );
98
99	virtual ~CharScanner()
100	{
101	}
102
103	virtual int LA(unsigned int i);
104
105	virtual void append(char c)
106	{
107	if (saveConsumedInput)
108	{
109	size_t l = text.length();
110
111	if ((l%256) == 0)
112	text.reserve(l+256);
113
114	text.replace(l,0,&c,1);
115	}
116	}
117
118	virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
119	{
120	if( saveConsumedInput )
121	text += s;
122	}
123
124	virtual void commit()
125	{
126	inputState->getInput().commit();
127	}
128
129	/** called by the generated lexer to do error recovery, override to
130	* customize the behaviour.
131	*/
132	virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
133	{
134	consume();
135	consumeUntil(tokenSet);
136	}
137
138	virtual void consume()
139	{
140	if (inputState->guessing == 0)
141	{
142	int c = LA(1);
143	if (caseSensitive)
144	{
145	append(c);
146	}
147	else
148	{
149	// use input.LA(), not LA(), to get original case
150	// CharScanner.LA() would toLower it.
151	append(inputState->getInput().LA(1));
152	}
153
154	// RK: in a sense I don't like this automatic handling.
155	if (c == '\t')
156	tab();
157	else
158	inputState->column++;
159	}
160	inputState->getInput().consume();
161	}
162
163	/** Consume chars until one matches the given char */
164	virtual void consumeUntil(int c)
165	{
166	for(;;)
167	{
168	int la_1 = LA(1);
169	if( la_1 == EOF_CHAR \|\| la_1 == c )
170	break;
171	consume();
172	}
173	}
174
175	/** Consume chars until one matches the given set */
176	virtual void consumeUntil(const BitSet& set)
177	{
178	for(;;)
179	{
180	int la_1 = LA(1);
181	if( la_1 == EOF_CHAR \|\| set.member(la_1) )
182	break;
183	consume();
184	}
185	}
186
187	/// Mark the current position and return a id for it
188	virtual unsigned int mark()
189	{
190	return inputState->getInput().mark();
191	}
192	/// Rewind the scanner to a previously marked position
193	virtual void rewind(unsigned int pos)
194	{
195	inputState->getInput().rewind(pos);
196	}
197
198	/// See if input contains character 'c' throw MismatchedCharException if not
199	virtual void match(int c)
200	{
201	int la_1 = LA(1);
202	if ( la_1 != c )
203	throw MismatchedCharException(la_1, c, false, this);
204	consume();
205	}
206
207	/** See if input contains element from bitset b
208	* throw MismatchedCharException if not
209	*/
210	virtual void match(const BitSet& b)
211	{
212	int la_1 = LA(1);
213
214	if ( !b.member(la_1) )
215	throw MismatchedCharException( la_1, b, false, this );
216	consume();
217	}
218
219	/** See if input contains string 's' throw MismatchedCharException if not
220	* @note the string cannot match EOF
221	*/
222	virtual void match( const char* s )
223	{
224	while( *s != '\0' )
225	{
226	// the & 0xFF is here to prevent sign extension lateron
227	int la_1 = LA(1), c = (*s++ & 0xFF);
228
229	if ( la_1 != c )
230	throw MismatchedCharException(la_1, c, false, this);
231
232	consume();
233	}
234	}
235	/** See if input contains string 's' throw MismatchedCharException if not
236	* @note the string cannot match EOF
237	*/
238	virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
239	{
240	size_t len = s.length();
241
242	for (size_t i = 0; i < len; i++)
243	{
244	// the & 0xFF is here to prevent sign extension lateron
245	int la_1 = LA(1), c = (s[i] & 0xFF);
246
247	if ( la_1 != c )
248	throw MismatchedCharException(la_1, c, false, this);
249
250	consume();
251	}
252	}
253	/** See if input does not contain character 'c'
254	* throw MismatchedCharException if not
255	*/
256	virtual void matchNot(int c)
257	{
258	int la_1 = LA(1);
259
260	if ( la_1 == c )
261	throw MismatchedCharException(la_1, c, true, this);
262
263	consume();
264	}
265	/** See if input contains character in range c1-c2
266	* throw MismatchedCharException if not
267	*/
268	virtual void matchRange(int c1, int c2)
269	{
270	int la_1 = LA(1);
271
272	if ( la_1 < c1 \|\| la_1 > c2 )
273	throw MismatchedCharException(la_1, c1, c2, false, this);
274
275	consume();
276	}
277
278	virtual bool getCaseSensitive() const
279	{
280	return caseSensitive;
281	}
282
283	virtual void setCaseSensitive(bool t)
284	{
285	caseSensitive = t;
286	}
287
288	virtual bool getCaseSensitiveLiterals() const=0;
289
290	/// Get the line the scanner currently is in (starts at 1)
291	virtual int getLine() const
292	{
293	return inputState->line;
294	}
295
296	/// set the line number
297	virtual void setLine(int l)
298	{
299	inputState->line = l;
300	}
301
302	/// Get the column the scanner currently is in (starts at 1)
303	virtual int getColumn() const
304	{
305	return inputState->column;
306	}
307	/// set the column number
308	virtual void setColumn(int c)
309	{
310	inputState->column = c;
311	}
312
313	/// get the filename for the file currently used
314	virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
315	{
316	return inputState->filename;
317	}
318	/// Set the filename the scanner is using (used in error messages)
319	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
320	{
321	inputState->filename = f;
322	}
323
324	virtual bool getCommitToPath() const
325	{
326	return commitToPath;
327	}
328
329	virtual void setCommitToPath(bool commit)
330	{
331	commitToPath = commit;
332	}
333
334	/** return a copy of the current text buffer */
335	virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
336	{
337	return text;
338	}
339
340	virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
341	{
342	text = s;
343	}
344
345	virtual void resetText()
346	{
347	text = "";
348	inputState->tokenStartColumn = inputState->column;
349	inputState->tokenStartLine = inputState->line;
350	}
351
352	virtual RefToken getTokenObject() const
353	{
354	return _returnToken;
355	}
356
357	/** Used to keep track of line breaks, needs to be called from
358	* within generated lexers when a \n \r is encountered.
359	*/
360	virtual void newline()
361	{
362	++inputState->line;
363	inputState->column = 1;
364	}
365
366	/** Advance the current column number by an appropriate amount according
367	* to the tabsize. This method needs to be explicitly called from the
368	* lexer rules encountering tabs.
369	*/
370	virtual void tab()
371	{
372	int c = getColumn();
373	int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
374	setColumn( nc );
375	}
376	/// set the tabsize. Returns the old tabsize
377	int setTabsize( int size )
378	{
379	int oldsize = tabsize;
380	tabsize = size;
381	return oldsize;
382	}
383	/// Return the tabsize used by the scanner
384	int getTabSize() const
385	{
386	return tabsize;
387	}
388
389	/** Report exception errors caught in nextToken() */
390	virtual void reportError(const RecognitionException& e);
391
392	/** Parser error-reporting function can be overridden in subclass */
393	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
394
395	/** Parser warning-reporting function can be overridden in subclass */
396	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
397
398	virtual InputBuffer& getInputBuffer()
399	{
400	return inputState->getInput();
401	}
402
403	virtual LexerSharedInputState getInputState()
404	{
405	return inputState;
406	}
407
408	/** set the input state for the lexer.
409	* @note state is a reference counted object, hence no reference */
410	virtual void setInputState(LexerSharedInputState state)
411	{
412	inputState = state;
413	}
414
415	/// Set the factory for created tokens
416	virtual void setTokenObjectFactory(factory_type factory)
417	{
418	tokenFactory = factory;
419	}
420
421	/** Test the token text against the literals table
422	* Override this method to perform a different literals test
423	*/
424	virtual int testLiteralsTable(int ttype) const
425	{
426	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
427	if (i != literals.end())
428	ttype = (*i).second;
429	return ttype;
430	}
431
432	/** Test the text passed in against the literals table
433	* Override this method to perform a different literals test
434	* This is used primarily when you want to test a portion of
435	* a token
436	*/
437	virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
438	{
439	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
440	if (i != literals.end())
441	ttype = (*i).second;
442	return ttype;
443	}
444
445	/// Override this method to get more specific case handling
446	virtual int toLower(int c) const
447	{
448	// test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
449	// also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
450	// this one is more structural. Maybe make this configurable.
451	return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
452	}
453
454	/** This method is called by YourLexer::nextToken() when the lexer has
455	* hit EOF condition. EOF is NOT a character.
456	* This method is not called if EOF is reached during
457	* syntactic predicate evaluation or during evaluation
458	* of normal lexical rules, which presumably would be
459	* an IOException. This traps the "normal" EOF condition.
460	*
461	* uponEOF() is called after the complete evaluation of
462	* the previous token and only if your parser asks
463	* for another token beyond that last non-EOF token.
464	*
465	* You might want to throw token or char stream exceptions
466	* like: "Heh, premature eof" or a retry stream exception
467	* ("I found the end of this file, go back to referencing file").
468	*/
469	virtual void uponEOF()
470	{
471	}
472
473	/// Methods used to change tracing behavior
474	virtual void traceIndent();
475	virtual void traceIn(const char* rname);
476	virtual void traceOut(const char* rname);
477
478	#ifndef NO_STATIC_CONSTS
479	static const int EOF_CHAR = EOF;
480	#else
481	enum {
482	EOF_CHAR = EOF
483	};
484	#endif
485	protected:
486	ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
487	/// flag indicating wether consume saves characters
488	bool saveConsumedInput;
489	factory_type tokenFactory; ///< Factory for tokens
490	bool caseSensitive; ///< Is this lexer case sensitive
491	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
492
493	RefToken _returnToken; ///< used to return tokens w/o using return val
494
495	/// Input state, gives access to input stream, shared among different lexers
496	LexerSharedInputState inputState;
497
498	/** Used during filter mode to indicate that path is desired.
499	* A subsequent scan error will report an error as usual
500	* if acceptPath=true;
501	*/
502	bool commitToPath;
503
504	int tabsize; ///< tab size the scanner uses.
505
506	/// Create a new RefToken of type t
507	virtual RefToken makeToken(int t)
508	{
509	RefToken tok = tokenFactory();
510	tok->setType(t);
511	tok->setColumn(inputState->tokenStartColumn);
512	tok->setLine(inputState->tokenStartLine);
513	return tok;
514	}
515
516	/** Tracer class, used when -traceLexer is passed to antlr
517	*/
518	class Tracer {
519	private:
520	CharScanner* parser;
521	const char* text;
522
523	Tracer(const Tracer& other); // undefined
524	Tracer& operator=(const Tracer& other); // undefined
525	public:
526	Tracer( CharScanner* p,const char* t )
527	: parser(p), text(t)
528	{
529	parser->traceIn(text);
530	}
531	~Tracer()
532	{
533	parser->traceOut(text);
534	}
535	};
536
537	int traceDepth;
538	private:
539	CharScanner( const CharScanner& other ); // undefined
540	CharScanner& operator=( const CharScanner& other ); // undefined
541
542	#ifndef NO_STATIC_CONSTS
543	static const int NO_CHAR = 0;
544	#else
545	enum {
546	NO_CHAR = 0
547	};
548	#endif
549	};
550
551	inline int CharScanner::LA(unsigned int i)
552	{
553	int c = inputState->getInput().LA(i);
554
555	if ( caseSensitive )
556	return c;
557	else
558	return toLower(c); // VC 6 tolower bug caught in toLower.
559	}
560
561	inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
562	{
563	if (scanner->getCaseSensitiveLiterals())
564	return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
565	else
566	{
567	#ifdef NO_STRCASECMP
568	return (stricmp(x.c_str(),y.c_str())<0);
569	#else
570	return (strcasecmp(x.c_str(),y.c_str())<0);
571	#endif
572	}
573	}
574
575	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
576	}
577	#endif
578
579	#endif //INC_CharScanner_hpp__

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/yao/share/antlr-2.7.7/lib/cpp/antlr/CharScanner.hpp

Download in other formats: