Context Navigation

source: trunk/yao/share/antlr-2.7.7/lib/cpp/antlr/CharScanner.hpp @ 1

Last change on this file since 1 was 1, checked in by lnalod, 15 years ago
Initial import of YAO sources
File size: 13.4 KB

Line
1	#ifndef INC_CharScanner_hpp__
2	#define INC_CharScanner_hpp__
3
4	/* ANTLR Translator Generator
5	* Project led by Terence Parr at http://www.jGuru.com
6	* Software rights: http://www.antlr.org/license.html
7	*
8	* $Id: //depot/code/org.antlr/release/antlr-2.7.7/lib/cpp/antlr/CharScanner.hpp#2 $
9	*/
10
11	#include <antlr/config.hpp>
12
13	#include <map>
14
15	#ifdef HAS_NOT_CCTYPE_H
16	#include <ctype.h>
17	#else
18	#include <cctype>
19	#endif
20
21	#if ( _MSC_VER == 1200 )
22	// VC6 seems to need this
23	// note that this is not a standard C++ include file.
24	# include <stdio.h>
25	#endif
26
27	#include <antlr/TokenStream.hpp>
28	#include <antlr/RecognitionException.hpp>
29	#include <antlr/SemanticException.hpp>
30	#include <antlr/MismatchedCharException.hpp>
31	#include <antlr/InputBuffer.hpp>
32	#include <antlr/BitSet.hpp>
33	#include <antlr/LexerSharedInputState.hpp>
34
35	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
36	namespace antlr {
37	#endif
38
39	class ANTLR_API CharScanner;
40
41	ANTLR_C_USING(tolower)
42
43	#ifdef ANTLR_REALLY_NO_STRCASECMP
44	// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
45	// on the mac has neither...
46	inline int strcasecmp(const char s1, const char s2)
47	{
48	while (true)
49	{
50	char c1 = tolower(*s1++),
51	c2 = tolower(*s2++);
52	if (c1 < c2) return -1;
53	if (c1 > c2) return 1;
54	if (c1 == 0) return 0;
55	}
56	}
57	#else
58	#ifdef NO_STRCASECMP
59	ANTLR_C_USING(stricmp)
60	#else
61	ANTLR_C_USING(strcasecmp)
62	#endif
63	#endif
64
65	/** Functor for the literals map
66	*/
67	class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
68	private:
69	const CharScanner* scanner;
70	public:
71	#ifdef NO_TEMPLATE_PARTS
72	CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
73	#endif
74	CharScannerLiteralsLess(const CharScanner* theScanner)
75	: scanner(theScanner)
76	{
77	}
78	bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
79	// defaults are good enough..
80	// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
81	// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
82	};
83
84	/** Superclass of generated lexers
85	*/
86	class ANTLR_API CharScanner : public TokenStream {
87	protected:
88	typedef RefToken (*factory_type)();
89	public:
90	CharScanner(InputBuffer& cb, bool case_sensitive );
91	CharScanner(InputBuffer* cb, bool case_sensitive );
92	CharScanner(const LexerSharedInputState& state, bool case_sensitive );
93
94	virtual ~CharScanner()
95	{
96	}
97
98	virtual int LA(unsigned int i);
99
100	virtual void append(char c)
101	{
102	if (saveConsumedInput)
103	{
104	size_t l = text.length();
105
106	if ((l%256) == 0)
107	text.reserve(l+256);
108
109	text.replace(l,0,&c,1);
110	}
111	}
112
113	virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
114	{
115	if( saveConsumedInput )
116	text += s;
117	}
118
119	virtual void commit()
120	{
121	inputState->getInput().commit();
122	}
123
124	/** called by the generated lexer to do error recovery, override to
125	* customize the behaviour.
126	*/
127	virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
128	{
129	consume();
130	consumeUntil(tokenSet);
131	}
132
133	virtual void consume()
134	{
135	if (inputState->guessing == 0)
136	{
137	int c = LA(1);
138	if (caseSensitive)
139	{
140	append(c);
141	}
142	else
143	{
144	// use input.LA(), not LA(), to get original case
145	// CharScanner.LA() would toLower it.
146	append(inputState->getInput().LA(1));
147	}
148
149	// RK: in a sense I don't like this automatic handling.
150	if (c == '\t')
151	tab();
152	else
153	inputState->column++;
154	}
155	inputState->getInput().consume();
156	}
157
158	/** Consume chars until one matches the given char */
159	virtual void consumeUntil(int c)
160	{
161	for(;;)
162	{
163	int la_1 = LA(1);
164	if( la_1 == EOF_CHAR \|\| la_1 == c )
165	break;
166	consume();
167	}
168	}
169
170	/** Consume chars until one matches the given set */
171	virtual void consumeUntil(const BitSet& set)
172	{
173	for(;;)
174	{
175	int la_1 = LA(1);
176	if( la_1 == EOF_CHAR \|\| set.member(la_1) )
177	break;
178	consume();
179	}
180	}
181
182	/// Mark the current position and return a id for it
183	virtual unsigned int mark()
184	{
185	return inputState->getInput().mark();
186	}
187	/// Rewind the scanner to a previously marked position
188	virtual void rewind(unsigned int pos)
189	{
190	inputState->getInput().rewind(pos);
191	}
192
193	/// See if input contains character 'c' throw MismatchedCharException if not
194	virtual void match(int c)
195	{
196	int la_1 = LA(1);
197	if ( la_1 != c )
198	throw MismatchedCharException(la_1, c, false, this);
199	consume();
200	}
201
202	/** See if input contains element from bitset b
203	* throw MismatchedCharException if not
204	*/
205	virtual void match(const BitSet& b)
206	{
207	int la_1 = LA(1);
208
209	if ( !b.member(la_1) )
210	throw MismatchedCharException( la_1, b, false, this );
211	consume();
212	}
213
214	/** See if input contains string 's' throw MismatchedCharException if not
215	* @note the string cannot match EOF
216	*/
217	virtual void match( const char* s )
218	{
219	while( *s != '\0' )
220	{
221	// the & 0xFF is here to prevent sign extension lateron
222	int la_1 = LA(1), c = (*s++ & 0xFF);
223
224	if ( la_1 != c )
225	throw MismatchedCharException(la_1, c, false, this);
226
227	consume();
228	}
229	}
230	/** See if input contains string 's' throw MismatchedCharException if not
231	* @note the string cannot match EOF
232	*/
233	virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
234	{
235	size_t len = s.length();
236
237	for (size_t i = 0; i < len; i++)
238	{
239	// the & 0xFF is here to prevent sign extension lateron
240	int la_1 = LA(1), c = (s[i] & 0xFF);
241
242	if ( la_1 != c )
243	throw MismatchedCharException(la_1, c, false, this);
244
245	consume();
246	}
247	}
248	/** See if input does not contain character 'c'
249	* throw MismatchedCharException if not
250	*/
251	virtual void matchNot(int c)
252	{
253	int la_1 = LA(1);
254
255	if ( la_1 == c )
256	throw MismatchedCharException(la_1, c, true, this);
257
258	consume();
259	}
260	/** See if input contains character in range c1-c2
261	* throw MismatchedCharException if not
262	*/
263	virtual void matchRange(int c1, int c2)
264	{
265	int la_1 = LA(1);
266
267	if ( la_1 < c1 \|\| la_1 > c2 )
268	throw MismatchedCharException(la_1, c1, c2, false, this);
269
270	consume();
271	}
272
273	virtual bool getCaseSensitive() const
274	{
275	return caseSensitive;
276	}
277
278	virtual void setCaseSensitive(bool t)
279	{
280	caseSensitive = t;
281	}
282
283	virtual bool getCaseSensitiveLiterals() const=0;
284
285	/// Get the line the scanner currently is in (starts at 1)
286	virtual int getLine() const
287	{
288	return inputState->line;
289	}
290
291	/// set the line number
292	virtual void setLine(int l)
293	{
294	inputState->line = l;
295	}
296
297	/// Get the column the scanner currently is in (starts at 1)
298	virtual int getColumn() const
299	{
300	return inputState->column;
301	}
302	/// set the column number
303	virtual void setColumn(int c)
304	{
305	inputState->column = c;
306	}
307
308	/// get the filename for the file currently used
309	virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
310	{
311	return inputState->filename;
312	}
313	/// Set the filename the scanner is using (used in error messages)
314	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
315	{
316	inputState->filename = f;
317	}
318
319	virtual bool getCommitToPath() const
320	{
321	return commitToPath;
322	}
323
324	virtual void setCommitToPath(bool commit)
325	{
326	commitToPath = commit;
327	}
328
329	/** return a copy of the current text buffer */
330	virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
331	{
332	return text;
333	}
334
335	virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
336	{
337	text = s;
338	}
339
340	virtual void resetText()
341	{
342	text = "";
343	inputState->tokenStartColumn = inputState->column;
344	inputState->tokenStartLine = inputState->line;
345	}
346
347	virtual RefToken getTokenObject() const
348	{
349	return _returnToken;
350	}
351
352	/** Used to keep track of line breaks, needs to be called from
353	* within generated lexers when a \n \r is encountered.
354	*/
355	virtual void newline()
356	{
357	++inputState->line;
358	inputState->column = 1;
359	}
360
361	/** Advance the current column number by an appropriate amount according
362	* to the tabsize. This method needs to be explicitly called from the
363	* lexer rules encountering tabs.
364	*/
365	virtual void tab()
366	{
367	int c = getColumn();
368	int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
369	setColumn( nc );
370	}
371	/// set the tabsize. Returns the old tabsize
372	int setTabsize( int size )
373	{
374	int oldsize = tabsize;
375	tabsize = size;
376	return oldsize;
377	}
378	/// Return the tabsize used by the scanner
379	int getTabSize() const
380	{
381	return tabsize;
382	}
383
384	/** Report exception errors caught in nextToken() */
385	virtual void reportError(const RecognitionException& e);
386
387	/** Parser error-reporting function can be overridden in subclass */
388	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
389
390	/** Parser warning-reporting function can be overridden in subclass */
391	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
392
393	virtual InputBuffer& getInputBuffer()
394	{
395	return inputState->getInput();
396	}
397
398	virtual LexerSharedInputState getInputState()
399	{
400	return inputState;
401	}
402
403	/** set the input state for the lexer.
404	* @note state is a reference counted object, hence no reference */
405	virtual void setInputState(LexerSharedInputState state)
406	{
407	inputState = state;
408	}
409
410	/// Set the factory for created tokens
411	virtual void setTokenObjectFactory(factory_type factory)
412	{
413	tokenFactory = factory;
414	}
415
416	/** Test the token text against the literals table
417	* Override this method to perform a different literals test
418	*/
419	virtual int testLiteralsTable(int ttype) const
420	{
421	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
422	if (i != literals.end())
423	ttype = (*i).second;
424	return ttype;
425	}
426
427	/** Test the text passed in against the literals table
428	* Override this method to perform a different literals test
429	* This is used primarily when you want to test a portion of
430	* a token
431	*/
432	virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
433	{
434	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
435	if (i != literals.end())
436	ttype = (*i).second;
437	return ttype;
438	}
439
440	/// Override this method to get more specific case handling
441	virtual int toLower(int c) const
442	{
443	// test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
444	// also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
445	// this one is more structural. Maybe make this configurable.
446	return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
447	}
448
449	/** This method is called by YourLexer::nextToken() when the lexer has
450	* hit EOF condition. EOF is NOT a character.
451	* This method is not called if EOF is reached during
452	* syntactic predicate evaluation or during evaluation
453	* of normal lexical rules, which presumably would be
454	* an IOException. This traps the "normal" EOF condition.
455	*
456	* uponEOF() is called after the complete evaluation of
457	* the previous token and only if your parser asks
458	* for another token beyond that last non-EOF token.
459	*
460	* You might want to throw token or char stream exceptions
461	* like: "Heh, premature eof" or a retry stream exception
462	* ("I found the end of this file, go back to referencing file").
463	*/
464	virtual void uponEOF()
465	{
466	}
467
468	/// Methods used to change tracing behavior
469	virtual void traceIndent();
470	virtual void traceIn(const char* rname);
471	virtual void traceOut(const char* rname);
472
473	#ifndef NO_STATIC_CONSTS
474	static const int EOF_CHAR = EOF;
475	#else
476	enum {
477	EOF_CHAR = EOF
478	};
479	#endif
480	protected:
481	ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
482	/// flag indicating wether consume saves characters
483	bool saveConsumedInput;
484	factory_type tokenFactory; ///< Factory for tokens
485	bool caseSensitive; ///< Is this lexer case sensitive
486	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
487
488	RefToken _returnToken; ///< used to return tokens w/o using return val
489
490	/// Input state, gives access to input stream, shared among different lexers
491	LexerSharedInputState inputState;
492
493	/** Used during filter mode to indicate that path is desired.
494	* A subsequent scan error will report an error as usual
495	* if acceptPath=true;
496	*/
497	bool commitToPath;
498
499	int tabsize; ///< tab size the scanner uses.
500
501	/// Create a new RefToken of type t
502	virtual RefToken makeToken(int t)
503	{
504	RefToken tok = tokenFactory();
505	tok->setType(t);
506	tok->setColumn(inputState->tokenStartColumn);
507	tok->setLine(inputState->tokenStartLine);
508	return tok;
509	}
510
511	/** Tracer class, used when -traceLexer is passed to antlr
512	*/
513	class Tracer {
514	private:
515	CharScanner* parser;
516	const char* text;
517
518	Tracer(const Tracer& other); // undefined
519	Tracer& operator=(const Tracer& other); // undefined
520	public:
521	Tracer( CharScanner* p,const char* t )
522	: parser(p), text(t)
523	{
524	parser->traceIn(text);
525	}
526	~Tracer()
527	{
528	parser->traceOut(text);
529	}
530	};
531
532	int traceDepth;
533	private:
534	CharScanner( const CharScanner& other ); // undefined
535	CharScanner& operator=( const CharScanner& other ); // undefined
536
537	#ifndef NO_STATIC_CONSTS
538	static const int NO_CHAR = 0;
539	#else
540	enum {
541	NO_CHAR = 0
542	};
543	#endif
544	};
545
546	inline int CharScanner::LA(unsigned int i)
547	{
548	int c = inputState->getInput().LA(i);
549
550	if ( caseSensitive )
551	return c;
552	else
553	return toLower(c); // VC 6 tolower bug caught in toLower.
554	}
555
556	inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
557	{
558	if (scanner->getCaseSensitiveLiterals())
559	return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
560	else
561	{
562	#ifdef NO_STRCASECMP
563	return (stricmp(x.c_str(),y.c_str())<0);
564	#else
565	return (strcasecmp(x.c_str(),y.c_str())<0);
566	#endif
567	}
568	}
569
570	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
571	}
572	#endif
573
574	#endif //INC_CharScanner_hpp__

Note: See TracBrowser for help on using the repository browser.

Download in other formats: