1b8e80941Smrg%{
2b8e80941Smrg/*
3b8e80941Smrg * Copyright © 2010 Intel Corporation
4b8e80941Smrg *
5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
6b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
7b8e80941Smrg * to deal in the Software without restriction, including without limitation
8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
10b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
11b8e80941Smrg *
12b8e80941Smrg * The above copyright notice and this permission notice (including the next
13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
14b8e80941Smrg * Software.
15b8e80941Smrg *
16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22b8e80941Smrg * DEALINGS IN THE SOFTWARE.
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#include <stdio.h>
26b8e80941Smrg#include <string.h>
27b8e80941Smrg#include <ctype.h>
28b8e80941Smrg
29b8e80941Smrg#include "glcpp.h"
30b8e80941Smrg#include "glcpp-parse.h"
31b8e80941Smrg
32b8e80941Smrg/* Flex annoyingly generates some functions without making them
33b8e80941Smrg * static. Let's declare them here. */
34b8e80941Smrgint glcpp_get_column  (yyscan_t yyscanner);
35b8e80941Smrgvoid glcpp_set_column (int  column_no , yyscan_t yyscanner);
36b8e80941Smrg
37b8e80941Smrg#ifdef _MSC_VER
38b8e80941Smrg#define YY_NO_UNISTD_H
39b8e80941Smrg#endif
40b8e80941Smrg
41b8e80941Smrg#define YY_NO_INPUT
42b8e80941Smrg
43b8e80941Smrg#define YY_USER_ACTION							\
44b8e80941Smrg	do {								\
45b8e80941Smrg		if (parser->has_new_line_number)			\
46b8e80941Smrg			yylineno = parser->new_line_number;		\
47b8e80941Smrg		if (parser->has_new_source_number)			\
48b8e80941Smrg			yylloc->source = parser->new_source_number;	\
49b8e80941Smrg		yylloc->first_column = yycolumn + 1;			\
50b8e80941Smrg		yylloc->first_line = yylloc->last_line = yylineno;	\
51b8e80941Smrg		yycolumn += yyleng;					\
52b8e80941Smrg		yylloc->last_column = yycolumn + 1;			\
53b8e80941Smrg		parser->has_new_line_number = 0;			\
54b8e80941Smrg		parser->has_new_source_number = 0;			\
55b8e80941Smrg	} while(0);
56b8e80941Smrg
57b8e80941Smrg#define YY_USER_INIT			\
58b8e80941Smrg	do {				\
59b8e80941Smrg		yylineno = 1;		\
60b8e80941Smrg		yycolumn = 0;		\
61b8e80941Smrg		yylloc->source = 0;	\
62b8e80941Smrg	} while(0)
63b8e80941Smrg
64b8e80941Smrg/* It's ugly to have macros that have return statements inside of
65b8e80941Smrg * them, but flex-based lexer generation is all built around the
66b8e80941Smrg * return statement.
67b8e80941Smrg *
68b8e80941Smrg * To mitigate the ugliness, we defer as much of the logic as possible
69b8e80941Smrg * to an actual function, not a macro (see
70b8e80941Smrg * glcpplex_update_state_per_token) and we make the word RETURN
71b8e80941Smrg * prominent in all of the macros which may return.
72b8e80941Smrg *
73b8e80941Smrg * The most-commonly-used macro is RETURN_TOKEN which will perform all
74b8e80941Smrg * necessary state updates based on the provided token,, then
75b8e80941Smrg * conditionally return the token. It will not return a token if the
76b8e80941Smrg * parser is currently skipping tokens, (such as within #if
77b8e80941Smrg * 0...#else).
78b8e80941Smrg *
79b8e80941Smrg * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that
80b8e80941Smrg * makes the token returning unconditional. This is needed for things
81b8e80941Smrg * like #if and the tokens of its condition, (since these must be
82b8e80941Smrg * evaluated by the parser even when otherwise skipping).
83b8e80941Smrg *
84b8e80941Smrg * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top
85b8e80941Smrg * of RETURN_TOKEN that performs a string copy of yytext before the
86b8e80941Smrg * return.
87b8e80941Smrg */
88b8e80941Smrg#define RETURN_TOKEN_NEVER_SKIP(token)					\
89b8e80941Smrg	do {								\
90b8e80941Smrg		if (glcpp_lex_update_state_per_token (parser, token))	\
91b8e80941Smrg			return token;					\
92b8e80941Smrg	} while (0)
93b8e80941Smrg
94b8e80941Smrg#define RETURN_TOKEN(token)						\
95b8e80941Smrg	do {								\
96b8e80941Smrg		if (! parser->skipping) {				\
97b8e80941Smrg			RETURN_TOKEN_NEVER_SKIP(token);			\
98b8e80941Smrg		}							\
99b8e80941Smrg	} while(0)
100b8e80941Smrg
101b8e80941Smrg#define RETURN_STRING_TOKEN(token)					\
102b8e80941Smrg	do {								\
103b8e80941Smrg		if (! parser->skipping) {				\
104b8e80941Smrg			/* We're not doing linear_strdup here, to avoid \
105b8e80941Smrg			 * an implicit call on strlen() for the length  \
106b8e80941Smrg			 * of the string, as this is already found by   \
107b8e80941Smrg			 * flex and stored in yyleng */                 \
108b8e80941Smrg			void *mem_ctx = yyextra->linalloc;		\
109b8e80941Smrg			yylval->str = linear_alloc_child(mem_ctx,	\
110b8e80941Smrg							 yyleng + 1);	\
111b8e80941Smrg			memcpy(yylval->str, yytext, yyleng + 1);        \
112b8e80941Smrg			RETURN_TOKEN_NEVER_SKIP (token);		\
113b8e80941Smrg		}							\
114b8e80941Smrg	} while(0)
115b8e80941Smrg
116b8e80941Smrg
117b8e80941Smrg/* Update all state necessary for each token being returned.
118b8e80941Smrg *
119b8e80941Smrg * Here we'll be tracking newlines and spaces so that the lexer can
120b8e80941Smrg * alter its behavior as necessary, (for example, '#' has special
121b8e80941Smrg * significance if it is the first non-whitespace, non-comment token
122b8e80941Smrg * in a line, but does not otherwise).
123b8e80941Smrg *
124b8e80941Smrg * NOTE: If this function returns FALSE, then no token should be
125b8e80941Smrg * returned at all. This is used to suprress duplicate SPACE tokens.
126b8e80941Smrg */
127b8e80941Smrgstatic int
128b8e80941Smrgglcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token)
129b8e80941Smrg{
130b8e80941Smrg	if (token != NEWLINE && token != SPACE && token != HASH_TOKEN &&
131b8e80941Smrg	    !parser->lexing_version_directive) {
132b8e80941Smrg		glcpp_parser_resolve_implicit_version(parser);
133b8e80941Smrg	}
134b8e80941Smrg
135b8e80941Smrg	/* After the first non-space token in a line, we won't
136b8e80941Smrg	 * allow any '#' to introduce a directive. */
137b8e80941Smrg	if (token == NEWLINE) {
138b8e80941Smrg		parser->first_non_space_token_this_line = 1;
139b8e80941Smrg	} else if (token != SPACE) {
140b8e80941Smrg		parser->first_non_space_token_this_line = 0;
141b8e80941Smrg	}
142b8e80941Smrg
143b8e80941Smrg	/* Track newlines just to know whether a newline needs
144b8e80941Smrg	 * to be inserted if end-of-file comes early. */
145b8e80941Smrg	if (token == NEWLINE) {
146b8e80941Smrg		parser->last_token_was_newline = 1;
147b8e80941Smrg	} else {
148b8e80941Smrg		parser->last_token_was_newline = 0;
149b8e80941Smrg	}
150b8e80941Smrg
151b8e80941Smrg	/* Track spaces to avoid emitting multiple SPACE
152b8e80941Smrg	 * tokens in a row. */
153b8e80941Smrg	if (token == SPACE) {
154b8e80941Smrg		if (! parser->last_token_was_space) {
155b8e80941Smrg			parser->last_token_was_space = 1;
156b8e80941Smrg			return 1;
157b8e80941Smrg		} else {
158b8e80941Smrg			parser->last_token_was_space = 1;
159b8e80941Smrg			return 0;
160b8e80941Smrg		}
161b8e80941Smrg	} else {
162b8e80941Smrg		parser->last_token_was_space = 0;
163b8e80941Smrg		return 1;
164b8e80941Smrg	}
165b8e80941Smrg}
166b8e80941Smrg
167b8e80941Smrg
168b8e80941Smrg%}
169b8e80941Smrg
170b8e80941Smrg%option bison-bridge bison-locations reentrant noyywrap
171b8e80941Smrg%option extra-type="glcpp_parser_t *"
172b8e80941Smrg%option prefix="glcpp_"
173b8e80941Smrg%option stack
174b8e80941Smrg%option never-interactive
175b8e80941Smrg%option warn nodefault
176b8e80941Smrg
177b8e80941Smrg	/* Note: When adding any start conditions to this list, you must also
178b8e80941Smrg	 * update the "Internal compiler error" catch-all rule near the end of
179b8e80941Smrg	 * this file. */
180b8e80941Smrg
181b8e80941Smrg%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE
182b8e80941Smrg
183b8e80941SmrgSPACE		[[:space:]]
184b8e80941SmrgNONSPACE	[^[:space:]]
185b8e80941SmrgHSPACE		[ \t\v\f]
186b8e80941SmrgHASH		#
187b8e80941SmrgNEWLINE		(\r\n|\n\r|\r|\n)
188b8e80941SmrgIDENTIFIER	[_a-zA-Z][_a-zA-Z0-9]*
189b8e80941SmrgPP_NUMBER	[.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])*
190b8e80941SmrgPUNCTUATION	[][(){}.&*~!/%<>^|;,=+-]
191b8e80941Smrg
192b8e80941Smrg/* The OTHER class is simply a catch-all for things that the CPP
193b8e80941Smrgparser just doesn't care about. Since flex regular expressions that
194b8e80941Smrgmatch longer strings take priority over those matching shorter
195b8e80941Smrgstrings, we have to be careful to avoid OTHER matching and hiding
196b8e80941Smrgsomething that CPP does care about. So we simply exclude all
197b8e80941Smrgcharacters that appear in any other expressions. */
198b8e80941Smrg
199b8e80941SmrgOTHER		[^][_#[:space:]#a-zA-Z0-9(){}.&*~!/%<>^|;,=+-]
200b8e80941Smrg
201b8e80941SmrgDIGITS			[0-9][0-9]*
202b8e80941SmrgDECIMAL_INTEGER		[1-9][0-9]*[uU]?
203b8e80941SmrgOCTAL_INTEGER		0[0-7]*[uU]?
204b8e80941SmrgHEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
205b8e80941Smrg
206b8e80941Smrg%%
207b8e80941Smrg
208b8e80941Smrg	glcpp_parser_t *parser = yyextra;
209b8e80941Smrg
210b8e80941Smrg	/* When we lex a multi-line comment, we replace it (as
211b8e80941Smrg	 * specified) with a single space. But if the comment spanned
212b8e80941Smrg	 * multiple lines, then subsequent parsing stages will not
213b8e80941Smrg	 * count correct line numbers. To avoid this problem we keep
214b8e80941Smrg	 * track of all newlines that were commented out by a
215b8e80941Smrg	 * multi-line comment, and we emit a NEWLINE token for each at
216b8e80941Smrg	 * the next legal opportunity, (which is when the lexer would
217b8e80941Smrg	 * be emitting a NEWLINE token anyway).
218b8e80941Smrg	 */
219b8e80941Smrg	if (YY_START == NEWLINE_CATCHUP) {
220b8e80941Smrg		if (parser->commented_newlines)
221b8e80941Smrg			parser->commented_newlines--;
222b8e80941Smrg		if (parser->commented_newlines == 0)
223b8e80941Smrg			BEGIN INITIAL;
224b8e80941Smrg		RETURN_TOKEN_NEVER_SKIP (NEWLINE);
225b8e80941Smrg	}
226b8e80941Smrg
227b8e80941Smrg	/* Set up the parser->skipping bit here before doing any lexing.
228b8e80941Smrg	 *
229b8e80941Smrg	 * This bit controls whether tokens are skipped, (as implemented by
230b8e80941Smrg         * RETURN_TOKEN), such as between "#if 0" and "#endif".
231b8e80941Smrg	 *
232b8e80941Smrg	 * The parser maintains a skip_stack indicating whether we should be
233b8e80941Smrg         * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will
234b8e80941Smrg         * push and pop items from the stack.
235b8e80941Smrg	 *
236b8e80941Smrg	 * Here are the rules for determining whether we are skipping:
237b8e80941Smrg	 *
238b8e80941Smrg	 *	1. If the skip stack is NULL, we are outside of all #if blocks
239b8e80941Smrg	 *         and we are not skipping.
240b8e80941Smrg	 *
241b8e80941Smrg	 *	2. If the skip stack is non-NULL, the type of the top node in
242b8e80941Smrg	 *	   the stack determines whether to skip. A type of
243b8e80941Smrg	 *	   SKIP_NO_SKIP is used for blocks wheere we are emitting
244b8e80941Smrg	 *	   tokens, (such as between #if 1 and #endif, or after the
245b8e80941Smrg	 *	   #else of an #if 0, etc.).
246b8e80941Smrg	 *
247b8e80941Smrg	 *	3. The lexing_directive bit overrides the skip stack. This bit
248b8e80941Smrg	 *	   is set when we are actively lexing the expression for a
249b8e80941Smrg	 *	   pre-processor condition, (such as #if, #elif, or #else). In
250b8e80941Smrg	 *	   this case, even if otherwise skipping, we need to emit the
251b8e80941Smrg	 *	   tokens for this condition so that the parser can evaluate
252b8e80941Smrg	 *	   the expression. (For, #else, there's no expression, but we
253b8e80941Smrg	 *	   emit tokens so the parser can generate a nice error message
254b8e80941Smrg	 *	   if there are any tokens here).
255b8e80941Smrg	 */
256b8e80941Smrg	if (parser->skip_stack &&
257b8e80941Smrg	    parser->skip_stack->type != SKIP_NO_SKIP &&
258b8e80941Smrg	    ! parser->lexing_directive)
259b8e80941Smrg	{
260b8e80941Smrg		parser->skipping = 1;
261b8e80941Smrg	} else {
262b8e80941Smrg		parser->skipping = 0;
263b8e80941Smrg	}
264b8e80941Smrg
265b8e80941Smrg	/* Single-line comments */
266b8e80941Smrg<INITIAL,DEFINE,HASH>"//"[^\r\n]* {
267b8e80941Smrg}
268b8e80941Smrg
269b8e80941Smrg	/* Multi-line comments */
270b8e80941Smrg<INITIAL,DEFINE,HASH>"/*"   { yy_push_state(COMMENT, yyscanner); }
271b8e80941Smrg<COMMENT>[^*\r\n]*
272b8e80941Smrg<COMMENT>[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; }
273b8e80941Smrg<COMMENT>"*"+[^*/\r\n]*
274b8e80941Smrg<COMMENT>"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; }
275b8e80941Smrg<COMMENT>"*"+"/"        {
276b8e80941Smrg	yy_pop_state(yyscanner);
277b8e80941Smrg	/* In the <HASH> start condition, we don't want any SPACE token. */
278b8e80941Smrg	if (yyextra->space_tokens && YY_START != HASH)
279b8e80941Smrg		RETURN_TOKEN (SPACE);
280b8e80941Smrg}
281b8e80941Smrg
282b8e80941Smrg{HASH} {
283b8e80941Smrg
284b8e80941Smrg	/* If the '#' is the first non-whitespace, non-comment token on this
285b8e80941Smrg	 * line, then it introduces a directive, switch to the <HASH> start
286b8e80941Smrg	 * condition.
287b8e80941Smrg	 *
288b8e80941Smrg	 * Otherwise, this is just punctuation, so return the HASH_TOKEN
289b8e80941Smrg         * token. */
290b8e80941Smrg	if (parser->first_non_space_token_this_line) {
291b8e80941Smrg		BEGIN HASH;
292b8e80941Smrg		yyextra->in_define = false;
293b8e80941Smrg	}
294b8e80941Smrg
295b8e80941Smrg	RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN);
296b8e80941Smrg}
297b8e80941Smrg
298b8e80941Smrg<HASH>version{HSPACE}+ {
299b8e80941Smrg	BEGIN INITIAL;
300b8e80941Smrg	yyextra->space_tokens = 0;
301b8e80941Smrg	yyextra->lexing_version_directive = 1;
302b8e80941Smrg	RETURN_STRING_TOKEN (VERSION_TOKEN);
303b8e80941Smrg}
304b8e80941Smrg
305b8e80941Smrg	/* Swallow empty #pragma directives, (to avoid confusing the
306b8e80941Smrg	 * downstream compiler).
307b8e80941Smrg	 *
308b8e80941Smrg	 * Note: We use a simple regular expression for the lookahead
309b8e80941Smrg	 * here. Specifically, we cannot use the complete {NEWLINE} expression
310b8e80941Smrg	 * since it uses alternation and we've found that there's a flex bug
311b8e80941Smrg	 * where using alternation in the lookahead portion of a pattern
312b8e80941Smrg	 * triggers a buffer overrun. */
313b8e80941Smrg<HASH>pragma{HSPACE}*/[\r\n] {
314b8e80941Smrg	BEGIN INITIAL;
315b8e80941Smrg}
316b8e80941Smrg
317b8e80941Smrg	/* glcpp doesn't handle #extension, #version, or #pragma directives.
318b8e80941Smrg	 * Simply pass them through to the main compiler's lexer/parser. */
319b8e80941Smrg<HASH>(extension|pragma)[^\r\n]* {
320b8e80941Smrg	BEGIN INITIAL;
321b8e80941Smrg	RETURN_STRING_TOKEN (PRAGMA);
322b8e80941Smrg}
323b8e80941Smrg
324b8e80941Smrg<HASH>line{HSPACE}+ {
325b8e80941Smrg	BEGIN INITIAL;
326b8e80941Smrg	RETURN_TOKEN (LINE);
327b8e80941Smrg}
328b8e80941Smrg
329b8e80941Smrg<HASH>{NEWLINE} {
330b8e80941Smrg	BEGIN INITIAL;
331b8e80941Smrg	yyextra->space_tokens = 0;
332b8e80941Smrg	yylineno++;
333b8e80941Smrg	yycolumn = 0;
334b8e80941Smrg	RETURN_TOKEN_NEVER_SKIP (NEWLINE);
335b8e80941Smrg}
336b8e80941Smrg
337b8e80941Smrg	/* For the pre-processor directives, we return these tokens
338b8e80941Smrg	 * even when we are otherwise skipping. */
339b8e80941Smrg<HASH>ifdef {
340b8e80941Smrg	if (!yyextra->in_define) {
341b8e80941Smrg		BEGIN INITIAL;
342b8e80941Smrg		yyextra->lexing_directive = 1;
343b8e80941Smrg		yyextra->space_tokens = 0;
344b8e80941Smrg		RETURN_TOKEN_NEVER_SKIP (IFDEF);
345b8e80941Smrg	}
346b8e80941Smrg}
347b8e80941Smrg
348b8e80941Smrg<HASH>ifndef {
349b8e80941Smrg	if (!yyextra->in_define) {
350b8e80941Smrg		BEGIN INITIAL;
351b8e80941Smrg		yyextra->lexing_directive = 1;
352b8e80941Smrg		yyextra->space_tokens = 0;
353b8e80941Smrg		RETURN_TOKEN_NEVER_SKIP (IFNDEF);
354b8e80941Smrg	}
355b8e80941Smrg}
356b8e80941Smrg
357b8e80941Smrg<HASH>if/[^_a-zA-Z0-9] {
358b8e80941Smrg	if (!yyextra->in_define) {
359b8e80941Smrg		BEGIN INITIAL;
360b8e80941Smrg		yyextra->lexing_directive = 1;
361b8e80941Smrg		yyextra->space_tokens = 0;
362b8e80941Smrg		RETURN_TOKEN_NEVER_SKIP (IF);
363b8e80941Smrg	}
364b8e80941Smrg}
365b8e80941Smrg
366b8e80941Smrg<HASH>elif/[^_a-zA-Z0-9] {
367b8e80941Smrg	if (!yyextra->in_define) {
368b8e80941Smrg		BEGIN INITIAL;
369b8e80941Smrg		yyextra->lexing_directive = 1;
370b8e80941Smrg		yyextra->space_tokens = 0;
371b8e80941Smrg		RETURN_TOKEN_NEVER_SKIP (ELIF);
372b8e80941Smrg	}
373b8e80941Smrg}
374b8e80941Smrg
375b8e80941Smrg<HASH>else {
376b8e80941Smrg	if (!yyextra->in_define) {
377b8e80941Smrg		BEGIN INITIAL;
378b8e80941Smrg		yyextra->space_tokens = 0;
379b8e80941Smrg		RETURN_TOKEN_NEVER_SKIP (ELSE);
380b8e80941Smrg	}
381b8e80941Smrg}
382b8e80941Smrg
383b8e80941Smrg<HASH>endif {
384b8e80941Smrg	if (!yyextra->in_define) {
385b8e80941Smrg		BEGIN INITIAL;
386b8e80941Smrg		yyextra->space_tokens = 0;
387b8e80941Smrg		RETURN_TOKEN_NEVER_SKIP (ENDIF);
388b8e80941Smrg	}
389b8e80941Smrg}
390b8e80941Smrg
391b8e80941Smrg<HASH>error[^\r\n]* {
392b8e80941Smrg	BEGIN INITIAL;
393b8e80941Smrg	RETURN_STRING_TOKEN (ERROR_TOKEN);
394b8e80941Smrg}
395b8e80941Smrg
396b8e80941Smrg	/* After we see a "#define" we enter the <DEFINE> start state
397b8e80941Smrg	 * for the lexer. Within <DEFINE> we are looking for the first
398b8e80941Smrg	 * identifier and specifically checking whether the identifier
399b8e80941Smrg	 * is followed by a '(' or not, (to lex either a
400b8e80941Smrg	 * FUNC_IDENTIFIER or an OBJ_IDENITIFIER token).
401b8e80941Smrg	 *
402b8e80941Smrg	 * While in the <DEFINE> state we also need to explicitly
403b8e80941Smrg	 * handle a few other things that may appear before the
404b8e80941Smrg	 * identifier:
405b8e80941Smrg	 *
406b8e80941Smrg	 * 	* Comments, (handled above with the main support for
407b8e80941Smrg	 * 	  comments).
408b8e80941Smrg	 *
409b8e80941Smrg	 *	* Whitespace (simply ignored)
410b8e80941Smrg	 *
411b8e80941Smrg	 *	* Anything else, (not an identifier, not a comment,
412b8e80941Smrg	 *	  and not whitespace). This will generate an error.
413b8e80941Smrg	 */
414b8e80941Smrg<HASH>define{HSPACE}* {
415b8e80941Smrg	yyextra->in_define = true;
416b8e80941Smrg	if (!parser->skipping) {
417b8e80941Smrg		BEGIN DEFINE;
418b8e80941Smrg		yyextra->space_tokens = 0;
419b8e80941Smrg		RETURN_TOKEN (DEFINE_TOKEN);
420b8e80941Smrg	}
421b8e80941Smrg}
422b8e80941Smrg
423b8e80941Smrg<HASH>undef {
424b8e80941Smrg	BEGIN INITIAL;
425b8e80941Smrg	yyextra->space_tokens = 0;
426b8e80941Smrg	RETURN_TOKEN (UNDEF);
427b8e80941Smrg}
428b8e80941Smrg
429b8e80941Smrg<HASH>{HSPACE}+ {
430b8e80941Smrg	/* Nothing to do here. Importantly, don't leave the <HASH>
431b8e80941Smrg	 * start condition, since it's legal to have space between the
432b8e80941Smrg	 * '#' and the directive.. */
433b8e80941Smrg}
434b8e80941Smrg
435b8e80941Smrg	/* This will catch any non-directive garbage after a HASH */
436b8e80941Smrg<HASH>{NONSPACE} {
437b8e80941Smrg	if (!parser->skipping) {
438b8e80941Smrg		BEGIN INITIAL;
439b8e80941Smrg		RETURN_TOKEN (GARBAGE);
440b8e80941Smrg	}
441b8e80941Smrg}
442b8e80941Smrg
443b8e80941Smrg	/* An identifier immediately followed by '(' */
444b8e80941Smrg<DEFINE>{IDENTIFIER}/"(" {
445b8e80941Smrg	BEGIN INITIAL;
446b8e80941Smrg	RETURN_STRING_TOKEN (FUNC_IDENTIFIER);
447b8e80941Smrg}
448b8e80941Smrg
449b8e80941Smrg	/* An identifier not immediately followed by '(' */
450b8e80941Smrg<DEFINE>{IDENTIFIER} {
451b8e80941Smrg	BEGIN INITIAL;
452b8e80941Smrg	RETURN_STRING_TOKEN (OBJ_IDENTIFIER);
453b8e80941Smrg}
454b8e80941Smrg
455b8e80941Smrg	/* Whitespace */
456b8e80941Smrg<DEFINE>{HSPACE}+ {
457b8e80941Smrg	/* Just ignore it. Nothing to do here. */
458b8e80941Smrg}
459b8e80941Smrg
460b8e80941Smrg	/* '/' not followed by '*', so not a comment. This is an error. */
461b8e80941Smrg<DEFINE>[/][^*]{NONSPACE}* {
462b8e80941Smrg	BEGIN INITIAL;
463b8e80941Smrg	glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
464b8e80941Smrg	RETURN_STRING_TOKEN (INTEGER_STRING);
465b8e80941Smrg}
466b8e80941Smrg
467b8e80941Smrg	/* A character that can't start an identifier, comment, or
468b8e80941Smrg	 * space. This is an error. */
469b8e80941Smrg<DEFINE>[^_a-zA-Z/[:space:]]{NONSPACE}* {
470b8e80941Smrg	BEGIN INITIAL;
471b8e80941Smrg	glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
472b8e80941Smrg	RETURN_STRING_TOKEN (INTEGER_STRING);
473b8e80941Smrg}
474b8e80941Smrg
475b8e80941Smrg{DECIMAL_INTEGER} {
476b8e80941Smrg	RETURN_STRING_TOKEN (INTEGER_STRING);
477b8e80941Smrg}
478b8e80941Smrg
479b8e80941Smrg{OCTAL_INTEGER} {
480b8e80941Smrg	RETURN_STRING_TOKEN (INTEGER_STRING);
481b8e80941Smrg}
482b8e80941Smrg
483b8e80941Smrg{HEXADECIMAL_INTEGER} {
484b8e80941Smrg	RETURN_STRING_TOKEN (INTEGER_STRING);
485b8e80941Smrg}
486b8e80941Smrg
487b8e80941Smrg"<<"  {
488b8e80941Smrg	RETURN_TOKEN (LEFT_SHIFT);
489b8e80941Smrg}
490b8e80941Smrg
491b8e80941Smrg">>" {
492b8e80941Smrg	RETURN_TOKEN (RIGHT_SHIFT);
493b8e80941Smrg}
494b8e80941Smrg
495b8e80941Smrg"<=" {
496b8e80941Smrg	RETURN_TOKEN (LESS_OR_EQUAL);
497b8e80941Smrg}
498b8e80941Smrg
499b8e80941Smrg">=" {
500b8e80941Smrg	RETURN_TOKEN (GREATER_OR_EQUAL);
501b8e80941Smrg}
502b8e80941Smrg
503b8e80941Smrg"==" {
504b8e80941Smrg	RETURN_TOKEN (EQUAL);
505b8e80941Smrg}
506b8e80941Smrg
507b8e80941Smrg"!=" {
508b8e80941Smrg	RETURN_TOKEN (NOT_EQUAL);
509b8e80941Smrg}
510b8e80941Smrg
511b8e80941Smrg"&&" {
512b8e80941Smrg	RETURN_TOKEN (AND);
513b8e80941Smrg}
514b8e80941Smrg
515b8e80941Smrg"||" {
516b8e80941Smrg	RETURN_TOKEN (OR);
517b8e80941Smrg}
518b8e80941Smrg
519b8e80941Smrg"++" {
520b8e80941Smrg	RETURN_TOKEN (PLUS_PLUS);
521b8e80941Smrg}
522b8e80941Smrg
523b8e80941Smrg"--" {
524b8e80941Smrg	RETURN_TOKEN (MINUS_MINUS);
525b8e80941Smrg}
526b8e80941Smrg
527b8e80941Smrg"##" {
528b8e80941Smrg	if (! parser->skipping) {
529b8e80941Smrg		if (parser->is_gles)
530b8e80941Smrg			glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES");
531b8e80941Smrg		RETURN_TOKEN (PASTE);
532b8e80941Smrg	}
533b8e80941Smrg}
534b8e80941Smrg
535b8e80941Smrg"defined" {
536b8e80941Smrg	RETURN_TOKEN (DEFINED);
537b8e80941Smrg}
538b8e80941Smrg
539b8e80941Smrg{IDENTIFIER} {
540b8e80941Smrg	RETURN_STRING_TOKEN (IDENTIFIER);
541b8e80941Smrg}
542b8e80941Smrg
543b8e80941Smrg{PP_NUMBER} {
544b8e80941Smrg	RETURN_STRING_TOKEN (OTHER);
545b8e80941Smrg}
546b8e80941Smrg
547b8e80941Smrg{PUNCTUATION} {
548b8e80941Smrg	RETURN_TOKEN (yytext[0]);
549b8e80941Smrg}
550b8e80941Smrg
551b8e80941Smrg{OTHER}+ {
552b8e80941Smrg	RETURN_STRING_TOKEN (OTHER);
553b8e80941Smrg}
554b8e80941Smrg
555b8e80941Smrg{HSPACE} {
556b8e80941Smrg	if (yyextra->space_tokens) {
557b8e80941Smrg		RETURN_TOKEN (SPACE);
558b8e80941Smrg	}
559b8e80941Smrg}
560b8e80941Smrg
561b8e80941Smrg	/* We preserve all newlines, even between #if 0..#endif, so no
562b8e80941Smrg	skipping.. */
563b8e80941Smrg<*>{NEWLINE} {
564b8e80941Smrg	if (parser->commented_newlines) {
565b8e80941Smrg		BEGIN NEWLINE_CATCHUP;
566b8e80941Smrg	} else {
567b8e80941Smrg		BEGIN INITIAL;
568b8e80941Smrg	}
569b8e80941Smrg	yyextra->space_tokens = 1;
570b8e80941Smrg	yyextra->lexing_directive = 0;
571b8e80941Smrg	yyextra->lexing_version_directive = 0;
572b8e80941Smrg	yylineno++;
573b8e80941Smrg	yycolumn = 0;
574b8e80941Smrg	RETURN_TOKEN_NEVER_SKIP (NEWLINE);
575b8e80941Smrg}
576b8e80941Smrg
577b8e80941Smrg<INITIAL,COMMENT,DEFINE,HASH><<EOF>> {
578b8e80941Smrg	if (YY_START == COMMENT)
579b8e80941Smrg		glcpp_error(yylloc, yyextra, "Unterminated comment");
580b8e80941Smrg	BEGIN DONE; /* Don't keep matching this rule forever. */
581b8e80941Smrg	yyextra->lexing_directive = 0;
582b8e80941Smrg	yyextra->lexing_version_directive = 0;
583b8e80941Smrg	if (! parser->last_token_was_newline)
584b8e80941Smrg		RETURN_TOKEN (NEWLINE);
585b8e80941Smrg}
586b8e80941Smrg
587b8e80941Smrg	/* This is a catch-all to avoid the annoying default flex action which
588b8e80941Smrg	 * matches any character and prints it. If any input ever matches this
589b8e80941Smrg	 * rule, then we have made a mistake above and need to fix one or more
590b8e80941Smrg	 * of the preceding patterns to match that input. */
591b8e80941Smrg
592b8e80941Smrg<*>. {
593b8e80941Smrg	glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext);
594b8e80941Smrg
595b8e80941Smrg	/* We don't actually use the UNREACHABLE start condition. We
596b8e80941Smrg	only have this block here so that we can pretend to call some
597b8e80941Smrg	generated functions, (to avoid "defined but not used"
598b8e80941Smrg	warnings. */
599b8e80941Smrg        if (YY_START == UNREACHABLE) {
600b8e80941Smrg		unput('.');
601b8e80941Smrg		yy_top_state(yyextra);
602b8e80941Smrg	}
603b8e80941Smrg}
604b8e80941Smrg
605b8e80941Smrg%%
606b8e80941Smrg
607b8e80941Smrgvoid
608b8e80941Smrgglcpp_lex_set_source_string(glcpp_parser_t *parser, const char *shader)
609b8e80941Smrg{
610b8e80941Smrg	yy_scan_string(shader, parser->scanner);
611b8e80941Smrg}
612