101e04c3fSmrg%{ 201e04c3fSmrg/* 301e04c3fSmrg * Copyright © 2010 Intel Corporation 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 601e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 701e04c3fSmrg * to deal in the Software without restriction, including without limitation 801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1101e04c3fSmrg * 1201e04c3fSmrg * The above copyright notice and this permission notice (including the next 1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1401e04c3fSmrg * Software. 1501e04c3fSmrg * 1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2201e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2301e04c3fSmrg */ 2401e04c3fSmrg 2501e04c3fSmrg#include <stdio.h> 2601e04c3fSmrg#include <string.h> 2701e04c3fSmrg#include <ctype.h> 2801e04c3fSmrg 2901e04c3fSmrg#include "glcpp.h" 3001e04c3fSmrg#include "glcpp-parse.h" 3101e04c3fSmrg 3201e04c3fSmrg/* Flex annoyingly generates some functions without making them 3301e04c3fSmrg * static. Let's declare them here. */ 3401e04c3fSmrgint glcpp_get_column (yyscan_t yyscanner); 3501e04c3fSmrgvoid glcpp_set_column (int column_no , yyscan_t yyscanner); 3601e04c3fSmrg 3701e04c3fSmrg#ifdef _MSC_VER 3801e04c3fSmrg#define YY_NO_UNISTD_H 3901e04c3fSmrg#endif 4001e04c3fSmrg 4101e04c3fSmrg#define YY_NO_INPUT 4201e04c3fSmrg 4301e04c3fSmrg#define YY_USER_ACTION \ 4401e04c3fSmrg do { \ 4501e04c3fSmrg if (parser->has_new_line_number) \ 4601e04c3fSmrg yylineno = parser->new_line_number; \ 4701e04c3fSmrg if (parser->has_new_source_number) \ 4801e04c3fSmrg yylloc->source = parser->new_source_number; \ 4901e04c3fSmrg yylloc->first_column = yycolumn + 1; \ 5001e04c3fSmrg yylloc->first_line = yylloc->last_line = yylineno; \ 5101e04c3fSmrg yycolumn += yyleng; \ 5201e04c3fSmrg yylloc->last_column = yycolumn + 1; \ 5301e04c3fSmrg parser->has_new_line_number = 0; \ 5401e04c3fSmrg parser->has_new_source_number = 0; \ 5501e04c3fSmrg } while(0); 5601e04c3fSmrg 5701e04c3fSmrg#define YY_USER_INIT \ 5801e04c3fSmrg do { \ 5901e04c3fSmrg yylineno = 1; \ 6001e04c3fSmrg yycolumn = 0; \ 6101e04c3fSmrg yylloc->source = 0; \ 6201e04c3fSmrg } while(0) 6301e04c3fSmrg 6401e04c3fSmrg/* It's ugly to have macros that have return statements inside of 6501e04c3fSmrg * them, but flex-based lexer generation is all built around the 6601e04c3fSmrg * return statement. 6701e04c3fSmrg * 6801e04c3fSmrg * To mitigate the ugliness, we defer as much of the logic as possible 6901e04c3fSmrg * to an actual function, not a macro (see 7001e04c3fSmrg * glcpplex_update_state_per_token) and we make the word RETURN 7101e04c3fSmrg * prominent in all of the macros which may return. 7201e04c3fSmrg * 7301e04c3fSmrg * The most-commonly-used macro is RETURN_TOKEN which will perform all 7401e04c3fSmrg * necessary state updates based on the provided token,, then 7501e04c3fSmrg * conditionally return the token. It will not return a token if the 7601e04c3fSmrg * parser is currently skipping tokens, (such as within #if 7701e04c3fSmrg * 0...#else). 7801e04c3fSmrg * 7901e04c3fSmrg * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that 8001e04c3fSmrg * makes the token returning unconditional. This is needed for things 8101e04c3fSmrg * like #if and the tokens of its condition, (since these must be 8201e04c3fSmrg * evaluated by the parser even when otherwise skipping). 8301e04c3fSmrg * 8401e04c3fSmrg * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top 8501e04c3fSmrg * of RETURN_TOKEN that performs a string copy of yytext before the 8601e04c3fSmrg * return. 8701e04c3fSmrg */ 8801e04c3fSmrg#define RETURN_TOKEN_NEVER_SKIP(token) \ 8901e04c3fSmrg do { \ 9001e04c3fSmrg if (glcpp_lex_update_state_per_token (parser, token)) \ 9101e04c3fSmrg return token; \ 9201e04c3fSmrg } while (0) 9301e04c3fSmrg 9401e04c3fSmrg#define RETURN_TOKEN(token) \ 9501e04c3fSmrg do { \ 9601e04c3fSmrg if (! parser->skipping) { \ 9701e04c3fSmrg RETURN_TOKEN_NEVER_SKIP(token); \ 9801e04c3fSmrg } \ 9901e04c3fSmrg } while(0) 10001e04c3fSmrg 10101e04c3fSmrg#define RETURN_STRING_TOKEN(token) \ 10201e04c3fSmrg do { \ 10301e04c3fSmrg if (! parser->skipping) { \ 10401e04c3fSmrg /* We're not doing linear_strdup here, to avoid \ 10501e04c3fSmrg * an implicit call on strlen() for the length \ 10601e04c3fSmrg * of the string, as this is already found by \ 10701e04c3fSmrg * flex and stored in yyleng */ \ 10801e04c3fSmrg void *mem_ctx = yyextra->linalloc; \ 10901e04c3fSmrg yylval->str = linear_alloc_child(mem_ctx, \ 11001e04c3fSmrg yyleng + 1); \ 11101e04c3fSmrg memcpy(yylval->str, yytext, yyleng + 1); \ 11201e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (token); \ 11301e04c3fSmrg } \ 11401e04c3fSmrg } while(0) 11501e04c3fSmrg 11601e04c3fSmrg 11701e04c3fSmrg/* Update all state necessary for each token being returned. 11801e04c3fSmrg * 11901e04c3fSmrg * Here we'll be tracking newlines and spaces so that the lexer can 12001e04c3fSmrg * alter its behavior as necessary, (for example, '#' has special 12101e04c3fSmrg * significance if it is the first non-whitespace, non-comment token 12201e04c3fSmrg * in a line, but does not otherwise). 12301e04c3fSmrg * 12401e04c3fSmrg * NOTE: If this function returns FALSE, then no token should be 12501e04c3fSmrg * returned at all. This is used to suprress duplicate SPACE tokens. 12601e04c3fSmrg */ 12701e04c3fSmrgstatic int 12801e04c3fSmrgglcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token) 12901e04c3fSmrg{ 13001e04c3fSmrg if (token != NEWLINE && token != SPACE && token != HASH_TOKEN && 13101e04c3fSmrg !parser->lexing_version_directive) { 13201e04c3fSmrg glcpp_parser_resolve_implicit_version(parser); 13301e04c3fSmrg } 13401e04c3fSmrg 13501e04c3fSmrg /* After the first non-space token in a line, we won't 13601e04c3fSmrg * allow any '#' to introduce a directive. */ 13701e04c3fSmrg if (token == NEWLINE) { 13801e04c3fSmrg parser->first_non_space_token_this_line = 1; 13901e04c3fSmrg } else if (token != SPACE) { 14001e04c3fSmrg parser->first_non_space_token_this_line = 0; 14101e04c3fSmrg } 14201e04c3fSmrg 14301e04c3fSmrg /* Track newlines just to know whether a newline needs 14401e04c3fSmrg * to be inserted if end-of-file comes early. */ 14501e04c3fSmrg if (token == NEWLINE) { 14601e04c3fSmrg parser->last_token_was_newline = 1; 14701e04c3fSmrg } else { 14801e04c3fSmrg parser->last_token_was_newline = 0; 14901e04c3fSmrg } 15001e04c3fSmrg 15101e04c3fSmrg /* Track spaces to avoid emitting multiple SPACE 15201e04c3fSmrg * tokens in a row. */ 15301e04c3fSmrg if (token == SPACE) { 15401e04c3fSmrg if (! parser->last_token_was_space) { 15501e04c3fSmrg parser->last_token_was_space = 1; 15601e04c3fSmrg return 1; 15701e04c3fSmrg } else { 15801e04c3fSmrg parser->last_token_was_space = 1; 15901e04c3fSmrg return 0; 16001e04c3fSmrg } 16101e04c3fSmrg } else { 16201e04c3fSmrg parser->last_token_was_space = 0; 16301e04c3fSmrg return 1; 16401e04c3fSmrg } 16501e04c3fSmrg} 16601e04c3fSmrg 16701e04c3fSmrg 16801e04c3fSmrg%} 16901e04c3fSmrg 17001e04c3fSmrg%option bison-bridge bison-locations reentrant noyywrap 17101e04c3fSmrg%option extra-type="glcpp_parser_t *" 17201e04c3fSmrg%option prefix="glcpp_" 17301e04c3fSmrg%option stack 17401e04c3fSmrg%option never-interactive 17501e04c3fSmrg%option warn nodefault 17601e04c3fSmrg 17701e04c3fSmrg /* Note: When adding any start conditions to this list, you must also 17801e04c3fSmrg * update the "Internal compiler error" catch-all rule near the end of 17901e04c3fSmrg * this file. */ 18001e04c3fSmrg 18101e04c3fSmrg%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE 18201e04c3fSmrg 18301e04c3fSmrgSPACE [[:space:]] 18401e04c3fSmrgNONSPACE [^[:space:]] 18501e04c3fSmrgHSPACE [ \t\v\f] 18601e04c3fSmrgHASH # 18701e04c3fSmrgNEWLINE (\r\n|\n\r|\r|\n) 18801e04c3fSmrgIDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* 18901e04c3fSmrgPP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])* 19001e04c3fSmrgPUNCTUATION [][(){}.&*~!/%<>^|;,=+-] 19101e04c3fSmrg 19201e04c3fSmrg/* The OTHER class is simply a catch-all for things that the CPP 19301e04c3fSmrgparser just doesn't care about. Since flex regular expressions that 19401e04c3fSmrgmatch longer strings take priority over those matching shorter 19501e04c3fSmrgstrings, we have to be careful to avoid OTHER matching and hiding 19601e04c3fSmrgsomething that CPP does care about. So we simply exclude all 19701e04c3fSmrgcharacters that appear in any other expressions. */ 19801e04c3fSmrg 19901e04c3fSmrgOTHER [^][_#[:space:]#a-zA-Z0-9(){}.&*~!/%<>^|;,=+-] 20001e04c3fSmrg 20101e04c3fSmrgDIGITS [0-9][0-9]* 20201e04c3fSmrgDECIMAL_INTEGER [1-9][0-9]*[uU]? 20301e04c3fSmrgOCTAL_INTEGER 0[0-7]*[uU]? 20401e04c3fSmrgHEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? 2057ec681f3SmrgPATH ["][]^./ _A-Za-z0-9+*%[(){}|&~=!:;,?-]*["] 20601e04c3fSmrg 20701e04c3fSmrg%% 20801e04c3fSmrg 20901e04c3fSmrg glcpp_parser_t *parser = yyextra; 21001e04c3fSmrg 21101e04c3fSmrg /* When we lex a multi-line comment, we replace it (as 21201e04c3fSmrg * specified) with a single space. But if the comment spanned 21301e04c3fSmrg * multiple lines, then subsequent parsing stages will not 21401e04c3fSmrg * count correct line numbers. To avoid this problem we keep 21501e04c3fSmrg * track of all newlines that were commented out by a 21601e04c3fSmrg * multi-line comment, and we emit a NEWLINE token for each at 21701e04c3fSmrg * the next legal opportunity, (which is when the lexer would 21801e04c3fSmrg * be emitting a NEWLINE token anyway). 21901e04c3fSmrg */ 22001e04c3fSmrg if (YY_START == NEWLINE_CATCHUP) { 22101e04c3fSmrg if (parser->commented_newlines) 22201e04c3fSmrg parser->commented_newlines--; 22301e04c3fSmrg if (parser->commented_newlines == 0) 22401e04c3fSmrg BEGIN INITIAL; 22501e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (NEWLINE); 22601e04c3fSmrg } 22701e04c3fSmrg 22801e04c3fSmrg /* Set up the parser->skipping bit here before doing any lexing. 22901e04c3fSmrg * 23001e04c3fSmrg * This bit controls whether tokens are skipped, (as implemented by 23101e04c3fSmrg * RETURN_TOKEN), such as between "#if 0" and "#endif". 23201e04c3fSmrg * 23301e04c3fSmrg * The parser maintains a skip_stack indicating whether we should be 23401e04c3fSmrg * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will 23501e04c3fSmrg * push and pop items from the stack. 23601e04c3fSmrg * 23701e04c3fSmrg * Here are the rules for determining whether we are skipping: 23801e04c3fSmrg * 23901e04c3fSmrg * 1. If the skip stack is NULL, we are outside of all #if blocks 24001e04c3fSmrg * and we are not skipping. 24101e04c3fSmrg * 24201e04c3fSmrg * 2. If the skip stack is non-NULL, the type of the top node in 24301e04c3fSmrg * the stack determines whether to skip. A type of 24401e04c3fSmrg * SKIP_NO_SKIP is used for blocks wheere we are emitting 24501e04c3fSmrg * tokens, (such as between #if 1 and #endif, or after the 24601e04c3fSmrg * #else of an #if 0, etc.). 24701e04c3fSmrg * 24801e04c3fSmrg * 3. The lexing_directive bit overrides the skip stack. This bit 24901e04c3fSmrg * is set when we are actively lexing the expression for a 25001e04c3fSmrg * pre-processor condition, (such as #if, #elif, or #else). In 25101e04c3fSmrg * this case, even if otherwise skipping, we need to emit the 25201e04c3fSmrg * tokens for this condition so that the parser can evaluate 25301e04c3fSmrg * the expression. (For, #else, there's no expression, but we 25401e04c3fSmrg * emit tokens so the parser can generate a nice error message 25501e04c3fSmrg * if there are any tokens here). 25601e04c3fSmrg */ 25701e04c3fSmrg if (parser->skip_stack && 25801e04c3fSmrg parser->skip_stack->type != SKIP_NO_SKIP && 25901e04c3fSmrg ! parser->lexing_directive) 26001e04c3fSmrg { 26101e04c3fSmrg parser->skipping = 1; 26201e04c3fSmrg } else { 26301e04c3fSmrg parser->skipping = 0; 26401e04c3fSmrg } 26501e04c3fSmrg 26601e04c3fSmrg /* Single-line comments */ 26701e04c3fSmrg<INITIAL,DEFINE,HASH>"//"[^\r\n]* { 26801e04c3fSmrg} 26901e04c3fSmrg 27001e04c3fSmrg /* Multi-line comments */ 27101e04c3fSmrg<INITIAL,DEFINE,HASH>"/*" { yy_push_state(COMMENT, yyscanner); } 27201e04c3fSmrg<COMMENT>[^*\r\n]* 27301e04c3fSmrg<COMMENT>[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } 27401e04c3fSmrg<COMMENT>"*"+[^*/\r\n]* 27501e04c3fSmrg<COMMENT>"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } 27601e04c3fSmrg<COMMENT>"*"+"/" { 27701e04c3fSmrg yy_pop_state(yyscanner); 27801e04c3fSmrg /* In the <HASH> start condition, we don't want any SPACE token. */ 27901e04c3fSmrg if (yyextra->space_tokens && YY_START != HASH) 28001e04c3fSmrg RETURN_TOKEN (SPACE); 28101e04c3fSmrg} 28201e04c3fSmrg 28301e04c3fSmrg{HASH} { 28401e04c3fSmrg 28501e04c3fSmrg /* If the '#' is the first non-whitespace, non-comment token on this 28601e04c3fSmrg * line, then it introduces a directive, switch to the <HASH> start 28701e04c3fSmrg * condition. 28801e04c3fSmrg * 28901e04c3fSmrg * Otherwise, this is just punctuation, so return the HASH_TOKEN 29001e04c3fSmrg * token. */ 29101e04c3fSmrg if (parser->first_non_space_token_this_line) { 29201e04c3fSmrg BEGIN HASH; 29301e04c3fSmrg yyextra->in_define = false; 29401e04c3fSmrg } 29501e04c3fSmrg 29601e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN); 29701e04c3fSmrg} 29801e04c3fSmrg 29901e04c3fSmrg<HASH>version{HSPACE}+ { 30001e04c3fSmrg BEGIN INITIAL; 30101e04c3fSmrg yyextra->space_tokens = 0; 30201e04c3fSmrg yyextra->lexing_version_directive = 1; 30301e04c3fSmrg RETURN_STRING_TOKEN (VERSION_TOKEN); 30401e04c3fSmrg} 30501e04c3fSmrg 30601e04c3fSmrg /* Swallow empty #pragma directives, (to avoid confusing the 30701e04c3fSmrg * downstream compiler). 30801e04c3fSmrg * 30901e04c3fSmrg * Note: We use a simple regular expression for the lookahead 31001e04c3fSmrg * here. Specifically, we cannot use the complete {NEWLINE} expression 31101e04c3fSmrg * since it uses alternation and we've found that there's a flex bug 31201e04c3fSmrg * where using alternation in the lookahead portion of a pattern 31301e04c3fSmrg * triggers a buffer overrun. */ 31401e04c3fSmrg<HASH>pragma{HSPACE}*/[\r\n] { 31501e04c3fSmrg BEGIN INITIAL; 31601e04c3fSmrg} 31701e04c3fSmrg 31801e04c3fSmrg /* glcpp doesn't handle #extension, #version, or #pragma directives. 31901e04c3fSmrg * Simply pass them through to the main compiler's lexer/parser. */ 32001e04c3fSmrg<HASH>(extension|pragma)[^\r\n]* { 32101e04c3fSmrg BEGIN INITIAL; 32201e04c3fSmrg RETURN_STRING_TOKEN (PRAGMA); 32301e04c3fSmrg} 32401e04c3fSmrg 3257ec681f3Smrg<HASH>include{HSPACE}+["<][]^./ _A-Za-z0-9+*%[(){}|&~=!:;,?-]+[">] { 3267ec681f3Smrg BEGIN INITIAL; 3277ec681f3Smrg RETURN_STRING_TOKEN (INCLUDE); 3287ec681f3Smrg} 3297ec681f3Smrg 33001e04c3fSmrg<HASH>line{HSPACE}+ { 33101e04c3fSmrg BEGIN INITIAL; 33201e04c3fSmrg RETURN_TOKEN (LINE); 33301e04c3fSmrg} 33401e04c3fSmrg 33501e04c3fSmrg<HASH>{NEWLINE} { 33601e04c3fSmrg BEGIN INITIAL; 33701e04c3fSmrg yyextra->space_tokens = 0; 33801e04c3fSmrg yylineno++; 33901e04c3fSmrg yycolumn = 0; 34001e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (NEWLINE); 34101e04c3fSmrg} 34201e04c3fSmrg 34301e04c3fSmrg /* For the pre-processor directives, we return these tokens 34401e04c3fSmrg * even when we are otherwise skipping. */ 34501e04c3fSmrg<HASH>ifdef { 34601e04c3fSmrg if (!yyextra->in_define) { 34701e04c3fSmrg BEGIN INITIAL; 34801e04c3fSmrg yyextra->lexing_directive = 1; 34901e04c3fSmrg yyextra->space_tokens = 0; 35001e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (IFDEF); 35101e04c3fSmrg } 35201e04c3fSmrg} 35301e04c3fSmrg 35401e04c3fSmrg<HASH>ifndef { 35501e04c3fSmrg if (!yyextra->in_define) { 35601e04c3fSmrg BEGIN INITIAL; 35701e04c3fSmrg yyextra->lexing_directive = 1; 35801e04c3fSmrg yyextra->space_tokens = 0; 35901e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (IFNDEF); 36001e04c3fSmrg } 36101e04c3fSmrg} 36201e04c3fSmrg 36301e04c3fSmrg<HASH>if/[^_a-zA-Z0-9] { 36401e04c3fSmrg if (!yyextra->in_define) { 36501e04c3fSmrg BEGIN INITIAL; 36601e04c3fSmrg yyextra->lexing_directive = 1; 36701e04c3fSmrg yyextra->space_tokens = 0; 36801e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (IF); 36901e04c3fSmrg } 37001e04c3fSmrg} 37101e04c3fSmrg 37201e04c3fSmrg<HASH>elif/[^_a-zA-Z0-9] { 37301e04c3fSmrg if (!yyextra->in_define) { 37401e04c3fSmrg BEGIN INITIAL; 37501e04c3fSmrg yyextra->lexing_directive = 1; 37601e04c3fSmrg yyextra->space_tokens = 0; 37701e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (ELIF); 37801e04c3fSmrg } 37901e04c3fSmrg} 38001e04c3fSmrg 38101e04c3fSmrg<HASH>else { 38201e04c3fSmrg if (!yyextra->in_define) { 38301e04c3fSmrg BEGIN INITIAL; 38401e04c3fSmrg yyextra->space_tokens = 0; 38501e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (ELSE); 38601e04c3fSmrg } 38701e04c3fSmrg} 38801e04c3fSmrg 38901e04c3fSmrg<HASH>endif { 39001e04c3fSmrg if (!yyextra->in_define) { 39101e04c3fSmrg BEGIN INITIAL; 39201e04c3fSmrg yyextra->space_tokens = 0; 39301e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (ENDIF); 39401e04c3fSmrg } 39501e04c3fSmrg} 39601e04c3fSmrg 39701e04c3fSmrg<HASH>error[^\r\n]* { 39801e04c3fSmrg BEGIN INITIAL; 39901e04c3fSmrg RETURN_STRING_TOKEN (ERROR_TOKEN); 40001e04c3fSmrg} 40101e04c3fSmrg 40201e04c3fSmrg /* After we see a "#define" we enter the <DEFINE> start state 40301e04c3fSmrg * for the lexer. Within <DEFINE> we are looking for the first 40401e04c3fSmrg * identifier and specifically checking whether the identifier 40501e04c3fSmrg * is followed by a '(' or not, (to lex either a 40601e04c3fSmrg * FUNC_IDENTIFIER or an OBJ_IDENITIFIER token). 40701e04c3fSmrg * 40801e04c3fSmrg * While in the <DEFINE> state we also need to explicitly 40901e04c3fSmrg * handle a few other things that may appear before the 41001e04c3fSmrg * identifier: 41101e04c3fSmrg * 41201e04c3fSmrg * * Comments, (handled above with the main support for 41301e04c3fSmrg * comments). 41401e04c3fSmrg * 41501e04c3fSmrg * * Whitespace (simply ignored) 41601e04c3fSmrg * 41701e04c3fSmrg * * Anything else, (not an identifier, not a comment, 41801e04c3fSmrg * and not whitespace). This will generate an error. 41901e04c3fSmrg */ 42001e04c3fSmrg<HASH>define{HSPACE}* { 42101e04c3fSmrg yyextra->in_define = true; 42201e04c3fSmrg if (!parser->skipping) { 42301e04c3fSmrg BEGIN DEFINE; 42401e04c3fSmrg yyextra->space_tokens = 0; 42501e04c3fSmrg RETURN_TOKEN (DEFINE_TOKEN); 42601e04c3fSmrg } 42701e04c3fSmrg} 42801e04c3fSmrg 42901e04c3fSmrg<HASH>undef { 43001e04c3fSmrg BEGIN INITIAL; 43101e04c3fSmrg yyextra->space_tokens = 0; 43201e04c3fSmrg RETURN_TOKEN (UNDEF); 43301e04c3fSmrg} 43401e04c3fSmrg 43501e04c3fSmrg<HASH>{HSPACE}+ { 43601e04c3fSmrg /* Nothing to do here. Importantly, don't leave the <HASH> 43701e04c3fSmrg * start condition, since it's legal to have space between the 43801e04c3fSmrg * '#' and the directive.. */ 43901e04c3fSmrg} 44001e04c3fSmrg 44101e04c3fSmrg /* This will catch any non-directive garbage after a HASH */ 44201e04c3fSmrg<HASH>{NONSPACE} { 44301e04c3fSmrg if (!parser->skipping) { 44401e04c3fSmrg BEGIN INITIAL; 44501e04c3fSmrg RETURN_TOKEN (GARBAGE); 44601e04c3fSmrg } 44701e04c3fSmrg} 44801e04c3fSmrg 44901e04c3fSmrg /* An identifier immediately followed by '(' */ 45001e04c3fSmrg<DEFINE>{IDENTIFIER}/"(" { 45101e04c3fSmrg BEGIN INITIAL; 45201e04c3fSmrg RETURN_STRING_TOKEN (FUNC_IDENTIFIER); 45301e04c3fSmrg} 45401e04c3fSmrg 45501e04c3fSmrg /* An identifier not immediately followed by '(' */ 45601e04c3fSmrg<DEFINE>{IDENTIFIER} { 45701e04c3fSmrg BEGIN INITIAL; 45801e04c3fSmrg RETURN_STRING_TOKEN (OBJ_IDENTIFIER); 45901e04c3fSmrg} 46001e04c3fSmrg 46101e04c3fSmrg /* Whitespace */ 46201e04c3fSmrg<DEFINE>{HSPACE}+ { 46301e04c3fSmrg /* Just ignore it. Nothing to do here. */ 46401e04c3fSmrg} 46501e04c3fSmrg 46601e04c3fSmrg /* '/' not followed by '*', so not a comment. This is an error. */ 46701e04c3fSmrg<DEFINE>[/][^*]{NONSPACE}* { 46801e04c3fSmrg BEGIN INITIAL; 46901e04c3fSmrg glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); 47001e04c3fSmrg RETURN_STRING_TOKEN (INTEGER_STRING); 47101e04c3fSmrg} 47201e04c3fSmrg 47301e04c3fSmrg /* A character that can't start an identifier, comment, or 47401e04c3fSmrg * space. This is an error. */ 47501e04c3fSmrg<DEFINE>[^_a-zA-Z/[:space:]]{NONSPACE}* { 47601e04c3fSmrg BEGIN INITIAL; 47701e04c3fSmrg glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); 47801e04c3fSmrg RETURN_STRING_TOKEN (INTEGER_STRING); 47901e04c3fSmrg} 48001e04c3fSmrg 48101e04c3fSmrg{DECIMAL_INTEGER} { 48201e04c3fSmrg RETURN_STRING_TOKEN (INTEGER_STRING); 48301e04c3fSmrg} 48401e04c3fSmrg 48501e04c3fSmrg{OCTAL_INTEGER} { 48601e04c3fSmrg RETURN_STRING_TOKEN (INTEGER_STRING); 48701e04c3fSmrg} 48801e04c3fSmrg 48901e04c3fSmrg{HEXADECIMAL_INTEGER} { 49001e04c3fSmrg RETURN_STRING_TOKEN (INTEGER_STRING); 49101e04c3fSmrg} 49201e04c3fSmrg 49301e04c3fSmrg"<<" { 49401e04c3fSmrg RETURN_TOKEN (LEFT_SHIFT); 49501e04c3fSmrg} 49601e04c3fSmrg 49701e04c3fSmrg">>" { 49801e04c3fSmrg RETURN_TOKEN (RIGHT_SHIFT); 49901e04c3fSmrg} 50001e04c3fSmrg 50101e04c3fSmrg"<=" { 50201e04c3fSmrg RETURN_TOKEN (LESS_OR_EQUAL); 50301e04c3fSmrg} 50401e04c3fSmrg 50501e04c3fSmrg">=" { 50601e04c3fSmrg RETURN_TOKEN (GREATER_OR_EQUAL); 50701e04c3fSmrg} 50801e04c3fSmrg 50901e04c3fSmrg"==" { 51001e04c3fSmrg RETURN_TOKEN (EQUAL); 51101e04c3fSmrg} 51201e04c3fSmrg 51301e04c3fSmrg"!=" { 51401e04c3fSmrg RETURN_TOKEN (NOT_EQUAL); 51501e04c3fSmrg} 51601e04c3fSmrg 51701e04c3fSmrg"&&" { 51801e04c3fSmrg RETURN_TOKEN (AND); 51901e04c3fSmrg} 52001e04c3fSmrg 52101e04c3fSmrg"||" { 52201e04c3fSmrg RETURN_TOKEN (OR); 52301e04c3fSmrg} 52401e04c3fSmrg 52501e04c3fSmrg"++" { 52601e04c3fSmrg RETURN_TOKEN (PLUS_PLUS); 52701e04c3fSmrg} 52801e04c3fSmrg 52901e04c3fSmrg"--" { 53001e04c3fSmrg RETURN_TOKEN (MINUS_MINUS); 53101e04c3fSmrg} 53201e04c3fSmrg 53301e04c3fSmrg"##" { 53401e04c3fSmrg if (! parser->skipping) { 53501e04c3fSmrg if (parser->is_gles) 53601e04c3fSmrg glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES"); 53701e04c3fSmrg RETURN_TOKEN (PASTE); 53801e04c3fSmrg } 53901e04c3fSmrg} 54001e04c3fSmrg 54101e04c3fSmrg"defined" { 54201e04c3fSmrg RETURN_TOKEN (DEFINED); 54301e04c3fSmrg} 54401e04c3fSmrg 54501e04c3fSmrg{IDENTIFIER} { 54601e04c3fSmrg RETURN_STRING_TOKEN (IDENTIFIER); 54701e04c3fSmrg} 54801e04c3fSmrg 54901e04c3fSmrg{PP_NUMBER} { 55001e04c3fSmrg RETURN_STRING_TOKEN (OTHER); 55101e04c3fSmrg} 55201e04c3fSmrg 55301e04c3fSmrg{PUNCTUATION} { 55401e04c3fSmrg RETURN_TOKEN (yytext[0]); 55501e04c3fSmrg} 55601e04c3fSmrg 55701e04c3fSmrg{OTHER}+ { 55801e04c3fSmrg RETURN_STRING_TOKEN (OTHER); 55901e04c3fSmrg} 56001e04c3fSmrg 56101e04c3fSmrg{HSPACE} { 56201e04c3fSmrg if (yyextra->space_tokens) { 56301e04c3fSmrg RETURN_TOKEN (SPACE); 56401e04c3fSmrg } 56501e04c3fSmrg} 56601e04c3fSmrg 5677ec681f3Smrg{PATH} { 5687ec681f3Smrg RETURN_STRING_TOKEN (PATH); 5697ec681f3Smrg} 5707ec681f3Smrg 57101e04c3fSmrg /* We preserve all newlines, even between #if 0..#endif, so no 57201e04c3fSmrg skipping.. */ 57301e04c3fSmrg<*>{NEWLINE} { 57401e04c3fSmrg if (parser->commented_newlines) { 57501e04c3fSmrg BEGIN NEWLINE_CATCHUP; 57601e04c3fSmrg } else { 57701e04c3fSmrg BEGIN INITIAL; 57801e04c3fSmrg } 57901e04c3fSmrg yyextra->space_tokens = 1; 58001e04c3fSmrg yyextra->lexing_directive = 0; 58101e04c3fSmrg yyextra->lexing_version_directive = 0; 58201e04c3fSmrg yylineno++; 58301e04c3fSmrg yycolumn = 0; 58401e04c3fSmrg RETURN_TOKEN_NEVER_SKIP (NEWLINE); 58501e04c3fSmrg} 58601e04c3fSmrg 58701e04c3fSmrg<INITIAL,COMMENT,DEFINE,HASH><<EOF>> { 58801e04c3fSmrg if (YY_START == COMMENT) 58901e04c3fSmrg glcpp_error(yylloc, yyextra, "Unterminated comment"); 59001e04c3fSmrg BEGIN DONE; /* Don't keep matching this rule forever. */ 59101e04c3fSmrg yyextra->lexing_directive = 0; 59201e04c3fSmrg yyextra->lexing_version_directive = 0; 59301e04c3fSmrg if (! parser->last_token_was_newline) 59401e04c3fSmrg RETURN_TOKEN (NEWLINE); 59501e04c3fSmrg} 59601e04c3fSmrg 59701e04c3fSmrg /* This is a catch-all to avoid the annoying default flex action which 59801e04c3fSmrg * matches any character and prints it. If any input ever matches this 59901e04c3fSmrg * rule, then we have made a mistake above and need to fix one or more 60001e04c3fSmrg * of the preceding patterns to match that input. */ 60101e04c3fSmrg 60201e04c3fSmrg<*>. { 60301e04c3fSmrg glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext); 60401e04c3fSmrg 60501e04c3fSmrg /* We don't actually use the UNREACHABLE start condition. We 60601e04c3fSmrg only have this block here so that we can pretend to call some 60701e04c3fSmrg generated functions, (to avoid "defined but not used" 60801e04c3fSmrg warnings. */ 60901e04c3fSmrg if (YY_START == UNREACHABLE) { 61001e04c3fSmrg unput('.'); 61101e04c3fSmrg yy_top_state(yyextra); 61201e04c3fSmrg } 61301e04c3fSmrg} 61401e04c3fSmrg 61501e04c3fSmrg%% 61601e04c3fSmrg 61701e04c3fSmrgvoid 61801e04c3fSmrgglcpp_lex_set_source_string(glcpp_parser_t *parser, const char *shader) 61901e04c3fSmrg{ 62001e04c3fSmrg yy_scan_string(shader, parser->scanner); 62101e04c3fSmrg} 622