1 1.239 rillig /* $NetBSD: lex.c,v 1.239 2025/04/12 19:42:35 rillig Exp $ */ 2 1.1 rillig 3 1.1 rillig /* 4 1.1 rillig * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. 5 1.1 rillig * Copyright (c) 1994, 1995 Jochen Pohl 6 1.1 rillig * All Rights Reserved. 7 1.1 rillig * 8 1.1 rillig * Redistribution and use in source and binary forms, with or without 9 1.1 rillig * modification, are permitted provided that the following conditions 10 1.1 rillig * are met: 11 1.1 rillig * 1. Redistributions of source code must retain the above copyright 12 1.1 rillig * notice, this list of conditions and the following disclaimer. 13 1.1 rillig * 2. Redistributions in binary form must reproduce the above copyright 14 1.1 rillig * notice, this list of conditions and the following disclaimer in the 15 1.1 rillig * documentation and/or other materials provided with the distribution. 16 1.1 rillig * 3. All advertising materials mentioning features or use of this software 17 1.1 rillig * must display the following acknowledgement: 18 1.182 rillig * This product includes software developed by Jochen Pohl for 19 1.182 rillig * The NetBSD Project. 20 1.1 rillig * 4. The name of the author may not be used to endorse or promote products 21 1.1 rillig * derived from this software without specific prior written permission. 22 1.1 rillig * 23 1.1 rillig * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 1.1 rillig * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 1.1 rillig * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 1.1 rillig * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 1.1 rillig * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 1.1 rillig * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 1.1 rillig * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 1.1 rillig * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 1.1 rillig * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 1.1 rillig * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 1.1 rillig */ 34 1.1 rillig 35 1.4 christos #if HAVE_NBTOOL_CONFIG_H 36 1.4 christos #include "nbtool_config.h" 37 1.4 christos #endif 38 1.4 christos 39 1.1 rillig #include <sys/cdefs.h> 40 1.131 rillig #if defined(__RCSID) 41 1.239 rillig __RCSID("$NetBSD: lex.c,v 1.239 2025/04/12 19:42:35 rillig Exp $"); 42 1.1 rillig #endif 43 1.1 rillig 44 1.1 rillig #include <ctype.h> 45 1.1 rillig #include <errno.h> 46 1.1 rillig #include <float.h> 47 1.1 rillig #include <limits.h> 48 1.1 rillig #include <math.h> 49 1.1 rillig #include <stdlib.h> 50 1.1 rillig #include <string.h> 51 1.1 rillig 52 1.1 rillig #include "lint1.h" 53 1.1 rillig #include "cgram.h" 54 1.1 rillig 55 1.72 rillig #define CHAR_MASK ((1U << CHAR_SIZE) - 1) 56 1.1 rillig 57 1.1 rillig 58 1.1 rillig /* Current position (it's also updated when an included file is parsed) */ 59 1.196 rillig pos_t curr_pos = { "", 1, 0 }; 60 1.1 rillig 61 1.1 rillig /* 62 1.1 rillig * Current position in C source (not updated when an included file is 63 1.1 rillig * parsed). 64 1.1 rillig */ 65 1.196 rillig pos_t csrc_pos = { "", 1, 0 }; 66 1.1 rillig 67 1.94 rillig bool in_gcc_attribute; 68 1.94 rillig bool in_system_header; 69 1.1 rillig 70 1.125 rillig /* 71 1.224 rillig * Define a keyword that cannot be overridden by identifiers. 72 1.224 rillig * 73 1.184 rillig * Valid values for 'since' are 78, 90, 99, 11, 23. 74 1.125 rillig * 75 1.184 rillig * The C11 keywords are all taken from the reserved namespace. They are added 76 1.184 rillig * in C99 mode as well, to make the parse error messages more useful. For 77 1.184 rillig * example, if the keyword '_Generic' were not defined, it would be interpreted 78 1.184 rillig * as an implicit function call, leading to a parse error. 79 1.184 rillig * 80 1.184 rillig * The C23 keywords are not made available in earlier modes, as they may 81 1.184 rillig * conflict with user-defined identifiers. 82 1.125 rillig */ 83 1.175 rillig #define kwdef(name, token, detail, since, gcc, deco) \ 84 1.236 rillig { \ 85 1.175 rillig name, token, detail, \ 86 1.122 rillig (since) == 90, \ 87 1.184 rillig (since) == 99 || (since) == 11, \ 88 1.184 rillig (since) == 23, \ 89 1.133 rillig (gcc) > 0, \ 90 1.66 rillig ((deco) & 1) != 0, ((deco) & 2) != 0, ((deco) & 4) != 0, \ 91 1.13 rillig } 92 1.122 rillig #define kwdef_token(name, token, since, gcc, deco) \ 93 1.175 rillig kwdef(name, token, {false}, since, gcc, deco) 94 1.122 rillig #define kwdef_sclass(name, sclass, since, gcc, deco) \ 95 1.175 rillig kwdef(name, T_SCLASS, .u.kw_scl = (sclass), since, gcc, deco) 96 1.132 rillig #define kwdef_type(name, tspec, since) \ 97 1.175 rillig kwdef(name, T_TYPE, .u.kw_tspec = (tspec), since, 0, 1) 98 1.122 rillig #define kwdef_tqual(name, tqual, since, gcc, deco) \ 99 1.185 rillig kwdef(name, T_QUAL, .u.kw_tqual = {.tqual = true}, since, gcc, deco) 100 1.225 rillig #define kwdef_const(name, named_constant, since, gcc, deco) \ 101 1.225 rillig kwdef(name, T_NAMED_CONSTANT, \ 102 1.225 rillig .u.kw_named_constant = (named_constant), since, gcc, deco) 103 1.35 rillig #define kwdef_keyword(name, token) \ 104 1.175 rillig kwdef(name, token, {false}, 78, 0, 1) 105 1.13 rillig 106 1.94 rillig /* During initialization, these keywords are written to the symbol table. */ 107 1.106 rillig static const struct keyword { 108 1.178 rillig const char kw_name[20]; 109 1.176 rillig int kw_token; /* token to be returned by yylex() */ 110 1.175 rillig union { 111 1.175 rillig bool kw_dummy; 112 1.175 rillig scl_t kw_scl; /* if kw_token is T_SCLASS */ 113 1.175 rillig tspec_t kw_tspec; /* if kw_token is T_TYPE or 114 1.175 rillig * T_STRUCT_OR_UNION */ 115 1.185 rillig type_qualifiers kw_tqual; /* if kw_token is T_QUAL */ 116 1.176 rillig function_specifier kw_fs; /* if kw_token is 117 1.176 rillig * T_FUNCTION_SPECIFIER */ 118 1.225 rillig named_constant kw_named_constant; 119 1.175 rillig } u; 120 1.184 rillig bool kw_added_in_c90:1; 121 1.184 rillig bool kw_added_in_c99_or_c11:1; 122 1.184 rillig bool kw_added_in_c23:1; 123 1.146 rillig bool kw_gcc:1; /* available in GCC mode */ 124 1.93 rillig bool kw_plain:1; /* 'name' */ 125 1.93 rillig bool kw_leading:1; /* '__name' */ 126 1.93 rillig bool kw_both:1; /* '__name__' */ 127 1.94 rillig } keywords[] = { 128 1.186 rillig // TODO: _Alignas is not available in C99. 129 1.35 rillig kwdef_keyword( "_Alignas", T_ALIGNAS), 130 1.186 rillig // TODO: _Alignof is not available in C99. 131 1.35 rillig kwdef_keyword( "_Alignof", T_ALIGNOF), 132 1.186 rillig // TODO: alignof is not available in C99. 133 1.130 rillig kwdef_token( "alignof", T_ALIGNOF, 78,0,6), 134 1.146 rillig kwdef_token( "asm", T_ASM, 78,1,7), 135 1.142 rillig kwdef_token( "_Atomic", T_ATOMIC, 11,0,1), 136 1.229 rillig kwdef("__auto_type", T_TYPE, .u.kw_tspec = AUTO_TYPE, 99,1,1), 137 1.122 rillig kwdef_token( "attribute", T_ATTRIBUTE, 78,1,6), 138 1.122 rillig kwdef_sclass( "auto", AUTO, 78,0,1), 139 1.132 rillig kwdef_type( "_Bool", BOOL, 99), 140 1.224 rillig kwdef_type( "bool", BOOL, 23), 141 1.35 rillig kwdef_keyword( "break", T_BREAK), 142 1.122 rillig kwdef_token( "__builtin_offsetof", T_BUILTIN_OFFSETOF, 78,1,1), 143 1.35 rillig kwdef_keyword( "case", T_CASE), 144 1.132 rillig kwdef_type( "char", CHAR, 78), 145 1.132 rillig kwdef_type( "_Complex", COMPLEX, 99), 146 1.185 rillig kwdef_tqual( "const", tq_const, 90,0,7), 147 1.35 rillig kwdef_keyword( "continue", T_CONTINUE), 148 1.35 rillig kwdef_keyword( "default", T_DEFAULT), 149 1.35 rillig kwdef_keyword( "do", T_DO), 150 1.132 rillig kwdef_type( "double", DOUBLE, 78), 151 1.35 rillig kwdef_keyword( "else", T_ELSE), 152 1.238 rillig // XXX: enum requires C90 or later. 153 1.35 rillig kwdef_keyword( "enum", T_ENUM), 154 1.122 rillig kwdef_token( "__extension__",T_EXTENSION, 78,1,1), 155 1.122 rillig kwdef_sclass( "extern", EXTERN, 78,0,1), 156 1.224 rillig kwdef_const( "false", NC_FALSE, 23,0,1), 157 1.132 rillig kwdef_type( "float", FLOAT, 78), 158 1.35 rillig kwdef_keyword( "for", T_FOR), 159 1.122 rillig kwdef_token( "_Generic", T_GENERIC, 11,0,1), 160 1.35 rillig kwdef_keyword( "goto", T_GOTO), 161 1.35 rillig kwdef_keyword( "if", T_IF), 162 1.122 rillig kwdef_token( "__imag__", T_IMAG, 78,1,1), 163 1.176 rillig kwdef("inline", T_FUNCTION_SPECIFIER, .u.kw_fs = FS_INLINE, 99,0,7), 164 1.132 rillig kwdef_type( "int", INT, 78), 165 1.56 rillig #ifdef INT128_SIZE 166 1.132 rillig kwdef_type( "__int128_t", INT128, 99), 167 1.56 rillig #endif 168 1.132 rillig kwdef_type( "long", LONG, 78), 169 1.177 rillig kwdef("_Noreturn", T_FUNCTION_SPECIFIER, .u.kw_fs = FS_NORETURN, 11,0,1), 170 1.225 rillig kwdef_const( "nullptr", NC_NULLPTR, 23,0,1), 171 1.186 rillig // XXX: __packed is GCC-specific. 172 1.124 rillig kwdef_token( "__packed", T_PACKED, 78,0,1), 173 1.124 rillig kwdef_token( "__real__", T_REAL, 78,1,1), 174 1.124 rillig kwdef_sclass( "register", REG, 78,0,1), 175 1.185 rillig kwdef_tqual( "restrict", tq_restrict, 99,0,7), 176 1.124 rillig kwdef_keyword( "return", T_RETURN), 177 1.132 rillig kwdef_type( "short", SHORT, 78), 178 1.175 rillig kwdef( "signed", T_TYPE, .u.kw_tspec = SIGNED, 90,0,3), 179 1.124 rillig kwdef_keyword( "sizeof", T_SIZEOF), 180 1.124 rillig kwdef_sclass( "static", STATIC, 78,0,1), 181 1.186 rillig // XXX: _Static_assert was added in C11. 182 1.124 rillig kwdef_keyword( "_Static_assert", T_STATIC_ASSERT), 183 1.175 rillig kwdef("struct", T_STRUCT_OR_UNION, .u.kw_tspec = STRUCT, 78,0,1), 184 1.124 rillig kwdef_keyword( "switch", T_SWITCH), 185 1.124 rillig kwdef_token( "__symbolrename", T_SYMBOLRENAME, 78,0,1), 186 1.183 rillig kwdef_sclass( "__thread", THREAD_LOCAL, 78,1,1), 187 1.183 rillig kwdef_sclass( "_Thread_local", THREAD_LOCAL, 11,0,1), 188 1.184 rillig kwdef_sclass( "thread_local", THREAD_LOCAL, 23,0,1), 189 1.224 rillig kwdef_const( "true", NC_TRUE, 23,0,1), 190 1.124 rillig kwdef_sclass( "typedef", TYPEDEF, 78,0,1), 191 1.124 rillig kwdef_token( "typeof", T_TYPEOF, 78,1,7), 192 1.124 rillig #ifdef INT128_SIZE 193 1.132 rillig kwdef_type( "__uint128_t", UINT128, 99), 194 1.124 rillig #endif 195 1.175 rillig kwdef("union", T_STRUCT_OR_UNION, .u.kw_tspec = UNION, 78,0,1), 196 1.132 rillig kwdef_type( "unsigned", UNSIGN, 78), 197 1.238 rillig // XXX: void requires C90 or later. 198 1.132 rillig kwdef_type( "void", VOID, 78), 199 1.185 rillig kwdef_tqual( "volatile", tq_volatile, 90,0,7), 200 1.124 rillig kwdef_keyword( "while", T_WHILE), 201 1.13 rillig #undef kwdef 202 1.13 rillig #undef kwdef_token 203 1.13 rillig #undef kwdef_sclass 204 1.13 rillig #undef kwdef_type 205 1.13 rillig #undef kwdef_tqual 206 1.36 rillig #undef kwdef_keyword 207 1.1 rillig }; 208 1.1 rillig 209 1.146 rillig /* 210 1.146 rillig * The symbol table containing all keywords, identifiers and labels. The hash 211 1.146 rillig * entries are linked via sym_t.s_symtab_next. 212 1.146 rillig */ 213 1.173 rillig static sym_t *symtab[503]; 214 1.1 rillig 215 1.146 rillig /* 216 1.146 rillig * The kind of the next expected symbol, to distinguish the namespaces of 217 1.146 rillig * members, labels, type tags and other identifiers. 218 1.146 rillig */ 219 1.200 rillig symbol_kind sym_kind; 220 1.1 rillig 221 1.1 rillig 222 1.104 rillig static unsigned int 223 1.104 rillig hash(const char *s) 224 1.104 rillig { 225 1.210 rillig unsigned int v = 0; 226 1.210 rillig for (const char *p = s; *p != '\0'; p++) { 227 1.104 rillig v = (v << 4) + (unsigned char)*p; 228 1.104 rillig v ^= v >> 28; 229 1.104 rillig } 230 1.173 rillig return v % (sizeof(symtab) / sizeof(symtab[0])); 231 1.104 rillig } 232 1.104 rillig 233 1.1 rillig static void 234 1.62 rillig symtab_add(sym_t *sym) 235 1.60 rillig { 236 1.210 rillig unsigned int h = hash(sym->s_name); 237 1.102 rillig if ((sym->s_symtab_next = symtab[h]) != NULL) 238 1.102 rillig symtab[h]->s_symtab_ref = &sym->s_symtab_next; 239 1.102 rillig sym->s_symtab_ref = &symtab[h]; 240 1.60 rillig symtab[h] = sym; 241 1.60 rillig } 242 1.60 rillig 243 1.104 rillig static sym_t * 244 1.143 rillig symtab_search(const char *name) 245 1.104 rillig { 246 1.104 rillig 247 1.143 rillig unsigned int h = hash(name); 248 1.104 rillig for (sym_t *sym = symtab[h]; sym != NULL; sym = sym->s_symtab_next) { 249 1.143 rillig if (strcmp(sym->s_name, name) != 0) 250 1.104 rillig continue; 251 1.148 rillig if (sym->s_keyword != NULL || 252 1.200 rillig sym->s_kind == sym_kind || 253 1.148 rillig in_gcc_attribute) 254 1.104 rillig return sym; 255 1.104 rillig } 256 1.104 rillig 257 1.104 rillig return NULL; 258 1.104 rillig } 259 1.104 rillig 260 1.60 rillig static void 261 1.61 rillig symtab_remove(sym_t *sym) 262 1.61 rillig { 263 1.61 rillig 264 1.102 rillig if ((*sym->s_symtab_ref = sym->s_symtab_next) != NULL) 265 1.102 rillig sym->s_symtab_next->s_symtab_ref = sym->s_symtab_ref; 266 1.102 rillig sym->s_symtab_next = NULL; 267 1.61 rillig } 268 1.61 rillig 269 1.104 rillig static void 270 1.104 rillig symtab_remove_locals(void) 271 1.104 rillig { 272 1.104 rillig 273 1.173 rillig for (size_t i = 0; i < sizeof(symtab) / sizeof(symtab[0]); i++) { 274 1.104 rillig for (sym_t *sym = symtab[i]; sym != NULL; ) { 275 1.104 rillig sym_t *next = sym->s_symtab_next; 276 1.104 rillig if (sym->s_block_level >= 1) 277 1.104 rillig symtab_remove(sym); 278 1.104 rillig sym = next; 279 1.104 rillig } 280 1.104 rillig } 281 1.104 rillig } 282 1.61 rillig 283 1.107 rillig #ifdef DEBUG 284 1.107 rillig static int 285 1.107 rillig sym_by_name(const void *va, const void *vb) 286 1.107 rillig { 287 1.107 rillig const sym_t *a = *(const sym_t *const *)va; 288 1.107 rillig const sym_t *b = *(const sym_t *const *)vb; 289 1.107 rillig 290 1.107 rillig return strcmp(a->s_name, b->s_name); 291 1.107 rillig } 292 1.107 rillig 293 1.107 rillig struct syms { 294 1.107 rillig const sym_t **items; 295 1.107 rillig size_t len; 296 1.107 rillig size_t cap; 297 1.107 rillig }; 298 1.107 rillig 299 1.107 rillig static void 300 1.107 rillig syms_add(struct syms *syms, const sym_t *sym) 301 1.107 rillig { 302 1.146 rillig if (syms->len >= syms->cap) { 303 1.107 rillig syms->cap *= 2; 304 1.107 rillig syms->items = xrealloc(syms->items, 305 1.107 rillig syms->cap * sizeof(syms->items[0])); 306 1.107 rillig } 307 1.107 rillig syms->items[syms->len++] = sym; 308 1.107 rillig } 309 1.107 rillig 310 1.107 rillig void 311 1.107 rillig debug_symtab(void) 312 1.107 rillig { 313 1.107 rillig struct syms syms = { xcalloc(64, sizeof(syms.items[0])), 0, 64 }; 314 1.107 rillig 315 1.187 rillig debug_enter(); 316 1.111 rillig for (int level = -1;; level++) { 317 1.107 rillig bool more = false; 318 1.107 rillig size_t n = sizeof(symtab) / sizeof(symtab[0]); 319 1.107 rillig 320 1.107 rillig syms.len = 0; 321 1.107 rillig for (size_t i = 0; i < n; i++) { 322 1.107 rillig for (sym_t *sym = symtab[i]; sym != NULL;) { 323 1.107 rillig if (sym->s_block_level == level && 324 1.107 rillig sym->s_keyword == NULL) 325 1.107 rillig syms_add(&syms, sym); 326 1.107 rillig if (sym->s_block_level > level) 327 1.107 rillig more = true; 328 1.107 rillig sym = sym->s_symtab_next; 329 1.107 rillig } 330 1.107 rillig } 331 1.107 rillig 332 1.111 rillig if (syms.len > 0) { 333 1.187 rillig debug_step("symbol table level %d", level); 334 1.111 rillig debug_indent_inc(); 335 1.111 rillig qsort(syms.items, syms.len, sizeof(syms.items[0]), 336 1.111 rillig sym_by_name); 337 1.111 rillig for (size_t i = 0; i < syms.len; i++) 338 1.114 rillig debug_sym("", syms.items[i], "\n"); 339 1.111 rillig debug_indent_dec(); 340 1.111 rillig 341 1.111 rillig lint_assert(level != -1); 342 1.111 rillig } 343 1.107 rillig 344 1.107 rillig if (!more) 345 1.107 rillig break; 346 1.107 rillig } 347 1.187 rillig debug_leave(); 348 1.107 rillig 349 1.107 rillig free(syms.items); 350 1.107 rillig } 351 1.107 rillig #endif 352 1.107 rillig 353 1.61 rillig static void 354 1.210 rillig register_keyword(const struct keyword *kw, bool leading, bool trailing) 355 1.1 rillig { 356 1.146 rillig 357 1.1 rillig const char *name; 358 1.66 rillig if (!leading && !trailing) { 359 1.1 rillig name = kw->kw_name; 360 1.66 rillig } else { 361 1.146 rillig char buf[256]; 362 1.73 rillig (void)snprintf(buf, sizeof(buf), "%s%s%s", 363 1.66 rillig leading ? "__" : "", kw->kw_name, trailing ? "__" : ""); 364 1.66 rillig name = xstrdup(buf); 365 1.1 rillig } 366 1.1 rillig 367 1.188 rillig sym_t *sym = block_zero_alloc(sizeof(*sym), "sym"); 368 1.1 rillig sym->s_name = name; 369 1.1 rillig sym->s_keyword = kw; 370 1.146 rillig int tok = kw->kw_token; 371 1.146 rillig sym->u.s_keyword.sk_token = tok; 372 1.146 rillig if (tok == T_TYPE || tok == T_STRUCT_OR_UNION) 373 1.176 rillig sym->u.s_keyword.u.sk_tspec = kw->u.kw_tspec; 374 1.146 rillig if (tok == T_SCLASS) 375 1.175 rillig sym->s_scl = kw->u.kw_scl; 376 1.146 rillig if (tok == T_QUAL) 377 1.185 rillig sym->u.s_keyword.u.sk_type_qualifier = kw->u.kw_tqual; 378 1.176 rillig if (tok == T_FUNCTION_SPECIFIER) 379 1.176 rillig sym->u.s_keyword.u.function_specifier = kw->u.kw_fs; 380 1.225 rillig if (tok == T_NAMED_CONSTANT) 381 1.225 rillig sym->u.s_keyword.u.named_constant = kw->u.kw_named_constant; 382 1.60 rillig 383 1.60 rillig symtab_add(sym); 384 1.1 rillig } 385 1.1 rillig 386 1.126 rillig static bool 387 1.126 rillig is_keyword_known(const struct keyword *kw) 388 1.126 rillig { 389 1.126 rillig 390 1.184 rillig if (kw->kw_added_in_c23 && !allow_c23) 391 1.184 rillig return false; 392 1.184 rillig if ((kw->kw_added_in_c90 || kw->kw_added_in_c99_or_c11) && !allow_c90) 393 1.126 rillig return false; 394 1.126 rillig 395 1.126 rillig /* 396 1.126 rillig * In the 1990s, GCC defined several keywords that were later 397 1.126 rillig * incorporated into C99, therefore in GCC mode, all C99 keywords are 398 1.126 rillig * made available. The C11 keywords are made available as well, but 399 1.126 rillig * there are so few that they don't matter practically. 400 1.126 rillig */ 401 1.126 rillig if (allow_gcc) 402 1.126 rillig return true; 403 1.126 rillig if (kw->kw_gcc) 404 1.126 rillig return false; 405 1.126 rillig 406 1.184 rillig if (kw->kw_added_in_c99_or_c11 && !allow_c99) 407 1.126 rillig return false; 408 1.126 rillig return true; 409 1.126 rillig } 410 1.126 rillig 411 1.146 rillig /* Write all keywords to the symbol table. */ 412 1.1 rillig void 413 1.210 rillig init_lex(void) 414 1.1 rillig { 415 1.1 rillig 416 1.146 rillig size_t n = sizeof(keywords) / sizeof(keywords[0]); 417 1.146 rillig for (size_t i = 0; i < n; i++) { 418 1.146 rillig const struct keyword *kw = keywords + i; 419 1.126 rillig if (!is_keyword_known(kw)) 420 1.1 rillig continue; 421 1.66 rillig if (kw->kw_plain) 422 1.210 rillig register_keyword(kw, false, false); 423 1.66 rillig if (kw->kw_leading) 424 1.210 rillig register_keyword(kw, true, false); 425 1.66 rillig if (kw->kw_both) 426 1.210 rillig register_keyword(kw, true, true); 427 1.1 rillig } 428 1.1 rillig } 429 1.1 rillig 430 1.1 rillig /* 431 1.135 rillig * When scanning the remainder of a long token (see lex_input), read a byte 432 1.135 rillig * and return it as an unsigned char or as EOF. 433 1.135 rillig * 434 1.135 rillig * Increment the line counts if necessary. 435 1.1 rillig */ 436 1.1 rillig static int 437 1.135 rillig read_byte(void) 438 1.1 rillig { 439 1.215 rillig int c = lex_input(); 440 1.1 rillig 441 1.41 rillig if (c == '\n') 442 1.6 rillig lex_next_line(); 443 1.215 rillig return c == '\0' ? EOF : c; /* lex returns 0 on EOF. */ 444 1.1 rillig } 445 1.1 rillig 446 1.105 rillig static int 447 1.105 rillig lex_keyword(sym_t *sym) 448 1.105 rillig { 449 1.144 rillig int tok = sym->u.s_keyword.sk_token; 450 1.105 rillig 451 1.144 rillig if (tok == T_SCLASS) 452 1.105 rillig yylval.y_scl = sym->s_scl; 453 1.144 rillig if (tok == T_TYPE || tok == T_STRUCT_OR_UNION) 454 1.176 rillig yylval.y_tspec = sym->u.s_keyword.u.sk_tspec; 455 1.144 rillig if (tok == T_QUAL) 456 1.185 rillig yylval.y_type_qualifiers = 457 1.185 rillig sym->u.s_keyword.u.sk_type_qualifier; 458 1.176 rillig if (tok == T_FUNCTION_SPECIFIER) 459 1.176 rillig yylval.y_function_specifier = 460 1.176 rillig sym->u.s_keyword.u.function_specifier; 461 1.225 rillig if (tok == T_NAMED_CONSTANT) 462 1.225 rillig yylval.y_named_constant = sym->u.s_keyword.u.named_constant; 463 1.144 rillig return tok; 464 1.105 rillig } 465 1.105 rillig 466 1.1 rillig /* 467 1.146 rillig * Look up the definition of a name in the symbol table. This symbol must 468 1.200 rillig * either be a keyword or a symbol of the type required by sym_kind (label, 469 1.146 rillig * member, tag, ...). 470 1.1 rillig */ 471 1.1 rillig extern int 472 1.226 rillig lex_name(const char *text, size_t len) 473 1.1 rillig { 474 1.1 rillig 475 1.226 rillig sym_t *sym = symtab_search(text); 476 1.143 rillig if (sym != NULL && sym->s_keyword != NULL) 477 1.105 rillig return lex_keyword(sym); 478 1.1 rillig 479 1.143 rillig sbuf_t *sb = xmalloc(sizeof(*sb)); 480 1.226 rillig sb->sb_len = len; 481 1.1 rillig sb->sb_sym = sym; 482 1.143 rillig yylval.y_name = sb; 483 1.1 rillig 484 1.1 rillig if (sym != NULL) { 485 1.12 rillig lint_assert(block_level >= sym->s_block_level); 486 1.1 rillig sb->sb_name = sym->s_name; 487 1.143 rillig return sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME; 488 1.1 rillig } 489 1.1 rillig 490 1.226 rillig char *name = block_zero_alloc(len + 1, "string"); 491 1.226 rillig (void)memcpy(name, text, len + 1); 492 1.144 rillig sb->sb_name = name; 493 1.143 rillig return T_NAME; 494 1.1 rillig } 495 1.1 rillig 496 1.202 rillig static tspec_t 497 1.218 rillig integer_constant_type_signed(unsigned ls, uint64_t ui, int base, bool warned) 498 1.202 rillig { 499 1.218 rillig if (ls == 0 && ui <= TARG_INT_MAX) 500 1.218 rillig return INT; 501 1.218 rillig if (ls == 0 && ui <= TARG_UINT_MAX && base != 10 && allow_c90) 502 1.218 rillig return UINT; 503 1.218 rillig if (ls == 0 && ui <= TARG_LONG_MAX) 504 1.218 rillig return LONG; 505 1.218 rillig 506 1.218 rillig if (ls <= 1 && ui <= TARG_LONG_MAX) 507 1.218 rillig return LONG; 508 1.218 rillig if (ls <= 1 && ui <= TARG_ULONG_MAX && base != 10) 509 1.218 rillig return allow_c90 ? ULONG : LONG; 510 1.218 rillig if (ls <= 1 && !allow_c99) { 511 1.203 rillig if (!warned) 512 1.203 rillig /* integer constant out of range */ 513 1.203 rillig warning(252); 514 1.218 rillig return allow_c90 ? ULONG : LONG; 515 1.218 rillig } 516 1.218 rillig 517 1.218 rillig if (ui <= TARG_LLONG_MAX) 518 1.218 rillig return LLONG; 519 1.218 rillig if (ui <= TARG_ULLONG_MAX && base != 10) 520 1.214 rillig return allow_c90 ? ULLONG : LLONG; 521 1.218 rillig if (!warned) 522 1.218 rillig /* integer constant out of range */ 523 1.218 rillig warning(252); 524 1.218 rillig return allow_c90 ? ULLONG : LLONG; 525 1.218 rillig } 526 1.218 rillig 527 1.218 rillig static tspec_t 528 1.218 rillig integer_constant_type_unsigned(unsigned l, uint64_t ui, bool warned) 529 1.218 rillig { 530 1.218 rillig if (l == 0 && ui <= TARG_UINT_MAX) 531 1.218 rillig return UINT; 532 1.218 rillig 533 1.218 rillig if (l <= 1 && ui <= TARG_ULONG_MAX) 534 1.218 rillig return ULONG; 535 1.218 rillig if (l <= 1 && !allow_c99) { 536 1.203 rillig if (!warned) 537 1.203 rillig /* integer constant out of range */ 538 1.203 rillig warning(252); 539 1.218 rillig return ULONG; 540 1.218 rillig } 541 1.218 rillig 542 1.218 rillig if (ui <= TARG_ULLONG_MAX) 543 1.203 rillig return ULLONG; 544 1.218 rillig if (!warned) 545 1.218 rillig /* integer constant out of range */ 546 1.218 rillig warning(252); 547 1.218 rillig return ULLONG; 548 1.202 rillig } 549 1.202 rillig 550 1.1 rillig int 551 1.226 rillig lex_integer_constant(const char *text, size_t len, int base) 552 1.1 rillig { 553 1.226 rillig const char *cp = text; 554 1.1 rillig 555 1.1 rillig /* skip 0[xX] or 0[bB] */ 556 1.1 rillig if (base == 16 || base == 2) { 557 1.1 rillig cp += 2; 558 1.1 rillig len -= 2; 559 1.1 rillig } 560 1.1 rillig 561 1.1 rillig /* read suffixes */ 562 1.151 rillig unsigned l_suffix = 0, u_suffix = 0; 563 1.143 rillig for (;; len--) { 564 1.151 rillig char c = cp[len - 1]; 565 1.151 rillig if (c == 'l' || c == 'L') 566 1.1 rillig l_suffix++; 567 1.143 rillig else if (c == 'u' || c == 'U') 568 1.1 rillig u_suffix++; 569 1.143 rillig else 570 1.1 rillig break; 571 1.1 rillig } 572 1.1 rillig if (l_suffix > 2 || u_suffix > 1) { 573 1.1 rillig /* malformed integer constant */ 574 1.1 rillig warning(251); 575 1.1 rillig if (l_suffix > 2) 576 1.1 rillig l_suffix = 2; 577 1.1 rillig if (u_suffix > 1) 578 1.1 rillig u_suffix = 1; 579 1.1 rillig } 580 1.216 rillig if (!allow_c90 && u_suffix > 0) 581 1.238 rillig /* suffix 'U' requires C90 or later */ 582 1.1 rillig warning(97); 583 1.1 rillig 584 1.151 rillig bool warned = false; 585 1.1 rillig errno = 0; 586 1.151 rillig char *eptr; 587 1.166 rillig uint64_t ui = (uint64_t)strtoull(cp, &eptr, base); 588 1.1 rillig lint_assert(eptr == cp + len); 589 1.11 rillig if (errno != 0) { 590 1.1 rillig /* integer constant out of range */ 591 1.1 rillig warning(252); 592 1.11 rillig warned = true; 593 1.11 rillig } 594 1.1 rillig 595 1.231 rillig if (base == 8 && len > 1) 596 1.155 rillig /* octal number '%.*s' */ 597 1.155 rillig query_message(8, (int)len, cp); 598 1.155 rillig 599 1.218 rillig bool unsigned_since_c90 = allow_trad && allow_c90 && u_suffix == 0 600 1.223 rillig && ui > TARG_INT_MAX 601 1.223 rillig && ((l_suffix == 0 && base != 10 && ui <= TARG_UINT_MAX) 602 1.223 rillig || (l_suffix <= 1 && ui > TARG_LONG_MAX)); 603 1.1 rillig 604 1.219 rillig tspec_t t = u_suffix > 0 605 1.218 rillig ? integer_constant_type_unsigned(l_suffix, ui, warned) 606 1.218 rillig : integer_constant_type_signed(l_suffix, ui, base, warned); 607 1.223 rillig ui = (uint64_t)convert_integer((int64_t)ui, t, size_in_bits(t)); 608 1.1 rillig 609 1.22 rillig yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 610 1.202 rillig yylval.y_val->v_tspec = t; 611 1.218 rillig yylval.y_val->v_unsigned_since_c90 = unsigned_since_c90; 612 1.166 rillig yylval.y_val->u.integer = (int64_t)ui; 613 1.1 rillig 614 1.1 rillig return T_CON; 615 1.1 rillig } 616 1.1 rillig 617 1.223 rillig /* Extend or truncate si to match t. If t is signed, sign-extend. */ 618 1.1 rillig int64_t 619 1.223 rillig convert_integer(int64_t si, tspec_t t, unsigned int bits) 620 1.1 rillig { 621 1.1 rillig 622 1.223 rillig uint64_t vbits = value_bits(bits); 623 1.172 rillig uint64_t ui = (uint64_t)si; 624 1.223 rillig return t == PTR || is_uinteger(t) || ((ui & bit(bits - 1)) == 0) 625 1.172 rillig ? (int64_t)(ui & vbits) 626 1.172 rillig : (int64_t)(ui | ~vbits); 627 1.1 rillig } 628 1.1 rillig 629 1.1 rillig int 630 1.226 rillig lex_floating_constant(const char *text, size_t len) 631 1.1 rillig { 632 1.226 rillig const char *cp = text; 633 1.1 rillig 634 1.158 rillig bool imaginary = cp[len - 1] == 'i'; 635 1.158 rillig if (imaginary) 636 1.158 rillig len--; 637 1.81 rillig 638 1.151 rillig char c = cp[len - 1]; 639 1.215 rillig tspec_t t; 640 1.151 rillig if (c == 'f' || c == 'F') { 641 1.215 rillig t = imaginary ? FCOMPLEX : FLOAT; 642 1.1 rillig len--; 643 1.1 rillig } else if (c == 'l' || c == 'L') { 644 1.215 rillig t = imaginary ? LCOMPLEX : LDOUBLE; 645 1.1 rillig len--; 646 1.152 rillig } else 647 1.215 rillig t = imaginary ? DCOMPLEX : DOUBLE; 648 1.1 rillig 649 1.216 rillig if (!allow_c90 && t != DOUBLE) 650 1.238 rillig /* suffixes 'F' or 'L' require C90 or later */ 651 1.1 rillig warning(98); 652 1.1 rillig 653 1.1 rillig errno = 0; 654 1.151 rillig char *eptr; 655 1.150 rillig long double ld = strtold(cp, &eptr); 656 1.149 rillig lint_assert(eptr == cp + len); 657 1.216 rillig if (errno != 0) 658 1.1 rillig /* floating-point constant out of range */ 659 1.1 rillig warning(248); 660 1.216 rillig else if (t == FLOAT) { 661 1.150 rillig ld = (float)ld; 662 1.150 rillig if (isfinite(ld) == 0) { 663 1.1 rillig /* floating-point constant out of range */ 664 1.1 rillig warning(248); 665 1.150 rillig ld = ld > 0 ? FLT_MAX : -FLT_MAX; 666 1.150 rillig } 667 1.215 rillig } else if (t == DOUBLE 668 1.236 rillig || LDOUBLE_SIZE == DOUBLE_SIZE) { 669 1.150 rillig ld = (double)ld; 670 1.150 rillig if (isfinite(ld) == 0) { 671 1.150 rillig /* floating-point constant out of range */ 672 1.150 rillig warning(248); 673 1.150 rillig ld = ld > 0 ? DBL_MAX : -DBL_MAX; 674 1.1 rillig } 675 1.1 rillig } 676 1.1 rillig 677 1.22 rillig yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 678 1.215 rillig yylval.y_val->v_tspec = t; 679 1.166 rillig yylval.y_val->u.floating = ld; 680 1.1 rillig 681 1.1 rillig return T_CON; 682 1.1 rillig } 683 1.1 rillig 684 1.1 rillig int 685 1.1 rillig lex_operator(int t, op_t o) 686 1.1 rillig { 687 1.1 rillig 688 1.1 rillig yylval.y_op = o; 689 1.1 rillig return t; 690 1.1 rillig } 691 1.1 rillig 692 1.232 rillig static buffer 693 1.217 rillig read_quoted(bool *complete, char delim, bool wide) 694 1.136 rillig { 695 1.232 rillig buffer buf; 696 1.232 rillig buf_init(&buf); 697 1.212 rillig if (wide) 698 1.232 rillig buf_add_char(&buf, 'L'); 699 1.232 rillig buf_add_char(&buf, delim); 700 1.212 rillig 701 1.212 rillig for (;;) { 702 1.212 rillig int c = read_byte(); 703 1.212 rillig if (c <= 0) 704 1.212 rillig break; 705 1.232 rillig buf_add_char(&buf, (char)c); 706 1.212 rillig if (c == '\n') 707 1.212 rillig break; 708 1.212 rillig if (c == delim) { 709 1.212 rillig *complete = true; 710 1.212 rillig return buf; 711 1.212 rillig } 712 1.212 rillig if (c == '\\') { 713 1.212 rillig c = read_byte(); 714 1.232 rillig buf_add_char(&buf, (char)(c <= 0 ? ' ' : c)); 715 1.212 rillig if (c <= 0) 716 1.212 rillig break; 717 1.136 rillig } 718 1.136 rillig } 719 1.212 rillig *complete = false; 720 1.232 rillig buf_add_char(&buf, delim); 721 1.212 rillig return buf; 722 1.136 rillig } 723 1.136 rillig 724 1.222 rillig /* 725 1.222 rillig * Analyze the lexical representation of the next character in the string 726 1.222 rillig * literal list. At the end, only update the position information. 727 1.222 rillig */ 728 1.212 rillig bool 729 1.212 rillig quoted_next(const buffer *lit, quoted_iterator *it) 730 1.137 rillig { 731 1.212 rillig const char *s = lit->data; 732 1.212 rillig 733 1.222 rillig *it = (quoted_iterator){ .start = it->end }; 734 1.212 rillig 735 1.212 rillig char delim = s[s[0] == 'L' ? 1 : 0]; 736 1.212 rillig 737 1.222 rillig bool in_the_middle = it->start > 0; 738 1.222 rillig if (!in_the_middle) { 739 1.212 rillig it->start = s[0] == 'L' ? 2 : 1; 740 1.222 rillig it->end = it->start; 741 1.212 rillig } 742 1.212 rillig 743 1.222 rillig while (s[it->start] == delim) { 744 1.222 rillig if (it->start + 1 == lit->len) { 745 1.222 rillig it->end = it->start; 746 1.212 rillig return false; 747 1.222 rillig } 748 1.212 rillig it->next_literal = in_the_middle; 749 1.212 rillig it->start += 2; 750 1.212 rillig } 751 1.222 rillig it->end = it->start; 752 1.137 rillig 753 1.212 rillig again: 754 1.222 rillig switch (s[it->end]) { 755 1.137 rillig case '\\': 756 1.222 rillig it->end++; 757 1.212 rillig goto backslash; 758 1.212 rillig case '\n': 759 1.212 rillig it->unescaped_newline = true; 760 1.212 rillig return false; 761 1.212 rillig default: 762 1.222 rillig it->value = (unsigned char)s[it->end++]; 763 1.212 rillig return true; 764 1.212 rillig } 765 1.212 rillig 766 1.212 rillig backslash: 767 1.212 rillig it->escaped = true; 768 1.222 rillig if ('0' <= s[it->end] && s[it->end] <= '7') 769 1.212 rillig goto octal_escape; 770 1.222 rillig switch (s[it->end++]) { 771 1.212 rillig case '\n': 772 1.212 rillig goto again; 773 1.137 rillig case 'a': 774 1.212 rillig it->named_escape = true; 775 1.212 rillig it->value = '\a'; 776 1.212 rillig it->invalid_escape = !allow_c90; 777 1.212 rillig return true; 778 1.137 rillig case 'b': 779 1.212 rillig it->named_escape = true; 780 1.212 rillig it->value = '\b'; 781 1.212 rillig return true; 782 1.199 rillig case 'e': 783 1.212 rillig it->named_escape = true; 784 1.212 rillig it->value = '\033'; 785 1.212 rillig it->invalid_escape = !allow_gcc; 786 1.212 rillig return true; 787 1.137 rillig case 'f': 788 1.212 rillig it->named_escape = true; 789 1.212 rillig it->value = '\f'; 790 1.212 rillig return true; 791 1.137 rillig case 'n': 792 1.212 rillig it->named_escape = true; 793 1.212 rillig it->value = '\n'; 794 1.212 rillig return true; 795 1.137 rillig case 'r': 796 1.212 rillig it->named_escape = true; 797 1.212 rillig it->value = '\r'; 798 1.212 rillig return true; 799 1.137 rillig case 't': 800 1.212 rillig it->named_escape = true; 801 1.212 rillig it->value = '\t'; 802 1.212 rillig return true; 803 1.137 rillig case 'v': 804 1.212 rillig it->named_escape = true; 805 1.212 rillig it->value = '\v'; 806 1.212 rillig it->invalid_escape = !allow_c90; 807 1.212 rillig return true; 808 1.137 rillig case 'x': 809 1.212 rillig goto hex_escape; 810 1.212 rillig case '"': 811 1.212 rillig it->literal_escape = true; 812 1.212 rillig it->value = '"'; 813 1.212 rillig it->invalid_escape = !allow_c90 && delim == '\''; 814 1.212 rillig return true; 815 1.212 rillig case '?': 816 1.212 rillig it->literal_escape = true; 817 1.212 rillig it->value = '?'; 818 1.212 rillig it->invalid_escape = !allow_c90; 819 1.212 rillig return true; 820 1.137 rillig default: 821 1.212 rillig it->invalid_escape = true; 822 1.212 rillig /* FALLTHROUGH */ 823 1.212 rillig case '\'': 824 1.212 rillig case '\\': 825 1.212 rillig it->literal_escape = true; 826 1.222 rillig it->value = (unsigned char)s[it->end - 1]; 827 1.212 rillig return true; 828 1.137 rillig } 829 1.212 rillig 830 1.212 rillig octal_escape: 831 1.212 rillig it->octal_digits++; 832 1.222 rillig it->value = s[it->end++] - '0'; 833 1.222 rillig if ('0' <= s[it->end] && s[it->end] <= '7') { 834 1.212 rillig it->octal_digits++; 835 1.222 rillig it->value = 8 * it->value + (s[it->end++] - '0'); 836 1.222 rillig if ('0' <= s[it->end] && s[it->end] <= '7') { 837 1.212 rillig it->octal_digits++; 838 1.222 rillig it->value = 8 * it->value + (s[it->end++] - '0'); 839 1.212 rillig it->overflow = it->value > TARG_UCHAR_MAX 840 1.212 rillig && s[0] != 'L'; 841 1.212 rillig } 842 1.212 rillig } 843 1.212 rillig return true; 844 1.212 rillig 845 1.212 rillig hex_escape: 846 1.212 rillig for (;;) { 847 1.222 rillig char ch = s[it->end]; 848 1.212 rillig unsigned digit_value; 849 1.212 rillig if ('0' <= ch && ch <= '9') 850 1.212 rillig digit_value = ch - '0'; 851 1.212 rillig else if ('A' <= ch && ch <= 'F') 852 1.212 rillig digit_value = 10 + (ch - 'A'); 853 1.212 rillig else if ('a' <= ch && ch <= 'f') 854 1.212 rillig digit_value = 10 + (ch - 'a'); 855 1.212 rillig else 856 1.212 rillig break; 857 1.212 rillig 858 1.222 rillig it->end++; 859 1.212 rillig it->value = 16 * it->value + digit_value; 860 1.212 rillig uint64_t limit = s[0] == 'L' ? TARG_UINT_MAX : TARG_UCHAR_MAX; 861 1.212 rillig if (it->value > limit) 862 1.212 rillig it->overflow = true; 863 1.212 rillig if (it->hex_digits < 3) 864 1.212 rillig it->hex_digits++; 865 1.212 rillig } 866 1.212 rillig it->missing_hex_digits = it->hex_digits == 0; 867 1.212 rillig return true; 868 1.137 rillig } 869 1.137 rillig 870 1.212 rillig static void 871 1.212 rillig check_quoted(const buffer *buf, bool complete, char delim) 872 1.138 rillig { 873 1.222 rillig quoted_iterator it = { .end = 0 }, prev = it; 874 1.213 rillig for (; quoted_next(buf, &it); prev = it) { 875 1.212 rillig if (it.missing_hex_digits) 876 1.212 rillig /* no hex digits follow \x */ 877 1.212 rillig error(74); 878 1.212 rillig if (it.hex_digits > 0 && !allow_c90) 879 1.238 rillig /* \x requires C90 or later */ 880 1.212 rillig warning(82); 881 1.212 rillig else if (!it.invalid_escape) 882 1.212 rillig ; 883 1.212 rillig else if (it.value == '8' || it.value == '9') 884 1.212 rillig /* bad octal digit '%c' */ 885 1.212 rillig warning(77, (int)it.value); 886 1.212 rillig else if (it.literal_escape && it.value == '?') 887 1.238 rillig /* \? requires C90 or later */ 888 1.212 rillig warning(263); 889 1.212 rillig else if (it.literal_escape && it.value == '"') 890 1.238 rillig /* \" inside a character constant requires C90 ... */ 891 1.212 rillig warning(262); 892 1.212 rillig else if (it.named_escape && it.value == '\a') 893 1.238 rillig /* \a requires C90 or later */ 894 1.212 rillig warning(81); 895 1.212 rillig else if (it.named_escape && it.value == '\v') 896 1.238 rillig /* \v requires C90 or later */ 897 1.212 rillig warning(264); 898 1.212 rillig else { 899 1.222 rillig unsigned char ch = buf->data[it.end - 1]; 900 1.228 rillig if (ch_isprint(ch)) 901 1.212 rillig /* dubious escape \%c */ 902 1.212 rillig warning(79, ch); 903 1.212 rillig else 904 1.212 rillig /* dubious escape \%o */ 905 1.212 rillig warning(80, ch); 906 1.212 rillig } 907 1.212 rillig if (it.overflow && it.hex_digits > 0) 908 1.212 rillig /* overflow in hex escape */ 909 1.212 rillig warning(75); 910 1.212 rillig if (it.overflow && it.octal_digits > 0) 911 1.212 rillig /* character escape does not fit in character */ 912 1.212 rillig warning(76); 913 1.212 rillig if (it.value < ' ' && !it.escaped && complete) 914 1.197 rillig /* invisible character U+%04X in %s */ 915 1.212 rillig query_message(17, (unsigned)it.value, delim == '"' 916 1.197 rillig ? "string literal" : "character constant"); 917 1.213 rillig if (prev.octal_digits > 0 && prev.octal_digits < 3 918 1.213 rillig && !it.escaped && it.value >= '8' && it.value <= '9') 919 1.213 rillig /* short octal escape '%.*s' followed by digit '%c' */ 920 1.222 rillig warning(356, (int)(prev.end - prev.start), 921 1.213 rillig buf->data + prev.start, buf->data[it.start]); 922 1.138 rillig } 923 1.212 rillig if (it.unescaped_newline) 924 1.212 rillig /* newline in string or char constant */ 925 1.212 rillig error(254); 926 1.212 rillig if (!complete && delim == '"') 927 1.212 rillig /* unterminated string constant */ 928 1.212 rillig error(258); 929 1.212 rillig if (!complete && delim == '\'') 930 1.212 rillig /* unterminated character constant */ 931 1.212 rillig error(253); 932 1.212 rillig } 933 1.212 rillig 934 1.232 rillig static buffer 935 1.212 rillig lex_quoted(char delim, bool wide) 936 1.212 rillig { 937 1.212 rillig bool complete; 938 1.232 rillig buffer buf = read_quoted(&complete, delim, wide); 939 1.232 rillig check_quoted(&buf, complete, delim); 940 1.212 rillig return buf; 941 1.138 rillig } 942 1.138 rillig 943 1.113 rillig /* Called if lex found a leading "'". */ 944 1.1 rillig int 945 1.6 rillig lex_character_constant(void) 946 1.1 rillig { 947 1.232 rillig buffer buf = lex_quoted('\'', false); 948 1.1 rillig 949 1.211 rillig size_t n = 0; 950 1.212 rillig uint64_t val = 0; 951 1.222 rillig quoted_iterator it = { .end = 0 }; 952 1.232 rillig while (quoted_next(&buf, &it)) { 953 1.212 rillig val = (val << CHAR_SIZE) + it.value; 954 1.1 rillig n++; 955 1.1 rillig } 956 1.212 rillig if (n > sizeof(int) || (n > 1 && (pflag || hflag))) { 957 1.145 rillig /* 958 1.145 rillig * XXX: ^^ should rather be sizeof(TARG_INT). Luckily, 959 1.145 rillig * sizeof(int) is the same on all supported platforms. 960 1.145 rillig */ 961 1.43 rillig /* too many characters in character constant */ 962 1.43 rillig error(71); 963 1.216 rillig } else if (n > 1) 964 1.43 rillig /* multi-character character constant */ 965 1.43 rillig warning(294); 966 1.216 rillig else if (n == 0 && !it.unescaped_newline) 967 1.43 rillig /* empty character constant */ 968 1.43 rillig error(73); 969 1.212 rillig 970 1.212 rillig int64_t cval = n == 1 971 1.212 rillig ? convert_integer((int64_t)val, CHAR, CHAR_SIZE) 972 1.212 rillig : (int64_t)val; 973 1.1 rillig 974 1.22 rillig yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 975 1.1 rillig yylval.y_val->v_tspec = INT; 976 1.161 rillig yylval.y_val->v_char_constant = true; 977 1.212 rillig yylval.y_val->u.integer = cval; 978 1.1 rillig 979 1.1 rillig return T_CON; 980 1.1 rillig } 981 1.1 rillig 982 1.223 rillig /* Called if lex found a leading "L'". */ 983 1.1 rillig int 984 1.6 rillig lex_wide_character_constant(void) 985 1.1 rillig { 986 1.232 rillig buffer buf = lex_quoted('\'', true); 987 1.212 rillig 988 1.212 rillig static char wbuf[MB_LEN_MAX + 1]; 989 1.212 rillig size_t n = 0, nmax = MB_CUR_MAX; 990 1.1 rillig 991 1.222 rillig quoted_iterator it = { .end = 0 }; 992 1.232 rillig while (quoted_next(&buf, &it)) { 993 1.44 rillig if (n < nmax) 994 1.212 rillig wbuf[n] = (char)it.value; 995 1.44 rillig n++; 996 1.1 rillig } 997 1.1 rillig 998 1.211 rillig wchar_t wc = 0; 999 1.216 rillig if (n == 0) 1000 1.1 rillig /* empty character constant */ 1001 1.1 rillig error(73); 1002 1.216 rillig else if (n > nmax) { 1003 1.44 rillig n = nmax; 1004 1.43 rillig /* too many characters in character constant */ 1005 1.43 rillig error(71); 1006 1.1 rillig } else { 1007 1.212 rillig wbuf[n] = '\0'; 1008 1.43 rillig (void)mbtowc(NULL, NULL, 0); 1009 1.212 rillig if (mbtowc(&wc, wbuf, nmax) < 0) 1010 1.43 rillig /* invalid multibyte character */ 1011 1.43 rillig error(291); 1012 1.1 rillig } 1013 1.1 rillig 1014 1.22 rillig yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 1015 1.173 rillig yylval.y_val->v_tspec = WCHAR_TSPEC; 1016 1.161 rillig yylval.y_val->v_char_constant = true; 1017 1.166 rillig yylval.y_val->u.integer = wc; 1018 1.1 rillig 1019 1.1 rillig return T_CON; 1020 1.1 rillig } 1021 1.1 rillig 1022 1.1 rillig /* See https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html */ 1023 1.1 rillig static void 1024 1.25 rillig parse_line_directive_flags(const char *p, 1025 1.25 rillig bool *is_begin, bool *is_end, bool *is_system) 1026 1.1 rillig { 1027 1.1 rillig 1028 1.25 rillig *is_begin = false; 1029 1.25 rillig *is_end = false; 1030 1.25 rillig *is_system = false; 1031 1.1 rillig 1032 1.8 rillig while (*p != '\0') { 1033 1.228 rillig while (ch_isspace(*p)) 1034 1.1 rillig p++; 1035 1.8 rillig 1036 1.157 rillig const char *word = p; 1037 1.228 rillig while (*p != '\0' && !ch_isspace(*p)) 1038 1.8 rillig p++; 1039 1.157 rillig size_t len = (size_t)(p - word); 1040 1.8 rillig 1041 1.157 rillig if (len == 1 && word[0] == '1') 1042 1.25 rillig *is_begin = true; 1043 1.157 rillig if (len == 1 && word[0] == '2') 1044 1.25 rillig *is_end = true; 1045 1.157 rillig if (len == 1 && word[0] == '3') 1046 1.25 rillig *is_system = true; 1047 1.94 rillig /* Flag '4' is only interesting for C++. */ 1048 1.1 rillig } 1049 1.1 rillig } 1050 1.1 rillig 1051 1.1 rillig /* 1052 1.191 rillig * The first directive of the preprocessed translation unit provides the name 1053 1.191 rillig * of the C source file as specified at the command line. 1054 1.191 rillig */ 1055 1.191 rillig static void 1056 1.191 rillig set_csrc_pos(void) 1057 1.191 rillig { 1058 1.191 rillig static bool done; 1059 1.191 rillig 1060 1.191 rillig if (done) 1061 1.191 rillig return; 1062 1.191 rillig done = true; 1063 1.191 rillig csrc_pos.p_file = curr_pos.p_file; 1064 1.191 rillig outsrc(transform_filename(curr_pos.p_file, strlen(curr_pos.p_file))); 1065 1.191 rillig } 1066 1.191 rillig 1067 1.233 rillig /* # lineno ["filename" [GCC-flag...]] */ 1068 1.232 rillig static void 1069 1.232 rillig set_location(const char *p) 1070 1.1 rillig { 1071 1.191 rillig char *end; 1072 1.191 rillig long ln = strtol(--p, &end, 10); 1073 1.191 rillig if (end == p) 1074 1.1 rillig goto error; 1075 1.191 rillig p = end; 1076 1.191 rillig 1077 1.232 rillig if (*p != ' ' && *p != '\0') 1078 1.1 rillig goto error; 1079 1.232 rillig while (*p == ' ') 1080 1.191 rillig p++; 1081 1.191 rillig 1082 1.191 rillig if (*p != '\0') { 1083 1.191 rillig if (*p != '"') 1084 1.1 rillig goto error; 1085 1.191 rillig const char *fn = ++p; 1086 1.191 rillig while (*p != '"' && *p != '\0') 1087 1.191 rillig p++; 1088 1.191 rillig if (*p != '"') 1089 1.1 rillig goto error; 1090 1.191 rillig size_t fn_len = p++ - fn; 1091 1.191 rillig if (fn_len > PATH_MAX) 1092 1.1 rillig goto error; 1093 1.191 rillig if (fn_len == 0) { 1094 1.1 rillig fn = "{standard input}"; 1095 1.191 rillig fn_len = strlen(fn); 1096 1.1 rillig } 1097 1.191 rillig curr_pos.p_file = record_filename(fn, fn_len); 1098 1.191 rillig set_csrc_pos(); 1099 1.25 rillig 1100 1.191 rillig bool is_begin, is_end, is_system; 1101 1.191 rillig parse_line_directive_flags(p, &is_begin, &is_end, &is_system); 1102 1.26 rillig update_location(curr_pos.p_file, (int)ln, is_begin, is_end); 1103 1.26 rillig in_system_header = is_system; 1104 1.1 rillig } 1105 1.1 rillig curr_pos.p_line = (int)ln - 1; 1106 1.1 rillig curr_pos.p_uniq = 0; 1107 1.1 rillig if (curr_pos.p_file == csrc_pos.p_file) { 1108 1.1 rillig csrc_pos.p_line = (int)ln - 1; 1109 1.1 rillig csrc_pos.p_uniq = 0; 1110 1.1 rillig } 1111 1.191 rillig return; 1112 1.191 rillig 1113 1.191 rillig error: 1114 1.191 rillig /* undefined or invalid '#' directive */ 1115 1.191 rillig warning(255); 1116 1.1 rillig } 1117 1.1 rillig 1118 1.232 rillig static void 1119 1.232 rillig check_stmt_macro(const char *text) 1120 1.232 rillig { 1121 1.232 rillig const char *p = text; 1122 1.232 rillig while (*p == ' ') 1123 1.232 rillig p++; 1124 1.232 rillig 1125 1.232 rillig const char *name_start = p; 1126 1.232 rillig while (ch_isalnum(*p) || *p == '_') 1127 1.232 rillig p++; 1128 1.232 rillig const char *name_end = p; 1129 1.232 rillig 1130 1.232 rillig if (*p == '(') { 1131 1.232 rillig while (*p != '\0' && *p != ')') 1132 1.232 rillig p++; 1133 1.232 rillig if (*p == ')') 1134 1.232 rillig p++; 1135 1.232 rillig } 1136 1.232 rillig 1137 1.232 rillig while (*p == ' ') 1138 1.232 rillig p++; 1139 1.232 rillig 1140 1.234 rillig if (strncmp(p, "do", 2) == 0 && !ch_isalnum(p[2]) && p[2] != '_') 1141 1.232 rillig /* do-while macro '%.*s' ends with semicolon */ 1142 1.232 rillig warning(385, (int)(name_end - name_start), name_start); 1143 1.232 rillig } 1144 1.232 rillig 1145 1.232 rillig // Between lex_pp_begin and lex_pp_end, the current preprocessing line, 1146 1.232 rillig // with comments and whitespace converted to a single space. 1147 1.232 rillig static buffer pp_line; 1148 1.232 rillig 1149 1.232 rillig void 1150 1.232 rillig lex_pp_begin(void) 1151 1.232 rillig { 1152 1.232 rillig if (pp_line.data == NULL) 1153 1.232 rillig buf_init(&pp_line); 1154 1.232 rillig debug_step("%s", __func__); 1155 1.232 rillig lint_assert(pp_line.len == 0); 1156 1.232 rillig } 1157 1.232 rillig 1158 1.232 rillig void 1159 1.232 rillig lex_pp_identifier(const char *text) 1160 1.232 rillig { 1161 1.232 rillig debug_step("%s '%s'", __func__, text); 1162 1.232 rillig buf_add(&pp_line, text); 1163 1.232 rillig } 1164 1.232 rillig 1165 1.232 rillig void 1166 1.232 rillig lex_pp_number(const char *text) 1167 1.232 rillig { 1168 1.232 rillig debug_step("%s '%s'", __func__, text); 1169 1.232 rillig buf_add(&pp_line, text); 1170 1.232 rillig } 1171 1.232 rillig 1172 1.232 rillig void 1173 1.232 rillig lex_pp_character_constant(void) 1174 1.232 rillig { 1175 1.232 rillig buffer buf = lex_quoted('\'', false); 1176 1.232 rillig debug_step("%s '%s'", __func__, buf.data); 1177 1.232 rillig buf_add(&pp_line, buf.data); 1178 1.232 rillig free(buf.data); 1179 1.232 rillig } 1180 1.232 rillig 1181 1.232 rillig void 1182 1.232 rillig lex_pp_string_literal(void) 1183 1.232 rillig { 1184 1.232 rillig buffer buf = lex_quoted('"', false); 1185 1.232 rillig debug_step("%s '%s'", __func__, buf.data); 1186 1.232 rillig buf_add(&pp_line, buf.data); 1187 1.232 rillig free(buf.data); 1188 1.232 rillig } 1189 1.232 rillig 1190 1.232 rillig void 1191 1.232 rillig lex_pp_punctuator(const char *text) 1192 1.232 rillig { 1193 1.232 rillig debug_step("%s '%s'", __func__, text); 1194 1.232 rillig buf_add(&pp_line, text); 1195 1.232 rillig } 1196 1.232 rillig 1197 1.232 rillig void 1198 1.232 rillig lex_pp_comment(void) 1199 1.232 rillig { 1200 1.232 rillig int lc = -1, c; 1201 1.232 rillig 1202 1.232 rillig for (;;) { 1203 1.232 rillig if ((c = read_byte()) == EOF) { 1204 1.232 rillig /* unterminated comment */ 1205 1.232 rillig error(256); 1206 1.232 rillig return; 1207 1.232 rillig } 1208 1.232 rillig if (lc == '*' && c == '/') 1209 1.232 rillig break; 1210 1.232 rillig lc = c; 1211 1.232 rillig } 1212 1.232 rillig 1213 1.232 rillig buf_add_char(&pp_line, ' '); 1214 1.232 rillig } 1215 1.232 rillig 1216 1.232 rillig void 1217 1.232 rillig lex_pp_whitespace(void) 1218 1.232 rillig { 1219 1.232 rillig buf_add_char(&pp_line, ' '); 1220 1.232 rillig } 1221 1.232 rillig 1222 1.232 rillig void 1223 1.232 rillig lex_pp_end(void) 1224 1.232 rillig { 1225 1.232 rillig const char *text = pp_line.data; 1226 1.232 rillig size_t len = pp_line.len; 1227 1.232 rillig while (len > 0 && text[len - 1] == ' ') 1228 1.232 rillig len--; 1229 1.232 rillig debug_step("%s '%.*s'", __func__, (int)len, text); 1230 1.232 rillig 1231 1.232 rillig const char *p = text; 1232 1.232 rillig while (*p == ' ') 1233 1.232 rillig p++; 1234 1.232 rillig 1235 1.232 rillig if (ch_isdigit(*p)) 1236 1.232 rillig set_location(p); 1237 1.232 rillig else if (strncmp(p, "pragma ", 7) == 0) 1238 1.232 rillig goto done; 1239 1.232 rillig else if (strncmp(p, "define ", 7) == 0) { 1240 1.232 rillig if (text[len - 1] == ';') 1241 1.232 rillig check_stmt_macro(p + 7); 1242 1.232 rillig } else if (strncmp(p, "undef ", 6) == 0) 1243 1.232 rillig goto done; 1244 1.232 rillig else 1245 1.232 rillig /* undefined or invalid '#' directive */ 1246 1.232 rillig warning(255); 1247 1.232 rillig 1248 1.232 rillig done: 1249 1.232 rillig pp_line.len = 0; 1250 1.232 rillig pp_line.data[0] = '\0'; 1251 1.232 rillig } 1252 1.232 rillig 1253 1.180 rillig /* Handle lint comments such as ARGSUSED. */ 1254 1.1 rillig void 1255 1.1 rillig lex_comment(void) 1256 1.1 rillig { 1257 1.146 rillig int c; 1258 1.1 rillig static const struct { 1259 1.239 rillig const char name[13]; 1260 1.3 rillig bool arg; 1261 1.180 rillig lint_comment comment; 1262 1.1 rillig } keywtab[] = { 1263 1.180 rillig { "ARGSUSED", true, LC_ARGSUSED }, 1264 1.180 rillig { "BITFIELDTYPE", false, LC_BITFIELDTYPE }, 1265 1.180 rillig { "FALLTHRU", false, LC_FALLTHROUGH }, 1266 1.180 rillig { "FALLTHROUGH", false, LC_FALLTHROUGH }, 1267 1.180 rillig { "FALL THROUGH", false, LC_FALLTHROUGH }, 1268 1.180 rillig { "fallthrough", false, LC_FALLTHROUGH }, 1269 1.180 rillig { "LINTLIBRARY", false, LC_LINTLIBRARY }, 1270 1.180 rillig { "LINTED", true, LC_LINTED }, 1271 1.180 rillig { "LONGLONG", false, LC_LONGLONG }, 1272 1.180 rillig { "NOSTRICT", true, LC_LINTED }, 1273 1.180 rillig { "NOTREACHED", false, LC_NOTREACHED }, 1274 1.180 rillig { "PRINTFLIKE", true, LC_PRINTFLIKE }, 1275 1.180 rillig { "PROTOLIB", true, LC_PROTOLIB }, 1276 1.180 rillig { "SCANFLIKE", true, LC_SCANFLIKE }, 1277 1.180 rillig { "VARARGS", true, LC_VARARGS }, 1278 1.1 rillig }; 1279 1.159 rillig char keywd[32]; 1280 1.1 rillig 1281 1.146 rillig bool seen_end_of_comment = false; 1282 1.1 rillig 1283 1.228 rillig while (c = read_byte(), isspace(c) != 0) 1284 1.1 rillig continue; 1285 1.1 rillig 1286 1.1 rillig /* Read the potential keyword to keywd */ 1287 1.223 rillig size_t l = 0; 1288 1.79 rillig while (c != EOF && l < sizeof(keywd) - 1 && 1289 1.228 rillig (isalpha(c) != 0 || isspace(c) != 0)) { 1290 1.228 rillig if (islower(c) != 0 && l > 0 && ch_isupper(keywd[0])) 1291 1.80 rillig break; 1292 1.1 rillig keywd[l++] = (char)c; 1293 1.135 rillig c = read_byte(); 1294 1.1 rillig } 1295 1.228 rillig while (l > 0 && ch_isspace(keywd[l - 1])) 1296 1.79 rillig l--; 1297 1.1 rillig keywd[l] = '\0'; 1298 1.1 rillig 1299 1.1 rillig /* look for the keyword */ 1300 1.223 rillig size_t i; 1301 1.181 rillig for (i = 0; i < sizeof(keywtab) / sizeof(keywtab[0]); i++) 1302 1.180 rillig if (strcmp(keywtab[i].name, keywd) == 0) 1303 1.181 rillig goto found_keyword; 1304 1.181 rillig goto skip_rest; 1305 1.1 rillig 1306 1.181 rillig found_keyword: 1307 1.228 rillig while (isspace(c) != 0) 1308 1.135 rillig c = read_byte(); 1309 1.1 rillig 1310 1.1 rillig /* read the argument, if the keyword accepts one and there is one */ 1311 1.223 rillig char arg[32]; 1312 1.1 rillig l = 0; 1313 1.1 rillig if (keywtab[i].arg) { 1314 1.228 rillig while (isdigit(c) != 0 && l < sizeof(arg) - 1) { 1315 1.1 rillig arg[l++] = (char)c; 1316 1.135 rillig c = read_byte(); 1317 1.1 rillig } 1318 1.1 rillig } 1319 1.1 rillig arg[l] = '\0'; 1320 1.223 rillig int a = l != 0 ? atoi(arg) : -1; 1321 1.1 rillig 1322 1.228 rillig while (isspace(c) != 0) 1323 1.135 rillig c = read_byte(); 1324 1.1 rillig 1325 1.146 rillig seen_end_of_comment = c == '*' && (c = read_byte()) == '/'; 1326 1.180 rillig if (!seen_end_of_comment && keywtab[i].comment != LC_LINTED) 1327 1.146 rillig /* extra characters in lint comment */ 1328 1.146 rillig warning(257); 1329 1.1 rillig 1330 1.180 rillig handle_lint_comment(keywtab[i].comment, a); 1331 1.1 rillig 1332 1.1 rillig skip_rest: 1333 1.146 rillig while (!seen_end_of_comment) { 1334 1.146 rillig int lc = c; 1335 1.135 rillig if ((c = read_byte()) == EOF) { 1336 1.1 rillig /* unterminated comment */ 1337 1.1 rillig error(256); 1338 1.1 rillig break; 1339 1.1 rillig } 1340 1.1 rillig if (lc == '*' && c == '/') 1341 1.146 rillig seen_end_of_comment = true; 1342 1.1 rillig } 1343 1.1 rillig } 1344 1.1 rillig 1345 1.1 rillig void 1346 1.6 rillig lex_slash_slash_comment(void) 1347 1.1 rillig { 1348 1.1 rillig 1349 1.121 rillig if (!allow_c99 && !allow_gcc) 1350 1.170 rillig /* %s does not support '//' comments */ 1351 1.121 rillig gnuism(312, allow_c90 ? "C90" : "traditional C"); 1352 1.1 rillig 1353 1.223 rillig for (int c; c = read_byte(), c != EOF && c != '\n';) 1354 1.1 rillig continue; 1355 1.1 rillig } 1356 1.1 rillig 1357 1.1 rillig void 1358 1.223 rillig reset_suppressions(void) 1359 1.1 rillig { 1360 1.1 rillig 1361 1.1 rillig lwarn = LWARN_ALL; 1362 1.179 rillig suppress_longlong = false; 1363 1.1 rillig } 1364 1.1 rillig 1365 1.1 rillig int 1366 1.1 rillig lex_string(void) 1367 1.1 rillig { 1368 1.232 rillig buffer *buf = xmalloc(sizeof(*buf)); 1369 1.232 rillig *buf = lex_quoted('"', false); 1370 1.232 rillig yylval.y_string = buf; 1371 1.1 rillig return T_STRING; 1372 1.1 rillig } 1373 1.1 rillig 1374 1.209 rillig static size_t 1375 1.209 rillig wide_length(const buffer *buf) 1376 1.209 rillig { 1377 1.209 rillig 1378 1.209 rillig (void)mblen(NULL, 0); 1379 1.209 rillig size_t len = 0, i = 0; 1380 1.209 rillig while (i < buf->len) { 1381 1.209 rillig int n = mblen(buf->data + i, MB_CUR_MAX); 1382 1.209 rillig if (n == -1) { 1383 1.209 rillig /* invalid multibyte character */ 1384 1.209 rillig error(291); 1385 1.209 rillig break; 1386 1.209 rillig } 1387 1.209 rillig i += n > 1 ? n : 1; 1388 1.209 rillig len++; 1389 1.209 rillig } 1390 1.209 rillig return len; 1391 1.209 rillig } 1392 1.209 rillig 1393 1.1 rillig int 1394 1.6 rillig lex_wide_string(void) 1395 1.1 rillig { 1396 1.232 rillig buffer buf = lex_quoted('"', true); 1397 1.1 rillig 1398 1.212 rillig buffer str; 1399 1.212 rillig buf_init(&str); 1400 1.222 rillig quoted_iterator it = { .end = 0 }; 1401 1.232 rillig while (quoted_next(&buf, &it)) 1402 1.212 rillig buf_add_char(&str, (char)it.value); 1403 1.1 rillig 1404 1.232 rillig free(buf.data); 1405 1.1 rillig 1406 1.232 rillig buffer *len_buf = xcalloc(1, sizeof(*len_buf)); 1407 1.232 rillig len_buf->len = wide_length(&str); 1408 1.232 rillig yylval.y_string = len_buf; 1409 1.1 rillig return T_STRING; 1410 1.1 rillig } 1411 1.1 rillig 1412 1.105 rillig void 1413 1.105 rillig lex_next_line(void) 1414 1.105 rillig { 1415 1.105 rillig curr_pos.p_line++; 1416 1.105 rillig curr_pos.p_uniq = 0; 1417 1.189 rillig debug_skip_indent(); 1418 1.187 rillig debug_printf("parsing %s:%d\n", curr_pos.p_file, curr_pos.p_line); 1419 1.105 rillig if (curr_pos.p_file == csrc_pos.p_file) { 1420 1.105 rillig csrc_pos.p_line++; 1421 1.105 rillig csrc_pos.p_uniq = 0; 1422 1.105 rillig } 1423 1.105 rillig } 1424 1.105 rillig 1425 1.105 rillig void 1426 1.105 rillig lex_unknown_character(int c) 1427 1.105 rillig { 1428 1.105 rillig 1429 1.105 rillig /* unknown character \%o */ 1430 1.105 rillig error(250, c); 1431 1.105 rillig } 1432 1.105 rillig 1433 1.1 rillig /* 1434 1.113 rillig * The scanner does not create new symbol table entries for symbols it cannot 1435 1.113 rillig * find in the symbol table. This is to avoid putting undeclared symbols into 1436 1.113 rillig * the symbol table if a syntax error occurs. 1437 1.1 rillig * 1438 1.113 rillig * getsym is called as soon as it is probably ok to put the symbol in the 1439 1.53 rillig * symbol table. It is still possible that symbols are put in the symbol 1440 1.53 rillig * table that are not completely declared due to syntax errors. To avoid too 1441 1.113 rillig * many problems in this case, symbols get type 'int' in getsym. 1442 1.1 rillig * 1443 1.113 rillig * XXX calls to getsym should be delayed until declare_1_* is called. 1444 1.1 rillig */ 1445 1.1 rillig sym_t * 1446 1.1 rillig getsym(sbuf_t *sb) 1447 1.1 rillig { 1448 1.1 rillig 1449 1.143 rillig sym_t *sym = sb->sb_sym; 1450 1.1 rillig 1451 1.1 rillig /* 1452 1.195 rillig * During member declaration it is possible that name() looked for 1453 1.200 rillig * symbols of type SK_VCFT, although it should have looked for symbols 1454 1.200 rillig * of type SK_TAG. Same can happen for labels. Both cases are 1455 1.200 rillig * compensated here. 1456 1.1 rillig */ 1457 1.200 rillig if (sym_kind == SK_MEMBER || sym_kind == SK_LABEL) { 1458 1.200 rillig if (sym == NULL || sym->s_kind == SK_VCFT) 1459 1.143 rillig sym = symtab_search(sb->sb_name); 1460 1.1 rillig } 1461 1.1 rillig 1462 1.1 rillig if (sym != NULL) { 1463 1.200 rillig lint_assert(sym->s_kind == sym_kind); 1464 1.200 rillig set_sym_kind(SK_VCFT); 1465 1.100 rillig free(sb); 1466 1.1 rillig return sym; 1467 1.1 rillig } 1468 1.1 rillig 1469 1.1 rillig /* create a new symbol table entry */ 1470 1.1 rillig 1471 1.164 rillig decl_level *dl; 1472 1.200 rillig if (sym_kind == SK_LABEL) { 1473 1.188 rillig sym = level_zero_alloc(1, sizeof(*sym), "sym"); 1474 1.188 rillig char *s = level_zero_alloc(1, sb->sb_len + 1, "string"); 1475 1.1 rillig (void)memcpy(s, sb->sb_name, sb->sb_len + 1); 1476 1.1 rillig sym->s_name = s; 1477 1.12 rillig sym->s_block_level = 1; 1478 1.164 rillig dl = dcs; 1479 1.164 rillig while (dl->d_enclosing != NULL && 1480 1.164 rillig dl->d_enclosing->d_enclosing != NULL) 1481 1.164 rillig dl = dl->d_enclosing; 1482 1.164 rillig lint_assert(dl->d_kind == DLK_AUTO); 1483 1.1 rillig } else { 1484 1.188 rillig sym = block_zero_alloc(sizeof(*sym), "sym"); 1485 1.1 rillig sym->s_name = sb->sb_name; 1486 1.12 rillig sym->s_block_level = block_level; 1487 1.164 rillig dl = dcs; 1488 1.1 rillig } 1489 1.1 rillig 1490 1.163 rillig sym->s_def_pos = unique_curr_pos(); 1491 1.200 rillig if ((sym->s_kind = sym_kind) != SK_LABEL) 1492 1.1 rillig sym->s_type = gettyp(INT); 1493 1.1 rillig 1494 1.200 rillig set_sym_kind(SK_VCFT); 1495 1.1 rillig 1496 1.133 rillig if (!in_gcc_attribute) { 1497 1.190 rillig debug_printf("%s: symtab_add ", __func__); 1498 1.190 rillig debug_sym("", sym, "\n"); 1499 1.133 rillig symtab_add(sym); 1500 1.1 rillig 1501 1.164 rillig *dl->d_last_dlsym = sym; 1502 1.164 rillig dl->d_last_dlsym = &sym->s_level_next; 1503 1.133 rillig } 1504 1.1 rillig 1505 1.100 rillig free(sb); 1506 1.1 rillig return sym; 1507 1.1 rillig } 1508 1.1 rillig 1509 1.1 rillig /* 1510 1.146 rillig * Construct a temporary symbol. The symbol name starts with a digit to avoid 1511 1.146 rillig * name clashes with other identifiers. 1512 1.1 rillig */ 1513 1.1 rillig sym_t * 1514 1.112 rillig mktempsym(type_t *tp) 1515 1.1 rillig { 1516 1.112 rillig static unsigned n = 0; 1517 1.188 rillig char *s = level_zero_alloc((size_t)block_level, 64, "string"); 1518 1.188 rillig sym_t *sym = block_zero_alloc(sizeof(*sym), "sym"); 1519 1.28 rillig scl_t scl; 1520 1.1 rillig 1521 1.112 rillig (void)snprintf(s, 64, "%.8u_tmp", n++); 1522 1.1 rillig 1523 1.28 rillig scl = dcs->d_scl; 1524 1.193 rillig if (scl == NO_SCL) 1525 1.28 rillig scl = block_level > 0 ? AUTO : EXTERN; 1526 1.28 rillig 1527 1.1 rillig sym->s_name = s; 1528 1.112 rillig sym->s_type = tp; 1529 1.12 rillig sym->s_block_level = block_level; 1530 1.28 rillig sym->s_scl = scl; 1531 1.200 rillig sym->s_kind = SK_VCFT; 1532 1.1 rillig sym->s_used = true; 1533 1.1 rillig sym->s_set = true; 1534 1.1 rillig 1535 1.60 rillig symtab_add(sym); 1536 1.1 rillig 1537 1.162 rillig *dcs->d_last_dlsym = sym; 1538 1.162 rillig dcs->d_last_dlsym = &sym->s_level_next; 1539 1.1 rillig 1540 1.1 rillig return sym; 1541 1.1 rillig } 1542 1.1 rillig 1543 1.1 rillig void 1544 1.223 rillig symtab_remove_forever(sym_t *sym) 1545 1.1 rillig { 1546 1.1 rillig 1547 1.223 rillig debug_step("%s '%s' %s '%s'", __func__, 1548 1.200 rillig sym->s_name, symbol_kind_name(sym->s_kind), 1549 1.200 rillig type_name(sym->s_type)); 1550 1.61 rillig symtab_remove(sym); 1551 1.61 rillig 1552 1.61 rillig /* avoid that the symbol will later be put back to the symbol table */ 1553 1.12 rillig sym->s_block_level = -1; 1554 1.1 rillig } 1555 1.1 rillig 1556 1.1 rillig /* 1557 1.113 rillig * Remove all symbols from the symbol table that have the same level as the 1558 1.113 rillig * given symbol. 1559 1.1 rillig */ 1560 1.1 rillig void 1561 1.165 rillig symtab_remove_level(sym_t *syms) 1562 1.1 rillig { 1563 1.1 rillig 1564 1.187 rillig if (syms != NULL) 1565 1.187 rillig debug_step("%s %d", __func__, syms->s_block_level); 1566 1.187 rillig 1567 1.113 rillig /* Note the use of s_level_next instead of s_symtab_next. */ 1568 1.165 rillig for (sym_t *sym = syms; sym != NULL; sym = sym->s_level_next) { 1569 1.12 rillig if (sym->s_block_level != -1) { 1570 1.187 rillig debug_step("%s '%s' %s '%s' %d", __func__, 1571 1.200 rillig sym->s_name, symbol_kind_name(sym->s_kind), 1572 1.200 rillig type_name(sym->s_type), sym->s_block_level); 1573 1.61 rillig symtab_remove(sym); 1574 1.102 rillig sym->s_symtab_ref = NULL; 1575 1.1 rillig } 1576 1.1 rillig } 1577 1.1 rillig } 1578 1.1 rillig 1579 1.146 rillig /* Put a symbol into the symbol table. */ 1580 1.1 rillig void 1581 1.113 rillig inssym(int level, sym_t *sym) 1582 1.1 rillig { 1583 1.1 rillig 1584 1.187 rillig debug_step("%s '%s' %s '%s' %d", __func__, 1585 1.200 rillig sym->s_name, symbol_kind_name(sym->s_kind), 1586 1.200 rillig type_name(sym->s_type), level); 1587 1.187 rillig sym->s_block_level = level; 1588 1.60 rillig symtab_add(sym); 1589 1.113 rillig 1590 1.146 rillig const sym_t *next = sym->s_symtab_next; 1591 1.146 rillig if (next != NULL) 1592 1.146 rillig lint_assert(sym->s_block_level >= next->s_block_level); 1593 1.1 rillig } 1594 1.1 rillig 1595 1.146 rillig /* Called at level 0 after syntax errors. */ 1596 1.1 rillig void 1597 1.109 rillig clean_up_after_error(void) 1598 1.1 rillig { 1599 1.1 rillig 1600 1.104 rillig symtab_remove_locals(); 1601 1.1 rillig 1602 1.143 rillig while (mem_block_level > 0) 1603 1.143 rillig level_free_all(mem_block_level--); 1604 1.1 rillig } 1605 1.1 rillig 1606 1.113 rillig /* Create a new symbol with the same name as an existing symbol. */ 1607 1.1 rillig sym_t * 1608 1.21 rillig pushdown(const sym_t *sym) 1609 1.1 rillig { 1610 1.1 rillig 1611 1.98 rillig debug_step("pushdown '%s' %s '%s'", 1612 1.200 rillig sym->s_name, symbol_kind_name(sym->s_kind), 1613 1.200 rillig type_name(sym->s_type)); 1614 1.223 rillig 1615 1.223 rillig sym_t *nsym = block_zero_alloc(sizeof(*nsym), "sym"); 1616 1.12 rillig lint_assert(sym->s_block_level <= block_level); 1617 1.1 rillig nsym->s_name = sym->s_name; 1618 1.163 rillig nsym->s_def_pos = unique_curr_pos(); 1619 1.1 rillig nsym->s_kind = sym->s_kind; 1620 1.12 rillig nsym->s_block_level = block_level; 1621 1.1 rillig 1622 1.60 rillig symtab_add(nsym); 1623 1.1 rillig 1624 1.162 rillig *dcs->d_last_dlsym = nsym; 1625 1.162 rillig dcs->d_last_dlsym = &nsym->s_level_next; 1626 1.1 rillig 1627 1.1 rillig return nsym; 1628 1.1 rillig } 1629 1.1 rillig 1630 1.226 rillig static void 1631 1.226 rillig fill_token(int tk, const char *text, token *tok) 1632 1.226 rillig { 1633 1.226 rillig switch (tk) { 1634 1.226 rillig case T_NAME: 1635 1.226 rillig case T_TYPENAME: 1636 1.226 rillig tok->kind = TK_IDENTIFIER; 1637 1.226 rillig tok->u.identifier = xstrdup(yylval.y_name->sb_name); 1638 1.226 rillig break; 1639 1.226 rillig case T_CON: 1640 1.226 rillig tok->kind = TK_CONSTANT; 1641 1.226 rillig tok->u.constant = *yylval.y_val; 1642 1.226 rillig break; 1643 1.226 rillig case T_NAMED_CONSTANT: 1644 1.226 rillig tok->kind = TK_IDENTIFIER; 1645 1.226 rillig tok->u.identifier = xstrdup(text); 1646 1.226 rillig break; 1647 1.226 rillig case T_STRING:; 1648 1.226 rillig tok->kind = TK_STRING_LITERALS; 1649 1.226 rillig tok->u.string_literals.len = yylval.y_string->len; 1650 1.226 rillig tok->u.string_literals.cap = yylval.y_string->cap; 1651 1.226 rillig tok->u.string_literals.data = xstrdup(yylval.y_string->data); 1652 1.226 rillig break; 1653 1.226 rillig default: 1654 1.226 rillig tok->kind = TK_PUNCTUATOR; 1655 1.226 rillig tok->u.punctuator = xstrdup(text); 1656 1.226 rillig } 1657 1.226 rillig } 1658 1.226 rillig 1659 1.226 rillig static void 1660 1.226 rillig seq_reserve(balanced_token_sequence *seq) 1661 1.226 rillig { 1662 1.226 rillig if (seq->len >= seq->cap) { 1663 1.226 rillig seq->cap = 16 + 2 * seq->cap; 1664 1.226 rillig const balanced_token *old_tokens = seq->tokens; 1665 1.226 rillig balanced_token *new_tokens = block_zero_alloc( 1666 1.227 rillig seq->cap * sizeof(*seq->tokens), "balanced_token[]"); 1667 1.227 rillig if (seq->len > 0) 1668 1.227 rillig memcpy(new_tokens, old_tokens, 1669 1.227 rillig seq->len * sizeof(*seq->tokens)); 1670 1.226 rillig seq->tokens = new_tokens; 1671 1.226 rillig } 1672 1.226 rillig } 1673 1.226 rillig 1674 1.226 rillig static balanced_token_sequence 1675 1.226 rillig read_balanced(int opening) 1676 1.226 rillig { 1677 1.226 rillig int closing = opening == T_LPAREN ? T_RPAREN 1678 1.226 rillig : opening == T_LBRACK ? T_RBRACK : T_RBRACE; 1679 1.226 rillig balanced_token_sequence seq = { NULL, 0, 0 }; 1680 1.226 rillig 1681 1.226 rillig int tok; 1682 1.226 rillig while (tok = yylex(), tok > 0 && tok != closing) { 1683 1.226 rillig seq_reserve(&seq); 1684 1.226 rillig if (tok == T_LPAREN || tok == T_LBRACK || tok == T_LBRACE) { 1685 1.226 rillig seq.tokens[seq.len].kind = tok == T_LPAREN ? '(' 1686 1.226 rillig : tok == T_LBRACK ? '[' : '{'; 1687 1.226 rillig seq.tokens[seq.len].u.tokens = read_balanced(tok); 1688 1.226 rillig } else { 1689 1.226 rillig fill_token(tok, yytext, &seq.tokens[seq.len].u.token); 1690 1.226 rillig freeyyv(&yylval, tok); 1691 1.226 rillig } 1692 1.226 rillig seq.len++; 1693 1.226 rillig } 1694 1.226 rillig return seq; 1695 1.226 rillig } 1696 1.226 rillig 1697 1.226 rillig balanced_token_sequence 1698 1.226 rillig lex_balanced(void) 1699 1.226 rillig { 1700 1.226 rillig return read_balanced(T_LPAREN); 1701 1.226 rillig } 1702 1.226 rillig 1703 1.1 rillig /* 1704 1.1 rillig * Free any dynamically allocated memory referenced by 1705 1.1 rillig * the value stack or yylval. 1706 1.1 rillig * The type of information in yylval is described by tok. 1707 1.1 rillig */ 1708 1.1 rillig void 1709 1.1 rillig freeyyv(void *sp, int tok) 1710 1.1 rillig { 1711 1.1 rillig if (tok == T_NAME || tok == T_TYPENAME) { 1712 1.1 rillig sbuf_t *sb = *(sbuf_t **)sp; 1713 1.100 rillig free(sb); 1714 1.1 rillig } else if (tok == T_CON) { 1715 1.1 rillig val_t *val = *(val_t **)sp; 1716 1.1 rillig free(val); 1717 1.1 rillig } else if (tok == T_STRING) { 1718 1.205 rillig buffer *str = *(buffer **)sp; 1719 1.205 rillig free(str->data); 1720 1.205 rillig free(str); 1721 1.1 rillig } 1722 1.1 rillig } 1723