1 /* $NetBSD: lex.c,v 1.240 2026/01/11 18:11:38 rillig Exp $ */ 2 3 /* 4 * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. 5 * Copyright (c) 1994, 1995 Jochen Pohl 6 * All Rights Reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Jochen Pohl for 19 * The NetBSD Project. 20 * 4. The name of the author may not be used to endorse or promote products 21 * derived from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #if HAVE_NBTOOL_CONFIG_H 36 #include "nbtool_config.h" 37 #endif 38 39 #include <sys/cdefs.h> 40 #if defined(__RCSID) 41 __RCSID("$NetBSD: lex.c,v 1.240 2026/01/11 18:11:38 rillig Exp $"); 42 #endif 43 44 #include <ctype.h> 45 #include <errno.h> 46 #include <float.h> 47 #include <limits.h> 48 #include <math.h> 49 #include <stdlib.h> 50 #include <string.h> 51 52 #include "lint1.h" 53 #include "cgram.h" 54 55 #define CHAR_MASK ((1U << CHAR_SIZE) - 1) 56 57 58 /* Current position (it's also updated when an included file is parsed) */ 59 pos_t curr_pos = { "", 1, 0 }; 60 61 /* 62 * Current position in C source (not updated when an included file is 63 * parsed). 64 */ 65 pos_t csrc_pos = { "", 1, 0 }; 66 67 bool in_gcc_attribute; 68 bool in_system_header; 69 70 /* 71 * Define a keyword that cannot be overridden by identifiers. 72 * 73 * Valid values for 'since' are 78, 90, 99, 11, 23. 74 * 75 * The C11 keywords are all taken from the reserved namespace. They are added 76 * in C99 mode as well, to make the parse error messages more useful. For 77 * example, if the keyword '_Generic' were not defined, it would be interpreted 78 * as an implicit function call, leading to a parse error. 79 * 80 * The C23 keywords are not made available in earlier modes, as they may 81 * conflict with user-defined identifiers. 82 */ 83 #define kwdef(name, token, detail, since, gcc, deco) \ 84 { \ 85 name, token, detail, \ 86 (since) == 90, \ 87 (since) == 99 || (since) == 11, \ 88 (since) == 23, \ 89 (gcc) > 0, \ 90 ((deco) & 1) != 0, ((deco) & 2) != 0, ((deco) & 4) != 0, \ 91 } 92 #define kwdef_token(name, token, since, gcc, deco) \ 93 kwdef(name, token, {false}, since, gcc, deco) 94 #define kwdef_sclass(name, sclass, since, gcc, deco) \ 95 kwdef(name, T_SCLASS, .u.kw_scl = (sclass), since, gcc, deco) 96 #define kwdef_type(name, tspec, since) \ 97 kwdef(name, T_TYPE, .u.kw_tspec = (tspec), since, 0, 1) 98 #define kwdef_tqual(name, tqual, since, gcc, deco) \ 99 kwdef(name, T_QUAL, .u.kw_tqual = {.tqual = true}, since, gcc, deco) 100 #define kwdef_const(name, named_constant, since, gcc, deco) \ 101 kwdef(name, T_NAMED_CONSTANT, \ 102 .u.kw_named_constant = (named_constant), since, gcc, deco) 103 #define kwdef_keyword(name, token) \ 104 kwdef(name, token, {false}, 78, 0, 1) 105 106 /* During initialization, these keywords are written to the symbol table. */ 107 static const struct keyword { 108 const char kw_name[20]; 109 int kw_token; /* token to be returned by yylex() */ 110 union { 111 bool kw_dummy; 112 scl_t kw_scl; /* if kw_token is T_SCLASS */ 113 tspec_t kw_tspec; /* if kw_token is T_TYPE or 114 * T_STRUCT_OR_UNION */ 115 type_qualifiers kw_tqual; /* if kw_token is T_QUAL */ 116 function_specifier kw_fs; /* if kw_token is 117 * T_FUNCTION_SPECIFIER */ 118 named_constant kw_named_constant; 119 } u; 120 bool kw_added_in_c90:1; 121 bool kw_added_in_c99_or_c11:1; 122 bool kw_added_in_c23:1; 123 bool kw_gcc:1; /* available in GCC mode */ 124 bool kw_plain:1; /* 'name' */ 125 bool kw_leading:1; /* '__name' */ 126 bool kw_both:1; /* '__name__' */ 127 } keywords[] = { 128 // TODO: _Alignas is not available in C99. 129 kwdef_keyword( "_Alignas", T_ALIGNAS), 130 // TODO: _Alignof is not available in C99. 131 kwdef_keyword( "_Alignof", T_ALIGNOF), 132 // TODO: alignof is not available in C99. 133 kwdef_token( "alignof", T_ALIGNOF, 78,0,6), 134 kwdef_token( "asm", T_ASM, 78,1,7), 135 kwdef_token( "_Atomic", T_ATOMIC, 11,0,1), 136 kwdef("__auto_type", T_TYPE, .u.kw_tspec = AUTO_TYPE, 99,1,1), 137 kwdef_token( "attribute", T_ATTRIBUTE, 78,1,6), 138 kwdef_sclass( "auto", AUTO, 78,0,1), 139 kwdef_type( "_Bool", BOOL, 99), 140 kwdef_type( "bool", BOOL, 23), 141 kwdef_keyword( "break", T_BREAK), 142 kwdef_token( "__builtin_offsetof", T_BUILTIN_OFFSETOF, 78,1,1), 143 kwdef_keyword( "case", T_CASE), 144 kwdef_type( "char", CHAR, 78), 145 kwdef_type( "_Complex", COMPLEX, 99), 146 kwdef_tqual( "const", tq_const, 90,0,7), 147 kwdef_keyword( "continue", T_CONTINUE), 148 kwdef_keyword( "default", T_DEFAULT), 149 kwdef_keyword( "do", T_DO), 150 kwdef_type( "double", DOUBLE, 78), 151 kwdef_keyword( "else", T_ELSE), 152 // XXX: enum requires C90 or later. 153 kwdef_keyword( "enum", T_ENUM), 154 kwdef_token( "__extension__",T_EXTENSION, 78,1,1), 155 kwdef_sclass( "extern", EXTERN, 78,0,1), 156 kwdef_const( "false", NC_FALSE, 23,0,1), 157 kwdef_type( "float", FLOAT, 78), 158 kwdef_keyword( "for", T_FOR), 159 kwdef_token( "_Generic", T_GENERIC, 11,0,1), 160 kwdef_keyword( "goto", T_GOTO), 161 kwdef_keyword( "if", T_IF), 162 kwdef_token( "__imag__", T_IMAG, 78,1,1), 163 kwdef("inline", T_FUNCTION_SPECIFIER, .u.kw_fs = FS_INLINE, 99,0,7), 164 kwdef_type( "int", INT, 78), 165 kwdef_type( "__int128_t", INT128, 99), 166 kwdef_type( "long", LONG, 78), 167 kwdef("_Noreturn", T_FUNCTION_SPECIFIER, .u.kw_fs = FS_NORETURN, 11,0,1), 168 kwdef_const( "nullptr", NC_NULLPTR, 23,0,1), 169 // XXX: __packed is GCC-specific. 170 kwdef_token( "__packed", T_PACKED, 78,0,1), 171 kwdef_token( "__real__", T_REAL, 78,1,1), 172 kwdef_sclass( "register", REG, 78,0,1), 173 kwdef_tqual( "restrict", tq_restrict, 99,0,7), 174 kwdef_keyword( "return", T_RETURN), 175 kwdef_type( "short", SHORT, 78), 176 kwdef( "signed", T_TYPE, .u.kw_tspec = SIGNED, 90,0,3), 177 kwdef_keyword( "sizeof", T_SIZEOF), 178 kwdef_sclass( "static", STATIC, 78,0,1), 179 // XXX: _Static_assert was added in C11. 180 kwdef_keyword( "_Static_assert", T_STATIC_ASSERT), 181 kwdef("struct", T_STRUCT_OR_UNION, .u.kw_tspec = STRUCT, 78,0,1), 182 kwdef_keyword( "switch", T_SWITCH), 183 kwdef_token( "__symbolrename", T_SYMBOLRENAME, 78,0,1), 184 kwdef_sclass( "__thread", THREAD_LOCAL, 78,1,1), 185 kwdef_sclass( "_Thread_local", THREAD_LOCAL, 11,0,1), 186 kwdef_sclass( "thread_local", THREAD_LOCAL, 23,0,1), 187 kwdef_const( "true", NC_TRUE, 23,0,1), 188 kwdef_sclass( "typedef", TYPEDEF, 78,0,1), 189 kwdef_token( "typeof", T_TYPEOF, 78,1,7), 190 kwdef_type( "__uint128_t", UINT128, 99), 191 kwdef("union", T_STRUCT_OR_UNION, .u.kw_tspec = UNION, 78,0,1), 192 kwdef_type( "unsigned", UNSIGN, 78), 193 // XXX: void requires C90 or later. 194 kwdef_type( "void", VOID, 78), 195 kwdef_tqual( "volatile", tq_volatile, 90,0,7), 196 kwdef_keyword( "while", T_WHILE), 197 #undef kwdef 198 #undef kwdef_token 199 #undef kwdef_sclass 200 #undef kwdef_type 201 #undef kwdef_tqual 202 #undef kwdef_keyword 203 }; 204 205 /* 206 * The symbol table containing all keywords, identifiers and labels. The hash 207 * entries are linked via sym_t.s_symtab_next. 208 */ 209 static sym_t *symtab[503]; 210 211 /* 212 * The kind of the next expected symbol, to distinguish the namespaces of 213 * members, labels, type tags and other identifiers. 214 */ 215 symbol_kind sym_kind; 216 217 218 static unsigned int 219 hash(const char *s) 220 { 221 unsigned int v = 0; 222 for (const char *p = s; *p != '\0'; p++) { 223 v = (v << 4) + (unsigned char)*p; 224 v ^= v >> 28; 225 } 226 return v % (sizeof(symtab) / sizeof(symtab[0])); 227 } 228 229 static void 230 symtab_add(sym_t *sym) 231 { 232 unsigned int h = hash(sym->s_name); 233 if ((sym->s_symtab_next = symtab[h]) != NULL) 234 symtab[h]->s_symtab_ref = &sym->s_symtab_next; 235 sym->s_symtab_ref = &symtab[h]; 236 symtab[h] = sym; 237 } 238 239 static sym_t * 240 symtab_search(const char *name) 241 { 242 243 unsigned int h = hash(name); 244 for (sym_t *sym = symtab[h]; sym != NULL; sym = sym->s_symtab_next) { 245 if (strcmp(sym->s_name, name) != 0) 246 continue; 247 if (sym->s_keyword != NULL || 248 sym->s_kind == sym_kind || 249 in_gcc_attribute) 250 return sym; 251 } 252 253 return NULL; 254 } 255 256 static void 257 symtab_remove(sym_t *sym) 258 { 259 260 if ((*sym->s_symtab_ref = sym->s_symtab_next) != NULL) 261 sym->s_symtab_next->s_symtab_ref = sym->s_symtab_ref; 262 sym->s_symtab_next = NULL; 263 } 264 265 static void 266 symtab_remove_locals(void) 267 { 268 269 for (size_t i = 0; i < sizeof(symtab) / sizeof(symtab[0]); i++) { 270 for (sym_t *sym = symtab[i]; sym != NULL; ) { 271 sym_t *next = sym->s_symtab_next; 272 if (sym->s_block_level >= 1) 273 symtab_remove(sym); 274 sym = next; 275 } 276 } 277 } 278 279 #ifdef DEBUG 280 static int 281 sym_by_name(const void *va, const void *vb) 282 { 283 const sym_t *a = *(const sym_t *const *)va; 284 const sym_t *b = *(const sym_t *const *)vb; 285 286 return strcmp(a->s_name, b->s_name); 287 } 288 289 struct syms { 290 const sym_t **items; 291 size_t len; 292 size_t cap; 293 }; 294 295 static void 296 syms_add(struct syms *syms, const sym_t *sym) 297 { 298 if (syms->len >= syms->cap) { 299 syms->cap *= 2; 300 syms->items = xrealloc(syms->items, 301 syms->cap * sizeof(syms->items[0])); 302 } 303 syms->items[syms->len++] = sym; 304 } 305 306 void 307 debug_symtab(void) 308 { 309 struct syms syms = { xcalloc(64, sizeof(syms.items[0])), 0, 64 }; 310 311 debug_enter(); 312 for (int level = -1;; level++) { 313 bool more = false; 314 size_t n = sizeof(symtab) / sizeof(symtab[0]); 315 316 syms.len = 0; 317 for (size_t i = 0; i < n; i++) { 318 for (sym_t *sym = symtab[i]; sym != NULL;) { 319 if (sym->s_block_level == level && 320 sym->s_keyword == NULL) 321 syms_add(&syms, sym); 322 if (sym->s_block_level > level) 323 more = true; 324 sym = sym->s_symtab_next; 325 } 326 } 327 328 if (syms.len > 0) { 329 debug_step("symbol table level %d", level); 330 debug_indent_inc(); 331 qsort(syms.items, syms.len, sizeof(syms.items[0]), 332 sym_by_name); 333 for (size_t i = 0; i < syms.len; i++) 334 debug_sym("", syms.items[i], "\n"); 335 debug_indent_dec(); 336 337 lint_assert(level != -1); 338 } 339 340 if (!more) 341 break; 342 } 343 debug_leave(); 344 345 free(syms.items); 346 } 347 #endif 348 349 static void 350 register_keyword(const struct keyword *kw, bool leading, bool trailing) 351 { 352 353 const char *name; 354 if (!leading && !trailing) { 355 name = kw->kw_name; 356 } else { 357 char buf[256]; 358 (void)snprintf(buf, sizeof(buf), "%s%s%s", 359 leading ? "__" : "", kw->kw_name, trailing ? "__" : ""); 360 name = xstrdup(buf); 361 } 362 363 sym_t *sym = block_zero_alloc(sizeof(*sym), "sym"); 364 sym->s_name = name; 365 sym->s_keyword = kw; 366 int tok = kw->kw_token; 367 sym->u.s_keyword.sk_token = tok; 368 if (tok == T_TYPE || tok == T_STRUCT_OR_UNION) 369 sym->u.s_keyword.u.sk_tspec = kw->u.kw_tspec; 370 if (tok == T_SCLASS) 371 sym->s_scl = kw->u.kw_scl; 372 if (tok == T_QUAL) 373 sym->u.s_keyword.u.sk_type_qualifier = kw->u.kw_tqual; 374 if (tok == T_FUNCTION_SPECIFIER) 375 sym->u.s_keyword.u.function_specifier = kw->u.kw_fs; 376 if (tok == T_NAMED_CONSTANT) 377 sym->u.s_keyword.u.named_constant = kw->u.kw_named_constant; 378 379 symtab_add(sym); 380 } 381 382 static bool 383 is_keyword_known(const struct keyword *kw) 384 { 385 386 if (kw->kw_added_in_c23 && !allow_c23) 387 return false; 388 if ((kw->kw_added_in_c90 || kw->kw_added_in_c99_or_c11) && !allow_c90) 389 return false; 390 391 /* 392 * In the 1990s, GCC defined several keywords that were later 393 * incorporated into C99, therefore in GCC mode, all C99 keywords are 394 * made available. The C11 keywords are made available as well, but 395 * there are so few that they don't matter practically. 396 */ 397 if (allow_gcc) 398 return true; 399 if (kw->kw_gcc) 400 return false; 401 402 if (kw->kw_added_in_c99_or_c11 && !allow_c99) 403 return false; 404 return true; 405 } 406 407 /* Write all keywords to the symbol table. */ 408 void 409 init_lex(void) 410 { 411 412 size_t n = sizeof(keywords) / sizeof(keywords[0]); 413 for (size_t i = 0; i < n; i++) { 414 const struct keyword *kw = keywords + i; 415 if (!is_keyword_known(kw)) 416 continue; 417 if (kw->kw_plain) 418 register_keyword(kw, false, false); 419 if (kw->kw_leading) 420 register_keyword(kw, true, false); 421 if (kw->kw_both) 422 register_keyword(kw, true, true); 423 } 424 } 425 426 /* 427 * When scanning the remainder of a long token (see lex_input), read a byte 428 * and return it as an unsigned char or as EOF. 429 * 430 * Increment the line counts if necessary. 431 */ 432 static int 433 read_byte(void) 434 { 435 int c = lex_input(); 436 437 if (c == '\n') 438 lex_next_line(); 439 return c == '\0' ? EOF : c; /* lex returns 0 on EOF. */ 440 } 441 442 static int 443 lex_keyword(sym_t *sym) 444 { 445 int tok = sym->u.s_keyword.sk_token; 446 447 if (tok == T_SCLASS) 448 yylval.y_scl = sym->s_scl; 449 if (tok == T_TYPE || tok == T_STRUCT_OR_UNION) 450 yylval.y_tspec = sym->u.s_keyword.u.sk_tspec; 451 if (tok == T_QUAL) 452 yylval.y_type_qualifiers = 453 sym->u.s_keyword.u.sk_type_qualifier; 454 if (tok == T_FUNCTION_SPECIFIER) 455 yylval.y_function_specifier = 456 sym->u.s_keyword.u.function_specifier; 457 if (tok == T_NAMED_CONSTANT) 458 yylval.y_named_constant = sym->u.s_keyword.u.named_constant; 459 return tok; 460 } 461 462 /* 463 * Look up the definition of a name in the symbol table. This symbol must 464 * either be a keyword or a symbol of the type required by sym_kind (label, 465 * member, tag, ...). 466 */ 467 extern int 468 lex_name(const char *text, size_t len) 469 { 470 471 sym_t *sym = symtab_search(text); 472 if (sym != NULL && sym->s_keyword != NULL) 473 return lex_keyword(sym); 474 475 sbuf_t *sb = xmalloc(sizeof(*sb)); 476 sb->sb_len = len; 477 sb->sb_sym = sym; 478 yylval.y_name = sb; 479 480 if (sym != NULL) { 481 lint_assert(block_level >= sym->s_block_level); 482 sb->sb_name = sym->s_name; 483 return sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME; 484 } 485 486 char *name = block_zero_alloc(len + 1, "string"); 487 (void)memcpy(name, text, len + 1); 488 sb->sb_name = name; 489 return T_NAME; 490 } 491 492 static tspec_t 493 integer_constant_type_signed(unsigned ls, uint64_t ui, int base, bool warned) 494 { 495 if (ls == 0 && ui <= TARG_INT_MAX) 496 return INT; 497 if (ls == 0 && ui <= TARG_UINT_MAX && base != 10 && allow_c90) 498 return UINT; 499 if (ls == 0 && ui <= TARG_LONG_MAX) 500 return LONG; 501 502 if (ls <= 1 && ui <= TARG_LONG_MAX) 503 return LONG; 504 if (ls <= 1 && ui <= TARG_ULONG_MAX && base != 10) 505 return allow_c90 ? ULONG : LONG; 506 if (ls <= 1 && !allow_c99) { 507 if (!warned) 508 /* integer constant out of range */ 509 warning(252); 510 return allow_c90 ? ULONG : LONG; 511 } 512 513 if (ui <= TARG_LLONG_MAX) 514 return LLONG; 515 if (ui <= TARG_ULLONG_MAX && base != 10) 516 return allow_c90 ? ULLONG : LLONG; 517 if (!warned) 518 /* integer constant out of range */ 519 warning(252); 520 return allow_c90 ? ULLONG : LLONG; 521 } 522 523 static tspec_t 524 integer_constant_type_unsigned(unsigned l, uint64_t ui, bool warned) 525 { 526 if (l == 0 && ui <= TARG_UINT_MAX) 527 return UINT; 528 529 if (l <= 1 && ui <= TARG_ULONG_MAX) 530 return ULONG; 531 if (l <= 1 && !allow_c99) { 532 if (!warned) 533 /* integer constant out of range */ 534 warning(252); 535 return ULONG; 536 } 537 538 if (ui <= TARG_ULLONG_MAX) 539 return ULLONG; 540 if (!warned) 541 /* integer constant out of range */ 542 warning(252); 543 return ULLONG; 544 } 545 546 int 547 lex_integer_constant(const char *text, size_t len, int base) 548 { 549 const char *cp = text; 550 551 /* skip 0[xX] or 0[bB] */ 552 if (base == 16 || base == 2) { 553 cp += 2; 554 len -= 2; 555 } 556 557 /* read suffixes */ 558 unsigned l_suffix = 0, u_suffix = 0; 559 for (;; len--) { 560 char c = cp[len - 1]; 561 if (c == 'l' || c == 'L') 562 l_suffix++; 563 else if (c == 'u' || c == 'U') 564 u_suffix++; 565 else 566 break; 567 } 568 if (l_suffix > 2 || u_suffix > 1) { 569 /* malformed integer constant */ 570 warning(251); 571 if (l_suffix > 2) 572 l_suffix = 2; 573 if (u_suffix > 1) 574 u_suffix = 1; 575 } 576 if (!allow_c90 && u_suffix > 0) 577 /* suffix 'U' requires C90 or later */ 578 warning(97); 579 580 bool warned = false; 581 errno = 0; 582 char *eptr; 583 uint64_t ui = (uint64_t)strtoull(cp, &eptr, base); 584 lint_assert(eptr == cp + len); 585 if (errno != 0) { 586 /* integer constant out of range */ 587 warning(252); 588 warned = true; 589 } 590 591 if (base == 8 && len > 1) 592 /* octal number '%.*s' */ 593 query_message(8, (int)len, cp); 594 595 bool unsigned_since_c90 = allow_trad && allow_c90 && u_suffix == 0 596 && ui > TARG_INT_MAX 597 && ((l_suffix == 0 && base != 10 && ui <= TARG_UINT_MAX) 598 || (l_suffix <= 1 && ui > TARG_LONG_MAX)); 599 600 tspec_t t = u_suffix > 0 601 ? integer_constant_type_unsigned(l_suffix, ui, warned) 602 : integer_constant_type_signed(l_suffix, ui, base, warned); 603 ui = (uint64_t)convert_integer((int64_t)ui, t, size_in_bits(t)); 604 605 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 606 yylval.y_val->v_tspec = t; 607 yylval.y_val->v_unsigned_since_c90 = unsigned_since_c90; 608 yylval.y_val->u.integer = (int64_t)ui; 609 610 return T_CON; 611 } 612 613 /* Extend or truncate si to match t. If t is signed, sign-extend. */ 614 int64_t 615 convert_integer(int64_t si, tspec_t t, unsigned int bits) 616 { 617 618 uint64_t vbits = value_bits(bits); 619 uint64_t ui = (uint64_t)si; 620 return t == PTR || is_uinteger(t) || ((ui & bit(bits - 1)) == 0) 621 ? (int64_t)(ui & vbits) 622 : (int64_t)(ui | ~vbits); 623 } 624 625 int 626 lex_floating_constant(const char *text, size_t len) 627 { 628 const char *cp = text; 629 630 bool imaginary = cp[len - 1] == 'i'; 631 if (imaginary) 632 len--; 633 634 char c = cp[len - 1]; 635 tspec_t t; 636 if (c == 'f' || c == 'F') { 637 t = imaginary ? FCOMPLEX : FLOAT; 638 len--; 639 } else if (c == 'l' || c == 'L') { 640 t = imaginary ? LCOMPLEX : LDOUBLE; 641 len--; 642 } else 643 t = imaginary ? DCOMPLEX : DOUBLE; 644 645 if (!allow_c90 && t != DOUBLE) 646 /* suffixes 'F' or 'L' require C90 or later */ 647 warning(98); 648 649 errno = 0; 650 char *eptr; 651 long double ld = strtold(cp, &eptr); 652 lint_assert(eptr == cp + len); 653 if (errno != 0) 654 /* floating-point constant out of range */ 655 warning(248); 656 else if (t == FLOAT) { 657 ld = (float)ld; 658 if (isfinite(ld) == 0) { 659 /* floating-point constant out of range */ 660 warning(248); 661 ld = ld > 0 ? FLT_MAX : -FLT_MAX; 662 } 663 } else if (t == DOUBLE 664 || LDOUBLE_SIZE == DOUBLE_SIZE) { 665 ld = (double)ld; 666 if (isfinite(ld) == 0) { 667 /* floating-point constant out of range */ 668 warning(248); 669 ld = ld > 0 ? DBL_MAX : -DBL_MAX; 670 } 671 } 672 673 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 674 yylval.y_val->v_tspec = t; 675 yylval.y_val->u.floating = ld; 676 677 return T_CON; 678 } 679 680 int 681 lex_operator(int t, op_t o) 682 { 683 684 yylval.y_op = o; 685 return t; 686 } 687 688 static buffer 689 read_quoted(bool *complete, char delim, bool wide) 690 { 691 buffer buf; 692 buf_init(&buf); 693 if (wide) 694 buf_add_char(&buf, 'L'); 695 buf_add_char(&buf, delim); 696 697 for (;;) { 698 int c = read_byte(); 699 if (c <= 0) 700 break; 701 buf_add_char(&buf, (char)c); 702 if (c == '\n') 703 break; 704 if (c == delim) { 705 *complete = true; 706 return buf; 707 } 708 if (c == '\\') { 709 c = read_byte(); 710 buf_add_char(&buf, (char)(c <= 0 ? ' ' : c)); 711 if (c <= 0) 712 break; 713 } 714 } 715 *complete = false; 716 buf_add_char(&buf, delim); 717 return buf; 718 } 719 720 /* 721 * Analyze the lexical representation of the next character in the string 722 * literal list. At the end, only update the position information. 723 */ 724 bool 725 quoted_next(const buffer *lit, quoted_iterator *it) 726 { 727 const char *s = lit->data; 728 729 *it = (quoted_iterator){ .start = it->end }; 730 731 char delim = s[s[0] == 'L' ? 1 : 0]; 732 733 bool in_the_middle = it->start > 0; 734 if (!in_the_middle) { 735 it->start = s[0] == 'L' ? 2 : 1; 736 it->end = it->start; 737 } 738 739 while (s[it->start] == delim) { 740 if (it->start + 1 == lit->len) { 741 it->end = it->start; 742 return false; 743 } 744 it->next_literal = in_the_middle; 745 it->start += 2; 746 } 747 it->end = it->start; 748 749 again: 750 switch (s[it->end]) { 751 case '\\': 752 it->end++; 753 goto backslash; 754 case '\n': 755 it->unescaped_newline = true; 756 return false; 757 default: 758 it->value = (unsigned char)s[it->end++]; 759 return true; 760 } 761 762 backslash: 763 it->escaped = true; 764 if ('0' <= s[it->end] && s[it->end] <= '7') 765 goto octal_escape; 766 switch (s[it->end++]) { 767 case '\n': 768 goto again; 769 case 'a': 770 it->named_escape = true; 771 it->value = '\a'; 772 it->invalid_escape = !allow_c90; 773 return true; 774 case 'b': 775 it->named_escape = true; 776 it->value = '\b'; 777 return true; 778 case 'e': 779 it->named_escape = true; 780 it->value = '\033'; 781 it->invalid_escape = !allow_gcc; 782 return true; 783 case 'f': 784 it->named_escape = true; 785 it->value = '\f'; 786 return true; 787 case 'n': 788 it->named_escape = true; 789 it->value = '\n'; 790 return true; 791 case 'r': 792 it->named_escape = true; 793 it->value = '\r'; 794 return true; 795 case 't': 796 it->named_escape = true; 797 it->value = '\t'; 798 return true; 799 case 'v': 800 it->named_escape = true; 801 it->value = '\v'; 802 it->invalid_escape = !allow_c90; 803 return true; 804 case 'x': 805 goto hex_escape; 806 case '"': 807 it->literal_escape = true; 808 it->value = '"'; 809 it->invalid_escape = !allow_c90 && delim == '\''; 810 return true; 811 case '?': 812 it->literal_escape = true; 813 it->value = '?'; 814 it->invalid_escape = !allow_c90; 815 return true; 816 default: 817 it->invalid_escape = true; 818 /* FALLTHROUGH */ 819 case '\'': 820 case '\\': 821 it->literal_escape = true; 822 it->value = (unsigned char)s[it->end - 1]; 823 return true; 824 } 825 826 octal_escape: 827 it->octal_digits++; 828 it->value = s[it->end++] - '0'; 829 if ('0' <= s[it->end] && s[it->end] <= '7') { 830 it->octal_digits++; 831 it->value = 8 * it->value + (s[it->end++] - '0'); 832 if ('0' <= s[it->end] && s[it->end] <= '7') { 833 it->octal_digits++; 834 it->value = 8 * it->value + (s[it->end++] - '0'); 835 it->overflow = it->value > TARG_UCHAR_MAX 836 && s[0] != 'L'; 837 } 838 } 839 return true; 840 841 hex_escape: 842 for (;;) { 843 char ch = s[it->end]; 844 unsigned digit_value; 845 if ('0' <= ch && ch <= '9') 846 digit_value = ch - '0'; 847 else if ('A' <= ch && ch <= 'F') 848 digit_value = 10 + (ch - 'A'); 849 else if ('a' <= ch && ch <= 'f') 850 digit_value = 10 + (ch - 'a'); 851 else 852 break; 853 854 it->end++; 855 it->value = 16 * it->value + digit_value; 856 uint64_t limit = s[0] == 'L' ? TARG_UINT_MAX : TARG_UCHAR_MAX; 857 if (it->value > limit) 858 it->overflow = true; 859 if (it->hex_digits < 3) 860 it->hex_digits++; 861 } 862 it->missing_hex_digits = it->hex_digits == 0; 863 return true; 864 } 865 866 static void 867 check_quoted(const buffer *buf, bool complete, char delim) 868 { 869 quoted_iterator it = { .end = 0 }, prev = it; 870 for (; quoted_next(buf, &it); prev = it) { 871 if (it.missing_hex_digits) 872 /* no hex digits follow \x */ 873 error(74); 874 if (it.hex_digits > 0 && !allow_c90) 875 /* \x requires C90 or later */ 876 warning(82); 877 else if (!it.invalid_escape) 878 ; 879 else if (it.value == '8' || it.value == '9') 880 /* bad octal digit '%c' */ 881 warning(77, (int)it.value); 882 else if (it.literal_escape && it.value == '?') 883 /* \? requires C90 or later */ 884 warning(263); 885 else if (it.literal_escape && it.value == '"') 886 /* \" inside a character constant requires C90 ... */ 887 warning(262); 888 else if (it.named_escape && it.value == '\a') 889 /* \a requires C90 or later */ 890 warning(81); 891 else if (it.named_escape && it.value == '\v') 892 /* \v requires C90 or later */ 893 warning(264); 894 else { 895 unsigned char ch = buf->data[it.end - 1]; 896 if (ch_isprint(ch)) 897 /* dubious escape \%c */ 898 warning(79, ch); 899 else 900 /* dubious escape \%o */ 901 warning(80, ch); 902 } 903 if (it.overflow && it.hex_digits > 0) 904 /* overflow in hex escape */ 905 warning(75); 906 if (it.overflow && it.octal_digits > 0) 907 /* character escape does not fit in character */ 908 warning(76); 909 if (it.value < ' ' && !it.escaped && complete) 910 /* invisible character U+%04X in %s */ 911 query_message(17, (unsigned)it.value, delim == '"' 912 ? "string literal" : "character constant"); 913 if (prev.octal_digits > 0 && prev.octal_digits < 3 914 && !it.escaped && it.value >= '8' && it.value <= '9') 915 /* short octal escape '%.*s' followed by digit '%c' */ 916 warning(356, (int)(prev.end - prev.start), 917 buf->data + prev.start, buf->data[it.start]); 918 } 919 if (it.unescaped_newline) 920 /* newline in string or char constant */ 921 error(254); 922 if (!complete && delim == '"') 923 /* unterminated string constant */ 924 error(258); 925 if (!complete && delim == '\'') 926 /* unterminated character constant */ 927 error(253); 928 } 929 930 static buffer 931 lex_quoted(char delim, bool wide) 932 { 933 bool complete; 934 buffer buf = read_quoted(&complete, delim, wide); 935 check_quoted(&buf, complete, delim); 936 return buf; 937 } 938 939 /* Called if lex found a leading "'". */ 940 int 941 lex_character_constant(void) 942 { 943 buffer buf = lex_quoted('\'', false); 944 945 size_t n = 0; 946 uint64_t val = 0; 947 quoted_iterator it = { .end = 0 }; 948 while (quoted_next(&buf, &it)) { 949 val = (val << CHAR_SIZE) + it.value; 950 n++; 951 } 952 if (n > sizeof(int) || (n > 1 && (pflag || hflag))) { 953 /* 954 * XXX: ^^ should rather be sizeof(TARG_INT). Luckily, 955 * sizeof(int) is the same on all supported platforms. 956 */ 957 /* too many characters in character constant */ 958 error(71); 959 } else if (n > 1) 960 /* multi-character character constant */ 961 warning(294); 962 else if (n == 0 && !it.unescaped_newline) 963 /* empty character constant */ 964 error(73); 965 966 int64_t cval = n == 1 967 ? convert_integer((int64_t)val, CHAR, CHAR_SIZE) 968 : (int64_t)val; 969 970 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 971 yylval.y_val->v_tspec = INT; 972 yylval.y_val->v_char_constant = true; 973 yylval.y_val->u.integer = cval; 974 975 return T_CON; 976 } 977 978 /* Called if lex found a leading "L'". */ 979 int 980 lex_wide_character_constant(void) 981 { 982 buffer buf = lex_quoted('\'', true); 983 984 static char wbuf[MB_LEN_MAX + 1]; 985 size_t n = 0, nmax = MB_CUR_MAX; 986 987 quoted_iterator it = { .end = 0 }; 988 while (quoted_next(&buf, &it)) { 989 if (n < nmax) 990 wbuf[n] = (char)it.value; 991 n++; 992 } 993 994 wchar_t wc = 0; 995 if (n == 0) 996 /* empty character constant */ 997 error(73); 998 else if (n > nmax) { 999 n = nmax; 1000 /* too many characters in character constant */ 1001 error(71); 1002 } else { 1003 wbuf[n] = '\0'; 1004 (void)mbtowc(NULL, NULL, 0); 1005 if (mbtowc(&wc, wbuf, nmax) < 0) 1006 /* invalid multibyte character */ 1007 error(291); 1008 } 1009 1010 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 1011 yylval.y_val->v_tspec = WCHAR_TSPEC; 1012 yylval.y_val->v_char_constant = true; 1013 yylval.y_val->u.integer = wc; 1014 1015 return T_CON; 1016 } 1017 1018 /* See https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html */ 1019 static void 1020 parse_line_directive_flags(const char *p, 1021 bool *is_begin, bool *is_end, bool *is_system) 1022 { 1023 1024 *is_begin = false; 1025 *is_end = false; 1026 *is_system = false; 1027 1028 while (*p != '\0') { 1029 while (ch_isspace(*p)) 1030 p++; 1031 1032 const char *word = p; 1033 while (*p != '\0' && !ch_isspace(*p)) 1034 p++; 1035 size_t len = (size_t)(p - word); 1036 1037 if (len == 1 && word[0] == '1') 1038 *is_begin = true; 1039 if (len == 1 && word[0] == '2') 1040 *is_end = true; 1041 if (len == 1 && word[0] == '3') 1042 *is_system = true; 1043 /* Flag '4' is only interesting for C++. */ 1044 } 1045 } 1046 1047 /* 1048 * The first directive of the preprocessed translation unit provides the name 1049 * of the C source file as specified at the command line. 1050 */ 1051 static void 1052 set_csrc_pos(void) 1053 { 1054 static bool done; 1055 1056 if (done) 1057 return; 1058 done = true; 1059 csrc_pos.p_file = curr_pos.p_file; 1060 outsrc(transform_filename(curr_pos.p_file, strlen(curr_pos.p_file))); 1061 } 1062 1063 /* # lineno ["filename" [GCC-flag...]] */ 1064 static void 1065 set_location(const char *p) 1066 { 1067 char *end; 1068 long ln = strtol(--p, &end, 10); 1069 if (end == p) 1070 goto error; 1071 p = end; 1072 1073 if (*p != ' ' && *p != '\0') 1074 goto error; 1075 while (*p == ' ') 1076 p++; 1077 1078 if (*p != '\0') { 1079 if (*p != '"') 1080 goto error; 1081 const char *fn = ++p; 1082 while (*p != '"' && *p != '\0') 1083 p++; 1084 if (*p != '"') 1085 goto error; 1086 size_t fn_len = p++ - fn; 1087 if (fn_len > PATH_MAX) 1088 goto error; 1089 if (fn_len == 0) { 1090 fn = "{standard input}"; 1091 fn_len = strlen(fn); 1092 } 1093 curr_pos.p_file = record_filename(fn, fn_len); 1094 set_csrc_pos(); 1095 1096 bool is_begin, is_end, is_system; 1097 parse_line_directive_flags(p, &is_begin, &is_end, &is_system); 1098 update_location(curr_pos.p_file, (int)ln, is_begin, is_end); 1099 in_system_header = is_system; 1100 } 1101 curr_pos.p_line = (int)ln - 1; 1102 curr_pos.p_uniq = 0; 1103 if (curr_pos.p_file == csrc_pos.p_file) { 1104 csrc_pos.p_line = (int)ln - 1; 1105 csrc_pos.p_uniq = 0; 1106 } 1107 return; 1108 1109 error: 1110 /* undefined or invalid '#' directive */ 1111 warning(255); 1112 } 1113 1114 static void 1115 check_stmt_macro(const char *text) 1116 { 1117 const char *p = text; 1118 while (*p == ' ') 1119 p++; 1120 1121 const char *name_start = p; 1122 while (ch_isalnum(*p) || *p == '_') 1123 p++; 1124 const char *name_end = p; 1125 1126 if (*p == '(') { 1127 while (*p != '\0' && *p != ')') 1128 p++; 1129 if (*p == ')') 1130 p++; 1131 } 1132 1133 while (*p == ' ') 1134 p++; 1135 1136 if (strncmp(p, "do", 2) == 0 && !ch_isalnum(p[2]) && p[2] != '_') 1137 /* do-while macro '%.*s' ends with semicolon */ 1138 warning(385, (int)(name_end - name_start), name_start); 1139 } 1140 1141 // Between lex_pp_begin and lex_pp_end, the current preprocessing line, 1142 // with comments and whitespace converted to a single space. 1143 static buffer pp_line; 1144 1145 void 1146 lex_pp_begin(void) 1147 { 1148 if (pp_line.data == NULL) 1149 buf_init(&pp_line); 1150 debug_step("%s", __func__); 1151 lint_assert(pp_line.len == 0); 1152 } 1153 1154 void 1155 lex_pp_identifier(const char *text) 1156 { 1157 debug_step("%s '%s'", __func__, text); 1158 buf_add(&pp_line, text); 1159 } 1160 1161 void 1162 lex_pp_number(const char *text) 1163 { 1164 debug_step("%s '%s'", __func__, text); 1165 buf_add(&pp_line, text); 1166 } 1167 1168 void 1169 lex_pp_character_constant(void) 1170 { 1171 buffer buf = lex_quoted('\'', false); 1172 debug_step("%s '%s'", __func__, buf.data); 1173 buf_add(&pp_line, buf.data); 1174 free(buf.data); 1175 } 1176 1177 void 1178 lex_pp_string_literal(void) 1179 { 1180 buffer buf = lex_quoted('"', false); 1181 debug_step("%s '%s'", __func__, buf.data); 1182 buf_add(&pp_line, buf.data); 1183 free(buf.data); 1184 } 1185 1186 void 1187 lex_pp_punctuator(const char *text) 1188 { 1189 debug_step("%s '%s'", __func__, text); 1190 buf_add(&pp_line, text); 1191 } 1192 1193 void 1194 lex_pp_comment(void) 1195 { 1196 int lc = -1, c; 1197 1198 for (;;) { 1199 if ((c = read_byte()) == EOF) { 1200 /* unterminated comment */ 1201 error(256); 1202 return; 1203 } 1204 if (lc == '*' && c == '/') 1205 break; 1206 lc = c; 1207 } 1208 1209 buf_add_char(&pp_line, ' '); 1210 } 1211 1212 void 1213 lex_pp_whitespace(void) 1214 { 1215 buf_add_char(&pp_line, ' '); 1216 } 1217 1218 void 1219 lex_pp_end(void) 1220 { 1221 const char *text = pp_line.data; 1222 size_t len = pp_line.len; 1223 while (len > 0 && text[len - 1] == ' ') 1224 len--; 1225 debug_step("%s '%.*s'", __func__, (int)len, text); 1226 1227 const char *p = text; 1228 while (*p == ' ') 1229 p++; 1230 1231 if (ch_isdigit(*p)) 1232 set_location(p); 1233 else if (strncmp(p, "pragma ", 7) == 0) 1234 goto done; 1235 else if (strncmp(p, "define ", 7) == 0) { 1236 if (text[len - 1] == ';') 1237 check_stmt_macro(p + 7); 1238 } else if (strncmp(p, "undef ", 6) == 0) 1239 goto done; 1240 else 1241 /* undefined or invalid '#' directive */ 1242 warning(255); 1243 1244 done: 1245 pp_line.len = 0; 1246 pp_line.data[0] = '\0'; 1247 } 1248 1249 /* Handle lint comments such as ARGSUSED. */ 1250 void 1251 lex_comment(void) 1252 { 1253 int c; 1254 static const struct { 1255 const char name[13]; 1256 bool arg; 1257 lint_comment comment; 1258 } keywtab[] = { 1259 { "ARGSUSED", true, LC_ARGSUSED }, 1260 { "BITFIELDTYPE", false, LC_BITFIELDTYPE }, 1261 { "FALLTHRU", false, LC_FALLTHROUGH }, 1262 { "FALLTHROUGH", false, LC_FALLTHROUGH }, 1263 { "FALL THROUGH", false, LC_FALLTHROUGH }, 1264 { "fallthrough", false, LC_FALLTHROUGH }, 1265 { "LINTLIBRARY", false, LC_LINTLIBRARY }, 1266 { "LINTED", true, LC_LINTED }, 1267 { "LONGLONG", false, LC_LONGLONG }, 1268 { "NOSTRICT", true, LC_LINTED }, 1269 { "NOTREACHED", false, LC_NOTREACHED }, 1270 { "PRINTFLIKE", true, LC_PRINTFLIKE }, 1271 { "PROTOLIB", true, LC_PROTOLIB }, 1272 { "SCANFLIKE", true, LC_SCANFLIKE }, 1273 { "VARARGS", true, LC_VARARGS }, 1274 }; 1275 char keywd[32]; 1276 1277 bool seen_end_of_comment = false; 1278 1279 while (c = read_byte(), isspace(c) != 0) 1280 continue; 1281 1282 /* Read the potential keyword to keywd */ 1283 size_t l = 0; 1284 while (c != EOF && l < sizeof(keywd) - 1 && 1285 (isalpha(c) != 0 || isspace(c) != 0)) { 1286 if (islower(c) != 0 && l > 0 && ch_isupper(keywd[0])) 1287 break; 1288 keywd[l++] = (char)c; 1289 c = read_byte(); 1290 } 1291 while (l > 0 && ch_isspace(keywd[l - 1])) 1292 l--; 1293 keywd[l] = '\0'; 1294 1295 /* look for the keyword */ 1296 size_t i; 1297 for (i = 0; i < sizeof(keywtab) / sizeof(keywtab[0]); i++) 1298 if (strcmp(keywtab[i].name, keywd) == 0) 1299 goto found_keyword; 1300 goto skip_rest; 1301 1302 found_keyword: 1303 while (isspace(c) != 0) 1304 c = read_byte(); 1305 1306 /* read the argument, if the keyword accepts one and there is one */ 1307 char arg[32]; 1308 l = 0; 1309 if (keywtab[i].arg) { 1310 while (isdigit(c) != 0 && l < sizeof(arg) - 1) { 1311 arg[l++] = (char)c; 1312 c = read_byte(); 1313 } 1314 } 1315 arg[l] = '\0'; 1316 int a = l != 0 ? atoi(arg) : -1; 1317 1318 while (isspace(c) != 0) 1319 c = read_byte(); 1320 1321 seen_end_of_comment = c == '*' && (c = read_byte()) == '/'; 1322 if (!seen_end_of_comment && keywtab[i].comment != LC_LINTED) 1323 /* extra characters in lint comment */ 1324 warning(257); 1325 1326 handle_lint_comment(keywtab[i].comment, a); 1327 1328 skip_rest: 1329 while (!seen_end_of_comment) { 1330 int lc = c; 1331 if ((c = read_byte()) == EOF) { 1332 /* unterminated comment */ 1333 error(256); 1334 break; 1335 } 1336 if (lc == '*' && c == '/') 1337 seen_end_of_comment = true; 1338 } 1339 } 1340 1341 void 1342 lex_slash_slash_comment(void) 1343 { 1344 1345 if (!allow_c99 && !allow_gcc) 1346 /* %s does not support '//' comments */ 1347 gnuism(312, allow_c90 ? "C90" : "traditional C"); 1348 1349 for (int c; c = read_byte(), c != EOF && c != '\n';) 1350 continue; 1351 } 1352 1353 void 1354 reset_suppressions(void) 1355 { 1356 1357 lwarn = LWARN_ALL; 1358 suppress_longlong = false; 1359 } 1360 1361 int 1362 lex_string(void) 1363 { 1364 buffer *buf = xmalloc(sizeof(*buf)); 1365 *buf = lex_quoted('"', false); 1366 yylval.y_string = buf; 1367 return T_STRING; 1368 } 1369 1370 static size_t 1371 wide_length(const buffer *buf) 1372 { 1373 1374 (void)mblen(NULL, 0); 1375 size_t len = 0, i = 0; 1376 while (i < buf->len) { 1377 int n = mblen(buf->data + i, MB_CUR_MAX); 1378 if (n == -1) { 1379 /* invalid multibyte character */ 1380 error(291); 1381 break; 1382 } 1383 i += n > 1 ? n : 1; 1384 len++; 1385 } 1386 return len; 1387 } 1388 1389 int 1390 lex_wide_string(void) 1391 { 1392 buffer buf = lex_quoted('"', true); 1393 1394 buffer str; 1395 buf_init(&str); 1396 quoted_iterator it = { .end = 0 }; 1397 while (quoted_next(&buf, &it)) 1398 buf_add_char(&str, (char)it.value); 1399 1400 free(buf.data); 1401 1402 buffer *len_buf = xcalloc(1, sizeof(*len_buf)); 1403 len_buf->len = wide_length(&str); 1404 yylval.y_string = len_buf; 1405 return T_STRING; 1406 } 1407 1408 void 1409 lex_next_line(void) 1410 { 1411 curr_pos.p_line++; 1412 curr_pos.p_uniq = 0; 1413 debug_skip_indent(); 1414 debug_printf("parsing %s:%d\n", curr_pos.p_file, curr_pos.p_line); 1415 if (curr_pos.p_file == csrc_pos.p_file) { 1416 csrc_pos.p_line++; 1417 csrc_pos.p_uniq = 0; 1418 } 1419 } 1420 1421 void 1422 lex_unknown_character(int c) 1423 { 1424 1425 /* unknown character \%o */ 1426 error(250, c); 1427 } 1428 1429 /* 1430 * The scanner does not create new symbol table entries for symbols it cannot 1431 * find in the symbol table. This is to avoid putting undeclared symbols into 1432 * the symbol table if a syntax error occurs. 1433 * 1434 * getsym is called as soon as it is probably ok to put the symbol in the 1435 * symbol table. It is still possible that symbols are put in the symbol 1436 * table that are not completely declared due to syntax errors. To avoid too 1437 * many problems in this case, symbols get type 'int' in getsym. 1438 * 1439 * XXX calls to getsym should be delayed until declare_1_* is called. 1440 */ 1441 sym_t * 1442 getsym(sbuf_t *sb) 1443 { 1444 1445 sym_t *sym = sb->sb_sym; 1446 1447 /* 1448 * During member declaration it is possible that name() looked for 1449 * symbols of type SK_VCFT, although it should have looked for symbols 1450 * of type SK_TAG. Same can happen for labels. Both cases are 1451 * compensated here. 1452 */ 1453 if (sym_kind == SK_MEMBER || sym_kind == SK_LABEL) { 1454 if (sym == NULL || sym->s_kind == SK_VCFT) 1455 sym = symtab_search(sb->sb_name); 1456 } 1457 1458 if (sym != NULL) { 1459 lint_assert(sym->s_kind == sym_kind); 1460 set_sym_kind(SK_VCFT); 1461 free(sb); 1462 return sym; 1463 } 1464 1465 /* create a new symbol table entry */ 1466 1467 decl_level *dl; 1468 if (sym_kind == SK_LABEL) { 1469 sym = level_zero_alloc(1, sizeof(*sym), "sym"); 1470 char *s = level_zero_alloc(1, sb->sb_len + 1, "string"); 1471 (void)memcpy(s, sb->sb_name, sb->sb_len + 1); 1472 sym->s_name = s; 1473 sym->s_block_level = 1; 1474 dl = dcs; 1475 while (dl->d_enclosing != NULL && 1476 dl->d_enclosing->d_enclosing != NULL) 1477 dl = dl->d_enclosing; 1478 lint_assert(dl->d_kind == DLK_AUTO); 1479 } else { 1480 sym = block_zero_alloc(sizeof(*sym), "sym"); 1481 sym->s_name = sb->sb_name; 1482 sym->s_block_level = block_level; 1483 dl = dcs; 1484 } 1485 1486 sym->s_def_pos = unique_curr_pos(); 1487 if ((sym->s_kind = sym_kind) != SK_LABEL) 1488 sym->s_type = gettyp(INT); 1489 1490 set_sym_kind(SK_VCFT); 1491 1492 if (!in_gcc_attribute) { 1493 debug_printf("%s: symtab_add ", __func__); 1494 debug_sym("", sym, "\n"); 1495 symtab_add(sym); 1496 1497 *dl->d_last_dlsym = sym; 1498 dl->d_last_dlsym = &sym->s_level_next; 1499 } 1500 1501 free(sb); 1502 return sym; 1503 } 1504 1505 /* 1506 * Construct a temporary symbol. The symbol name starts with a digit to avoid 1507 * name clashes with other identifiers. 1508 */ 1509 sym_t * 1510 mktempsym(type_t *tp) 1511 { 1512 static unsigned n = 0; 1513 char *s = level_zero_alloc((size_t)block_level, 64, "string"); 1514 sym_t *sym = block_zero_alloc(sizeof(*sym), "sym"); 1515 scl_t scl; 1516 1517 (void)snprintf(s, 64, "%.8u_tmp", n++); 1518 1519 scl = dcs->d_scl; 1520 if (scl == NO_SCL) 1521 scl = block_level > 0 ? AUTO : EXTERN; 1522 1523 sym->s_name = s; 1524 sym->s_type = tp; 1525 sym->s_block_level = block_level; 1526 sym->s_scl = scl; 1527 sym->s_kind = SK_VCFT; 1528 sym->s_used = true; 1529 sym->s_set = true; 1530 1531 symtab_add(sym); 1532 1533 *dcs->d_last_dlsym = sym; 1534 dcs->d_last_dlsym = &sym->s_level_next; 1535 1536 return sym; 1537 } 1538 1539 void 1540 symtab_remove_forever(sym_t *sym) 1541 { 1542 1543 debug_step("%s '%s' %s '%s'", __func__, 1544 sym->s_name, symbol_kind_name(sym->s_kind), 1545 type_name(sym->s_type)); 1546 symtab_remove(sym); 1547 1548 /* avoid that the symbol will later be put back to the symbol table */ 1549 sym->s_block_level = -1; 1550 } 1551 1552 /* 1553 * Remove all symbols from the symbol table that have the same level as the 1554 * given symbol. 1555 */ 1556 void 1557 symtab_remove_level(sym_t *syms) 1558 { 1559 1560 if (syms != NULL) 1561 debug_step("%s %d", __func__, syms->s_block_level); 1562 1563 /* Note the use of s_level_next instead of s_symtab_next. */ 1564 for (sym_t *sym = syms; sym != NULL; sym = sym->s_level_next) { 1565 if (sym->s_block_level != -1) { 1566 debug_step("%s '%s' %s '%s' %d", __func__, 1567 sym->s_name, symbol_kind_name(sym->s_kind), 1568 type_name(sym->s_type), sym->s_block_level); 1569 symtab_remove(sym); 1570 sym->s_symtab_ref = NULL; 1571 } 1572 } 1573 } 1574 1575 /* Put a symbol into the symbol table. */ 1576 void 1577 inssym(int level, sym_t *sym) 1578 { 1579 1580 debug_step("%s '%s' %s '%s' %d", __func__, 1581 sym->s_name, symbol_kind_name(sym->s_kind), 1582 type_name(sym->s_type), level); 1583 sym->s_block_level = level; 1584 symtab_add(sym); 1585 1586 const sym_t *next = sym->s_symtab_next; 1587 if (next != NULL) 1588 lint_assert(sym->s_block_level >= next->s_block_level); 1589 } 1590 1591 /* Called at level 0 after syntax errors. */ 1592 void 1593 clean_up_after_error(void) 1594 { 1595 1596 symtab_remove_locals(); 1597 1598 while (mem_block_level > 0) 1599 level_free_all(mem_block_level--); 1600 } 1601 1602 /* Create a new symbol with the same name as an existing symbol. */ 1603 sym_t * 1604 pushdown(const sym_t *sym) 1605 { 1606 1607 debug_step("pushdown '%s' %s '%s'", 1608 sym->s_name, symbol_kind_name(sym->s_kind), 1609 type_name(sym->s_type)); 1610 1611 sym_t *nsym = block_zero_alloc(sizeof(*nsym), "sym"); 1612 lint_assert(sym->s_block_level <= block_level); 1613 nsym->s_name = sym->s_name; 1614 nsym->s_def_pos = unique_curr_pos(); 1615 nsym->s_kind = sym->s_kind; 1616 nsym->s_block_level = block_level; 1617 1618 symtab_add(nsym); 1619 1620 *dcs->d_last_dlsym = nsym; 1621 dcs->d_last_dlsym = &nsym->s_level_next; 1622 1623 return nsym; 1624 } 1625 1626 static void 1627 fill_token(int tk, const char *text, token *tok) 1628 { 1629 switch (tk) { 1630 case T_NAME: 1631 case T_TYPENAME: 1632 tok->kind = TK_IDENTIFIER; 1633 tok->u.identifier = xstrdup(yylval.y_name->sb_name); 1634 break; 1635 case T_CON: 1636 tok->kind = TK_CONSTANT; 1637 tok->u.constant = *yylval.y_val; 1638 break; 1639 case T_NAMED_CONSTANT: 1640 tok->kind = TK_IDENTIFIER; 1641 tok->u.identifier = xstrdup(text); 1642 break; 1643 case T_STRING:; 1644 tok->kind = TK_STRING_LITERALS; 1645 tok->u.string_literals.len = yylval.y_string->len; 1646 tok->u.string_literals.cap = yylval.y_string->cap; 1647 tok->u.string_literals.data = xstrdup(yylval.y_string->data); 1648 break; 1649 default: 1650 tok->kind = TK_PUNCTUATOR; 1651 tok->u.punctuator = xstrdup(text); 1652 } 1653 } 1654 1655 static void 1656 seq_reserve(balanced_token_sequence *seq) 1657 { 1658 if (seq->len >= seq->cap) { 1659 seq->cap = 16 + 2 * seq->cap; 1660 const balanced_token *old_tokens = seq->tokens; 1661 balanced_token *new_tokens = block_zero_alloc( 1662 seq->cap * sizeof(*seq->tokens), "balanced_token[]"); 1663 if (seq->len > 0) 1664 memcpy(new_tokens, old_tokens, 1665 seq->len * sizeof(*seq->tokens)); 1666 seq->tokens = new_tokens; 1667 } 1668 } 1669 1670 static balanced_token_sequence 1671 read_balanced(int opening) 1672 { 1673 int closing = opening == T_LPAREN ? T_RPAREN 1674 : opening == T_LBRACK ? T_RBRACK : T_RBRACE; 1675 balanced_token_sequence seq = { NULL, 0, 0 }; 1676 1677 int tok; 1678 while (tok = yylex(), tok > 0 && tok != closing) { 1679 seq_reserve(&seq); 1680 if (tok == T_LPAREN || tok == T_LBRACK || tok == T_LBRACE) { 1681 seq.tokens[seq.len].kind = tok == T_LPAREN ? '(' 1682 : tok == T_LBRACK ? '[' : '{'; 1683 seq.tokens[seq.len].u.tokens = read_balanced(tok); 1684 } else { 1685 fill_token(tok, yytext, &seq.tokens[seq.len].u.token); 1686 freeyyv(&yylval, tok); 1687 } 1688 seq.len++; 1689 } 1690 return seq; 1691 } 1692 1693 balanced_token_sequence 1694 lex_balanced(void) 1695 { 1696 return read_balanced(T_LPAREN); 1697 } 1698 1699 /* 1700 * Free any dynamically allocated memory referenced by 1701 * the value stack or yylval. 1702 * The type of information in yylval is described by tok. 1703 */ 1704 void 1705 freeyyv(void *sp, int tok) 1706 { 1707 if (tok == T_NAME || tok == T_TYPENAME) { 1708 sbuf_t *sb = *(sbuf_t **)sp; 1709 free(sb); 1710 } else if (tok == T_CON) { 1711 val_t *val = *(val_t **)sp; 1712 free(val); 1713 } else if (tok == T_STRING) { 1714 buffer *str = *(buffer **)sp; 1715 free(str->data); 1716 free(str); 1717 } 1718 } 1719