scan.l revision 1.18 1 %{
2 /* $NetBSD: scan.l,v 1.18 2001/05/24 12:10:39 lukem Exp $ */
3
4 /*
5 * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
6 * Copyright (c) 1994, 1995 Jochen Pohl
7 * All Rights Reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by Jochen Pohl for
20 * The NetBSD Project.
21 * 4. The name of the author may not be used to endorse or promote products
22 * derived from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __RCSID("$NetBSD: scan.l,v 1.18 2001/05/24 12:10:39 lukem Exp $");
39 #endif
40
41 #include <stdlib.h>
42 #include <string.h>
43 #include <limits.h>
44 #include <float.h>
45 #include <ctype.h>
46 #include <errno.h>
47 #include <math.h>
48 #include <err.h>
49
50 #include "lint1.h"
51 #include "cgram.h"
52
53 #define CHAR_MASK (~(~0 << CHAR_BIT))
54 #define YY_NO_UNPUT
55
56 /* Current position (its also updated when an included file is parsed) */
57 pos_t curr_pos = { 1, "", 0 };
58
59 /*
60 * Current position in C source (not updated when an included file is
61 * parsed).
62 */
63 pos_t csrc_pos = { 1, "", 0 };
64
65 static void incline __P((void));
66 static void badchar __P((int));
67 static sbuf_t *allocsb __P((void));
68 static void freesb __P((sbuf_t *));
69 static int inpc __P((void));
70 static int hash __P((const char *));
71 static sym_t *search __P((sbuf_t *));
72 static int name __P((void));
73 static int keyw __P((sym_t *));
74 static int icon __P((int));
75 static int fcon __P((void));
76 static int operator __P((int, op_t));
77 static int ccon __P((void));
78 static int wccon __P((void));
79 static int getescc __P((int));
80 static void directive __P((void));
81 static void comment __P((void));
82 static void slashslashcomment __P((void));
83 static int string __P((void));
84 static int wcstrg __P((void));
85
86 %}
87
88 L [_A-Za-z]
89 D [0-9]
90 NZD [1-9]
91 OD [0-7]
92 HD [0-9A-Fa-f]
93 EX ([eE][+-]?[0-9]+)
94
95 %%
96
97 {L}({L}|{D})* return (name());
98 0{OD}*[lLuU]* return (icon(8));
99 {NZD}{D}*[lLuU]* return (icon(10));
100 0[xX]{HD}+[lLuU]* return (icon(16));
101 {D}+\.{D}*{EX}?[fFlL]? |
102 {D}+{EX}[fFlL]? |
103 \.{D}+{EX}?[fFlL]? return (fcon());
104 "=" return (operator(T_ASSIGN, ASSIGN));
105 "*=" return (operator(T_OPASS, MULASS));
106 "/=" return (operator(T_OPASS, DIVASS));
107 "%=" return (operator(T_OPASS, MODASS));
108 "+=" return (operator(T_OPASS, ADDASS));
109 "-=" return (operator(T_OPASS, SUBASS));
110 "<<=" return (operator(T_OPASS, SHLASS));
111 ">>=" return (operator(T_OPASS, SHRASS));
112 "&=" return (operator(T_OPASS, ANDASS));
113 "^=" return (operator(T_OPASS, XORASS));
114 "|=" return (operator(T_OPASS, ORASS));
115 "||" return (operator(T_LOGOR, LOGOR));
116 "&&" return (operator(T_LOGAND, LOGAND));
117 "|" return (operator(T_OR, OR));
118 "&" return (operator(T_AND, AND));
119 "^" return (operator(T_XOR, XOR));
120 "==" return (operator(T_EQOP, EQ));
121 "!=" return (operator(T_EQOP, NE));
122 "<" return (operator(T_RELOP, LT));
123 ">" return (operator(T_RELOP, GT));
124 "<=" return (operator(T_RELOP, LE));
125 ">=" return (operator(T_RELOP, GE));
126 "<<" return (operator(T_SHFTOP, SHL));
127 ">>" return (operator(T_SHFTOP, SHR));
128 "++" return (operator(T_INCDEC, INC));
129 "--" return (operator(T_INCDEC, DEC));
130 "->" return (operator(T_STROP, ARROW));
131 "." return (operator(T_STROP, POINT));
132 "+" return (operator(T_ADDOP, PLUS));
133 "-" return (operator(T_ADDOP, MINUS));
134 "*" return (operator(T_MULT, MULT));
135 "/" return (operator(T_DIVOP, DIV));
136 "%" return (operator(T_DIVOP, MOD));
137 "!" return (operator(T_UNOP, NOT));
138 "~" return (operator(T_UNOP, COMPL));
139 "\"" return (string());
140 "L\"" return (wcstrg());
141 ";" return (T_SEMI);
142 "{" return (T_LBRACE);
143 "}" return (T_RBRACE);
144 "," return (T_COMMA);
145 ":" return (T_COLON);
146 "?" return (T_QUEST);
147 "[" return (T_LBRACK);
148 "]" return (T_RBRACK);
149 "(" return (T_LPARN);
150 ")" return (T_RPARN);
151 "..." return (T_ELLIPSE);
152 "'" return (ccon());
153 "L'" return (wccon());
154 ^#.*$ directive();
155 \n incline();
156 \t|" "|\f|\v ;
157 "/*" comment();
158 "//" slashslashcomment();
159 . badchar(yytext[0]);
160
161 %%
162
163 static void
164 incline()
165 {
166 curr_pos.p_line++;
167 curr_pos.p_uniq = 0;
168 if (curr_pos.p_file == csrc_pos.p_file) {
169 csrc_pos.p_line++;
170 csrc_pos.p_uniq = 0;
171 }
172 }
173
174 static void
175 badchar(c)
176 int c;
177 {
178 /* unknown character \%o */
179 error(250, c);
180 }
181
182 /*
183 * Keywords.
184 * During initialisation they are written to the symbol table.
185 */
186 static struct kwtab {
187 const char *kw_name; /* keyword */
188 int kw_token; /* token returned by yylex() */
189 scl_t kw_scl; /* storage class if kw_token T_SCLASS */
190 tspec_t kw_tspec; /* type spec. if kw_token T_TYPE or T_SOU */
191 tqual_t kw_tqual; /* type qual. fi kw_token T_QUAL */
192 u_int kw_stdc : 1; /* STDC keyword */
193 u_int kw_gcc : 1; /* GCC keyword */
194 } kwtab[] = {
195 { "asm", T_ASM, 0, 0, 0, 0, 1 },
196 { "__asm", T_ASM, 0, 0, 0, 0, 0 },
197 { "__asm__", T_ASM, 0, 0, 0, 0, 0 },
198 { "auto", T_SCLASS, AUTO, 0, 0, 0, 0 },
199 { "break", T_BREAK, 0, 0, 0, 0, 0 },
200 { "case", T_CASE, 0, 0, 0, 0, 0 },
201 { "char", T_TYPE, 0, CHAR, 0, 0, 0 },
202 { "const", T_QUAL, 0, 0, CONST, 1, 0 },
203 { "__const__", T_QUAL, 0, 0, CONST, 0, 0 },
204 { "__const", T_QUAL, 0, 0, CONST, 0, 0 },
205 { "continue", T_CONTINUE, 0, 0, 0, 0, 0 },
206 { "default", T_DEFAULT, 0, 0, 0, 0, 0 },
207 { "do", T_DO, 0, 0, 0, 0, 0 },
208 { "double", T_TYPE, 0, DOUBLE, 0, 0, 0 },
209 { "else", T_ELSE, 0, 0, 0, 0, 0 },
210 { "enum", T_ENUM, 0, 0, 0, 0, 0 },
211 { "extern", T_SCLASS, EXTERN, 0, 0, 0, 0 },
212 { "float", T_TYPE, 0, FLOAT, 0, 0, 0 },
213 { "for", T_FOR, 0, 0, 0, 0, 0 },
214 { "goto", T_GOTO, 0, 0, 0, 0, 0 },
215 { "if", T_IF, 0, 0, 0, 0, 0 },
216 { "inline", T_SCLASS, INLINE, 0, 0, 0, 1 },
217 { "__inline__", T_SCLASS, INLINE, 0, 0, 0, 0 },
218 { "__inline", T_SCLASS, INLINE, 0, 0, 0, 0 },
219 { "int", T_TYPE, 0, INT, 0, 0, 0 },
220 { "__symbolrename", T_SYMBOLRENAME, 0, 0, 0, 0, 0 },
221 { "long", T_TYPE, 0, LONG, 0, 0, 0 },
222 { "register", T_SCLASS, REG, 0, 0, 0, 0 },
223 { "return", T_RETURN, 0, 0, 0, 0, 0 },
224 { "short", T_TYPE, 0, SHORT, 0, 0, 0 },
225 { "signed", T_TYPE, 0, SIGNED, 0, 1, 0 },
226 { "__signed__", T_TYPE, 0, SIGNED, 0, 0, 0 },
227 { "__signed", T_TYPE, 0, SIGNED, 0, 0, 0 },
228 { "sizeof", T_SIZEOF, 0, 0, 0, 0, 0 },
229 { "static", T_SCLASS, STATIC, 0, 0, 0, 0 },
230 { "struct", T_SOU, 0, STRUCT, 0, 0, 0 },
231 { "switch", T_SWITCH, 0, 0, 0, 0, 0 },
232 { "typedef", T_SCLASS, TYPEDEF, 0, 0, 0, 0 },
233 { "union", T_SOU, 0, UNION, 0, 0, 0 },
234 { "unsigned", T_TYPE, 0, UNSIGN, 0, 0, 0 },
235 { "void", T_TYPE, 0, VOID, 0, 0, 0 },
236 { "volatile", T_QUAL, 0, 0, VOLATILE, 1, 0 },
237 { "__volatile__", T_QUAL, 0, 0, VOLATILE, 0, 0 },
238 { "__volatile", T_QUAL, 0, 0, VOLATILE, 0, 0 },
239 { "while", T_WHILE, 0, 0, 0, 0, 0 },
240 { NULL, 0, 0, 0, 0, 0, 0 }
241 };
242
243 /* Symbol table */
244 static sym_t *symtab[HSHSIZ1];
245
246 /* bit i of the entry with index i is set */
247 u_quad_t qbmasks[sizeof(u_quad_t) * CHAR_BIT];
248
249 /* least significant i bits are set in the entry with index i */
250 u_quad_t qlmasks[sizeof(u_quad_t) * CHAR_BIT + 1];
251
252 /* least significant i bits are not set in the entry with index i */
253 u_quad_t qumasks[sizeof(u_quad_t) * CHAR_BIT + 1];
254
255 /* free list for sbuf structures */
256 static sbuf_t *sbfrlst;
257
258 /* Typ of next expected symbol */
259 symt_t symtyp;
260
261
262 /*
263 * All keywords are written to the symbol table. This saves us looking
264 * in a extra table for each name we found.
265 */
266 void
267 initscan()
268 {
269 struct kwtab *kw;
270 sym_t *sym;
271 int h, i;
272 u_quad_t uq;
273
274 for (kw = kwtab; kw->kw_name != NULL; kw++) {
275 if (kw->kw_stdc && tflag)
276 continue;
277 if (kw->kw_gcc && !gflag)
278 continue;
279 sym = getblk(sizeof (sym_t));
280 sym->s_name = kw->kw_name;
281 sym->s_keyw = 1;
282 sym->s_value.v_quad = kw->kw_token;
283 if (kw->kw_token == T_TYPE || kw->kw_token == T_SOU) {
284 sym->s_tspec = kw->kw_tspec;
285 } else if (kw->kw_token == T_SCLASS) {
286 sym->s_scl = kw->kw_scl;
287 } else if (kw->kw_token == T_QUAL) {
288 sym->s_tqual = kw->kw_tqual;
289 }
290 h = hash(sym->s_name);
291 if ((sym->s_link = symtab[h]) != NULL)
292 symtab[h]->s_rlink = &sym->s_link;
293 (symtab[h] = sym)->s_rlink = &symtab[h];
294 }
295
296 /* initialize bit-masks for quads */
297 for (i = 0; i < sizeof (u_quad_t) * CHAR_BIT; i++) {
298 qbmasks[i] = (u_quad_t)1 << i;
299 uq = ~(u_quad_t)0 << i;
300 qumasks[i] = uq;
301 qlmasks[i] = ~uq;
302 }
303 qumasks[i] = 0;
304 qlmasks[i] = ~(u_quad_t)0;
305 }
306
307 /*
308 * Get a free sbuf structure, if possible from the free list
309 */
310 static sbuf_t *
311 allocsb()
312 {
313 sbuf_t *sb;
314
315 if ((sb = sbfrlst) != NULL) {
316 sbfrlst = sb->sb_nxt;
317 } else {
318 sb = xmalloc(sizeof (sbuf_t));
319 }
320 (void)memset(sb, 0, sizeof (sb));
321 return (sb);
322 }
323
324 /*
325 * Put a sbuf structure to the free list
326 */
327 static void
328 freesb(sb)
329 sbuf_t *sb;
330 {
331 sb->sb_nxt = sbfrlst;
332 sbfrlst = sb;
333 }
334
335 /*
336 * Read a character and ensure that it is positive (except EOF).
337 * Increment line count(s) if necessary.
338 */
339 static int
340 inpc()
341 {
342 int c;
343
344 if ((c = input()) != EOF && (c &= CHAR_MASK) == '\n')
345 incline();
346 return (c);
347 }
348
349 static int
350 hash(s)
351 const char *s;
352 {
353 u_int v;
354 const u_char *us;
355
356 v = 0;
357 for (us = (const u_char *)s; *us != '\0'; us++) {
358 v = (v << sizeof (v)) + *us;
359 v ^= v >> (sizeof (v) * CHAR_BIT - sizeof (v));
360 }
361 return (v % HSHSIZ1);
362 }
363
364 /*
365 * Lex has found a letter followed by zero or more letters or digits.
366 * It looks for a symbol in the symbol table with the same name. This
367 * symbol must either be a keyword or a symbol of the type required by
368 * symtyp (label, member, tag, ...).
369 *
370 * If it is a keyword, the token is returned. In some cases it is described
371 * more deeply by data written to yylval.
372 *
373 * If it is a symbol, T_NAME is returned and the pointer to a sbuf struct
374 * is stored in yylval. This struct contains the name of the symbol, it's
375 * length and hash value. If there is already a symbol of the same name
376 * and type in the symbol table, the sbuf struct also contains a pointer
377 * to the symbol table entry.
378 */
379 static int
380 name()
381 {
382 char *s;
383 sbuf_t *sb;
384 sym_t *sym;
385 int tok;
386
387 sb = allocsb();
388 sb->sb_name = yytext;
389 sb->sb_len = yyleng;
390 sb->sb_hash = hash(yytext);
391
392 if ((sym = search(sb)) != NULL && sym->s_keyw) {
393 freesb(sb);
394 return (keyw(sym));
395 }
396
397 sb->sb_sym = sym;
398
399 if (sym != NULL) {
400 if (blklev < sym->s_blklev)
401 lerror("name() 1");
402 sb->sb_name = sym->s_name;
403 sb->sb_len = strlen(sym->s_name);
404 tok = sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME;
405 } else {
406 s = getblk(yyleng + 1);
407 (void)memcpy(s, yytext, yyleng + 1);
408 sb->sb_name = s;
409 sb->sb_len = yyleng;
410 tok = T_NAME;
411 }
412
413 yylval.y_sb = sb;
414 return (tok);
415 }
416
417 static sym_t *
418 search(sb)
419 sbuf_t *sb;
420 {
421 sym_t *sym;
422
423 for (sym = symtab[sb->sb_hash]; sym != NULL; sym = sym->s_link) {
424 if (strcmp(sym->s_name, sb->sb_name) == 0) {
425 if (sym->s_keyw || sym->s_kind == symtyp)
426 return (sym);
427 }
428 }
429
430 return (NULL);
431 }
432
433 static int
434 keyw(sym)
435 sym_t *sym;
436 {
437 int t;
438
439 if ((t = (int)sym->s_value.v_quad) == T_SCLASS) {
440 yylval.y_scl = sym->s_scl;
441 } else if (t == T_TYPE || t == T_SOU) {
442 yylval.y_tspec = sym->s_tspec;
443 } else if (t == T_QUAL) {
444 yylval.y_tqual = sym->s_tqual;
445 }
446 return (t);
447 }
448
449 /*
450 * Convert a string representing an integer into internal representation.
451 * The value is returned in yylval. icon() (and yylex()) returns T_CON.
452 */
453 static int
454 icon(base)
455 int base;
456 {
457 int l_suffix, u_suffix;
458 int len;
459 const char *cp;
460 char c, *eptr;
461 tspec_t typ;
462 u_long ul = 0;
463 u_quad_t uq = 0;
464 int ansiu;
465 static tspec_t contypes[2][3] = {
466 { INT, LONG, QUAD },
467 { UINT, ULONG, UQUAD }
468 };
469
470 cp = yytext;
471 len = yyleng;
472
473 /* skip 0x */
474 if (base == 16) {
475 cp += 2;
476 len -= 2;
477 }
478
479 /* read suffixes */
480 l_suffix = u_suffix = 0;
481 for ( ; ; ) {
482 if ((c = cp[len - 1]) == 'l' || c == 'L') {
483 l_suffix++;
484 } else if (c == 'u' || c == 'U') {
485 u_suffix++;
486 } else {
487 break;
488 }
489 len--;
490 }
491 if (l_suffix > 2 || u_suffix > 1) {
492 /* malformed integer constant */
493 warning(251);
494 if (l_suffix > 2)
495 l_suffix = 2;
496 if (u_suffix > 1)
497 u_suffix = 1;
498 }
499 if (tflag && u_suffix != 0) {
500 /* suffix U is illegal in traditional C */
501 warning(97);
502 }
503 typ = contypes[u_suffix][l_suffix];
504
505 errno = 0;
506 if (l_suffix < 2) {
507 ul = strtoul(cp, &eptr, base);
508 } else {
509 uq = strtouq(cp, &eptr, base);
510 }
511 if (eptr != cp + len)
512 lerror("icon() 1");
513 if (errno != 0)
514 /* integer constant out of range */
515 warning(252);
516
517 /*
518 * If the value is to big for the current type, we must choose
519 * another type.
520 */
521 ansiu = 0;
522 switch (typ) {
523 case INT:
524 if (ul <= INT_MAX) {
525 /* ok */
526 } else if (ul <= (unsigned)UINT_MAX && base != 10) {
527 typ = UINT;
528 } else if (ul <= LONG_MAX) {
529 typ = LONG;
530 } else {
531 typ = ULONG;
532 }
533 if (typ == UINT || typ == ULONG) {
534 if (tflag) {
535 typ = LONG;
536 } else if (!sflag) {
537 /*
538 * Remember that the constant is unsigned
539 * only in ANSI C
540 */
541 ansiu = 1;
542 }
543 }
544 break;
545 case UINT:
546 if (ul > (u_int)UINT_MAX)
547 typ = ULONG;
548 break;
549 case LONG:
550 if (ul > LONG_MAX && !tflag) {
551 typ = ULONG;
552 if (!sflag)
553 ansiu = 1;
554 }
555 break;
556 case QUAD:
557 if (uq > QUAD_MAX && !tflag) {
558 typ = UQUAD;
559 if (!sflag)
560 ansiu = 1;
561 }
562 break;
563 /* LINTED (enumeration values not handled in switch) */
564 case STRUCT:
565 case VOID:
566 case LDOUBLE:
567 case FUNC:
568 case ARRAY:
569 case PTR:
570 case ENUM:
571 case UNION:
572 case SIGNED:
573 case NOTSPEC:
574 case DOUBLE:
575 case FLOAT:
576 case UQUAD:
577 case ULONG:
578 case USHORT:
579 case SHORT:
580 case UCHAR:
581 case SCHAR:
582 case CHAR:
583 case UNSIGN:
584 break;
585 }
586
587 if (typ != QUAD && typ != UQUAD) {
588 if (isutyp(typ)) {
589 uq = ul;
590 } else {
591 uq = (quad_t)(long)ul;
592 }
593 }
594
595 uq = (u_quad_t)xsign((quad_t)uq, typ, -1);
596
597 (yylval.y_val = xcalloc(1, sizeof (val_t)))->v_tspec = typ;
598 yylval.y_val->v_ansiu = ansiu;
599 yylval.y_val->v_quad = (quad_t)uq;
600
601 return (T_CON);
602 }
603
604 /*
605 * Returns 1 if t is a signed type and the value is negative.
606 *
607 * len is the number of significant bits. If len is -1, len is set
608 * to the width of type t.
609 */
610 int
611 sign(q, t, len)
612 quad_t q;
613 tspec_t t;
614 int len;
615 {
616 if (t == PTR || isutyp(t))
617 return (0);
618 return (msb(q, t, len));
619 }
620
621 int
622 msb(q, t, len)
623 quad_t q;
624 tspec_t t;
625 int len;
626 {
627 if (len <= 0)
628 len = size(t);
629 return ((q & qbmasks[len - 1]) != 0);
630 }
631
632 /*
633 * Extends the sign of q.
634 */
635 quad_t
636 xsign(q, t, len)
637 quad_t q;
638 tspec_t t;
639 int len;
640 {
641 if (len <= 0)
642 len = size(t);
643
644 if (t == PTR || isutyp(t) || !sign(q, t, len)) {
645 q &= qlmasks[len];
646 } else {
647 q |= qumasks[len];
648 }
649 return (q);
650 }
651
652 /*
653 * Convert a string representing a floating point value into its interal
654 * representation. Type and value are returned in yylval. fcon()
655 * (and yylex()) returns T_CON.
656 * XXX Currently it is not possible to convert constants of type
657 * long double which are greater then DBL_MAX.
658 */
659 static int
660 fcon()
661 {
662 const char *cp;
663 int len;
664 tspec_t typ;
665 char c, *eptr;
666 double d;
667 float f = 0;
668
669 cp = yytext;
670 len = yyleng;
671
672 if ((c = cp[len - 1]) == 'f' || c == 'F') {
673 typ = FLOAT;
674 len--;
675 } else if (c == 'l' || c == 'L') {
676 typ = LDOUBLE;
677 len--;
678 } else {
679 typ = DOUBLE;
680 }
681
682 if (tflag && typ != DOUBLE) {
683 /* suffixes F and L are illegal in traditional C */
684 warning(98);
685 }
686
687 errno = 0;
688 d = strtod(cp, &eptr);
689 if (eptr != cp + len)
690 lerror("fcon() 1");
691 if (errno != 0)
692 /* floating-point constant out of range */
693 warning(248);
694
695 if (typ == FLOAT) {
696 f = (float)d;
697 if (isinf(f)) {
698 /* floating-point constant out of range */
699 warning(248);
700 f = f > 0 ? FLT_MAX : -FLT_MAX;
701 }
702 }
703
704 (yylval.y_val = xcalloc(1, sizeof (val_t)))->v_tspec = typ;
705 if (typ == FLOAT) {
706 yylval.y_val->v_ldbl = f;
707 } else {
708 yylval.y_val->v_ldbl = d;
709 }
710
711 return (T_CON);
712 }
713
714 static int
715 operator(t, o)
716 int t;
717 op_t o;
718 {
719 yylval.y_op = o;
720 return (t);
721 }
722
723 /*
724 * Called if lex found a leading \'.
725 */
726 static int
727 ccon()
728 {
729 int n, val, c;
730 char cv;
731
732 n = 0;
733 val = 0;
734 while ((c = getescc('\'')) >= 0) {
735 val = (val << CHAR_BIT) + c;
736 n++;
737 }
738 if (c == -2) {
739 /* unterminated character constant */
740 error(253);
741 } else {
742 if (n > sizeof (int) || (n > 1 && (pflag || hflag))) {
743 /* too many characters in character constant */
744 error(71);
745 } else if (n > 1) {
746 /* multi-character character constant */
747 warning(294);
748 } else if (n == 0) {
749 /* empty character constant */
750 error(73);
751 }
752 }
753 if (n == 1) {
754 cv = (char)val;
755 val = cv;
756 }
757
758 yylval.y_val = xcalloc(1, sizeof (val_t));
759 yylval.y_val->v_tspec = INT;
760 yylval.y_val->v_quad = val;
761
762 return (T_CON);
763 }
764
765 /*
766 * Called if lex found a leading L\'
767 */
768 static int
769 wccon()
770 {
771 static char buf[MB_LEN_MAX + 1];
772 int i, c;
773 wchar_t wc;
774
775 i = 0;
776 while ((c = getescc('\'')) >= 0) {
777 if (i < MB_CUR_MAX)
778 buf[i] = (char)c;
779 i++;
780 }
781
782 wc = 0;
783
784 if (c == -2) {
785 /* unterminated character constant */
786 error(253);
787 } else if (c == 0) {
788 /* empty character constant */
789 error(73);
790 } else {
791 if (i > MB_CUR_MAX) {
792 i = MB_CUR_MAX;
793 /* too many characters in character constant */
794 error(71);
795 } else {
796 buf[i] = '\0';
797 (void)mbtowc(NULL, NULL, 0);
798 if (mbtowc(&wc, buf, MB_CUR_MAX) < 0)
799 /* invalid multibyte character */
800 error(291);
801 }
802 }
803
804 yylval.y_val = xcalloc(1, sizeof (val_t));
805 yylval.y_val->v_tspec = WCHAR;
806 yylval.y_val->v_quad = wc;
807
808 return (T_CON);
809 }
810
811 /*
812 * Read a character which is part of a character constant or of a string
813 * and handle escapes.
814 *
815 * The Argument is the character which delimits the character constant or
816 * string.
817 *
818 * Returns -1 if the end of the character constant or string is reached,
819 * -2 if the EOF is reached, and the character otherwise.
820 */
821 static int
822 getescc(d)
823 int d;
824 {
825 static int pbc = -1;
826 int n, c, v;
827
828 if (pbc == -1) {
829 c = inpc();
830 } else {
831 c = pbc;
832 pbc = -1;
833 }
834 if (c == d)
835 return (-1);
836 switch (c) {
837 case '\n':
838 if (tflag) {
839 /* newline in string or char constant */
840 error(254);
841 return (-2);
842 }
843 return (c);
844 case EOF:
845 return (-2);
846 case '\\':
847 switch (c = inpc()) {
848 case '"':
849 if (tflag && d == '\'')
850 /* \" inside character constant undef. ... */
851 warning(262);
852 return ('"');
853 case '\'':
854 return ('\'');
855 case '?':
856 if (tflag)
857 /* \? undefined in traditional C */
858 warning(263);
859 return ('?');
860 case '\\':
861 return ('\\');
862 case 'a':
863 if (tflag)
864 /* \a undefined in traditional C */
865 warning(81);
866 #ifdef __STDC__
867 return ('\a');
868 #else
869 return ('\007');
870 #endif
871 case 'b':
872 return ('\b');
873 case 'f':
874 return ('\f');
875 case 'n':
876 return ('\n');
877 case 'r':
878 return ('\r');
879 case 't':
880 return ('\t');
881 case 'v':
882 if (tflag)
883 /* \v undefined in traditional C */
884 warning(264);
885 #ifdef __STDC__
886 return ('\v');
887 #else
888 return ('\013');
889 #endif
890 case '8': case '9':
891 /* bad octal digit %c */
892 warning(77, c);
893 /* FALLTHROUGH */
894 case '0': case '1': case '2': case '3':
895 case '4': case '5': case '6': case '7':
896 n = 3;
897 v = 0;
898 do {
899 v = (v << 3) + (c - '0');
900 c = inpc();
901 } while (--n && isdigit(c) && (tflag || c <= '7'));
902 if (tflag && n > 0 && isdigit(c))
903 /* bad octal digit %c */
904 warning(77, c);
905 pbc = c;
906 if (v > UCHAR_MAX) {
907 /* character escape does not fit in char. */
908 warning(76);
909 v &= CHAR_MASK;
910 }
911 return (v);
912 case 'x':
913 if (tflag)
914 /* \x undefined in traditional C */
915 warning(82);
916 v = 0;
917 n = 0;
918 while ((c = inpc()) >= 0 && isxdigit(c)) {
919 c = isdigit(c) ?
920 c - '0' : toupper(c) - 'A' + 10;
921 v = (v << 4) + c;
922 if (n >= 0) {
923 if ((v & ~CHAR_MASK) != 0) {
924 /* overflow in hex escape */
925 warning(75);
926 n = -1;
927 } else {
928 n++;
929 }
930 }
931 }
932 pbc = c;
933 if (n == 0) {
934 /* no hex digits follow \x */
935 error(74);
936 } if (n == -1) {
937 v &= CHAR_MASK;
938 }
939 return (v);
940 case '\n':
941 return (getescc(d));
942 case EOF:
943 return (-2);
944 default:
945 if (isprint(c)) {
946 /* dubious escape \%c */
947 warning(79, c);
948 } else {
949 /* dubious escape \%o */
950 warning(80, c);
951 }
952 }
953 }
954 return (c);
955 }
956
957 /*
958 * Called for preprocessor directives. Currently implemented are:
959 * # lineno
960 * # lineno "filename"
961 */
962 static void
963 directive()
964 {
965 const char *cp, *fn;
966 char c, *eptr;
967 size_t fnl;
968 long ln;
969 static int first = 1;
970
971 /* Go to first non-whitespace after # */
972 for (cp = yytext + 1; (c = *cp) == ' ' || c == '\t'; cp++) ;
973
974 if (!isdigit((unsigned char)c)) {
975 error:
976 /* undefined or invalid # directive */
977 warning(255);
978 return;
979 }
980 ln = strtol(--cp, &eptr, 10);
981 if (cp == eptr)
982 goto error;
983 if ((c = *(cp = eptr)) != ' ' && c != '\t' && c != '\0')
984 goto error;
985 while ((c = *cp++) == ' ' || c == '\t')
986 continue;
987 if (c != '\0') {
988 if (c != '"')
989 goto error;
990 fn = cp;
991 while ((c = *cp) != '"' && c != '\0')
992 cp++;
993 if (c != '"')
994 goto error;
995 if ((fnl = cp++ - fn) > PATH_MAX)
996 goto error;
997 while ((c = *cp++) == ' ' || c == '\t')
998 continue;
999 #if 0
1000 if (c != '\0')
1001 warning("extra character(s) after directive");
1002 #endif
1003
1004 /* empty string means stdin */
1005 if (fnl == 0) {
1006 fn = "{standard input}";
1007 fnl = 16; /* strlen (fn) */
1008 }
1009 curr_pos.p_file = fnnalloc(fn, fnl);
1010 /*
1011 * If this is the first directive, the name is the name
1012 * of the C source file as specified at the command line.
1013 * It is written to the output file.
1014 */
1015 if (first) {
1016 csrc_pos.p_file = curr_pos.p_file;
1017 outsrc(curr_pos.p_file);
1018 first = 0;
1019 }
1020 }
1021 curr_pos.p_line = (int)ln - 1;
1022 curr_pos.p_uniq = 0;
1023 if (curr_pos.p_file == csrc_pos.p_file) {
1024 csrc_pos.p_line = (int)ln - 1;
1025 csrc_pos.p_uniq = 0;
1026 }
1027 }
1028
1029 /*
1030 * Handle lint comments. Following comments are currently understood:
1031 * ARGSUSEDn
1032 * CONSTCOND CONSTANTCOND CONSTANTCONDITION
1033 * FALLTHRU FALLTHROUGH
1034 * LINTLIBRARY
1035 * LINTED NOSTRICT
1036 * LONGLONG
1037 * NOTREACHED
1038 * PRINTFLIKEn
1039 * PROTOLIB
1040 * SCANFLIKEn
1041 * VARARGSn
1042 * If one of this comments is recognized, the arguments, if any, are
1043 * parsed and a function which handles this comment is called.
1044 */
1045 static void
1046 comment()
1047 {
1048 int c, lc;
1049 static struct {
1050 const char *keywd;
1051 int arg;
1052 void (*func) __P((int));
1053 } keywtab[] = {
1054 { "ARGSUSED", 1, argsused },
1055 { "CONSTCOND", 0, constcond },
1056 { "CONSTANTCOND", 0, constcond },
1057 { "CONSTANTCONDITION", 0, constcond },
1058 { "FALLTHRU", 0, fallthru },
1059 { "FALLTHROUGH", 0, fallthru },
1060 { "LINTLIBRARY", 0, lintlib },
1061 { "LINTED", 0, linted },
1062 { "LONGLONG", 0, longlong },
1063 { "NOSTRICT", 0, linted },
1064 { "NOTREACHED", 0, notreach },
1065 { "PRINTFLIKE", 1, printflike },
1066 { "PROTOLIB", 1, protolib },
1067 { "SCANFLIKE", 1, scanflike },
1068 { "VARARGS", 1, varargs },
1069 };
1070 char keywd[32];
1071 char arg[32];
1072 int l, i, a;
1073 int eoc;
1074
1075 eoc = 0;
1076
1077 /* Skip white spaces after the start of the comment */
1078 while ((c = inpc()) != EOF && isspace(c))
1079 continue;
1080
1081 /* Read the potential keyword to keywd */
1082 l = 0;
1083 while (c != EOF && isupper(c) && l < sizeof (keywd) - 1) {
1084 keywd[l++] = (char)c;
1085 c = inpc();
1086 }
1087 keywd[l] = '\0';
1088
1089 /* look for the keyword */
1090 for (i = 0; i < sizeof (keywtab) / sizeof (keywtab[0]); i++) {
1091 if (strcmp(keywtab[i].keywd, keywd) == 0)
1092 break;
1093 }
1094 if (i == sizeof (keywtab) / sizeof (keywtab[0]))
1095 goto skip_rest;
1096
1097 /* skip white spaces after the keyword */
1098 while (c != EOF && isspace(c))
1099 c = inpc();
1100
1101 /* read the argument, if the keyword accepts one and there is one */
1102 l = 0;
1103 if (keywtab[i].arg) {
1104 while (c != EOF && isdigit(c) && l < sizeof (arg) - 1) {
1105 arg[l++] = (char)c;
1106 c = inpc();
1107 }
1108 }
1109 arg[l] = '\0';
1110 a = l != 0 ? atoi(arg) : -1;
1111
1112 /* skip white spaces after the argument */
1113 while (c != EOF && isspace(c))
1114 c = inpc();
1115
1116 if (c != '*' || (c = inpc()) != '/') {
1117 if (keywtab[i].func != linted)
1118 /* extra characters in lint comment */
1119 warning(257);
1120 } else {
1121 /*
1122 * remember that we have already found the end of the
1123 * comment
1124 */
1125 eoc = 1;
1126 }
1127
1128 if (keywtab[i].func != NULL)
1129 (*keywtab[i].func)(a);
1130
1131 skip_rest:
1132 while (!eoc) {
1133 lc = c;
1134 if ((c = inpc()) == EOF) {
1135 /* unterminated comment */
1136 error(256);
1137 break;
1138 }
1139 if (lc == '*' && c == '/')
1140 eoc = 1;
1141 }
1142 }
1143
1144 /*
1145 * Handle // style comments
1146 */
1147 static void
1148 slashslashcomment()
1149 {
1150 int c;
1151
1152 if (sflag < 2 && !gflag)
1153 /* // comments only support in C99 */
1154 (void)gnuism(312, tflag ? "traditional" : "ANSI");
1155
1156 while ((c = inpc()) != EOF && c != '\n')
1157 continue;
1158 }
1159
1160 /*
1161 * Clear flags for lint comments LINTED, LONGLONG and CONSTCOND.
1162 * clrwflgs() is called after function definitions and global and
1163 * local declarations and definitions. It is also called between
1164 * the controlling expression and the body of control statements
1165 * (if, switch, for, while).
1166 */
1167 void
1168 clrwflgs()
1169 {
1170 nowarn = 0;
1171 quadflg = 0;
1172 ccflg = 0;
1173 }
1174
1175 /*
1176 * Strings are stored in a dynamically alloceted buffer and passed
1177 * in yylval.y_xstrg to the parser. The parser or the routines called
1178 * by the parser are responsible for freeing this buffer.
1179 */
1180 static int
1181 string()
1182 {
1183 u_char *s;
1184 int c;
1185 size_t len, max;
1186 strg_t *strg;
1187
1188 s = xmalloc(max = 64);
1189
1190 len = 0;
1191 while ((c = getescc('"')) >= 0) {
1192 /* +1 to reserve space for a trailing NUL character */
1193 if (len + 1 == max)
1194 s = xrealloc(s, max *= 2);
1195 s[len++] = (char)c;
1196 }
1197 s[len] = '\0';
1198 if (c == -2)
1199 /* unterminated string constant */
1200 error(258);
1201
1202 strg = xcalloc(1, sizeof (strg_t));
1203 strg->st_tspec = CHAR;
1204 strg->st_len = len;
1205 strg->st_cp = s;
1206
1207 yylval.y_strg = strg;
1208 return (T_STRING);
1209 }
1210
1211 static int
1212 wcstrg()
1213 {
1214 char *s;
1215 int c, i, n, wi;
1216 size_t len, max, wlen;
1217 wchar_t *ws;
1218 strg_t *strg;
1219
1220 s = xmalloc(max = 64);
1221 len = 0;
1222 while ((c = getescc('"')) >= 0) {
1223 /* +1 to save space for a trailing NUL character */
1224 if (len + 1 >= max)
1225 s = xrealloc(s, max *= 2);
1226 s[len++] = (char)c;
1227 }
1228 s[len] = '\0';
1229 if (c == -2)
1230 /* unterminated string constant */
1231 error(258);
1232
1233 /* get length of wide character string */
1234 (void)mblen(NULL, 0);
1235 for (i = 0, wlen = 0; i < len; i += n, wlen++) {
1236 if ((n = mblen(&s[i], MB_CUR_MAX)) == -1) {
1237 /* invalid multibyte character */
1238 error(291);
1239 break;
1240 }
1241 if (n == 0)
1242 n = 1;
1243 }
1244
1245 ws = xmalloc((wlen + 1) * sizeof (wchar_t));
1246
1247 /* convert from multibyte to wide char */
1248 (void)mbtowc(NULL, NULL, 0);
1249 for (i = 0, wi = 0; i < len; i += n, wi++) {
1250 if ((n = mbtowc(&ws[wi], &s[i], MB_CUR_MAX)) == -1)
1251 break;
1252 if (n == 0)
1253 n = 1;
1254 }
1255 ws[wi] = 0;
1256 free(s);
1257
1258 strg = xcalloc(1, sizeof (strg_t));
1259 strg->st_tspec = WCHAR;
1260 strg->st_len = wlen;
1261 strg->st_wcp = ws;
1262
1263 yylval.y_strg = strg;
1264 return (T_STRING);
1265 }
1266
1267 /*
1268 * As noted above the scanner does not create new symbol table entries
1269 * for symbols it cannot find in the symbol table. This is to avoid
1270 * putting undeclared symbols into the symbol table if a syntax error
1271 * occurs.
1272 *
1273 * getsym() is called as soon as it is probably ok to put the symbol to
1274 * the symbol table. This does not mean that it is not possible that
1275 * symbols are put to the symbol table which are than not completely
1276 * declared due to syntax errors. To avoid too many problems in this
1277 * case symbols get type int in getsym().
1278 *
1279 * XXX calls to getsym() should be delayed until decl1*() is called
1280 */
1281 sym_t *
1282 getsym(sb)
1283 sbuf_t *sb;
1284 {
1285 dinfo_t *di;
1286 char *s;
1287 sym_t *sym;
1288
1289 sym = sb->sb_sym;
1290
1291 /*
1292 * During member declaration it is possible that name() looked
1293 * for symbols of type FVFT, although it should have looked for
1294 * symbols of type FTAG. Same can happen for labels. Both cases
1295 * are compensated here.
1296 */
1297 if (symtyp == FMOS || symtyp == FLAB) {
1298 if (sym == NULL || sym->s_kind == FVFT)
1299 sym = search(sb);
1300 }
1301
1302 if (sym != NULL) {
1303 if (sym->s_kind != symtyp)
1304 lerror("storesym() 1");
1305 symtyp = FVFT;
1306 freesb(sb);
1307 return (sym);
1308 }
1309
1310 /* create a new symbol table entry */
1311
1312 /* labels must always be allocated at level 1 (outhermost block) */
1313 if (symtyp == FLAB) {
1314 sym = getlblk(1, sizeof (sym_t));
1315 s = getlblk(1, sb->sb_len + 1);
1316 (void)memcpy(s, sb->sb_name, sb->sb_len + 1);
1317 sym->s_name = s;
1318 sym->s_blklev = 1;
1319 di = dcs;
1320 while (di->d_nxt != NULL && di->d_nxt->d_nxt != NULL)
1321 di = di->d_nxt;
1322 if (di->d_ctx != AUTO)
1323 lerror("storesym() 2");
1324 } else {
1325 sym = getblk(sizeof (sym_t));
1326 sym->s_name = sb->sb_name;
1327 sym->s_blklev = blklev;
1328 di = dcs;
1329 }
1330
1331 UNIQUE_CURR_POS(sym->s_dpos);
1332 if ((sym->s_kind = symtyp) != FLAB)
1333 sym->s_type = gettyp(INT);
1334
1335 symtyp = FVFT;
1336
1337 if ((sym->s_link = symtab[sb->sb_hash]) != NULL)
1338 symtab[sb->sb_hash]->s_rlink = &sym->s_link;
1339 (symtab[sb->sb_hash] = sym)->s_rlink = &symtab[sb->sb_hash];
1340
1341 *di->d_ldlsym = sym;
1342 di->d_ldlsym = &sym->s_dlnxt;
1343
1344 freesb(sb);
1345 return (sym);
1346 }
1347
1348 /*
1349 * Remove a symbol forever from the symbol table. s_blklev
1350 * is set to -1 to avoid that the symbol will later be put
1351 * back to the symbol table.
1352 */
1353 void
1354 rmsym(sym)
1355 sym_t *sym;
1356 {
1357 if ((*sym->s_rlink = sym->s_link) != NULL)
1358 sym->s_link->s_rlink = sym->s_rlink;
1359 sym->s_blklev = -1;
1360 sym->s_link = NULL;
1361 }
1362
1363 /*
1364 * Remove a list of symbols declared at one level from the symbol
1365 * table.
1366 */
1367 void
1368 rmsyms(syms)
1369 sym_t *syms;
1370 {
1371 sym_t *sym;
1372
1373 for (sym = syms; sym != NULL; sym = sym->s_dlnxt) {
1374 if (sym->s_blklev != -1) {
1375 if ((*sym->s_rlink = sym->s_link) != NULL)
1376 sym->s_link->s_rlink = sym->s_rlink;
1377 sym->s_link = NULL;
1378 sym->s_rlink = NULL;
1379 }
1380 }
1381 }
1382
1383 /*
1384 * Put a symbol into the symbol table
1385 */
1386 void
1387 inssym(bl, sym)
1388 int bl;
1389 sym_t *sym;
1390 {
1391 int h;
1392
1393 h = hash(sym->s_name);
1394 if ((sym->s_link = symtab[h]) != NULL)
1395 symtab[h]->s_rlink = &sym->s_link;
1396 (symtab[h] = sym)->s_rlink = &symtab[h];
1397 sym->s_blklev = bl;
1398 if (sym->s_link != NULL && sym->s_blklev < sym->s_link->s_blklev)
1399 lerror("inssym()");
1400 }
1401
1402 /*
1403 * Called at level 0 after syntax errors
1404 * Removes all symbols which are not declared at level 0 from the
1405 * symbol table. Also frees all memory which is not associated with
1406 * level 0.
1407 */
1408 void
1409 cleanup()
1410 {
1411 sym_t *sym, *nsym;
1412 int i;
1413
1414 for (i = 0; i < HSHSIZ1; i++) {
1415 for (sym = symtab[i]; sym != NULL; sym = nsym) {
1416 nsym = sym->s_link;
1417 if (sym->s_blklev >= 1) {
1418 if ((*sym->s_rlink = nsym) != NULL)
1419 nsym->s_rlink = sym->s_rlink;
1420 }
1421 }
1422 }
1423
1424 for (i = mblklev; i > 0; i--)
1425 freelblk(i);
1426 }
1427
1428 /*
1429 * Create a new symbol with the name of an existing symbol.
1430 */
1431 sym_t *
1432 pushdown(sym)
1433 sym_t *sym;
1434 {
1435 int h;
1436 sym_t *nsym;
1437
1438 h = hash(sym->s_name);
1439 nsym = getblk(sizeof (sym_t));
1440 if (sym->s_blklev > blklev)
1441 lerror("pushdown()");
1442 nsym->s_name = sym->s_name;
1443 UNIQUE_CURR_POS(nsym->s_dpos);
1444 nsym->s_kind = sym->s_kind;
1445 nsym->s_blklev = blklev;
1446
1447 if ((nsym->s_link = symtab[h]) != NULL)
1448 symtab[h]->s_rlink = &nsym->s_link;
1449 (symtab[h] = nsym)->s_rlink = &symtab[h];
1450
1451 *dcs->d_ldlsym = nsym;
1452 dcs->d_ldlsym = &nsym->s_dlnxt;
1453
1454 return (nsym);
1455 }
1456
1457 /*
1458 * Free any dynamically allocated memory referenced by
1459 * the value stack or yylval.
1460 * The type of information in yylval is described by tok.
1461 */
1462 void
1463 freeyyv(sp, tok)
1464 void *sp;
1465 int tok;
1466 {
1467 if (tok == T_NAME || tok == T_TYPENAME) {
1468 sbuf_t *sb = *(sbuf_t **)sp;
1469 freesb(sb);
1470 } else if (tok == T_CON) {
1471 val_t *val = *(val_t **)sp;
1472 free(val);
1473 } else if (tok == T_STRING) {
1474 strg_t *strg = *(strg_t **)sp;
1475 if (strg->st_tspec == CHAR) {
1476 free(strg->st_cp);
1477 } else if (strg->st_tspec == WCHAR) {
1478 free(strg->st_wcp);
1479 } else {
1480 lerror("fryylv() 1");
1481 }
1482 free(strg);
1483 }
1484 }
1485