emit1.c revision 1.45 1 /* $NetBSD: emit1.c,v 1.45 2021/07/05 19:39:12 rillig Exp $ */
2
3 /*
4 * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved.
5 * Copyright (c) 1994, 1995 Jochen Pohl
6 * All Rights Reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by Jochen Pohl for
19 * The NetBSD Project.
20 * 4. The name of the author may not be used to endorse or promote products
21 * derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #if HAVE_NBTOOL_CONFIG_H
36 #include "nbtool_config.h"
37 #endif
38
39 #include <sys/cdefs.h>
40 #if defined(__RCSID) && !defined(lint)
41 __RCSID("$NetBSD: emit1.c,v 1.45 2021/07/05 19:39:12 rillig Exp $");
42 #endif
43
44 #include "lint1.h"
45
46 static void outtt(sym_t *, sym_t *);
47 static void outfstrg(strg_t *);
48
49 /*
50 * Write type into the output buffer.
51 * The type is written as a sequence of substrings, each of which describes a
52 * node of type type_t
53 * a node is encoded as follows:
54 * _Bool B
55 * _Complex float s X
56 * _Complex double X
57 * _Complex long double l X
58 * char C
59 * signed char s C
60 * unsigned char u C
61 * short S
62 * unsigned short u S
63 * int I
64 * unsigned int u I
65 * long L
66 * unsigned long u L
67 * long long Q
68 * unsigned long long u Q
69 * float s D
70 * double D
71 * long double l D
72 * void V
73 * * P
74 * [n] A n
75 * () F
76 * (void) F 0
77 * (n parameters) F n arg1 arg2 ... argn
78 * (n parameters, ...) F n arg1 arg2 ... argn-1 E
79 * enum tag e T tag_or_typename
80 * struct tag s T tag_or_typename
81 * union tag u T tag_or_typename
82 *
83 * tag_or_typename 0 (obsolete) no tag or type name
84 * 1 n tag tagged type
85 * 2 n typename only type name
86 * 3 line.file.uniq anonymous types
87 *
88 * spaces are only for better readability
89 * additionally it is possible to prepend the characters 'c' (for const)
90 * and 'v' (for volatile)
91 */
92 void
93 outtype(const type_t *tp)
94 {
95 int t, s, na;
96 sym_t *arg;
97 tspec_t ts;
98
99 while (tp != NULL) {
100 if ((ts = tp->t_tspec) == INT && tp->t_is_enum)
101 ts = ENUM;
102 switch (ts) {
103 case BOOL: t = 'B'; s = '\0'; break;
104 case CHAR: t = 'C'; s = '\0'; break;
105 case SCHAR: t = 'C'; s = 's'; break;
106 case UCHAR: t = 'C'; s = 'u'; break;
107 case SHORT: t = 'S'; s = '\0'; break;
108 case USHORT: t = 'S'; s = 'u'; break;
109 case INT: t = 'I'; s = '\0'; break;
110 case UINT: t = 'I'; s = 'u'; break;
111 case LONG: t = 'L'; s = '\0'; break;
112 case ULONG: t = 'L'; s = 'u'; break;
113 case QUAD: t = 'Q'; s = '\0'; break;
114 case UQUAD: t = 'Q'; s = 'u'; break;
115 case FLOAT: t = 'D'; s = 's'; break;
116 case DOUBLE: t = 'D'; s = '\0'; break;
117 case LDOUBLE: t = 'D'; s = 'l'; break;
118 case VOID: t = 'V'; s = '\0'; break;
119 case PTR: t = 'P'; s = '\0'; break;
120 case ARRAY: t = 'A'; s = '\0'; break;
121 case FUNC: t = 'F'; s = '\0'; break;
122 case ENUM: t = 'T'; s = 'e'; break;
123 case STRUCT: t = 'T'; s = 's'; break;
124 case UNION: t = 'T'; s = 'u'; break;
125 case FCOMPLEX: t = 'X'; s = 's'; break;
126 case DCOMPLEX: t = 'X'; s = '\0'; break;
127 case LCOMPLEX: t = 'X'; s = 'l'; break;
128 default:
129 lint_assert(/*CONSTCOND*/false);
130 }
131 if (tp->t_const)
132 outchar('c');
133 if (tp->t_volatile)
134 outchar('v');
135 if (s != '\0')
136 outchar(s);
137 outchar(t);
138 if (ts == ARRAY) {
139 outint(tp->t_dim);
140 } else if (ts == ENUM) {
141 outtt(tp->t_enum->en_tag, tp->t_enum->en_first_typedef);
142 } else if (ts == STRUCT || ts == UNION) {
143 outtt(tp->t_str->sou_tag, tp->t_str->sou_first_typedef);
144 } else if (ts == FUNC && tp->t_proto) {
145 na = 0;
146 for (arg = tp->t_args; arg != NULL; arg = arg->s_next)
147 na++;
148 if (tp->t_vararg)
149 na++;
150 outint(na);
151 for (arg = tp->t_args; arg != NULL; arg = arg->s_next)
152 outtype(arg->s_type);
153 if (tp->t_vararg)
154 outchar('E');
155 }
156 tp = tp->t_subt;
157 }
158 }
159
160 /*
161 * type to string
162 * used for debugging output
163 *
164 * it uses its own output buffer for conversion
165 */
166 const char *
167 ttos(const type_t *tp)
168 {
169 static ob_t tob;
170 ob_t tmp;
171
172 if (tob.o_buf == NULL) {
173 tob.o_len = 64;
174 tob.o_buf = tob.o_next = xmalloc(tob.o_len);
175 tob.o_end = tob.o_buf + tob.o_len;
176 }
177
178 tmp = ob;
179 ob = tob;
180 ob.o_next = ob.o_buf;
181 outtype(tp);
182 outchar('\0');
183 tob = ob;
184 ob = tmp;
185
186 return tob.o_buf;
187 }
188
189 /*
190 * write the name of a tag or typename
191 *
192 * if the tag is named, the name of the tag is written,
193 * otherwise, if a typename exists which refers to this tag,
194 * this typename is written
195 */
196 static void
197 outtt(sym_t *tag, sym_t *tdef)
198 {
199
200 /* 0 is no longer used. */
201
202 if (tag->s_name != unnamed) {
203 outint(1);
204 outname(tag->s_name);
205 } else if (tdef != NULL) {
206 outint(2);
207 outname(tdef->s_name);
208 } else {
209 outint(3);
210 outint(tag->s_def_pos.p_line);
211 outchar('.');
212 outint(get_filename_id(tag->s_def_pos.p_file));
213 outchar('.');
214 outint(tag->s_def_pos.p_uniq);
215 }
216 }
217
218 /*
219 * write information about a globally declared/defined symbol
220 * with storage class extern
221 *
222 * information about function definitions are written in outfdef(),
223 * not here
224 */
225 void
226 outsym(const sym_t *sym, scl_t sc, def_t def)
227 {
228
229 /*
230 * Static function declarations must also be written to the output
231 * file. Compatibility of function declarations (for both static
232 * and extern functions) must be checked in lint2. Lint1 can't do
233 * this, especially not if functions are declared at block level
234 * before their first declaration at level 0.
235 */
236 if (sc != EXTERN && !(sc == STATIC && sym->s_type->t_tspec == FUNC))
237 return;
238
239 /* reset buffer */
240 outclr();
241
242 /*
243 * line number of .c source, 'd' for declaration, Id of current
244 * source (.c or .h), and line in current source.
245 */
246 outint(csrc_pos.p_line);
247 outchar('d');
248 outint(get_filename_id(sym->s_def_pos.p_file));
249 outchar('.');
250 outint(sym->s_def_pos.p_line);
251
252 /* flags */
253
254 switch (def) {
255 case DEF:
256 /* defined */
257 outchar('d');
258 break;
259 case TDEF:
260 /* tentative defined */
261 outchar('t');
262 break;
263 case DECL:
264 /* declared */
265 outchar('e');
266 break;
267 default:
268 lint_assert(/*CONSTCOND*/false);
269 }
270 if (llibflg && def != DECL) {
271 /*
272 * mark it as used so we get no warnings from lint2 about
273 * unused symbols in libraries.
274 */
275 outchar('u');
276 }
277
278 if (sc == STATIC)
279 outchar('s');
280
281 /* name of the symbol */
282 outname(sym->s_name);
283
284 /* renamed name of symbol, if necessary */
285 if (sym->s_rename != NULL) {
286 outchar('r');
287 outname(sym->s_rename);
288 }
289
290 /* type of the symbol */
291 outtype(sym->s_type);
292 }
293
294 /*
295 * write information about function definition
296 *
297 * this is also done for static functions so we are able to check if
298 * they are called with proper argument types
299 */
300 void
301 outfdef(const sym_t *fsym, const pos_t *posp, bool rval, bool osdef,
302 const sym_t *args)
303 {
304 int narg;
305 const sym_t *arg;
306
307 /* reset the buffer */
308 outclr();
309
310 /*
311 * line number of .c source, 'd' for declaration, Id of current
312 * source (.c or .h), and line in current source
313 *
314 * we are already at the end of the function. If we are in the
315 * .c source, posp->p_line is correct, otherwise csrc_pos.p_line
316 * (for functions defined in header files).
317 */
318 if (posp->p_file == csrc_pos.p_file) {
319 outint(posp->p_line);
320 } else {
321 outint(csrc_pos.p_line);
322 }
323 outchar('d');
324 outint(get_filename_id(posp->p_file));
325 outchar('.');
326 outint(posp->p_line);
327
328 /* flags */
329
330 /* both SCANFLIKE and PRINTFLIKE imply VARARGS */
331 if (printflike_argnum != -1) {
332 nvararg = printflike_argnum;
333 } else if (scanflike_argnum != -1) {
334 nvararg = scanflike_argnum;
335 }
336
337 if (nvararg != -1) {
338 outchar('v');
339 outint(nvararg);
340 }
341 if (scanflike_argnum != -1) {
342 outchar('S');
343 outint(scanflike_argnum);
344 }
345 if (printflike_argnum != -1) {
346 outchar('P');
347 outint(printflike_argnum);
348 }
349 nvararg = printflike_argnum = scanflike_argnum = -1;
350
351 outchar('d');
352
353 if (rval)
354 /* has return value */
355 outchar('r');
356
357 if (llibflg)
358 /*
359 * mark it as used so lint2 does not complain about
360 * unused symbols in libraries
361 */
362 outchar('u');
363
364 if (osdef)
365 /* old style function definition */
366 outchar('o');
367
368 if (fsym->s_inline)
369 outchar('i');
370
371 if (fsym->s_scl == STATIC)
372 outchar('s');
373
374 /* name of function */
375 outname(fsym->s_name);
376
377 /* renamed name of function, if necessary */
378 if (fsym->s_rename != NULL) {
379 outchar('r');
380 outname(fsym->s_rename);
381 }
382
383 /* argument types and return value */
384 if (osdef) {
385 narg = 0;
386 for (arg = args; arg != NULL; arg = arg->s_next)
387 narg++;
388 outchar('f');
389 outint(narg);
390 for (arg = args; arg != NULL; arg = arg->s_next)
391 outtype(arg->s_type);
392 outtype(fsym->s_type->t_subt);
393 } else {
394 outtype(fsym->s_type);
395 }
396 }
397
398 /*
399 * write out all information necessary for lint2 to check function
400 * calls
401 *
402 * rvused is set if the return value is used (assigned to a variable)
403 * rvdisc is set if the return value is not used and not ignored
404 * (casted to void)
405 */
406 void
407 outcall(const tnode_t *tn, bool rvused, bool rvdisc)
408 {
409 tnode_t *args, *arg;
410 int narg, n, i;
411 int64_t q;
412 tspec_t t;
413
414 /* reset buffer */
415 outclr();
416
417 /*
418 * line number of .c source, 'c' for function call, Id of current
419 * source (.c or .h), and line in current source
420 */
421 outint(csrc_pos.p_line);
422 outchar('c');
423 outint(get_filename_id(curr_pos.p_file));
424 outchar('.');
425 outint(curr_pos.p_line);
426
427 /*
428 * flags; 'u' and 'i' must be last to make sure a letter
429 * is between the numeric argument of a flag and the name of
430 * the function
431 */
432 narg = 0;
433 args = tn->tn_right;
434 for (arg = args; arg != NULL; arg = arg->tn_right)
435 narg++;
436 /* information about arguments */
437 for (n = 1; n <= narg; n++) {
438 /* the last argument is the top one in the tree */
439 for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
440 continue;
441 arg = arg->tn_left;
442 if (arg->tn_op == CON) {
443 if (is_integer(t = arg->tn_type->t_tspec)) {
444 /*
445 * XXX it would probably be better to
446 * explicitly test the sign
447 */
448 if ((q = arg->tn_val->v_quad) == 0) {
449 /* zero constant */
450 outchar('z');
451 } else if (msb(q, t, 0) == 0) {
452 /* positive if casted to signed */
453 outchar('p');
454 } else {
455 /* negative if casted to signed */
456 outchar('n');
457 }
458 outint(n);
459 }
460 } else if (arg->tn_op == ADDR &&
461 arg->tn_left->tn_op == STRING &&
462 arg->tn_left->tn_string->st_tspec == CHAR) {
463 /* constant string, write all format specifiers */
464 outchar('s');
465 outint(n);
466 outfstrg(arg->tn_left->tn_string);
467 }
468
469 }
470 /* return value discarded/used/ignored */
471 outchar(rvdisc ? 'd' : (rvused ? 'u' : 'i'));
472
473 /* name of the called function */
474 outname(tn->tn_left->tn_left->tn_sym->s_name);
475
476 /* types of arguments */
477 outchar('f');
478 outint(narg);
479 for (n = 1; n <= narg; n++) {
480 /* the last argument is the top one in the tree */
481 for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
482 continue;
483 outtype(arg->tn_left->tn_type);
484 }
485 /* expected type of return value */
486 outtype(tn->tn_type);
487 }
488
489 /*
490 * extracts potential format specifiers for printf() and scanf() and
491 * writes them, enclosed in "" and quoted if necessary, to the output buffer
492 */
493 static void
494 outfstrg(strg_t *strg)
495 {
496 unsigned char c, oc;
497 bool first;
498 u_char *cp;
499
500 lint_assert(strg->st_tspec == CHAR);
501
502 cp = strg->st_cp;
503
504 outchar('"');
505
506 c = *cp++;
507
508 while (c != '\0') {
509
510 if (c != '%') {
511 c = *cp++;
512 continue;
513 }
514
515 outqchar('%');
516 c = *cp++;
517
518 /* flags for printf and scanf and *-fieldwidth for printf */
519 while (c != '\0' && (c == '-' || c == '+' || c == ' ' ||
520 c == '#' || c == '0' || c == '*')) {
521 outqchar(c);
522 c = *cp++;
523 }
524
525 /* numeric field width */
526 while (c != '\0' && ch_isdigit((char)c)) {
527 outqchar(c);
528 c = *cp++;
529 }
530
531 /* precision for printf */
532 if (c == '.') {
533 outqchar(c);
534 if ((c = *cp++) == '*') {
535 outqchar(c);
536 c = *cp++;
537 } else {
538 while (c != '\0' && ch_isdigit((char)c)) {
539 outqchar(c);
540 c = *cp++;
541 }
542 }
543 }
544
545 /* h, l, L and q flags fpr printf and scanf */
546 if (c == 'h' || c == 'l' || c == 'L' || c == 'q') {
547 outqchar(c);
548 c = *cp++;
549 }
550
551 /*
552 * The last character. It is always written so we can detect
553 * invalid format specifiers.
554 */
555 if (c != '\0') {
556 outqchar(c);
557 oc = c;
558 c = *cp++;
559 /*
560 * handle [ for scanf. [-] means that a minus sign
561 * was found at an undefined position.
562 */
563 if (oc == '[') {
564 if (c == '^')
565 c = *cp++;
566 if (c == ']')
567 c = *cp++;
568 first = true;
569 while (c != '\0' && c != ']') {
570 if (c == '-') {
571 if (!first && *cp != ']')
572 outqchar(c);
573 }
574 first = false;
575 c = *cp++;
576 }
577 if (c == ']') {
578 outqchar(c);
579 c = *cp++;
580 }
581 }
582 }
583
584 }
585
586 outchar('"');
587 }
588
589 /*
590 * writes a record if sym was used
591 */
592 void
593 outusg(const sym_t *sym)
594 {
595 /* reset buffer */
596 outclr();
597
598 /*
599 * line number of .c source, 'u' for used, Id of current
600 * source (.c or .h), and line in current source
601 */
602 outint(csrc_pos.p_line);
603 outchar('u');
604 outint(get_filename_id(curr_pos.p_file));
605 outchar('.');
606 outint(curr_pos.p_line);
607
608 /* necessary to delimit both numbers */
609 outchar('x');
610
611 outname(sym->s_name);
612 }
613