Home | History | Annotate | Line # | Download | only in lint1
emit1.c revision 1.73
      1 /* $NetBSD: emit1.c,v 1.73 2023/08/02 18:51:25 rillig Exp $ */
      2 
      3 /*
      4  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
      5  * Copyright (c) 1994, 1995 Jochen Pohl
      6  * All Rights Reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by Jochen Pohl for
     19  *	The NetBSD Project.
     20  * 4. The name of the author may not be used to endorse or promote products
     21  *    derived from this software without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     33  */
     34 
     35 #if HAVE_NBTOOL_CONFIG_H
     36 #include "nbtool_config.h"
     37 #endif
     38 
     39 #include <sys/cdefs.h>
     40 #if defined(__RCSID)
     41 __RCSID("$NetBSD: emit1.c,v 1.73 2023/08/02 18:51:25 rillig Exp $");
     42 #endif
     43 
     44 #include "lint1.h"
     45 
     46 static	void	outtt(sym_t *, sym_t *);
     47 static	void	outfstrg(strg_t *);
     48 
     49 /*
     50  * Write type into the output buffer.
     51  * The type is written as a sequence of substrings, each of which describes a
     52  * node of type type_t
     53  * a node is encoded as follows:
     54  *	_Bool			B
     55  *	_Complex float		s X
     56  *	_Complex double		X
     57  *	_Complex long double	l X
     58  *	char			C
     59  *	signed char		s C
     60  *	unsigned char		u C
     61  *	short			S
     62  *	unsigned short		u S
     63  *	int			I
     64  *	unsigned int		u I
     65  *	long			L
     66  *	unsigned long		u L
     67  *	long long		Q
     68  *	unsigned long long	u Q
     69  *	float			s D
     70  *	double			D
     71  *	long double		l D
     72  *	void			V
     73  *	*			P
     74  *	[n]			A n
     75  *	()			F
     76  *	(void)			F 0
     77  *	(n parameters)		F n arg1 arg2 ... argn
     78  *	(n parameters, ...)	F n arg1 arg2 ... argn E
     79  *	enum tag		e T tag_or_typename
     80  *	struct tag		s T tag_or_typename
     81  *	union tag		u T tag_or_typename
     82  *
     83  *	tag_or_typename		0 (obsolete)		no tag or type name
     84  *				1 n tag			tagged type
     85  *				2 n typename		only typedef name
     86  *				3 line.file.uniq	anonymous types
     87  *
     88  * spaces are only for better readability
     89  * additionally it is possible to prepend the characters 'c' (for const)
     90  * and 'v' (for volatile)
     91  */
     92 void
     93 outtype(const type_t *tp)
     94 {
     95 	/* Available letters: ------GH--K-MNO--R--U-W-YZ */
     96 #ifdef INT128_SIZE
     97 	static const char tt[NTSPEC] = "???BCCCSSIILLQQJJDDD?XXXVTTTPAF";
     98 	static const char ss[NTSPEC] = "???  su u u u u us l?s l sue   ";
     99 #else
    100 	static const char tt[NTSPEC] = "???BCCCSSIILLQQDDD?XXXVTTTPAF";
    101 	static const char ss[NTSPEC] = "???  su u u u us l?s l sue   ";
    102 #endif
    103 	int na;
    104 	tspec_t ts;
    105 
    106 	while (tp != NULL) {
    107 		if ((ts = tp->t_tspec) == INT && tp->t_is_enum)
    108 			ts = ENUM;
    109 		lint_assert(tt[ts] != '?' && ss[ts] != '?');
    110 		if (tp->t_const)
    111 			outchar('c');
    112 		if (tp->t_volatile)
    113 			outchar('v');
    114 		if (ss[ts] != ' ')
    115 			outchar(ss[ts]);
    116 		outchar(tt[ts]);
    117 
    118 		if (ts == ARRAY) {
    119 			outint(tp->t_dim);
    120 		} else if (ts == ENUM) {
    121 			outtt(tp->t_enum->en_tag, tp->t_enum->en_first_typedef);
    122 		} else if (is_struct_or_union(ts)) {
    123 			outtt(tp->t_sou->sou_tag, tp->t_sou->sou_first_typedef);
    124 		} else if (ts == FUNC && tp->t_proto) {
    125 			na = 0;
    126 			for (const sym_t *param = tp->t_params;
    127 			     param != NULL; param = param->s_next)
    128 				na++;
    129 			if (tp->t_vararg)
    130 				na++;
    131 			outint(na);
    132 			for (const sym_t *param = tp->t_params;
    133 			     param != NULL; param = param->s_next)
    134 				outtype(param->s_type);
    135 			if (tp->t_vararg)
    136 				outchar('E');
    137 		}
    138 		tp = tp->t_subt;
    139 	}
    140 }
    141 
    142 /*
    143  * write the name of a tag or typename
    144  *
    145  * if the tag is named, the name of the tag is written,
    146  * otherwise, if a typename exists which refers to this tag,
    147  * this typename is written
    148  */
    149 static void
    150 outtt(sym_t *tag, sym_t *tdef)
    151 {
    152 
    153 	/* 0 is no longer used. */
    154 
    155 	if (tag->s_name != unnamed) {
    156 		outint(1);
    157 		outname(tag->s_name);
    158 	} else if (tdef != NULL) {
    159 		outint(2);
    160 		outname(tdef->s_name);
    161 	} else {
    162 		outint(3);
    163 		outint(tag->s_def_pos.p_line);
    164 		outchar('.');
    165 		outint(get_filename_id(tag->s_def_pos.p_file));
    166 		outchar('.');
    167 		outint(tag->s_def_pos.p_uniq);
    168 	}
    169 }
    170 
    171 /*
    172  * write information about a globally declared/defined symbol
    173  * with storage class extern
    174  *
    175  * information about function definitions are written in outfdef(),
    176  * not here
    177  */
    178 void
    179 outsym(const sym_t *sym, scl_t sc, def_t def)
    180 {
    181 
    182 	/*
    183 	 * Static function declarations must also be written to the output
    184 	 * file. Compatibility of function declarations (for both static
    185 	 * and extern functions) must be checked in lint2. Lint1 can't do
    186 	 * this, especially not if functions are declared at block level
    187 	 * before their first declaration at level 0.
    188 	 */
    189 	if (sc != EXTERN && !(sc == STATIC && sym->s_type->t_tspec == FUNC))
    190 		return;
    191 	if (ch_isdigit(sym->s_name[0]))	/* 00000000_tmp */
    192 		return;
    193 
    194 	/* reset buffer */
    195 	outclr();
    196 
    197 	outint(csrc_pos.p_line);
    198 	outchar('d');		/* declaration */
    199 	outint(get_filename_id(sym->s_def_pos.p_file));
    200 	outchar('.');
    201 	outint(sym->s_def_pos.p_line);
    202 
    203 	/* flags */
    204 
    205 	if (def == DEF)
    206 		outchar('d');	/* defined */
    207 	else if (def == TDEF)
    208 		outchar('t');	/* tentative defined */
    209 	else {
    210 		lint_assert(def == DECL);
    211 		outchar('e');	/* declared */
    212 	}
    213 
    214 	if (llibflg && def != DECL) {
    215 		/*
    216 		 * mark it as used so lint2 does not complain about
    217 		 * unused symbols in libraries
    218 		 */
    219 		outchar('u');
    220 	}
    221 
    222 	if (sc == STATIC)
    223 		outchar('s');
    224 
    225 	/* name of the symbol */
    226 	outname(sym->s_name);
    227 
    228 	/* renamed name of symbol, if necessary */
    229 	if (sym->s_rename != NULL) {
    230 		outchar('r');
    231 		outname(sym->s_rename);
    232 	}
    233 
    234 	/* type of the symbol */
    235 	outtype(sym->s_type);
    236 }
    237 
    238 /*
    239  * Write information about a function definition. This is also done for static
    240  * functions, to later check if they are called with proper argument types.
    241  */
    242 void
    243 outfdef(const sym_t *fsym, const pos_t *posp, bool rval, bool osdef,
    244 	const sym_t *args)
    245 {
    246 	int narg;
    247 	const sym_t *arg;
    248 
    249 	/* reset the buffer */
    250 	outclr();
    251 
    252 	if (posp->p_file == csrc_pos.p_file) {
    253 		outint(posp->p_line);
    254 	} else {
    255 		outint(csrc_pos.p_line);
    256 	}
    257 	outchar('d');		/* declaration */
    258 	outint(get_filename_id(posp->p_file));
    259 	outchar('.');
    260 	outint(posp->p_line);
    261 
    262 	/* flags */
    263 
    264 	/* both SCANFLIKE and PRINTFLIKE imply VARARGS */
    265 	if (printflike_argnum != -1) {
    266 		nvararg = printflike_argnum;
    267 	} else if (scanflike_argnum != -1) {
    268 		nvararg = scanflike_argnum;
    269 	}
    270 
    271 	if (nvararg != -1) {
    272 		outchar('v');
    273 		outint(nvararg);
    274 	}
    275 	if (scanflike_argnum != -1) {
    276 		outchar('S');
    277 		outint(scanflike_argnum);
    278 	}
    279 	if (printflike_argnum != -1) {
    280 		outchar('P');
    281 		outint(printflike_argnum);
    282 	}
    283 	nvararg = printflike_argnum = scanflike_argnum = -1;
    284 
    285 	outchar('d');
    286 
    287 	if (rval)
    288 		outchar('r');	/* has return value */
    289 
    290 	if (llibflg)
    291 		/*
    292 		 * mark it as used so lint2 does not complain about
    293 		 * unused symbols in libraries
    294 		 */
    295 		outchar('u');
    296 
    297 	if (osdef)
    298 		outchar('o');	/* old-style function definition */
    299 
    300 	if (fsym->s_inline)
    301 		outchar('i');
    302 
    303 	if (fsym->s_scl == STATIC)
    304 		outchar('s');
    305 
    306 	/* name of function */
    307 	outname(fsym->s_name);
    308 
    309 	/* renamed name of function, if necessary */
    310 	if (fsym->s_rename != NULL) {
    311 		outchar('r');
    312 		outname(fsym->s_rename);
    313 	}
    314 
    315 	/* parameter types and return value */
    316 	if (osdef) {
    317 		narg = 0;
    318 		for (arg = args; arg != NULL; arg = arg->s_next)
    319 			narg++;
    320 		outchar('f');
    321 		outint(narg);
    322 		for (arg = args; arg != NULL; arg = arg->s_next)
    323 			outtype(arg->s_type);
    324 		outtype(fsym->s_type->t_subt);
    325 	} else {
    326 		outtype(fsym->s_type);
    327 	}
    328 }
    329 
    330 /*
    331  * write out all information necessary for lint2 to check function
    332  * calls
    333  *
    334  * retval_used is set if the return value is used (assigned to a variable)
    335  * retval_discarded is set if the return value is neither used nor ignored
    336  * (that is, cast to void)
    337  */
    338 void
    339 outcall(const tnode_t *tn, bool retval_used, bool retval_discarded)
    340 {
    341 	tnode_t *args, *arg;
    342 	int narg, n, i;
    343 	tspec_t t;
    344 
    345 	/* reset buffer */
    346 	outclr();
    347 
    348 	outint(csrc_pos.p_line);
    349 	outchar('c');		/* function call */
    350 	outint(get_filename_id(curr_pos.p_file));
    351 	outchar('.');
    352 	outint(curr_pos.p_line);
    353 
    354 	/*
    355 	 * flags; 'u' and 'i' must be last to make sure a letter
    356 	 * is between the numeric argument of a flag and the name of
    357 	 * the function
    358 	 */
    359 	narg = 0;
    360 	args = tn->tn_right;
    361 	for (arg = args; arg != NULL; arg = arg->tn_right)
    362 		narg++;
    363 	/* information about arguments */
    364 	for (n = 1; n <= narg; n++) {
    365 		/* the last argument is the top one in the tree */
    366 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
    367 			continue;
    368 		arg = arg->tn_left;
    369 		if (arg->tn_op == CON) {
    370 			if (is_integer(t = arg->tn_type->t_tspec)) {
    371 				/*
    372 				 * XXX it would probably be better to
    373 				 * explicitly test the sign
    374 				 */
    375 				int64_t si = arg->tn_val.u.integer;
    376 				if (si == 0) {
    377 					/* zero constant */
    378 					outchar('z');
    379 				} else if (!msb(si, t)) {
    380 					/* positive if cast to signed */
    381 					outchar('p');
    382 				} else {
    383 					/* negative if cast to signed */
    384 					outchar('n');
    385 				}
    386 				outint(n);
    387 			}
    388 		} else if (arg->tn_op == ADDR &&
    389 			   arg->tn_left->tn_op == STRING &&
    390 			   arg->tn_left->tn_string->st_char) {
    391 			/* constant string, write all format specifiers */
    392 			outchar('s');
    393 			outint(n);
    394 			outfstrg(arg->tn_left->tn_string);
    395 		}
    396 
    397 	}
    398 	/* return value discarded/used/ignored */
    399 	outchar((char)(retval_discarded ? 'd' : (retval_used ? 'u' : 'i')));
    400 
    401 	/* name of the called function */
    402 	outname(tn->tn_left->tn_left->tn_sym->s_name);
    403 
    404 	/* types of arguments */
    405 	outchar('f');
    406 	outint(narg);
    407 	for (n = 1; n <= narg; n++) {
    408 		/* the last argument is the top one in the tree */
    409 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
    410 			continue;
    411 		outtype(arg->tn_left->tn_type);
    412 	}
    413 	/* expected type of return value */
    414 	outtype(tn->tn_type);
    415 }
    416 
    417 /* write a character to the output buffer, quoted if necessary */
    418 static void
    419 outqchar(char c)
    420 {
    421 
    422 	if (ch_isprint(c) && c != '\\' && c != '"' && c != '\'') {
    423 		outchar(c);
    424 		return;
    425 	}
    426 
    427 	outchar('\\');
    428 	switch (c) {
    429 	case '\\':
    430 		outchar('\\');
    431 		break;
    432 	case '"':
    433 		outchar('"');
    434 		break;
    435 	case '\'':
    436 		outchar('\'');
    437 		break;
    438 	case '\b':
    439 		outchar('b');
    440 		break;
    441 	case '\t':
    442 		outchar('t');
    443 		break;
    444 	case '\n':
    445 		outchar('n');
    446 		break;
    447 	case '\f':
    448 		outchar('f');
    449 		break;
    450 	case '\r':
    451 		outchar('r');
    452 		break;
    453 	case '\v':
    454 		outchar('v');
    455 		break;
    456 	case '\a':
    457 		outchar('a');
    458 		break;
    459 	default:
    460 		outchar((char)((((unsigned char)c >> 6) & 07) + '0'));
    461 		outchar((char)((((unsigned char)c >> 3) & 07) + '0'));
    462 		outchar((char)((c & 07) + '0'));
    463 		break;
    464 	}
    465 }
    466 
    467 /*
    468  * extracts potential format specifiers for printf() and scanf() and
    469  * writes them, enclosed in "" and quoted if necessary, to the output buffer
    470  */
    471 static void
    472 outfstrg(strg_t *strg)
    473 {
    474 	char c, oc;
    475 	bool first;
    476 	const char *cp;
    477 
    478 	lint_assert(strg->st_char);
    479 	cp = strg->st_mem;
    480 
    481 	outchar('"');
    482 
    483 	c = *cp++;
    484 
    485 	while (c != '\0') {
    486 
    487 		if (c != '%') {
    488 			c = *cp++;
    489 			continue;
    490 		}
    491 
    492 		outchar('%');
    493 		c = *cp++;
    494 
    495 		/* flags for printf and scanf and *-fieldwidth for printf */
    496 		while (c == '-' || c == '+' || c == ' ' ||
    497 		       c == '#' || c == '0' || c == '*') {
    498 			outchar(c);
    499 			c = *cp++;
    500 		}
    501 
    502 		/* numeric field width */
    503 		while (ch_isdigit(c)) {
    504 			outchar(c);
    505 			c = *cp++;
    506 		}
    507 
    508 		/* precision for printf */
    509 		if (c == '.') {
    510 			outchar(c);
    511 			c = *cp++;
    512 			if (c == '*') {
    513 				outchar(c);
    514 				c = *cp++;
    515 			} else {
    516 				while (ch_isdigit(c)) {
    517 					outchar(c);
    518 					c = *cp++;
    519 				}
    520 			}
    521 		}
    522 
    523 		/* h, l, L and q flags for printf and scanf */
    524 		if (c == 'h' || c == 'l' || c == 'L' || c == 'q') {
    525 			outchar(c);
    526 			c = *cp++;
    527 		}
    528 
    529 		/*
    530 		 * The last character. It is always written, so we can detect
    531 		 * invalid format specifiers.
    532 		 */
    533 		if (c != '\0') {
    534 			outqchar(c);
    535 			oc = c;
    536 			c = *cp++;
    537 			/*
    538 			 * handle [ for scanf. [-] means that a minus sign
    539 			 * was found at an undefined position.
    540 			 */
    541 			if (oc == '[') {
    542 				if (c == '^')
    543 					c = *cp++;
    544 				if (c == ']')
    545 					c = *cp++;
    546 				first = true;
    547 				while (c != '\0' && c != ']') {
    548 					if (c == '-') {
    549 						if (!first && *cp != ']')
    550 							outchar(c);
    551 					}
    552 					first = false;
    553 					c = *cp++;
    554 				}
    555 				if (c == ']') {
    556 					outchar(c);
    557 					c = *cp++;
    558 				}
    559 			}
    560 		}
    561 
    562 	}
    563 
    564 	outchar('"');
    565 }
    566 
    567 /*
    568  * writes a record if sym was used
    569  */
    570 void
    571 outusg(const sym_t *sym)
    572 {
    573 	if (ch_isdigit(sym->s_name[0]))	/* 00000000_tmp, from mktempsym */
    574 		return;
    575 
    576 	/* reset buffer */
    577 	outclr();
    578 
    579 	outint(csrc_pos.p_line);
    580 	outchar('u');		/* used */
    581 	outint(get_filename_id(curr_pos.p_file));
    582 	outchar('.');
    583 	outint(curr_pos.p_line);
    584 
    585 	/* necessary to delimit both numbers */
    586 	outchar('x');
    587 
    588 	outname(sym->s_name);
    589 }
    590