Home | History | Annotate | Line # | Download | only in lint1
emit1.c revision 1.47
      1 /* $NetBSD: emit1.c,v 1.47 2021/07/31 19:52:44 rillig Exp $ */
      2 
      3 /*
      4  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
      5  * Copyright (c) 1994, 1995 Jochen Pohl
      6  * All Rights Reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *      This product includes software developed by Jochen Pohl for
     19  *	The NetBSD Project.
     20  * 4. The name of the author may not be used to endorse or promote products
     21  *    derived from this software without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     33  */
     34 
     35 #if HAVE_NBTOOL_CONFIG_H
     36 #include "nbtool_config.h"
     37 #endif
     38 
     39 #include <sys/cdefs.h>
     40 #if defined(__RCSID) && !defined(lint)
     41 __RCSID("$NetBSD: emit1.c,v 1.47 2021/07/31 19:52:44 rillig Exp $");
     42 #endif
     43 
     44 #include "lint1.h"
     45 
     46 static	void	outtt(sym_t *, sym_t *);
     47 static	void	outfstrg(strg_t *);
     48 
     49 /*
     50  * Write type into the output buffer.
     51  * The type is written as a sequence of substrings, each of which describes a
     52  * node of type type_t
     53  * a node is encoded as follows:
     54  *	_Bool			B
     55  *	_Complex float		s X
     56  *	_Complex double		X
     57  *	_Complex long double	l X
     58  *	char			C
     59  *	signed char		s C
     60  *	unsigned char		u C
     61  *	short			S
     62  *	unsigned short		u S
     63  *	int			I
     64  *	unsigned int		u I
     65  *	long			L
     66  *	unsigned long		u L
     67  *	long long		Q
     68  *	unsigned long long	u Q
     69  *	float			s D
     70  *	double			D
     71  *	long double		l D
     72  *	void			V
     73  *	*			P
     74  *	[n]			A n
     75  *	()			F
     76  *	(void)			F 0
     77  *	(n parameters)		F n arg1 arg2 ... argn
     78  *	(n parameters, ...)	F n arg1 arg2 ... argn-1 E
     79  *	enum tag		e T tag_or_typename
     80  *	struct tag		s T tag_or_typename
     81  *	union tag		u T tag_or_typename
     82  *
     83  *	tag_or_typename		0 (obsolete)		no tag or type name
     84  *				1 n tag			tagged type
     85  *				2 n typename		only type name
     86  *				3 line.file.uniq	anonymous types
     87  *
     88  * spaces are only for better readability
     89  * additionally it is possible to prepend the characters 'c' (for const)
     90  * and 'v' (for volatile)
     91  */
     92 void
     93 outtype(const type_t *tp)
     94 {
     95 	int	t, s, na;
     96 	sym_t	*arg;
     97 	tspec_t	ts;
     98 
     99 	while (tp != NULL) {
    100 		if ((ts = tp->t_tspec) == INT && tp->t_is_enum)
    101 			ts = ENUM;
    102 		/* Available letters: ----E-GH--K-MNO--R--U-W-YZ */
    103 		switch (ts) {
    104 		case BOOL:	t = 'B';	s = '\0';	break;
    105 		case CHAR:	t = 'C';	s = '\0';	break;
    106 		case SCHAR:	t = 'C';	s = 's';	break;
    107 		case UCHAR:	t = 'C';	s = 'u';	break;
    108 		case SHORT:	t = 'S';	s = '\0';	break;
    109 		case USHORT:	t = 'S';	s = 'u';	break;
    110 		case INT:	t = 'I';	s = '\0';	break;
    111 		case UINT:	t = 'I';	s = 'u';	break;
    112 		case LONG:	t = 'L';	s = '\0';	break;
    113 		case ULONG:	t = 'L';	s = 'u';	break;
    114 		case QUAD:	t = 'Q';	s = '\0';	break;
    115 		case UQUAD:	t = 'Q';	s = 'u';	break;
    116 #ifdef INT128_SIZE
    117 		case INT128:	t = 'J';	s = '\0';	break;
    118 		case UINT128:	t = 'J';	s = 'u';	break;
    119 #endif
    120 		case FLOAT:	t = 'D';	s = 's';	break;
    121 		case DOUBLE:	t = 'D';	s = '\0';	break;
    122 		case LDOUBLE:	t = 'D';	s = 'l';	break;
    123 		case VOID:	t = 'V';	s = '\0';	break;
    124 		case STRUCT:	t = 'T';	s = 's';	break;
    125 		case UNION:	t = 'T';	s = 'u';	break;
    126 		case ENUM:	t = 'T';	s = 'e';	break;
    127 		case PTR:	t = 'P';	s = '\0';	break;
    128 		case ARRAY:	t = 'A';	s = '\0';	break;
    129 		case FUNC:	t = 'F';	s = '\0';	break;
    130 		case FCOMPLEX:	t = 'X';	s = 's';	break;
    131 		case DCOMPLEX:	t = 'X';	s = '\0';	break;
    132 		case LCOMPLEX:	t = 'X';	s = 'l';	break;
    133 		default:
    134 			lint_assert(/*CONSTCOND*/false);
    135 		}
    136 		if (tp->t_const)
    137 			outchar('c');
    138 		if (tp->t_volatile)
    139 			outchar('v');
    140 		if (s != '\0')
    141 			outchar(s);
    142 		outchar(t);
    143 		if (ts == ARRAY) {
    144 			outint(tp->t_dim);
    145 		} else if (ts == ENUM) {
    146 			outtt(tp->t_enum->en_tag, tp->t_enum->en_first_typedef);
    147 		} else if (ts == STRUCT || ts == UNION) {
    148 			outtt(tp->t_str->sou_tag, tp->t_str->sou_first_typedef);
    149 		} else if (ts == FUNC && tp->t_proto) {
    150 			na = 0;
    151 			for (arg = tp->t_args; arg != NULL; arg = arg->s_next)
    152 				na++;
    153 			if (tp->t_vararg)
    154 				na++;
    155 			outint(na);
    156 			for (arg = tp->t_args; arg != NULL; arg = arg->s_next)
    157 				outtype(arg->s_type);
    158 			if (tp->t_vararg)
    159 				outchar('E');
    160 		}
    161 		tp = tp->t_subt;
    162 	}
    163 }
    164 
    165 /*
    166  * write the name of a tag or typename
    167  *
    168  * if the tag is named, the name of the tag is written,
    169  * otherwise, if a typename exists which refers to this tag,
    170  * this typename is written
    171  */
    172 static void
    173 outtt(sym_t *tag, sym_t *tdef)
    174 {
    175 
    176 	/* 0 is no longer used. */
    177 
    178 	if (tag->s_name != unnamed) {
    179 		outint(1);
    180 		outname(tag->s_name);
    181 	} else if (tdef != NULL) {
    182 		outint(2);
    183 		outname(tdef->s_name);
    184 	} else {
    185 		outint(3);
    186 		outint(tag->s_def_pos.p_line);
    187 		outchar('.');
    188 		outint(get_filename_id(tag->s_def_pos.p_file));
    189 		outchar('.');
    190 		outint(tag->s_def_pos.p_uniq);
    191 	}
    192 }
    193 
    194 /*
    195  * write information about a globally declared/defined symbol
    196  * with storage class extern
    197  *
    198  * information about function definitions are written in outfdef(),
    199  * not here
    200  */
    201 void
    202 outsym(const sym_t *sym, scl_t sc, def_t def)
    203 {
    204 
    205 	/*
    206 	 * Static function declarations must also be written to the output
    207 	 * file. Compatibility of function declarations (for both static
    208 	 * and extern functions) must be checked in lint2. Lint1 can't do
    209 	 * this, especially not if functions are declared at block level
    210 	 * before their first declaration at level 0.
    211 	 */
    212 	if (sc != EXTERN && !(sc == STATIC && sym->s_type->t_tspec == FUNC))
    213 		return;
    214 
    215 	/* reset buffer */
    216 	outclr();
    217 
    218 	/*
    219 	 * line number of .c source, 'd' for declaration, Id of current
    220 	 * source (.c or .h), and line in current source.
    221 	 */
    222 	outint(csrc_pos.p_line);
    223 	outchar('d');
    224 	outint(get_filename_id(sym->s_def_pos.p_file));
    225 	outchar('.');
    226 	outint(sym->s_def_pos.p_line);
    227 
    228 	/* flags */
    229 
    230 	switch (def) {
    231 	case DEF:
    232 		/* defined */
    233 		outchar('d');
    234 		break;
    235 	case TDEF:
    236 		/* tentative defined */
    237 		outchar('t');
    238 		break;
    239 	case DECL:
    240 		/* declared */
    241 		outchar('e');
    242 		break;
    243 	default:
    244 		lint_assert(/*CONSTCOND*/false);
    245 	}
    246 	if (llibflg && def != DECL) {
    247 		/*
    248 		 * mark it as used so we get no warnings from lint2 about
    249 		 * unused symbols in libraries.
    250 		 */
    251 		outchar('u');
    252 	}
    253 
    254 	if (sc == STATIC)
    255 		outchar('s');
    256 
    257 	/* name of the symbol */
    258 	outname(sym->s_name);
    259 
    260 	/* renamed name of symbol, if necessary */
    261 	if (sym->s_rename != NULL) {
    262 		outchar('r');
    263 		outname(sym->s_rename);
    264 	}
    265 
    266 	/* type of the symbol */
    267 	outtype(sym->s_type);
    268 }
    269 
    270 /*
    271  * write information about function definition
    272  *
    273  * this is also done for static functions so we are able to check if
    274  * they are called with proper argument types
    275  */
    276 void
    277 outfdef(const sym_t *fsym, const pos_t *posp, bool rval, bool osdef,
    278 	const sym_t *args)
    279 {
    280 	int narg;
    281 	const sym_t *arg;
    282 
    283 	/* reset the buffer */
    284 	outclr();
    285 
    286 	/*
    287 	 * line number of .c source, 'd' for declaration, Id of current
    288 	 * source (.c or .h), and line in current source
    289 	 *
    290 	 * we are already at the end of the function. If we are in the
    291 	 * .c source, posp->p_line is correct, otherwise csrc_pos.p_line
    292 	 * (for functions defined in header files).
    293 	 */
    294 	if (posp->p_file == csrc_pos.p_file) {
    295 		outint(posp->p_line);
    296 	} else {
    297 		outint(csrc_pos.p_line);
    298 	}
    299 	outchar('d');
    300 	outint(get_filename_id(posp->p_file));
    301 	outchar('.');
    302 	outint(posp->p_line);
    303 
    304 	/* flags */
    305 
    306 	/* both SCANFLIKE and PRINTFLIKE imply VARARGS */
    307 	if (printflike_argnum != -1) {
    308 		nvararg = printflike_argnum;
    309 	} else if (scanflike_argnum != -1) {
    310 		nvararg = scanflike_argnum;
    311 	}
    312 
    313 	if (nvararg != -1) {
    314 		outchar('v');
    315 		outint(nvararg);
    316 	}
    317 	if (scanflike_argnum != -1) {
    318 		outchar('S');
    319 		outint(scanflike_argnum);
    320 	}
    321 	if (printflike_argnum != -1) {
    322 		outchar('P');
    323 		outint(printflike_argnum);
    324 	}
    325 	nvararg = printflike_argnum = scanflike_argnum = -1;
    326 
    327 	outchar('d');
    328 
    329 	if (rval)
    330 		/* has return value */
    331 		outchar('r');
    332 
    333 	if (llibflg)
    334 		/*
    335 		 * mark it as used so lint2 does not complain about
    336 		 * unused symbols in libraries
    337 		 */
    338 		outchar('u');
    339 
    340 	if (osdef)
    341 		/* old style function definition */
    342 		outchar('o');
    343 
    344 	if (fsym->s_inline)
    345 		outchar('i');
    346 
    347 	if (fsym->s_scl == STATIC)
    348 		outchar('s');
    349 
    350 	/* name of function */
    351 	outname(fsym->s_name);
    352 
    353 	/* renamed name of function, if necessary */
    354 	if (fsym->s_rename != NULL) {
    355 		outchar('r');
    356 		outname(fsym->s_rename);
    357 	}
    358 
    359 	/* argument types and return value */
    360 	if (osdef) {
    361 		narg = 0;
    362 		for (arg = args; arg != NULL; arg = arg->s_next)
    363 			narg++;
    364 		outchar('f');
    365 		outint(narg);
    366 		for (arg = args; arg != NULL; arg = arg->s_next)
    367 			outtype(arg->s_type);
    368 		outtype(fsym->s_type->t_subt);
    369 	} else {
    370 		outtype(fsym->s_type);
    371 	}
    372 }
    373 
    374 /*
    375  * write out all information necessary for lint2 to check function
    376  * calls
    377  *
    378  * rvused is set if the return value is used (assigned to a variable)
    379  * rvdisc is set if the return value is not used and not ignored
    380  * (casted to void)
    381  */
    382 void
    383 outcall(const tnode_t *tn, bool rvused, bool rvdisc)
    384 {
    385 	tnode_t	*args, *arg;
    386 	int	narg, n, i;
    387 	int64_t	q;
    388 	tspec_t	t;
    389 
    390 	/* reset buffer */
    391 	outclr();
    392 
    393 	/*
    394 	 * line number of .c source, 'c' for function call, Id of current
    395 	 * source (.c or .h), and line in current source
    396 	 */
    397 	outint(csrc_pos.p_line);
    398 	outchar('c');
    399 	outint(get_filename_id(curr_pos.p_file));
    400 	outchar('.');
    401 	outint(curr_pos.p_line);
    402 
    403 	/*
    404 	 * flags; 'u' and 'i' must be last to make sure a letter
    405 	 * is between the numeric argument of a flag and the name of
    406 	 * the function
    407 	 */
    408 	narg = 0;
    409 	args = tn->tn_right;
    410 	for (arg = args; arg != NULL; arg = arg->tn_right)
    411 		narg++;
    412 	/* information about arguments */
    413 	for (n = 1; n <= narg; n++) {
    414 		/* the last argument is the top one in the tree */
    415 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
    416 			continue;
    417 		arg = arg->tn_left;
    418 		if (arg->tn_op == CON) {
    419 			if (is_integer(t = arg->tn_type->t_tspec)) {
    420 				/*
    421 				 * XXX it would probably be better to
    422 				 * explicitly test the sign
    423 				 */
    424 				if ((q = arg->tn_val->v_quad) == 0) {
    425 					/* zero constant */
    426 					outchar('z');
    427 				} else if (msb(q, t, 0) == 0) {
    428 					/* positive if casted to signed */
    429 					outchar('p');
    430 				} else {
    431 					/* negative if casted to signed */
    432 					outchar('n');
    433 				}
    434 				outint(n);
    435 			}
    436 		} else if (arg->tn_op == ADDR &&
    437 			   arg->tn_left->tn_op == STRING &&
    438 			   arg->tn_left->tn_string->st_tspec == CHAR) {
    439 			/* constant string, write all format specifiers */
    440 			outchar('s');
    441 			outint(n);
    442 			outfstrg(arg->tn_left->tn_string);
    443 		}
    444 
    445 	}
    446 	/* return value discarded/used/ignored */
    447 	outchar(rvdisc ? 'd' : (rvused ? 'u' : 'i'));
    448 
    449 	/* name of the called function */
    450 	outname(tn->tn_left->tn_left->tn_sym->s_name);
    451 
    452 	/* types of arguments */
    453 	outchar('f');
    454 	outint(narg);
    455 	for (n = 1; n <= narg; n++) {
    456 		/* the last argument is the top one in the tree */
    457 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
    458 			continue;
    459 		outtype(arg->tn_left->tn_type);
    460 	}
    461 	/* expected type of return value */
    462 	outtype(tn->tn_type);
    463 }
    464 
    465 /*
    466  * extracts potential format specifiers for printf() and scanf() and
    467  * writes them, enclosed in "" and quoted if necessary, to the output buffer
    468  */
    469 static void
    470 outfstrg(strg_t *strg)
    471 {
    472 	unsigned char c, oc;
    473 	bool	first;
    474 	u_char	*cp;
    475 
    476 	lint_assert(strg->st_tspec == CHAR);
    477 
    478 	cp = strg->st_cp;
    479 
    480 	outchar('"');
    481 
    482 	c = *cp++;
    483 
    484 	while (c != '\0') {
    485 
    486 		if (c != '%') {
    487 			c = *cp++;
    488 			continue;
    489 		}
    490 
    491 		outqchar('%');
    492 		c = *cp++;
    493 
    494 		/* flags for printf and scanf and *-fieldwidth for printf */
    495 		while (c != '\0' && (c == '-' || c == '+' || c == ' ' ||
    496 				     c == '#' || c == '0' || c == '*')) {
    497 			outqchar(c);
    498 			c = *cp++;
    499 		}
    500 
    501 		/* numeric field width */
    502 		while (c != '\0' && ch_isdigit((char)c)) {
    503 			outqchar(c);
    504 			c = *cp++;
    505 		}
    506 
    507 		/* precision for printf */
    508 		if (c == '.') {
    509 			outqchar(c);
    510 			if ((c = *cp++) == '*') {
    511 				outqchar(c);
    512 				c = *cp++;
    513 			} else {
    514 				while (c != '\0' && ch_isdigit((char)c)) {
    515 					outqchar(c);
    516 					c = *cp++;
    517 				}
    518 			}
    519 		}
    520 
    521 		/* h, l, L and q flags fpr printf and scanf */
    522 		if (c == 'h' || c == 'l' || c == 'L' || c == 'q') {
    523 			outqchar(c);
    524 			c = *cp++;
    525 		}
    526 
    527 		/*
    528 		 * The last character. It is always written so we can detect
    529 		 * invalid format specifiers.
    530 		 */
    531 		if (c != '\0') {
    532 			outqchar(c);
    533 			oc = c;
    534 			c = *cp++;
    535 			/*
    536 			 * handle [ for scanf. [-] means that a minus sign
    537 			 * was found at an undefined position.
    538 			 */
    539 			if (oc == '[') {
    540 				if (c == '^')
    541 					c = *cp++;
    542 				if (c == ']')
    543 					c = *cp++;
    544 				first = true;
    545 				while (c != '\0' && c != ']') {
    546 					if (c == '-') {
    547 						if (!first && *cp != ']')
    548 							outqchar(c);
    549 					}
    550 					first = false;
    551 					c = *cp++;
    552 				}
    553 				if (c == ']') {
    554 					outqchar(c);
    555 					c = *cp++;
    556 				}
    557 			}
    558 		}
    559 
    560 	}
    561 
    562 	outchar('"');
    563 }
    564 
    565 /*
    566  * writes a record if sym was used
    567  */
    568 void
    569 outusg(const sym_t *sym)
    570 {
    571 	/* reset buffer */
    572 	outclr();
    573 
    574 	/*
    575 	 * line number of .c source, 'u' for used, Id of current
    576 	 * source (.c or .h), and line in current source
    577 	 */
    578 	outint(csrc_pos.p_line);
    579 	outchar('u');
    580 	outint(get_filename_id(curr_pos.p_file));
    581 	outchar('.');
    582 	outint(curr_pos.p_line);
    583 
    584 	/* necessary to delimit both numbers */
    585 	outchar('x');
    586 
    587 	outname(sym->s_name);
    588 }
    589