Home | History | Annotate | Line # | Download | only in ctags
      1 /*	$NetBSD: C.c,v 1.19 2009/07/13 19:05:40 roy Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1987, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #if HAVE_NBTOOL_CONFIG_H
     33 #include "nbtool_config.h"
     34 #endif
     35 
     36 #include <sys/cdefs.h>
     37 #if defined(__RCSID) && !defined(lint)
     38 #if 0
     39 static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
     40 #else
     41 __RCSID("$NetBSD: C.c,v 1.19 2009/07/13 19:05:40 roy Exp $");
     42 #endif
     43 #endif /* not lint */
     44 
     45 #include <limits.h>
     46 #include <stddef.h>
     47 #include <stdio.h>
     48 #include <string.h>
     49 
     50 #include "ctags.h"
     51 
     52 static int	func_entry(void);
     53 static void	hash_entry(void);
     54 static void	skip_string(int);
     55 static int	str_entry(int);
     56 
     57 /*
     58  * c_entries --
     59  *	read .c and .h files and call appropriate routines
     60  */
     61 void
     62 c_entries(void)
     63 {
     64 	int	c;			/* current character */
     65 	int	level;			/* brace level */
     66 	int	token;			/* if reading a token */
     67 	int	t_def;			/* if reading a typedef */
     68 	int	t_level;		/* typedef's brace level */
     69 	char	*sp;			/* buffer pointer */
     70 	char	tok[MAXTOKEN];		/* token buffer */
     71 
     72 	lineftell = ftell(inf);
     73 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
     74 	while (GETC(!=, EOF)) {
     75 		switch (c) {
     76 		/*
     77 		 * Here's where it DOESN'T handle: {
     78 		 *	foo(a)
     79 		 *	{
     80 		 *	#ifdef notdef
     81 		 *		}
     82 		 *	#endif
     83 		 *		if (a)
     84 		 *			puts("hello, world");
     85 		 *	}
     86 		 */
     87 		case '{':
     88 			++level;
     89 			goto endtok;
     90 		case '}':
     91 			/*
     92 			 * if level goes below zero, try and fix
     93 			 * it, even though we've already messed up
     94 			 */
     95 			if (--level < 0)
     96 				level = 0;
     97 			goto endtok;
     98 
     99 		case '\n':
    100 			SETLINE;
    101 			/*
    102 			 * the above 3 cases are similar in that they
    103 			 * are special characters that also end tokens.
    104 			 */
    105 	endtok:			if (sp > tok) {
    106 				*sp = EOS;
    107 				token = YES;
    108 				sp = tok;
    109 			}
    110 			else
    111 				token = NO;
    112 			continue;
    113 
    114 		/*
    115 		 * We ignore quoted strings and character constants
    116 		 * completely.
    117 		 */
    118 		case '"':
    119 		case '\'':
    120 			(void)skip_string(c);
    121 			break;
    122 
    123 		/*
    124 		 * comments can be fun; note the state is unchanged after
    125 		 * return, in case we found:
    126 		 *	"foo() XX comment XX { int bar; }"
    127 		 */
    128 		case '/':
    129 			if (GETC(==, '*')) {
    130 				skip_comment(c);
    131 				continue;
    132 			} else if (c == '/') {
    133 				skip_comment(c);
    134 				continue;
    135 			}
    136 			(void)ungetc(c, inf);
    137 			c = '/';
    138 			goto storec;
    139 
    140 		/* hash marks flag #define's. */
    141 		case '#':
    142 			if (sp == tok) {
    143 				hash_entry();
    144 				break;
    145 			}
    146 			goto storec;
    147 
    148 		/*
    149 		 * if we have a current token, parenthesis on
    150 		 * level zero indicates a function.
    151 		 */
    152 		case '(':
    153 			do c = getc(inf);
    154 			while (c != EOF && iswhite(c));
    155 			if (c == '*')
    156 				break;
    157 			if (c != EOF)
    158 				ungetc(c, inf);
    159 			if (!level && token) {
    160 				int	curline;
    161 
    162 				if (sp != tok)
    163 					*sp = EOS;
    164 				/*
    165 				 * grab the line immediately, we may
    166 				 * already be wrong, for example,
    167 				 *	foo\n
    168 				 *	(arg1,
    169 				 */
    170 				get_line();
    171 				curline = lineno;
    172 				if (func_entry()) {
    173 					++level;
    174 					pfnote(tok, curline);
    175 				}
    176 				break;
    177 			}
    178 			goto storec;
    179 
    180 		/*
    181 		 * semi-colons indicate the end of a typedef; if we find a
    182 		 * typedef we search for the next semi-colon of the same
    183 		 * level as the typedef.  Ignoring "structs", they are
    184 		 * tricky, since you can find:
    185 		 *
    186 		 *	"typedef long time_t;"
    187 		 *	"typedef unsigned int u_int;"
    188 		 *	"typedef unsigned int u_int [10];"
    189 		 *
    190 		 * If looking at a typedef, we save a copy of the last token
    191 		 * found.  Then, when we find the ';' we take the current
    192 		 * token if it starts with a valid token name, else we take
    193 		 * the one we saved.  There's probably some reasonable
    194 		 * alternative to this...
    195 		 */
    196 		case ';':
    197 			if (t_def && level == t_level) {
    198 				t_def = NO;
    199 				get_line();
    200 				if (sp != tok)
    201 					*sp = EOS;
    202 				pfnote(tok, lineno);
    203 				break;
    204 			}
    205 			goto storec;
    206 
    207 		/*
    208 		 * store characters until one that can't be part of a token
    209 		 * comes along; check the current token against certain
    210 		 * reserved words.
    211 		 */
    212 		default:
    213 	storec:		if (c == EOF)
    214 				break;
    215 			if (!intoken(c)) {
    216 				if (sp == tok)
    217 					break;
    218 				*sp = EOS;
    219 				if (tflag) {
    220 					/* no typedefs inside typedefs */
    221 					if (!t_def &&
    222 						   !memcmp(tok, "typedef",8)) {
    223 						t_def = YES;
    224 						t_level = level;
    225 						break;
    226 					}
    227 					/* catch "typedef struct" */
    228 					if ((!t_def || t_level <= level)
    229 					    && (!memcmp(tok, "struct", 7)
    230 					    || !memcmp(tok, "union", 6)
    231 					    || !memcmp(tok, "enum", 5))) {
    232 						/*
    233 						 * get line immediately;
    234 						 * may change before '{'
    235 						 */
    236 						get_line();
    237 						if (str_entry(c))
    238 							++level;
    239 						break;
    240 						/* } */
    241 					}
    242 				}
    243 				sp = tok;
    244 			}
    245 			else if (sp != tok || begtoken(c)) {
    246 				if (sp < tok + sizeof tok)
    247 					*sp++ = c;
    248 				token = YES;
    249 			}
    250 			continue;
    251 		}
    252 
    253 		sp = tok;
    254 		token = NO;
    255 	}
    256 }
    257 
    258 /*
    259  * func_entry --
    260  *	handle a function reference
    261  */
    262 static int
    263 func_entry(void)
    264 {
    265 	int	c;			/* current character */
    266 	int	level = 0;		/* for matching '()' */
    267 	static char attribute[] = "__attribute__";
    268 	char	maybe_attribute[sizeof attribute + 1],
    269 		*anext;
    270 
    271 	/*
    272 	 * Find the end of the assumed function declaration.
    273 	 * Note that ANSI C functions can have type definitions so keep
    274 	 * track of the parentheses nesting level.
    275 	 */
    276 	while (GETC(!=, EOF)) {
    277 		switch (c) {
    278 		case '\'':
    279 		case '"':
    280 			/* skip strings and character constants */
    281 			skip_string(c);
    282 			break;
    283 		case '/':
    284 			/* skip comments */
    285 			if (GETC(==, '*'))
    286 				skip_comment(c);
    287 			else if (c == '/')
    288 				skip_comment(c);
    289 			break;
    290 		case '(':
    291 			level++;
    292 			break;
    293 		case ')':
    294 			if (level == 0)
    295 				goto fnd;
    296 			level--;
    297 			break;
    298 		case '\n':
    299 			SETLINE;
    300 		}
    301 	}
    302 	return (NO);
    303 fnd:
    304 	/*
    305 	 * we assume that the character after a function's right paren
    306 	 * is a token character if it's a function and a non-token
    307 	 * character if it's a declaration.  Comments don't count...
    308 	 */
    309 	for (anext = maybe_attribute;;) {
    310 		while (GETC(!=, EOF) && iswhite(c))
    311 			if (c == '\n')
    312 				SETLINE;
    313 		if (c == EOF)
    314 			return NO;
    315 		/*
    316 		 * Recognize the gnu __attribute__ extension, which would
    317 		 * otherwise make the heuristic test DTWT
    318 		 */
    319 		if (anext == maybe_attribute) {
    320 			if (intoken(c)) {
    321 				*anext++ = c;
    322 				continue;
    323 			}
    324 		} else {
    325 			if (intoken(c)) {
    326 				if (anext - maybe_attribute
    327 				 < (ptrdiff_t)(sizeof attribute - 1))
    328 					*anext++ = c;
    329 				else	break;
    330 				continue;
    331 			} else {
    332 				*anext++ = '\0';
    333 				if (strcmp(maybe_attribute, attribute) == 0) {
    334 					(void)ungetc(c, inf);
    335 					return NO;
    336 				}
    337 				break;
    338 			}
    339 		}
    340 		if (intoken(c) || c == '{')
    341 			break;
    342 		if (c == '/' && GETC(==, '*'))
    343 			skip_comment(c);
    344 		else if (c == '/')
    345 			skip_comment(c);
    346 		else {				/* don't ever "read" '/' */
    347 			(void)ungetc(c, inf);
    348 			return (NO);
    349 		}
    350 	}
    351 	if (c != '{')
    352 		(void)skip_key('{');
    353 	return (YES);
    354 }
    355 
    356 /*
    357  * hash_entry --
    358  *	handle a line starting with a '#'
    359  */
    360 static void
    361 hash_entry(void)
    362 {
    363 	int	c;			/* character read */
    364 	int	curline;		/* line started on */
    365 	char	*sp;			/* buffer pointer */
    366 	char	tok[MAXTOKEN];		/* storage buffer */
    367 
    368 	curline = lineno;
    369 	do if (GETC(==, EOF))
    370 		return;
    371 	while(c != '\n' && iswhite(c));
    372 	ungetc(c, inf);
    373 	for (sp = tok;;) {		/* get next token */
    374 		if (GETC(==, EOF))
    375 			return;
    376 		if (iswhite(c))
    377 			break;
    378 		if (sp < tok + sizeof tok)
    379 			*sp++ = c;
    380 	}
    381 	if(sp >= tok + sizeof tok)
    382 		--sp;
    383 	*sp = EOS;
    384 	if (memcmp(tok, "define", 6))	/* only interested in #define's */
    385 		goto skip;
    386 	for (;;) {			/* this doesn't handle "#define \n" */
    387 		if (GETC(==, EOF))
    388 			return;
    389 		if (!iswhite(c))
    390 			break;
    391 	}
    392 	for (sp = tok;;) {		/* get next token */
    393 		if(sp < tok + sizeof tok)
    394 			*sp++ = c;
    395 		if (GETC(==, EOF))
    396 			return;
    397 		/*
    398 		 * this is where it DOESN'T handle
    399 		 * "#define \n"
    400 		 */
    401 		if (!intoken(c))
    402 			break;
    403 	}
    404 	if(sp >= tok + sizeof tok)
    405 		--sp;
    406 	*sp = EOS;
    407 	if (dflag || c == '(') {	/* only want macros */
    408 		get_line();
    409 		pfnote(tok, curline);
    410 	}
    411 skip:	if (c == '\n') {		/* get rid of rest of define */
    412 		SETLINE
    413 		if (*(sp - 1) != '\\')
    414 			return;
    415 	}
    416 	(void)skip_key('\n');
    417 }
    418 
    419 /*
    420  * str_entry --
    421  *	handle a struct, union or enum entry
    422  */
    423 static int
    424 str_entry(int c /* current character */)
    425 {
    426 	int	curline;		/* line started on */
    427 	char	*sp;			/* buffer pointer */
    428 	char	tok[LINE_MAX];		/* storage buffer */
    429 
    430 	curline = lineno;
    431 	while (iswhite(c))
    432 		if (GETC(==, EOF))
    433 			return (NO);
    434 	if (c == '{')		/* it was "struct {" */
    435 		return (YES);
    436 	for (sp = tok;;) {		/* get next token */
    437 		*sp++ = c;
    438 		if (GETC(==, EOF))
    439 			return (NO);
    440 		if (!intoken(c))
    441 			break;
    442 	}
    443 	switch (c) {
    444 		case '{':		/* it was "struct foo{" */
    445 			--sp;
    446 			break;
    447 		case '\n':		/* it was "struct foo\n" */
    448 			SETLINE;
    449 			/*FALLTHROUGH*/
    450 		default:		/* probably "struct foo " */
    451 			while (GETC(!=, EOF))
    452 				if (!iswhite(c))
    453 					break;
    454 			if (c != '{') {
    455 				(void)ungetc(c, inf);
    456 				return (NO);
    457 			}
    458 	}
    459 	*sp = EOS;
    460 	pfnote(tok, curline);
    461 	return (YES);
    462 }
    463 
    464 /*
    465  * skip_comment --
    466  *	skip over comment
    467  */
    468 void
    469 skip_comment(int commenttype)
    470 {
    471 	int	c;			/* character read */
    472 	int	star;			/* '*' flag */
    473 
    474 	for (star = 0; GETC(!=, EOF);)
    475 		switch(c) {
    476 		/* comments don't nest, nor can they be escaped. */
    477 		case '*':
    478 			star = YES;
    479 			break;
    480 		case '/':
    481 			if (commenttype == '*' && star)
    482 				return;
    483 			break;
    484 		case '\n':
    485 			if (commenttype == '/') {
    486 				/*
    487 				 * we don't really parse C, so sometimes it
    488 				 * is necessary to see the newline
    489 				 */
    490 				ungetc(c, inf);
    491 				return;
    492 			}
    493 			SETLINE;
    494 			/*FALLTHROUGH*/
    495 		default:
    496 			star = NO;
    497 			break;
    498 		}
    499 }
    500 
    501 /*
    502  * skip_string --
    503  *	skip to the end of a string or character constant.
    504  */
    505 void
    506 skip_string(int key)
    507 {
    508 	int	c,
    509 		skip;
    510 
    511 	for (skip = NO; GETC(!=, EOF); )
    512 		switch (c) {
    513 		case '\\':		/* a backslash escapes anything */
    514 			skip = !skip;	/* we toggle in case it's "\\" */
    515 			break;
    516 		case '\n':
    517 			SETLINE;
    518 			/*FALLTHROUGH*/
    519 		default:
    520 			if (c == key && !skip)
    521 				return;
    522 			skip = NO;
    523 		}
    524 }
    525 
    526 /*
    527  * skip_key --
    528  *	skip to next char "key"
    529  */
    530 int
    531 skip_key(int key)
    532 {
    533 	int	c,
    534 		skip,
    535 		retval;
    536 
    537 	for (skip = retval = NO; GETC(!=, EOF);)
    538 		switch(c) {
    539 		case '\\':		/* a backslash escapes anything */
    540 			skip = !skip;	/* we toggle in case it's "\\" */
    541 			break;
    542 		case ';':		/* special case for yacc; if one */
    543 		case '|':		/* of these chars occurs, we may */
    544 			retval = YES;	/* have moved out of the rule */
    545 			break;		/* not used by C */
    546 		case '\'':
    547 		case '"':
    548 			/* skip strings and character constants */
    549 			skip_string(c);
    550 			break;
    551 		case '/':
    552 			/* skip comments */
    553 			if (GETC(==, '*')) {
    554 				skip_comment(c);
    555 				break;
    556 			} else if (c == '/') {
    557 				skip_comment(c);
    558 				break;
    559 			}
    560 			(void)ungetc(c, inf);
    561 			c = '/';
    562 			goto norm;
    563 		case '\n':
    564 			SETLINE;
    565 			/*FALLTHROUGH*/
    566 		default:
    567 		norm:
    568 			if (c == key && !skip)
    569 				return (retval);
    570 			skip = NO;
    571 		}
    572 	return (retval);
    573 }
    574