Home | History | Annotate | Line # | Download | only in indent
lexi.c revision 1.6
      1 /*	$NetBSD: lexi.c,v 1.6 1997/10/19 03:17:25 lukem Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
      7  * Copyright (c) 1985 Sun Microsystems, Inc.
      8  * All rights reserved.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the University of
     21  *	California, Berkeley and its contributors.
     22  * 4. Neither the name of the University nor the names of its contributors
     23  *    may be used to endorse or promote products derived from this software
     24  *    without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36  * SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 #ifndef lint
     41 #if 0
     42 static char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
     43 #else
     44 __RCSID("$NetBSD: lexi.c,v 1.6 1997/10/19 03:17:25 lukem Exp $");
     45 #endif
     46 #endif				/* not lint */
     47 
     48 /*
     49  * Here we have the token scanner for indent.  It scans off one token and puts
     50  * it in the global variable "token".  It returns a code, indicating the type
     51  * of token scanned.
     52  */
     53 
     54 #include <stdio.h>
     55 #include <ctype.h>
     56 #include <stdlib.h>
     57 #include <string.h>
     58 #include "indent_globs.h"
     59 #include "indent_codes.h"
     60 
     61 #define alphanum 1
     62 #define opchar 3
     63 
     64 struct templ {
     65 	char   *rwd;
     66 	int     rwcode;
     67 };
     68 
     69 struct templ specials[1000] =
     70 {
     71 	{"switch", 1},
     72 	{"case", 2},
     73 	{"break", 0},
     74 	{"struct", 3},
     75 	{"union", 3},
     76 	{"enum", 3},
     77 	{"default", 2},
     78 	{"int", 4},
     79 	{"char", 4},
     80 	{"float", 4},
     81 	{"double", 4},
     82 	{"long", 4},
     83 	{"short", 4},
     84 	{"typdef", 4},
     85 	{"unsigned", 4},
     86 	{"register", 4},
     87 	{"static", 4},
     88 	{"global", 4},
     89 	{"extern", 4},
     90 	{"void", 4},
     91 	{"goto", 0},
     92 	{"return", 0},
     93 	{"if", 5},
     94 	{"while", 5},
     95 	{"for", 5},
     96 	{"else", 6},
     97 	{"do", 6},
     98 	{"sizeof", 7},
     99 	{0, 0}
    100 };
    101 
    102 char    chartype[128] =
    103 {				/* this is used to facilitate the decision of
    104 				 * what type (alphanumeric, operator) each
    105 				 * character is */
    106 	0, 0, 0, 0, 0, 0, 0, 0,
    107 	0, 0, 0, 0, 0, 0, 0, 0,
    108 	0, 0, 0, 0, 0, 0, 0, 0,
    109 	0, 0, 0, 0, 0, 0, 0, 0,
    110 	0, 3, 0, 0, 1, 3, 3, 0,
    111 	0, 0, 3, 3, 0, 3, 0, 3,
    112 	1, 1, 1, 1, 1, 1, 1, 1,
    113 	1, 1, 0, 0, 3, 3, 3, 3,
    114 	0, 1, 1, 1, 1, 1, 1, 1,
    115 	1, 1, 1, 1, 1, 1, 1, 1,
    116 	1, 1, 1, 1, 1, 1, 1, 1,
    117 	1, 1, 1, 0, 0, 0, 3, 1,
    118 	0, 1, 1, 1, 1, 1, 1, 1,
    119 	1, 1, 1, 1, 1, 1, 1, 1,
    120 	1, 1, 1, 1, 1, 1, 1, 1,
    121 	1, 1, 1, 0, 3, 0, 3, 0
    122 };
    123 
    124 
    125 
    126 
    127 int
    128 lexi()
    129 {
    130 	int     unary_delim;	/* this is set to 1 if the current token
    131 				 *
    132 				 * forces a following operator to be unary */
    133 	static int last_code;	/* the last token type returned */
    134 	static int l_struct;	/* set to 1 if the last token was 'struct' */
    135 	int     code;		/* internal code to be returned */
    136 	char    qchar;		/* the delimiter character for a string */
    137 
    138 	e_token = s_token;	/* point to start of place to save token */
    139 	unary_delim = false;
    140 	ps.col_1 = ps.last_nl;	/* tell world that this token started in
    141 				 * column 1 iff the last thing scanned was nl */
    142 	ps.last_nl = false;
    143 
    144 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
    145 		ps.col_1 = false;	/* leading blanks imply token is not
    146 					 * in column 1 */
    147 		if (++buf_ptr >= buf_end)
    148 			fill_buffer();
    149 	}
    150 
    151 	/* Scan an alphanumeric token */
    152 	if (chartype[(int) *buf_ptr] == alphanum ||
    153 	    (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
    154 		/*
    155 		 * we have a character or number
    156 		 */
    157 		char   *j;	/* used for searching thru list of
    158 				 *
    159 				 * reserved words */
    160 		struct templ *p;
    161 
    162 		if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
    163 			int     seendot = 0, seenexp = 0;
    164 			if (*buf_ptr == '0' &&
    165 			    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
    166 				*e_token++ = *buf_ptr++;
    167 				*e_token++ = *buf_ptr++;
    168 				while (isxdigit(*buf_ptr)) {
    169 					CHECK_SIZE_TOKEN;
    170 					*e_token++ = *buf_ptr++;
    171 				}
    172 			} else
    173 				while (1) {
    174 					if (*buf_ptr == '.')
    175 						if (seendot)
    176 							break;
    177 						else
    178 							seendot++;
    179 					CHECK_SIZE_TOKEN;
    180 					*e_token++ = *buf_ptr++;
    181 					if (!isdigit(*buf_ptr) && *buf_ptr != '.')
    182 						if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
    183 							break;
    184 						else {
    185 							seenexp++;
    186 							seendot++;
    187 							CHECK_SIZE_TOKEN;
    188 							*e_token++ = *buf_ptr++;
    189 							if (*buf_ptr == '+' || *buf_ptr == '-')
    190 								*e_token++ = *buf_ptr++;
    191 						}
    192 				}
    193 			if (*buf_ptr == 'L' || *buf_ptr == 'l')
    194 				*e_token++ = *buf_ptr++;
    195 		} else
    196 			while (chartype[(int) *buf_ptr] == alphanum) {	/* copy it over */
    197 				CHECK_SIZE_TOKEN;
    198 				*e_token++ = *buf_ptr++;
    199 				if (buf_ptr >= buf_end)
    200 					fill_buffer();
    201 			}
    202 		*e_token++ = '\0';
    203 		while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
    204 			if (++buf_ptr >= buf_end)
    205 				fill_buffer();
    206 		}
    207 		ps.its_a_keyword = false;
    208 		ps.sizeof_keyword = false;
    209 		if (l_struct) {	/* if last token was 'struct', then this token
    210 				 * should be treated as a declaration */
    211 			l_struct = false;
    212 			last_code = ident;
    213 			ps.last_u_d = true;
    214 			return (decl);
    215 		}
    216 		ps.last_u_d = false;	/* Operator after indentifier is
    217 					 * binary */
    218 		last_code = ident;	/* Remember that this is the code we
    219 					 * will return */
    220 
    221 		/*
    222 		 * This loop will check if the token is a keyword.
    223 		 */
    224 		for (p = specials; (j = p->rwd) != 0; p++) {
    225 			char   *p = s_token;	/* point at scanned token */
    226 			if (*j++ != *p++ || *j++ != *p++)
    227 				continue;	/* This test depends on the
    228 						 * fact that identifiers are
    229 						 * always at least 1 character
    230 						 * long (ie. the first two
    231 						 * bytes of the identifier are
    232 						 * always meaningful) */
    233 			if (p[-1] == 0)
    234 				break;	/* If its a one-character identifier */
    235 			while (*p++ == *j)
    236 				if (*j++ == 0)
    237 					goto found_keyword;	/* I wish that C had a
    238 								 * multi-level break... */
    239 		}
    240 		if (p->rwd) {	/* we have a keyword */
    241 	found_keyword:
    242 			ps.its_a_keyword = true;
    243 			ps.last_u_d = true;
    244 			switch (p->rwcode) {
    245 			case 1:/* it is a switch */
    246 				return (swstmt);
    247 			case 2:/* a case or default */
    248 				return (casestmt);
    249 
    250 			case 3:/* a "struct" */
    251 				if (ps.p_l_follow)
    252 					break;	/* inside parens: cast */
    253 				l_struct = true;
    254 
    255 				/*
    256 				 * Next time around, we will want to know that we have had a
    257 				 * 'struct'
    258 				 */
    259 			case 4:/* one of the declaration keywords */
    260 				if (ps.p_l_follow) {
    261 					ps.cast_mask |= 1 << ps.p_l_follow;
    262 					break;	/* inside parens: cast */
    263 				}
    264 				last_code = decl;
    265 				return (decl);
    266 
    267 			case 5:/* if, while, for */
    268 				return (sp_paren);
    269 
    270 			case 6:/* do, else */
    271 				return (sp_nparen);
    272 
    273 			case 7:
    274 				ps.sizeof_keyword = true;
    275 			default:	/* all others are treated like any
    276 					 * other identifier */
    277 				return (ident);
    278 			}	/* end of switch */
    279 		}		/* end of if (found_it) */
    280 		if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
    281 			char   *tp = buf_ptr;
    282 			while (tp < buf_end)
    283 				if (*tp++ == ')' && (*tp == ';' || *tp == ','))
    284 					goto not_proc;
    285 			strncpy(ps.procname, token, sizeof ps.procname - 1);
    286 			ps.in_parameter_declaration = 1;
    287 			rparen_count = 1;
    288 	not_proc:	;
    289 		}
    290 		/*
    291 		 * The following hack attempts to guess whether or not the current
    292 		 * token is in fact a declaration keyword -- one that has been
    293 		 * typedefd
    294 		 */
    295 		if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
    296 		    && !ps.p_l_follow
    297 		    && !ps.block_init
    298 		    && (ps.last_token == rparen || ps.last_token == semicolon ||
    299 			ps.last_token == decl ||
    300 			ps.last_token == lbrace || ps.last_token == rbrace)) {
    301 			ps.its_a_keyword = true;
    302 			ps.last_u_d = true;
    303 			last_code = decl;
    304 			return decl;
    305 		}
    306 		if (last_code == decl)	/* if this is a declared variable,
    307 					 * then following sign is unary */
    308 			ps.last_u_d = true;	/* will make "int a -1" work */
    309 		last_code = ident;
    310 		return (ident);	/* the ident is not in the list */
    311 	}			/* end of procesing for alpanum character */
    312 	/* Scan a non-alphanumeric token */
    313 	*e_token++ = *buf_ptr;	/* if it is only a one-character token, it is
    314 				 * moved here */
    315 	*e_token = '\0';
    316 	if (++buf_ptr >= buf_end)
    317 		fill_buffer();
    318 
    319 	switch (*token) {
    320 	case '\n':
    321 		unary_delim = ps.last_u_d;
    322 		ps.last_nl = true;	/* remember that we just had a newline */
    323 		code = (had_eof ? 0 : newline);
    324 
    325 		/*
    326 		 * if data has been exausted, the newline is a dummy, and we should
    327 		 * return code to stop
    328 		 */
    329 		break;
    330 
    331 	case '\'':		/* start of quoted character */
    332 	case '"':		/* start of string */
    333 		qchar = *token;
    334 		if (troff) {
    335 			e_token[-1] = '`';
    336 			if (qchar == '"')
    337 				*e_token++ = '`';
    338 			e_token = chfont(&bodyf, &stringf, e_token);
    339 		}
    340 		do {		/* copy the string */
    341 			while (1) {	/* move one character or
    342 					 * [/<char>]<char> */
    343 				if (*buf_ptr == '\n') {
    344 					printf("%d: Unterminated literal\n", line_no);
    345 					goto stop_lit;
    346 				}
    347 				CHECK_SIZE_TOKEN;	/* Only have to do this
    348 							 * once in this loop,
    349 							 * since CHECK_SIZE
    350 							 * guarantees that there
    351 							 * are at least 5
    352 							 * entries left */
    353 				*e_token = *buf_ptr++;
    354 				if (buf_ptr >= buf_end)
    355 					fill_buffer();
    356 				if (*e_token == BACKSLASH) {	/* if escape, copy extra
    357 								 * char */
    358 					if (*buf_ptr == '\n')	/* check for escaped
    359 								 * newline */
    360 						++line_no;
    361 					if (troff) {
    362 						*++e_token = BACKSLASH;
    363 						if (*buf_ptr == BACKSLASH)
    364 							*++e_token = BACKSLASH;
    365 					}
    366 					*++e_token = *buf_ptr++;
    367 					++e_token;	/* we must increment
    368 							 * this again because we
    369 							 * copied two chars */
    370 					if (buf_ptr >= buf_end)
    371 						fill_buffer();
    372 				} else
    373 					break;	/* we copied one character */
    374 			}	/* end of while (1) */
    375 		} while (*e_token++ != qchar);
    376 		if (troff) {
    377 			e_token = chfont(&stringf, &bodyf, e_token - 1);
    378 			if (qchar == '"')
    379 				*e_token++ = '\'';
    380 		}
    381 stop_lit:
    382 		code = ident;
    383 		break;
    384 
    385 	case ('('):
    386 	case ('['):
    387 		unary_delim = true;
    388 		code = lparen;
    389 		break;
    390 
    391 	case (')'):
    392 	case (']'):
    393 		code = rparen;
    394 		break;
    395 
    396 	case '#':
    397 		unary_delim = ps.last_u_d;
    398 		code = preesc;
    399 		break;
    400 
    401 	case '?':
    402 		unary_delim = true;
    403 		code = question;
    404 		break;
    405 
    406 	case (':'):
    407 		code = colon;
    408 		unary_delim = true;
    409 		break;
    410 
    411 	case (';'):
    412 		unary_delim = true;
    413 		code = semicolon;
    414 		break;
    415 
    416 	case ('{'):
    417 		unary_delim = true;
    418 
    419 		/*
    420 		 * if (ps.in_or_st) ps.block_init = 1;
    421 		 */
    422 		/* ?	code = ps.block_init ? lparen : lbrace; */
    423 		code = lbrace;
    424 		break;
    425 
    426 	case ('}'):
    427 		unary_delim = true;
    428 		/* ?	code = ps.block_init ? rparen : rbrace; */
    429 		code = rbrace;
    430 		break;
    431 
    432 	case 014:		/* a form feed */
    433 		unary_delim = ps.last_u_d;
    434 		ps.last_nl = true;	/* remember this so we can set
    435 					 * 'ps.col_1' right */
    436 		code = form_feed;
    437 		break;
    438 
    439 	case (','):
    440 		unary_delim = true;
    441 		code = comma;
    442 		break;
    443 
    444 	case '.':
    445 		unary_delim = false;
    446 		code = period;
    447 		break;
    448 
    449 	case '-':
    450 	case '+':		/* check for -, +, --, ++ */
    451 		code = (ps.last_u_d ? unary_op : binary_op);
    452 		unary_delim = true;
    453 
    454 		if (*buf_ptr == token[0]) {
    455 			/* check for doubled character */
    456 			*e_token++ = *buf_ptr++;
    457 			/* buffer overflow will be checked at end of loop */
    458 			if (last_code == ident || last_code == rparen) {
    459 				code = (ps.last_u_d ? unary_op : postop);
    460 				/* check for following ++ or -- */
    461 				unary_delim = false;
    462 			}
    463 		} else
    464 			if (*buf_ptr == '=')
    465 				/* check for operator += */
    466 				*e_token++ = *buf_ptr++;
    467 			else
    468 				if (*buf_ptr == '>') {
    469 					/* check for operator -> */
    470 					*e_token++ = *buf_ptr++;
    471 					if (!pointer_as_binop) {
    472 						unary_delim = false;
    473 						code = unary_op;
    474 						ps.want_blank = false;
    475 					}
    476 				}
    477 		break;		/* buffer overflow will be checked at end of
    478 				 * switch */
    479 
    480 	case '=':
    481 		if (ps.in_or_st)
    482 			ps.block_init = 1;
    483 #ifdef undef
    484 		if (chartype[*buf_ptr] == opchar) {	/* we have two char
    485 							 * assignment */
    486 			e_token[-1] = *buf_ptr++;
    487 			if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
    488 				*e_token++ = *buf_ptr++;
    489 			*e_token++ = '=';	/* Flip =+ to += */
    490 			*e_token = 0;
    491 		}
    492 #else
    493 		if (*buf_ptr == '=') {	/* == */
    494 			*e_token++ = '=';	/* Flip =+ to += */
    495 			buf_ptr++;
    496 			*e_token = 0;
    497 		}
    498 #endif
    499 		code = binary_op;
    500 		unary_delim = true;
    501 		break;
    502 		/* can drop thru!!! */
    503 
    504 	case '>':
    505 	case '<':
    506 	case '!':		/* ops like <, <<, <=, !=, etc */
    507 		if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
    508 			*e_token++ = *buf_ptr;
    509 			if (++buf_ptr >= buf_end)
    510 				fill_buffer();
    511 		}
    512 		if (*buf_ptr == '=')
    513 			*e_token++ = *buf_ptr++;
    514 		code = (ps.last_u_d ? unary_op : binary_op);
    515 		unary_delim = true;
    516 		break;
    517 
    518 	default:
    519 		if (token[0] == '/' && *buf_ptr == '*') {
    520 			/* it is start of comment */
    521 			*e_token++ = '*';
    522 
    523 			if (++buf_ptr >= buf_end)
    524 				fill_buffer();
    525 
    526 			code = comment;
    527 			unary_delim = ps.last_u_d;
    528 			break;
    529 		}
    530 		while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
    531 			/*
    532 		         * handle ||, &&, etc, and also things as in int *****i
    533 		         */
    534 			*e_token++ = *buf_ptr;
    535 			if (++buf_ptr >= buf_end)
    536 				fill_buffer();
    537 		}
    538 		code = (ps.last_u_d ? unary_op : binary_op);
    539 		unary_delim = true;
    540 
    541 
    542 	}			/* end of switch */
    543 	if (code != newline) {
    544 		l_struct = false;
    545 		last_code = code;
    546 	}
    547 	if (buf_ptr >= buf_end)	/* check for input buffer empty */
    548 		fill_buffer();
    549 	ps.last_u_d = unary_delim;
    550 	*e_token = '\0';	/* null terminate the token */
    551 	return (code);
    552 }
    553 /*
    554  * Add the given keyword to the keyword table, using val as the keyword type
    555  */
    556 void
    557 addkey(key, val)
    558 	char   *key;
    559 	int     val;
    560 {
    561 	struct templ *p = specials;
    562 	while (p->rwd)
    563 		if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
    564 			return;
    565 		else
    566 			p++;
    567 	if (p >= specials + sizeof specials / sizeof specials[0])
    568 		return;		/* For now, table overflows are silently
    569 				 * ignored */
    570 	p->rwd = key;
    571 	p->rwcode = val;
    572 	p[1].rwd = 0;
    573 	p[1].rwcode = 0;
    574 }
    575