Home | History | Annotate | Line # | Download | only in indent
lexi.c revision 1.3
      1 /*	$NetBSD: lexi.c,v 1.3 1997/01/09 20:20:17 tls Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1985 Sun Microsystems, Inc.
      5  * Copyright (c) 1980 The Regents of the University of California.
      6  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
      7  * All rights reserved.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *	This product includes software developed by the University of
     20  *	California, Berkeley and its contributors.
     21  * 4. Neither the name of the University nor the names of its contributors
     22  *    may be used to endorse or promote products derived from this software
     23  *    without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     35  * SUCH DAMAGE.
     36  */
     37 
     38 #ifndef lint
     39 /*static char sccsid[] = "from: @(#)lexi.c	5.16 (Berkeley) 2/26/91";*/
     40 static char rcsid[] = "$NetBSD: lexi.c,v 1.3 1997/01/09 20:20:17 tls Exp $";
     41 #endif /* not lint */
     42 
     43 /*
     44  * Here we have the token scanner for indent.  It scans off one token and puts
     45  * it in the global variable "token".  It returns a code, indicating the type
     46  * of token scanned.
     47  */
     48 
     49 #include <stdio.h>
     50 #include <ctype.h>
     51 #include <stdlib.h>
     52 #include <string.h>
     53 #include "indent_globs.h"
     54 #include "indent_codes.h"
     55 
     56 #define alphanum 1
     57 #define opchar 3
     58 
     59 struct templ {
     60     char       *rwd;
     61     int         rwcode;
     62 };
     63 
     64 struct templ specials[100] =
     65 {
     66     "switch", 1,
     67     "case", 2,
     68     "break", 0,
     69     "struct", 3,
     70     "union", 3,
     71     "enum", 3,
     72     "default", 2,
     73     "int", 4,
     74     "char", 4,
     75     "float", 4,
     76     "double", 4,
     77     "long", 4,
     78     "short", 4,
     79     "typdef", 4,
     80     "unsigned", 4,
     81     "register", 4,
     82     "static", 4,
     83     "global", 4,
     84     "extern", 4,
     85     "void", 4,
     86     "goto", 0,
     87     "return", 0,
     88     "if", 5,
     89     "while", 5,
     90     "for", 5,
     91     "else", 6,
     92     "do", 6,
     93     "sizeof", 7,
     94     0, 0
     95 };
     96 
     97 char        chartype[128] =
     98 {				/* this is used to facilitate the decision of
     99 				 * what type (alphanumeric, operator) each
    100 				 * character is */
    101     0, 0, 0, 0, 0, 0, 0, 0,
    102     0, 0, 0, 0, 0, 0, 0, 0,
    103     0, 0, 0, 0, 0, 0, 0, 0,
    104     0, 0, 0, 0, 0, 0, 0, 0,
    105     0, 3, 0, 0, 1, 3, 3, 0,
    106     0, 0, 3, 3, 0, 3, 0, 3,
    107     1, 1, 1, 1, 1, 1, 1, 1,
    108     1, 1, 0, 0, 3, 3, 3, 3,
    109     0, 1, 1, 1, 1, 1, 1, 1,
    110     1, 1, 1, 1, 1, 1, 1, 1,
    111     1, 1, 1, 1, 1, 1, 1, 1,
    112     1, 1, 1, 0, 0, 0, 3, 1,
    113     0, 1, 1, 1, 1, 1, 1, 1,
    114     1, 1, 1, 1, 1, 1, 1, 1,
    115     1, 1, 1, 1, 1, 1, 1, 1,
    116     1, 1, 1, 0, 3, 0, 3, 0
    117 };
    118 
    119 
    120 
    121 
    122 int
    123 lexi()
    124 {
    125     int         unary_delim;	/* this is set to 1 if the current token
    126 				 *
    127 				 * forces a following operator to be unary */
    128     static int  last_code;	/* the last token type returned */
    129     static int  l_struct;	/* set to 1 if the last token was 'struct' */
    130     int         code;		/* internal code to be returned */
    131     char        qchar;		/* the delimiter character for a string */
    132 
    133     e_token = s_token;		/* point to start of place to save token */
    134     unary_delim = false;
    135     ps.col_1 = ps.last_nl;	/* tell world that this token started in
    136 				 * column 1 iff the last thing scanned was nl */
    137     ps.last_nl = false;
    138 
    139     while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
    140 	ps.col_1 = false;	/* leading blanks imply token is not in column
    141 				 * 1 */
    142 	if (++buf_ptr >= buf_end)
    143 	    fill_buffer();
    144     }
    145 
    146     /* Scan an alphanumeric token */
    147     if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
    148 	/*
    149 	 * we have a character or number
    150 	 */
    151 	register char *j;	/* used for searching thru list of
    152 				 *
    153 				 * reserved words */
    154 	register struct templ *p;
    155 
    156 	if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
    157 	    int         seendot = 0,
    158 	                seenexp = 0;
    159 	    if (*buf_ptr == '0' &&
    160 		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
    161 		*e_token++ = *buf_ptr++;
    162 		*e_token++ = *buf_ptr++;
    163 		while (isxdigit(*buf_ptr)) {
    164 		    CHECK_SIZE_TOKEN;
    165 		    *e_token++ = *buf_ptr++;
    166 		}
    167 	    }
    168 	    else
    169 		while (1) {
    170 		    if (*buf_ptr == '.')
    171 			if (seendot)
    172 			    break;
    173 			else
    174 			    seendot++;
    175 		    CHECK_SIZE_TOKEN;
    176 		    *e_token++ = *buf_ptr++;
    177 		    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
    178 			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
    179 			    break;
    180 			else {
    181 			    seenexp++;
    182 			    seendot++;
    183 			    CHECK_SIZE_TOKEN;
    184 			    *e_token++ = *buf_ptr++;
    185 			    if (*buf_ptr == '+' || *buf_ptr == '-')
    186 				*e_token++ = *buf_ptr++;
    187 			}
    188 		}
    189 	    if (*buf_ptr == 'L' || *buf_ptr == 'l')
    190 		*e_token++ = *buf_ptr++;
    191 	}
    192 	else
    193 	    while (chartype[*buf_ptr] == alphanum) {	/* copy it over */
    194 		CHECK_SIZE_TOKEN;
    195 		*e_token++ = *buf_ptr++;
    196 		if (buf_ptr >= buf_end)
    197 		    fill_buffer();
    198 	    }
    199 	*e_token++ = '\0';
    200 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
    201 	    if (++buf_ptr >= buf_end)
    202 		fill_buffer();
    203 	}
    204 	ps.its_a_keyword = false;
    205 	ps.sizeof_keyword = false;
    206 	if (l_struct) {		/* if last token was 'struct', then this token
    207 				 * should be treated as a declaration */
    208 	    l_struct = false;
    209 	    last_code = ident;
    210 	    ps.last_u_d = true;
    211 	    return (decl);
    212 	}
    213 	ps.last_u_d = false;	/* Operator after indentifier is binary */
    214 	last_code = ident;	/* Remember that this is the code we will
    215 				 * return */
    216 
    217 	/*
    218 	 * This loop will check if the token is a keyword.
    219 	 */
    220 	for (p = specials; (j = p->rwd) != 0; p++) {
    221 	    register char *p = s_token;	/* point at scanned token */
    222 	    if (*j++ != *p++ || *j++ != *p++)
    223 		continue;	/* This test depends on the fact that
    224 				 * identifiers are always at least 1 character
    225 				 * long (ie. the first two bytes of the
    226 				 * identifier are always meaningful) */
    227 	    if (p[-1] == 0)
    228 		break;		/* If its a one-character identifier */
    229 	    while (*p++ == *j)
    230 		if (*j++ == 0)
    231 		    goto found_keyword;	/* I wish that C had a multi-level
    232 					 * break... */
    233 	}
    234 	if (p->rwd) {		/* we have a keyword */
    235     found_keyword:
    236 	    ps.its_a_keyword = true;
    237 	    ps.last_u_d = true;
    238 	    switch (p->rwcode) {
    239 	    case 1:		/* it is a switch */
    240 		return (swstmt);
    241 	    case 2:		/* a case or default */
    242 		return (casestmt);
    243 
    244 	    case 3:		/* a "struct" */
    245 		if (ps.p_l_follow)
    246 		    break;	/* inside parens: cast */
    247 		l_struct = true;
    248 
    249 		/*
    250 		 * Next time around, we will want to know that we have had a
    251 		 * 'struct'
    252 		 */
    253 	    case 4:		/* one of the declaration keywords */
    254 		if (ps.p_l_follow) {
    255 		    ps.cast_mask |= 1 << ps.p_l_follow;
    256 		    break;	/* inside parens: cast */
    257 		}
    258 		last_code = decl;
    259 		return (decl);
    260 
    261 	    case 5:		/* if, while, for */
    262 		return (sp_paren);
    263 
    264 	    case 6:		/* do, else */
    265 		return (sp_nparen);
    266 
    267 	    case 7:
    268 		ps.sizeof_keyword = true;
    269 	    default:		/* all others are treated like any other
    270 				 * identifier */
    271 		return (ident);
    272 	    }			/* end of switch */
    273 	}			/* end of if (found_it) */
    274 	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
    275 	    register char *tp = buf_ptr;
    276 	    while (tp < buf_end)
    277 		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
    278 		    goto not_proc;
    279 	    strncpy(ps.procname, token, sizeof ps.procname - 1);
    280 	    ps.in_parameter_declaration = 1;
    281 	    rparen_count = 1;
    282     not_proc:;
    283 	}
    284 	/*
    285 	 * The following hack attempts to guess whether or not the current
    286 	 * token is in fact a declaration keyword -- one that has been
    287 	 * typedefd
    288 	 */
    289 	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
    290 		&& !ps.p_l_follow
    291 	        && !ps.block_init
    292 		&& (ps.last_token == rparen || ps.last_token == semicolon ||
    293 		    ps.last_token == decl ||
    294 		    ps.last_token == lbrace || ps.last_token == rbrace)) {
    295 	    ps.its_a_keyword = true;
    296 	    ps.last_u_d = true;
    297 	    last_code = decl;
    298 	    return decl;
    299 	}
    300 	if (last_code == decl)	/* if this is a declared variable, then
    301 				 * following sign is unary */
    302 	    ps.last_u_d = true;	/* will make "int a -1" work */
    303 	last_code = ident;
    304 	return (ident);		/* the ident is not in the list */
    305     }				/* end of procesing for alpanum character */
    306 
    307     /* Scan a non-alphanumeric token */
    308 
    309     *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
    310 				 * moved here */
    311     *e_token = '\0';
    312     if (++buf_ptr >= buf_end)
    313 	fill_buffer();
    314 
    315     switch (*token) {
    316     case '\n':
    317 	unary_delim = ps.last_u_d;
    318 	ps.last_nl = true;	/* remember that we just had a newline */
    319 	code = (had_eof ? 0 : newline);
    320 
    321 	/*
    322 	 * if data has been exausted, the newline is a dummy, and we should
    323 	 * return code to stop
    324 	 */
    325 	break;
    326 
    327     case '\'':			/* start of quoted character */
    328     case '"':			/* start of string */
    329 	qchar = *token;
    330 	if (troff) {
    331 	    e_token[-1] = '`';
    332 	    if (qchar == '"')
    333 		*e_token++ = '`';
    334 	    e_token = chfont(&bodyf, &stringf, e_token);
    335 	}
    336 	do {			/* copy the string */
    337 	    while (1) {		/* move one character or [/<char>]<char> */
    338 		if (*buf_ptr == '\n') {
    339 		    printf("%d: Unterminated literal\n", line_no);
    340 		    goto stop_lit;
    341 		}
    342 		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
    343 					 * since CHECK_SIZE guarantees that there
    344 					 * are at least 5 entries left */
    345 		*e_token = *buf_ptr++;
    346 		if (buf_ptr >= buf_end)
    347 		    fill_buffer();
    348 		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
    349 		    if (*buf_ptr == '\n')	/* check for escaped newline */
    350 			++line_no;
    351 		    if (troff) {
    352 			*++e_token = BACKSLASH;
    353 			if (*buf_ptr == BACKSLASH)
    354 			    *++e_token = BACKSLASH;
    355 		    }
    356 		    *++e_token = *buf_ptr++;
    357 		    ++e_token;	/* we must increment this again because we
    358 				 * copied two chars */
    359 		    if (buf_ptr >= buf_end)
    360 			fill_buffer();
    361 		}
    362 		else
    363 		    break;	/* we copied one character */
    364 	    }			/* end of while (1) */
    365 	} while (*e_token++ != qchar);
    366 	if (troff) {
    367 	    e_token = chfont(&stringf, &bodyf, e_token - 1);
    368 	    if (qchar == '"')
    369 		*e_token++ = '\'';
    370 	}
    371 stop_lit:
    372 	code = ident;
    373 	break;
    374 
    375     case ('('):
    376     case ('['):
    377 	unary_delim = true;
    378 	code = lparen;
    379 	break;
    380 
    381     case (')'):
    382     case (']'):
    383 	code = rparen;
    384 	break;
    385 
    386     case '#':
    387 	unary_delim = ps.last_u_d;
    388 	code = preesc;
    389 	break;
    390 
    391     case '?':
    392 	unary_delim = true;
    393 	code = question;
    394 	break;
    395 
    396     case (':'):
    397 	code = colon;
    398 	unary_delim = true;
    399 	break;
    400 
    401     case (';'):
    402 	unary_delim = true;
    403 	code = semicolon;
    404 	break;
    405 
    406     case ('{'):
    407 	unary_delim = true;
    408 
    409 	/*
    410 	 * if (ps.in_or_st) ps.block_init = 1;
    411 	 */
    412 	/* ?	code = ps.block_init ? lparen : lbrace; */
    413 	code = lbrace;
    414 	break;
    415 
    416     case ('}'):
    417 	unary_delim = true;
    418 	/* ?	code = ps.block_init ? rparen : rbrace; */
    419 	code = rbrace;
    420 	break;
    421 
    422     case 014:			/* a form feed */
    423 	unary_delim = ps.last_u_d;
    424 	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
    425 				 * right */
    426 	code = form_feed;
    427 	break;
    428 
    429     case (','):
    430 	unary_delim = true;
    431 	code = comma;
    432 	break;
    433 
    434     case '.':
    435 	unary_delim = false;
    436 	code = period;
    437 	break;
    438 
    439     case '-':
    440     case '+':			/* check for -, +, --, ++ */
    441 	code = (ps.last_u_d ? unary_op : binary_op);
    442 	unary_delim = true;
    443 
    444 	if (*buf_ptr == token[0]) {
    445 	    /* check for doubled character */
    446 	    *e_token++ = *buf_ptr++;
    447 	    /* buffer overflow will be checked at end of loop */
    448 	    if (last_code == ident || last_code == rparen) {
    449 		code = (ps.last_u_d ? unary_op : postop);
    450 		/* check for following ++ or -- */
    451 		unary_delim = false;
    452 	    }
    453 	}
    454 	else if (*buf_ptr == '=')
    455 	    /* check for operator += */
    456 	    *e_token++ = *buf_ptr++;
    457 	else if (*buf_ptr == '>') {
    458 	    /* check for operator -> */
    459 	    *e_token++ = *buf_ptr++;
    460 	    if (!pointer_as_binop) {
    461 		unary_delim = false;
    462 		code = unary_op;
    463 		ps.want_blank = false;
    464 	    }
    465 	}
    466 	break;			/* buffer overflow will be checked at end of
    467 				 * switch */
    468 
    469     case '=':
    470 	if (ps.in_or_st)
    471 	    ps.block_init = 1;
    472 #ifdef undef
    473 	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
    474 	    e_token[-1] = *buf_ptr++;
    475 	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
    476 		*e_token++ = *buf_ptr++;
    477 	    *e_token++ = '=';	/* Flip =+ to += */
    478 	    *e_token = 0;
    479 	}
    480 #else
    481 	if (*buf_ptr == '=') {/* == */
    482 	    *e_token++ = '=';	/* Flip =+ to += */
    483 	    buf_ptr++;
    484 	    *e_token = 0;
    485 	}
    486 #endif
    487 	code = binary_op;
    488 	unary_delim = true;
    489 	break;
    490 	/* can drop thru!!! */
    491 
    492     case '>':
    493     case '<':
    494     case '!':			/* ops like <, <<, <=, !=, etc */
    495 	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
    496 	    *e_token++ = *buf_ptr;
    497 	    if (++buf_ptr >= buf_end)
    498 		fill_buffer();
    499 	}
    500 	if (*buf_ptr == '=')
    501 	    *e_token++ = *buf_ptr++;
    502 	code = (ps.last_u_d ? unary_op : binary_op);
    503 	unary_delim = true;
    504 	break;
    505 
    506     default:
    507 	if (token[0] == '/' && *buf_ptr == '*') {
    508 	    /* it is start of comment */
    509 	    *e_token++ = '*';
    510 
    511 	    if (++buf_ptr >= buf_end)
    512 		fill_buffer();
    513 
    514 	    code = comment;
    515 	    unary_delim = ps.last_u_d;
    516 	    break;
    517 	}
    518 	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
    519 	    /*
    520 	     * handle ||, &&, etc, and also things as in int *****i
    521 	     */
    522 	    *e_token++ = *buf_ptr;
    523 	    if (++buf_ptr >= buf_end)
    524 		fill_buffer();
    525 	}
    526 	code = (ps.last_u_d ? unary_op : binary_op);
    527 	unary_delim = true;
    528 
    529 
    530     }				/* end of switch */
    531     if (code != newline) {
    532 	l_struct = false;
    533 	last_code = code;
    534     }
    535     if (buf_ptr >= buf_end)	/* check for input buffer empty */
    536 	fill_buffer();
    537     ps.last_u_d = unary_delim;
    538     *e_token = '\0';		/* null terminate the token */
    539     return (code);
    540 }
    541 
    542 /*
    543  * Add the given keyword to the keyword table, using val as the keyword type
    544  */
    545 addkey(key, val)
    546     char       *key;
    547 {
    548     register struct templ *p = specials;
    549     while (p->rwd)
    550 	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
    551 	    return;
    552 	else
    553 	    p++;
    554     if (p >= specials + sizeof specials / sizeof specials[0])
    555 	return;			/* For now, table overflows are silently
    556 				 * ignored */
    557     p->rwd = key;
    558     p->rwcode = val;
    559     p[1].rwd = 0;
    560     p[1].rwcode = 0;
    561     return;
    562 }
    563