Home | History | Annotate | Line # | Download | only in indent
lexi.c revision 1.2
      1 /*
      2  * Copyright (c) 1985 Sun Microsystems, Inc.
      3  * Copyright (c) 1980 The Regents of the University of California.
      4  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #ifndef lint
     37 /*static char sccsid[] = "from: @(#)lexi.c	5.16 (Berkeley) 2/26/91";*/
     38 static char rcsid[] = "$Id: lexi.c,v 1.2 1993/08/01 18:14:31 mycroft Exp $";
     39 #endif /* not lint */
     40 
     41 /*
     42  * Here we have the token scanner for indent.  It scans off one token and puts
     43  * it in the global variable "token".  It returns a code, indicating the type
     44  * of token scanned.
     45  */
     46 
     47 #include <stdio.h>
     48 #include <ctype.h>
     49 #include <stdlib.h>
     50 #include <string.h>
     51 #include "indent_globs.h"
     52 #include "indent_codes.h"
     53 
     54 #define alphanum 1
     55 #define opchar 3
     56 
     57 struct templ {
     58     char       *rwd;
     59     int         rwcode;
     60 };
     61 
     62 struct templ specials[100] =
     63 {
     64     "switch", 1,
     65     "case", 2,
     66     "break", 0,
     67     "struct", 3,
     68     "union", 3,
     69     "enum", 3,
     70     "default", 2,
     71     "int", 4,
     72     "char", 4,
     73     "float", 4,
     74     "double", 4,
     75     "long", 4,
     76     "short", 4,
     77     "typdef", 4,
     78     "unsigned", 4,
     79     "register", 4,
     80     "static", 4,
     81     "global", 4,
     82     "extern", 4,
     83     "void", 4,
     84     "goto", 0,
     85     "return", 0,
     86     "if", 5,
     87     "while", 5,
     88     "for", 5,
     89     "else", 6,
     90     "do", 6,
     91     "sizeof", 7,
     92     0, 0
     93 };
     94 
     95 char        chartype[128] =
     96 {				/* this is used to facilitate the decision of
     97 				 * what type (alphanumeric, operator) each
     98 				 * character is */
     99     0, 0, 0, 0, 0, 0, 0, 0,
    100     0, 0, 0, 0, 0, 0, 0, 0,
    101     0, 0, 0, 0, 0, 0, 0, 0,
    102     0, 0, 0, 0, 0, 0, 0, 0,
    103     0, 3, 0, 0, 1, 3, 3, 0,
    104     0, 0, 3, 3, 0, 3, 0, 3,
    105     1, 1, 1, 1, 1, 1, 1, 1,
    106     1, 1, 0, 0, 3, 3, 3, 3,
    107     0, 1, 1, 1, 1, 1, 1, 1,
    108     1, 1, 1, 1, 1, 1, 1, 1,
    109     1, 1, 1, 1, 1, 1, 1, 1,
    110     1, 1, 1, 0, 0, 0, 3, 1,
    111     0, 1, 1, 1, 1, 1, 1, 1,
    112     1, 1, 1, 1, 1, 1, 1, 1,
    113     1, 1, 1, 1, 1, 1, 1, 1,
    114     1, 1, 1, 0, 3, 0, 3, 0
    115 };
    116 
    117 
    118 
    119 
    120 int
    121 lexi()
    122 {
    123     int         unary_delim;	/* this is set to 1 if the current token
    124 				 *
    125 				 * forces a following operator to be unary */
    126     static int  last_code;	/* the last token type returned */
    127     static int  l_struct;	/* set to 1 if the last token was 'struct' */
    128     int         code;		/* internal code to be returned */
    129     char        qchar;		/* the delimiter character for a string */
    130 
    131     e_token = s_token;		/* point to start of place to save token */
    132     unary_delim = false;
    133     ps.col_1 = ps.last_nl;	/* tell world that this token started in
    134 				 * column 1 iff the last thing scanned was nl */
    135     ps.last_nl = false;
    136 
    137     while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
    138 	ps.col_1 = false;	/* leading blanks imply token is not in column
    139 				 * 1 */
    140 	if (++buf_ptr >= buf_end)
    141 	    fill_buffer();
    142     }
    143 
    144     /* Scan an alphanumeric token */
    145     if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
    146 	/*
    147 	 * we have a character or number
    148 	 */
    149 	register char *j;	/* used for searching thru list of
    150 				 *
    151 				 * reserved words */
    152 	register struct templ *p;
    153 
    154 	if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
    155 	    int         seendot = 0,
    156 	                seenexp = 0;
    157 	    if (*buf_ptr == '0' &&
    158 		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
    159 		*e_token++ = *buf_ptr++;
    160 		*e_token++ = *buf_ptr++;
    161 		while (isxdigit(*buf_ptr)) {
    162 		    CHECK_SIZE_TOKEN;
    163 		    *e_token++ = *buf_ptr++;
    164 		}
    165 	    }
    166 	    else
    167 		while (1) {
    168 		    if (*buf_ptr == '.')
    169 			if (seendot)
    170 			    break;
    171 			else
    172 			    seendot++;
    173 		    CHECK_SIZE_TOKEN;
    174 		    *e_token++ = *buf_ptr++;
    175 		    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
    176 			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
    177 			    break;
    178 			else {
    179 			    seenexp++;
    180 			    seendot++;
    181 			    CHECK_SIZE_TOKEN;
    182 			    *e_token++ = *buf_ptr++;
    183 			    if (*buf_ptr == '+' || *buf_ptr == '-')
    184 				*e_token++ = *buf_ptr++;
    185 			}
    186 		}
    187 	    if (*buf_ptr == 'L' || *buf_ptr == 'l')
    188 		*e_token++ = *buf_ptr++;
    189 	}
    190 	else
    191 	    while (chartype[*buf_ptr] == alphanum) {	/* copy it over */
    192 		CHECK_SIZE_TOKEN;
    193 		*e_token++ = *buf_ptr++;
    194 		if (buf_ptr >= buf_end)
    195 		    fill_buffer();
    196 	    }
    197 	*e_token++ = '\0';
    198 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
    199 	    if (++buf_ptr >= buf_end)
    200 		fill_buffer();
    201 	}
    202 	ps.its_a_keyword = false;
    203 	ps.sizeof_keyword = false;
    204 	if (l_struct) {		/* if last token was 'struct', then this token
    205 				 * should be treated as a declaration */
    206 	    l_struct = false;
    207 	    last_code = ident;
    208 	    ps.last_u_d = true;
    209 	    return (decl);
    210 	}
    211 	ps.last_u_d = false;	/* Operator after indentifier is binary */
    212 	last_code = ident;	/* Remember that this is the code we will
    213 				 * return */
    214 
    215 	/*
    216 	 * This loop will check if the token is a keyword.
    217 	 */
    218 	for (p = specials; (j = p->rwd) != 0; p++) {
    219 	    register char *p = s_token;	/* point at scanned token */
    220 	    if (*j++ != *p++ || *j++ != *p++)
    221 		continue;	/* This test depends on the fact that
    222 				 * identifiers are always at least 1 character
    223 				 * long (ie. the first two bytes of the
    224 				 * identifier are always meaningful) */
    225 	    if (p[-1] == 0)
    226 		break;		/* If its a one-character identifier */
    227 	    while (*p++ == *j)
    228 		if (*j++ == 0)
    229 		    goto found_keyword;	/* I wish that C had a multi-level
    230 					 * break... */
    231 	}
    232 	if (p->rwd) {		/* we have a keyword */
    233     found_keyword:
    234 	    ps.its_a_keyword = true;
    235 	    ps.last_u_d = true;
    236 	    switch (p->rwcode) {
    237 	    case 1:		/* it is a switch */
    238 		return (swstmt);
    239 	    case 2:		/* a case or default */
    240 		return (casestmt);
    241 
    242 	    case 3:		/* a "struct" */
    243 		if (ps.p_l_follow)
    244 		    break;	/* inside parens: cast */
    245 		l_struct = true;
    246 
    247 		/*
    248 		 * Next time around, we will want to know that we have had a
    249 		 * 'struct'
    250 		 */
    251 	    case 4:		/* one of the declaration keywords */
    252 		if (ps.p_l_follow) {
    253 		    ps.cast_mask |= 1 << ps.p_l_follow;
    254 		    break;	/* inside parens: cast */
    255 		}
    256 		last_code = decl;
    257 		return (decl);
    258 
    259 	    case 5:		/* if, while, for */
    260 		return (sp_paren);
    261 
    262 	    case 6:		/* do, else */
    263 		return (sp_nparen);
    264 
    265 	    case 7:
    266 		ps.sizeof_keyword = true;
    267 	    default:		/* all others are treated like any other
    268 				 * identifier */
    269 		return (ident);
    270 	    }			/* end of switch */
    271 	}			/* end of if (found_it) */
    272 	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
    273 	    register char *tp = buf_ptr;
    274 	    while (tp < buf_end)
    275 		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
    276 		    goto not_proc;
    277 	    strncpy(ps.procname, token, sizeof ps.procname - 1);
    278 	    ps.in_parameter_declaration = 1;
    279 	    rparen_count = 1;
    280     not_proc:;
    281 	}
    282 	/*
    283 	 * The following hack attempts to guess whether or not the current
    284 	 * token is in fact a declaration keyword -- one that has been
    285 	 * typedefd
    286 	 */
    287 	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
    288 		&& !ps.p_l_follow
    289 	        && !ps.block_init
    290 		&& (ps.last_token == rparen || ps.last_token == semicolon ||
    291 		    ps.last_token == decl ||
    292 		    ps.last_token == lbrace || ps.last_token == rbrace)) {
    293 	    ps.its_a_keyword = true;
    294 	    ps.last_u_d = true;
    295 	    last_code = decl;
    296 	    return decl;
    297 	}
    298 	if (last_code == decl)	/* if this is a declared variable, then
    299 				 * following sign is unary */
    300 	    ps.last_u_d = true;	/* will make "int a -1" work */
    301 	last_code = ident;
    302 	return (ident);		/* the ident is not in the list */
    303     }				/* end of procesing for alpanum character */
    304 
    305     /* Scan a non-alphanumeric token */
    306 
    307     *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
    308 				 * moved here */
    309     *e_token = '\0';
    310     if (++buf_ptr >= buf_end)
    311 	fill_buffer();
    312 
    313     switch (*token) {
    314     case '\n':
    315 	unary_delim = ps.last_u_d;
    316 	ps.last_nl = true;	/* remember that we just had a newline */
    317 	code = (had_eof ? 0 : newline);
    318 
    319 	/*
    320 	 * if data has been exausted, the newline is a dummy, and we should
    321 	 * return code to stop
    322 	 */
    323 	break;
    324 
    325     case '\'':			/* start of quoted character */
    326     case '"':			/* start of string */
    327 	qchar = *token;
    328 	if (troff) {
    329 	    e_token[-1] = '`';
    330 	    if (qchar == '"')
    331 		*e_token++ = '`';
    332 	    e_token = chfont(&bodyf, &stringf, e_token);
    333 	}
    334 	do {			/* copy the string */
    335 	    while (1) {		/* move one character or [/<char>]<char> */
    336 		if (*buf_ptr == '\n') {
    337 		    printf("%d: Unterminated literal\n", line_no);
    338 		    goto stop_lit;
    339 		}
    340 		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
    341 					 * since CHECK_SIZE guarantees that there
    342 					 * are at least 5 entries left */
    343 		*e_token = *buf_ptr++;
    344 		if (buf_ptr >= buf_end)
    345 		    fill_buffer();
    346 		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
    347 		    if (*buf_ptr == '\n')	/* check for escaped newline */
    348 			++line_no;
    349 		    if (troff) {
    350 			*++e_token = BACKSLASH;
    351 			if (*buf_ptr == BACKSLASH)
    352 			    *++e_token = BACKSLASH;
    353 		    }
    354 		    *++e_token = *buf_ptr++;
    355 		    ++e_token;	/* we must increment this again because we
    356 				 * copied two chars */
    357 		    if (buf_ptr >= buf_end)
    358 			fill_buffer();
    359 		}
    360 		else
    361 		    break;	/* we copied one character */
    362 	    }			/* end of while (1) */
    363 	} while (*e_token++ != qchar);
    364 	if (troff) {
    365 	    e_token = chfont(&stringf, &bodyf, e_token - 1);
    366 	    if (qchar == '"')
    367 		*e_token++ = '\'';
    368 	}
    369 stop_lit:
    370 	code = ident;
    371 	break;
    372 
    373     case ('('):
    374     case ('['):
    375 	unary_delim = true;
    376 	code = lparen;
    377 	break;
    378 
    379     case (')'):
    380     case (']'):
    381 	code = rparen;
    382 	break;
    383 
    384     case '#':
    385 	unary_delim = ps.last_u_d;
    386 	code = preesc;
    387 	break;
    388 
    389     case '?':
    390 	unary_delim = true;
    391 	code = question;
    392 	break;
    393 
    394     case (':'):
    395 	code = colon;
    396 	unary_delim = true;
    397 	break;
    398 
    399     case (';'):
    400 	unary_delim = true;
    401 	code = semicolon;
    402 	break;
    403 
    404     case ('{'):
    405 	unary_delim = true;
    406 
    407 	/*
    408 	 * if (ps.in_or_st) ps.block_init = 1;
    409 	 */
    410 	/* ?	code = ps.block_init ? lparen : lbrace; */
    411 	code = lbrace;
    412 	break;
    413 
    414     case ('}'):
    415 	unary_delim = true;
    416 	/* ?	code = ps.block_init ? rparen : rbrace; */
    417 	code = rbrace;
    418 	break;
    419 
    420     case 014:			/* a form feed */
    421 	unary_delim = ps.last_u_d;
    422 	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
    423 				 * right */
    424 	code = form_feed;
    425 	break;
    426 
    427     case (','):
    428 	unary_delim = true;
    429 	code = comma;
    430 	break;
    431 
    432     case '.':
    433 	unary_delim = false;
    434 	code = period;
    435 	break;
    436 
    437     case '-':
    438     case '+':			/* check for -, +, --, ++ */
    439 	code = (ps.last_u_d ? unary_op : binary_op);
    440 	unary_delim = true;
    441 
    442 	if (*buf_ptr == token[0]) {
    443 	    /* check for doubled character */
    444 	    *e_token++ = *buf_ptr++;
    445 	    /* buffer overflow will be checked at end of loop */
    446 	    if (last_code == ident || last_code == rparen) {
    447 		code = (ps.last_u_d ? unary_op : postop);
    448 		/* check for following ++ or -- */
    449 		unary_delim = false;
    450 	    }
    451 	}
    452 	else if (*buf_ptr == '=')
    453 	    /* check for operator += */
    454 	    *e_token++ = *buf_ptr++;
    455 	else if (*buf_ptr == '>') {
    456 	    /* check for operator -> */
    457 	    *e_token++ = *buf_ptr++;
    458 	    if (!pointer_as_binop) {
    459 		unary_delim = false;
    460 		code = unary_op;
    461 		ps.want_blank = false;
    462 	    }
    463 	}
    464 	break;			/* buffer overflow will be checked at end of
    465 				 * switch */
    466 
    467     case '=':
    468 	if (ps.in_or_st)
    469 	    ps.block_init = 1;
    470 #ifdef undef
    471 	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
    472 	    e_token[-1] = *buf_ptr++;
    473 	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
    474 		*e_token++ = *buf_ptr++;
    475 	    *e_token++ = '=';	/* Flip =+ to += */
    476 	    *e_token = 0;
    477 	}
    478 #else
    479 	if (*buf_ptr == '=') {/* == */
    480 	    *e_token++ = '=';	/* Flip =+ to += */
    481 	    buf_ptr++;
    482 	    *e_token = 0;
    483 	}
    484 #endif
    485 	code = binary_op;
    486 	unary_delim = true;
    487 	break;
    488 	/* can drop thru!!! */
    489 
    490     case '>':
    491     case '<':
    492     case '!':			/* ops like <, <<, <=, !=, etc */
    493 	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
    494 	    *e_token++ = *buf_ptr;
    495 	    if (++buf_ptr >= buf_end)
    496 		fill_buffer();
    497 	}
    498 	if (*buf_ptr == '=')
    499 	    *e_token++ = *buf_ptr++;
    500 	code = (ps.last_u_d ? unary_op : binary_op);
    501 	unary_delim = true;
    502 	break;
    503 
    504     default:
    505 	if (token[0] == '/' && *buf_ptr == '*') {
    506 	    /* it is start of comment */
    507 	    *e_token++ = '*';
    508 
    509 	    if (++buf_ptr >= buf_end)
    510 		fill_buffer();
    511 
    512 	    code = comment;
    513 	    unary_delim = ps.last_u_d;
    514 	    break;
    515 	}
    516 	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
    517 	    /*
    518 	     * handle ||, &&, etc, and also things as in int *****i
    519 	     */
    520 	    *e_token++ = *buf_ptr;
    521 	    if (++buf_ptr >= buf_end)
    522 		fill_buffer();
    523 	}
    524 	code = (ps.last_u_d ? unary_op : binary_op);
    525 	unary_delim = true;
    526 
    527 
    528     }				/* end of switch */
    529     if (code != newline) {
    530 	l_struct = false;
    531 	last_code = code;
    532     }
    533     if (buf_ptr >= buf_end)	/* check for input buffer empty */
    534 	fill_buffer();
    535     ps.last_u_d = unary_delim;
    536     *e_token = '\0';		/* null terminate the token */
    537     return (code);
    538 }
    539 
    540 /*
    541  * Add the given keyword to the keyword table, using val as the keyword type
    542  */
    543 addkey(key, val)
    544     char       *key;
    545 {
    546     register struct templ *p = specials;
    547     while (p->rwd)
    548 	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
    549 	    return;
    550 	else
    551 	    p++;
    552     if (p >= specials + sizeof specials / sizeof specials[0])
    553 	return;			/* For now, table overflows are silently
    554 				 * ignored */
    555     p->rwd = key;
    556     p->rwcode = val;
    557     p[1].rwd = 0;
    558     p[1].rwcode = 0;
    559     return;
    560 }
    561