Home | History | Annotate | Line # | Download | only in indent
lexi.c revision 1.14
      1  1.14  dholland /*	$NetBSD: lexi.c,v 1.14 2016/06/05 18:35:32 dholland Exp $	*/
      2   1.3       tls 
      3   1.1       cgd /*
      4   1.5       mrg  * Copyright (c) 1980, 1993
      5   1.5       mrg  *	The Regents of the University of California.  All rights reserved.
      6  1.12       agc  *
      7  1.12       agc  * Redistribution and use in source and binary forms, with or without
      8  1.12       agc  * modification, are permitted provided that the following conditions
      9  1.12       agc  * are met:
     10  1.12       agc  * 1. Redistributions of source code must retain the above copyright
     11  1.12       agc  *    notice, this list of conditions and the following disclaimer.
     12  1.12       agc  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.12       agc  *    notice, this list of conditions and the following disclaimer in the
     14  1.12       agc  *    documentation and/or other materials provided with the distribution.
     15  1.12       agc  * 3. Neither the name of the University nor the names of its contributors
     16  1.12       agc  *    may be used to endorse or promote products derived from this software
     17  1.12       agc  *    without specific prior written permission.
     18  1.12       agc  *
     19  1.12       agc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  1.12       agc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  1.12       agc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  1.12       agc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  1.12       agc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  1.12       agc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  1.12       agc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  1.12       agc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  1.12       agc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  1.12       agc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  1.12       agc  * SUCH DAMAGE.
     30  1.12       agc  */
     31  1.12       agc 
     32  1.12       agc /*
     33   1.5       mrg  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
     34   1.1       cgd  * Copyright (c) 1985 Sun Microsystems, Inc.
     35   1.1       cgd  * All rights reserved.
     36   1.1       cgd  *
     37   1.1       cgd  * Redistribution and use in source and binary forms, with or without
     38   1.1       cgd  * modification, are permitted provided that the following conditions
     39   1.1       cgd  * are met:
     40   1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     41   1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     42   1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     43   1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     44   1.1       cgd  *    documentation and/or other materials provided with the distribution.
     45   1.1       cgd  * 3. All advertising materials mentioning features or use of this software
     46   1.1       cgd  *    must display the following acknowledgement:
     47   1.1       cgd  *	This product includes software developed by the University of
     48   1.1       cgd  *	California, Berkeley and its contributors.
     49   1.1       cgd  * 4. Neither the name of the University nor the names of its contributors
     50   1.1       cgd  *    may be used to endorse or promote products derived from this software
     51   1.1       cgd  *    without specific prior written permission.
     52   1.1       cgd  *
     53   1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     54   1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     55   1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     56   1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     57   1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     58   1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     59   1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     60   1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     61   1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     62   1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     63   1.1       cgd  * SUCH DAMAGE.
     64   1.1       cgd  */
     65   1.1       cgd 
     66   1.6     lukem #include <sys/cdefs.h>
     67   1.1       cgd #ifndef lint
     68   1.5       mrg #if 0
     69   1.5       mrg static char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
     70   1.5       mrg #else
     71  1.14  dholland __RCSID("$NetBSD: lexi.c,v 1.14 2016/06/05 18:35:32 dholland Exp $");
     72   1.5       mrg #endif
     73   1.6     lukem #endif				/* not lint */
     74   1.1       cgd 
     75   1.1       cgd /*
     76   1.1       cgd  * Here we have the token scanner for indent.  It scans off one token and puts
     77   1.1       cgd  * it in the global variable "token".  It returns a code, indicating the type
     78   1.1       cgd  * of token scanned.
     79   1.1       cgd  */
     80   1.1       cgd 
     81   1.1       cgd #include <stdio.h>
     82   1.1       cgd #include <ctype.h>
     83   1.1       cgd #include <stdlib.h>
     84   1.1       cgd #include <string.h>
     85   1.1       cgd #include "indent_globs.h"
     86   1.1       cgd #include "indent_codes.h"
     87   1.1       cgd 
     88   1.1       cgd #define alphanum 1
     89   1.1       cgd #define opchar 3
     90   1.1       cgd 
     91   1.1       cgd struct templ {
     92  1.13     lukem 	const char	*rwd;
     93  1.13     lukem 	int		rwcode;
     94   1.1       cgd };
     95   1.1       cgd 
     96   1.4       agc struct templ specials[1000] =
     97   1.1       cgd {
     98   1.6     lukem 	{"switch", 1},
     99   1.6     lukem 	{"case", 2},
    100   1.6     lukem 	{"break", 0},
    101   1.6     lukem 	{"struct", 3},
    102   1.6     lukem 	{"union", 3},
    103   1.6     lukem 	{"enum", 3},
    104   1.6     lukem 	{"default", 2},
    105   1.6     lukem 	{"int", 4},
    106   1.6     lukem 	{"char", 4},
    107   1.6     lukem 	{"float", 4},
    108   1.6     lukem 	{"double", 4},
    109   1.6     lukem 	{"long", 4},
    110   1.6     lukem 	{"short", 4},
    111  1.14  dholland 	{"typedef", 4},
    112   1.6     lukem 	{"unsigned", 4},
    113   1.6     lukem 	{"register", 4},
    114   1.6     lukem 	{"static", 4},
    115   1.6     lukem 	{"global", 4},
    116   1.6     lukem 	{"extern", 4},
    117   1.6     lukem 	{"void", 4},
    118   1.6     lukem 	{"goto", 0},
    119   1.6     lukem 	{"return", 0},
    120   1.6     lukem 	{"if", 5},
    121   1.6     lukem 	{"while", 5},
    122   1.6     lukem 	{"for", 5},
    123   1.6     lukem 	{"else", 6},
    124   1.6     lukem 	{"do", 6},
    125   1.6     lukem 	{"sizeof", 7},
    126   1.6     lukem 	{0, 0}
    127   1.1       cgd };
    128   1.1       cgd 
    129   1.6     lukem char    chartype[128] =
    130   1.1       cgd {				/* this is used to facilitate the decision of
    131   1.1       cgd 				 * what type (alphanumeric, operator) each
    132   1.1       cgd 				 * character is */
    133   1.6     lukem 	0, 0, 0, 0, 0, 0, 0, 0,
    134   1.6     lukem 	0, 0, 0, 0, 0, 0, 0, 0,
    135   1.6     lukem 	0, 0, 0, 0, 0, 0, 0, 0,
    136   1.6     lukem 	0, 0, 0, 0, 0, 0, 0, 0,
    137   1.6     lukem 	0, 3, 0, 0, 1, 3, 3, 0,
    138   1.6     lukem 	0, 0, 3, 3, 0, 3, 0, 3,
    139   1.6     lukem 	1, 1, 1, 1, 1, 1, 1, 1,
    140   1.6     lukem 	1, 1, 0, 0, 3, 3, 3, 3,
    141   1.6     lukem 	0, 1, 1, 1, 1, 1, 1, 1,
    142   1.6     lukem 	1, 1, 1, 1, 1, 1, 1, 1,
    143   1.6     lukem 	1, 1, 1, 1, 1, 1, 1, 1,
    144   1.6     lukem 	1, 1, 1, 0, 0, 0, 3, 1,
    145   1.6     lukem 	0, 1, 1, 1, 1, 1, 1, 1,
    146   1.6     lukem 	1, 1, 1, 1, 1, 1, 1, 1,
    147   1.6     lukem 	1, 1, 1, 1, 1, 1, 1, 1,
    148   1.6     lukem 	1, 1, 1, 0, 3, 0, 3, 0
    149   1.1       cgd };
    150   1.1       cgd 
    151   1.1       cgd 
    152   1.1       cgd 
    153   1.1       cgd 
    154   1.1       cgd int
    155  1.11       wiz lexi(void)
    156   1.1       cgd {
    157   1.6     lukem 	int     unary_delim;	/* this is set to 1 if the current token
    158   1.1       cgd 				 *
    159   1.1       cgd 				 * forces a following operator to be unary */
    160   1.6     lukem 	static int last_code;	/* the last token type returned */
    161   1.6     lukem 	static int l_struct;	/* set to 1 if the last token was 'struct' */
    162   1.6     lukem 	int     code;		/* internal code to be returned */
    163   1.6     lukem 	char    qchar;		/* the delimiter character for a string */
    164   1.6     lukem 
    165   1.6     lukem 	e_token = s_token;	/* point to start of place to save token */
    166   1.6     lukem 	unary_delim = false;
    167   1.6     lukem 	ps.col_1 = ps.last_nl;	/* tell world that this token started in
    168   1.1       cgd 				 * column 1 iff the last thing scanned was nl */
    169   1.6     lukem 	ps.last_nl = false;
    170   1.1       cgd 
    171   1.6     lukem 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
    172   1.6     lukem 		ps.col_1 = false;	/* leading blanks imply token is not
    173   1.6     lukem 					 * in column 1 */
    174   1.6     lukem 		if (++buf_ptr >= buf_end)
    175   1.6     lukem 			fill_buffer();
    176   1.6     lukem 	}
    177   1.1       cgd 
    178   1.6     lukem 	/* Scan an alphanumeric token */
    179   1.6     lukem 	if (chartype[(int) *buf_ptr] == alphanum ||
    180   1.8  christos 	    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
    181   1.6     lukem 		/*
    182   1.6     lukem 		 * we have a character or number
    183   1.6     lukem 		 */
    184  1.13     lukem 		const char *j;	/* used for searching thru list of
    185   1.1       cgd 				 * reserved words */
    186   1.6     lukem 		struct templ *p;
    187   1.1       cgd 
    188   1.8  christos 		if (isdigit((unsigned char)*buf_ptr) ||
    189   1.8  christos 		    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
    190  1.10  kristerw 			int     seendot = 0, seenexp = 0, seensfx = 0;
    191   1.6     lukem 			if (*buf_ptr == '0' &&
    192   1.6     lukem 			    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
    193   1.6     lukem 				*e_token++ = *buf_ptr++;
    194   1.6     lukem 				*e_token++ = *buf_ptr++;
    195   1.8  christos 				while (isxdigit((unsigned char)*buf_ptr)) {
    196   1.6     lukem 					CHECK_SIZE_TOKEN;
    197   1.6     lukem 					*e_token++ = *buf_ptr++;
    198   1.6     lukem 				}
    199   1.7      ross 			} else {
    200   1.6     lukem 				while (1) {
    201   1.7      ross 					if (*buf_ptr == '.') {
    202   1.6     lukem 						if (seendot)
    203   1.6     lukem 							break;
    204   1.6     lukem 						else
    205   1.6     lukem 							seendot++;
    206   1.7      ross 					}
    207   1.6     lukem 					CHECK_SIZE_TOKEN;
    208   1.6     lukem 					*e_token++ = *buf_ptr++;
    209   1.8  christos 					if (!isdigit((unsigned char)*buf_ptr)
    210   1.7      ross 					&& *buf_ptr != '.') {
    211   1.7      ross 						if ((*buf_ptr != 'E'
    212   1.7      ross 						&& *buf_ptr != 'e') || seenexp)
    213   1.6     lukem 							break;
    214   1.6     lukem 						else {
    215   1.6     lukem 							seenexp++;
    216   1.6     lukem 							seendot++;
    217   1.6     lukem 							CHECK_SIZE_TOKEN;
    218   1.6     lukem 							*e_token++ = *buf_ptr++;
    219   1.6     lukem 							if (*buf_ptr == '+' || *buf_ptr == '-')
    220   1.6     lukem 								*e_token++ = *buf_ptr++;
    221   1.6     lukem 						}
    222   1.7      ross 					}
    223   1.6     lukem 				}
    224   1.7      ross 			}
    225   1.9  kristerw 			if (*buf_ptr == 'F' || *buf_ptr == 'f') {
    226   1.9  kristerw 				/* float constant */
    227   1.6     lukem 				*e_token++ = *buf_ptr++;
    228   1.9  kristerw 			} else {
    229  1.10  kristerw 				/* integer constant */
    230  1.10  kristerw 				while (1) {
    231  1.10  kristerw 					if (!(seensfx & 1) &&
    232  1.10  kristerw 					    (*buf_ptr == 'U' ||
    233  1.10  kristerw 					     *buf_ptr == 'u')) {
    234  1.10  kristerw 						CHECK_SIZE_TOKEN;
    235  1.10  kristerw 						*e_token++ = *buf_ptr++;
    236  1.10  kristerw 						seensfx |= 1;
    237  1.10  kristerw 						continue;
    238  1.10  kristerw 					}
    239  1.10  kristerw 					if (!(seensfx & 2) &&
    240  1.10  kristerw 					    (*buf_ptr == 'L' ||
    241  1.10  kristerw 					     *buf_ptr == 'l')) {
    242  1.10  kristerw 						CHECK_SIZE_TOKEN;
    243  1.10  kristerw 						if (buf_ptr[1] == buf_ptr[0])
    244  1.10  kristerw 							*e_token++ = *buf_ptr++;
    245  1.10  kristerw 						*e_token++ = *buf_ptr++;
    246  1.10  kristerw 						seensfx |= 2;
    247  1.10  kristerw 						continue;
    248  1.10  kristerw 					}
    249  1.10  kristerw 					break;
    250  1.10  kristerw 				}
    251   1.9  kristerw 			}
    252   1.6     lukem 		} else
    253   1.6     lukem 			while (chartype[(int) *buf_ptr] == alphanum) {	/* copy it over */
    254   1.6     lukem 				CHECK_SIZE_TOKEN;
    255   1.1       cgd 				*e_token++ = *buf_ptr++;
    256   1.6     lukem 				if (buf_ptr >= buf_end)
    257   1.6     lukem 					fill_buffer();
    258   1.1       cgd 			}
    259   1.6     lukem 		*e_token++ = '\0';
    260   1.6     lukem 		while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
    261   1.6     lukem 			if (++buf_ptr >= buf_end)
    262   1.6     lukem 				fill_buffer();
    263   1.1       cgd 		}
    264   1.6     lukem 		ps.its_a_keyword = false;
    265   1.6     lukem 		ps.sizeof_keyword = false;
    266   1.6     lukem 		if (l_struct) {	/* if last token was 'struct', then this token
    267   1.1       cgd 				 * should be treated as a declaration */
    268   1.6     lukem 			l_struct = false;
    269   1.6     lukem 			last_code = ident;
    270   1.6     lukem 			ps.last_u_d = true;
    271   1.6     lukem 			return (decl);
    272   1.6     lukem 		}
    273   1.6     lukem 		ps.last_u_d = false;	/* Operator after indentifier is
    274   1.6     lukem 					 * binary */
    275   1.6     lukem 		last_code = ident;	/* Remember that this is the code we
    276   1.6     lukem 					 * will return */
    277   1.1       cgd 
    278   1.1       cgd 		/*
    279   1.6     lukem 		 * This loop will check if the token is a keyword.
    280   1.6     lukem 		 */
    281   1.6     lukem 		for (p = specials; (j = p->rwd) != 0; p++) {
    282  1.13     lukem 			char   *pt = s_token;	/* point at scanned token */
    283  1.13     lukem 			if (*j++ != *pt++ || *j++ != *pt++)
    284   1.6     lukem 				continue;	/* This test depends on the
    285   1.6     lukem 						 * fact that identifiers are
    286   1.6     lukem 						 * always at least 1 character
    287   1.6     lukem 						 * long (ie. the first two
    288   1.6     lukem 						 * bytes of the identifier are
    289   1.6     lukem 						 * always meaningful) */
    290  1.13     lukem 			if (pt[-1] == 0)
    291   1.6     lukem 				break;	/* If its a one-character identifier */
    292  1.13     lukem 			while (*pt++ == *j)
    293   1.6     lukem 				if (*j++ == 0)
    294   1.6     lukem 					goto found_keyword;	/* I wish that C had a
    295   1.6     lukem 								 * multi-level break... */
    296   1.6     lukem 		}
    297   1.6     lukem 		if (p->rwd) {	/* we have a keyword */
    298   1.6     lukem 	found_keyword:
    299   1.6     lukem 			ps.its_a_keyword = true;
    300   1.6     lukem 			ps.last_u_d = true;
    301   1.6     lukem 			switch (p->rwcode) {
    302   1.6     lukem 			case 1:/* it is a switch */
    303   1.6     lukem 				return (swstmt);
    304   1.6     lukem 			case 2:/* a case or default */
    305   1.6     lukem 				return (casestmt);
    306   1.6     lukem 
    307   1.6     lukem 			case 3:/* a "struct" */
    308   1.6     lukem 				if (ps.p_l_follow)
    309   1.6     lukem 					break;	/* inside parens: cast */
    310   1.6     lukem 				l_struct = true;
    311   1.6     lukem 
    312   1.6     lukem 				/*
    313   1.6     lukem 				 * Next time around, we will want to know that we have had a
    314   1.6     lukem 				 * 'struct'
    315   1.6     lukem 				 */
    316   1.6     lukem 			case 4:/* one of the declaration keywords */
    317   1.6     lukem 				if (ps.p_l_follow) {
    318   1.6     lukem 					ps.cast_mask |= 1 << ps.p_l_follow;
    319   1.6     lukem 					break;	/* inside parens: cast */
    320   1.6     lukem 				}
    321   1.6     lukem 				last_code = decl;
    322   1.6     lukem 				return (decl);
    323   1.6     lukem 
    324   1.6     lukem 			case 5:/* if, while, for */
    325   1.6     lukem 				return (sp_paren);
    326   1.6     lukem 
    327   1.6     lukem 			case 6:/* do, else */
    328   1.6     lukem 				return (sp_nparen);
    329   1.6     lukem 
    330   1.6     lukem 			case 7:
    331   1.6     lukem 				ps.sizeof_keyword = true;
    332   1.6     lukem 			default:	/* all others are treated like any
    333   1.6     lukem 					 * other identifier */
    334   1.6     lukem 				return (ident);
    335   1.6     lukem 			}	/* end of switch */
    336   1.6     lukem 		}		/* end of if (found_it) */
    337   1.6     lukem 		if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
    338   1.6     lukem 			char   *tp = buf_ptr;
    339   1.6     lukem 			while (tp < buf_end)
    340   1.6     lukem 				if (*tp++ == ')' && (*tp == ';' || *tp == ','))
    341   1.6     lukem 					goto not_proc;
    342   1.6     lukem 			strncpy(ps.procname, token, sizeof ps.procname - 1);
    343   1.6     lukem 			ps.in_parameter_declaration = 1;
    344   1.6     lukem 			rparen_count = 1;
    345   1.6     lukem 	not_proc:	;
    346   1.6     lukem 		}
    347   1.6     lukem 		/*
    348   1.6     lukem 		 * The following hack attempts to guess whether or not the current
    349   1.6     lukem 		 * token is in fact a declaration keyword -- one that has been
    350   1.6     lukem 		 * typedefd
    351   1.1       cgd 		 */
    352   1.8  christos 		if (((*buf_ptr == '*' && buf_ptr[1] != '=') ||
    353   1.8  christos 		    isalpha((unsigned char)*buf_ptr) || *buf_ptr == '_')
    354   1.6     lukem 		    && !ps.p_l_follow
    355   1.6     lukem 		    && !ps.block_init
    356   1.6     lukem 		    && (ps.last_token == rparen || ps.last_token == semicolon ||
    357   1.6     lukem 			ps.last_token == decl ||
    358   1.6     lukem 			ps.last_token == lbrace || ps.last_token == rbrace)) {
    359   1.6     lukem 			ps.its_a_keyword = true;
    360   1.6     lukem 			ps.last_u_d = true;
    361   1.6     lukem 			last_code = decl;
    362   1.6     lukem 			return decl;
    363   1.6     lukem 		}
    364   1.6     lukem 		if (last_code == decl)	/* if this is a declared variable,
    365   1.6     lukem 					 * then following sign is unary */
    366   1.6     lukem 			ps.last_u_d = true;	/* will make "int a -1" work */
    367   1.6     lukem 		last_code = ident;
    368   1.6     lukem 		return (ident);	/* the ident is not in the list */
    369   1.6     lukem 	}			/* end of procesing for alpanum character */
    370   1.6     lukem 	/* Scan a non-alphanumeric token */
    371   1.6     lukem 	*e_token++ = *buf_ptr;	/* if it is only a one-character token, it is
    372   1.6     lukem 				 * moved here */
    373   1.6     lukem 	*e_token = '\0';
    374   1.6     lukem 	if (++buf_ptr >= buf_end)
    375   1.6     lukem 		fill_buffer();
    376   1.6     lukem 
    377   1.6     lukem 	switch (*token) {
    378   1.6     lukem 	case '\n':
    379   1.6     lukem 		unary_delim = ps.last_u_d;
    380   1.6     lukem 		ps.last_nl = true;	/* remember that we just had a newline */
    381   1.6     lukem 		code = (had_eof ? 0 : newline);
    382   1.1       cgd 
    383   1.6     lukem 		/*
    384   1.6     lukem 		 * if data has been exausted, the newline is a dummy, and we should
    385   1.6     lukem 		 * return code to stop
    386   1.6     lukem 		 */
    387   1.6     lukem 		break;
    388   1.1       cgd 
    389   1.6     lukem 	case '\'':		/* start of quoted character */
    390   1.6     lukem 	case '"':		/* start of string */
    391   1.6     lukem 		qchar = *token;
    392   1.6     lukem 		if (troff) {
    393   1.6     lukem 			e_token[-1] = '`';
    394   1.6     lukem 			if (qchar == '"')
    395   1.6     lukem 				*e_token++ = '`';
    396   1.6     lukem 			e_token = chfont(&bodyf, &stringf, e_token);
    397   1.6     lukem 		}
    398   1.6     lukem 		do {		/* copy the string */
    399   1.6     lukem 			while (1) {	/* move one character or
    400   1.6     lukem 					 * [/<char>]<char> */
    401   1.6     lukem 				if (*buf_ptr == '\n') {
    402   1.6     lukem 					printf("%d: Unterminated literal\n", line_no);
    403   1.6     lukem 					goto stop_lit;
    404   1.6     lukem 				}
    405   1.6     lukem 				CHECK_SIZE_TOKEN;	/* Only have to do this
    406   1.6     lukem 							 * once in this loop,
    407   1.6     lukem 							 * since CHECK_SIZE
    408   1.6     lukem 							 * guarantees that there
    409   1.6     lukem 							 * are at least 5
    410   1.6     lukem 							 * entries left */
    411   1.6     lukem 				*e_token = *buf_ptr++;
    412   1.6     lukem 				if (buf_ptr >= buf_end)
    413   1.6     lukem 					fill_buffer();
    414   1.6     lukem 				if (*e_token == BACKSLASH) {	/* if escape, copy extra
    415   1.6     lukem 								 * char */
    416   1.6     lukem 					if (*buf_ptr == '\n')	/* check for escaped
    417   1.6     lukem 								 * newline */
    418   1.6     lukem 						++line_no;
    419   1.6     lukem 					if (troff) {
    420   1.6     lukem 						*++e_token = BACKSLASH;
    421   1.6     lukem 						if (*buf_ptr == BACKSLASH)
    422   1.6     lukem 							*++e_token = BACKSLASH;
    423   1.6     lukem 					}
    424   1.6     lukem 					*++e_token = *buf_ptr++;
    425   1.6     lukem 					++e_token;	/* we must increment
    426   1.6     lukem 							 * this again because we
    427   1.6     lukem 							 * copied two chars */
    428   1.6     lukem 					if (buf_ptr >= buf_end)
    429   1.6     lukem 						fill_buffer();
    430   1.6     lukem 				} else
    431   1.6     lukem 					break;	/* we copied one character */
    432   1.6     lukem 			}	/* end of while (1) */
    433   1.6     lukem 		} while (*e_token++ != qchar);
    434   1.6     lukem 		if (troff) {
    435   1.6     lukem 			e_token = chfont(&stringf, &bodyf, e_token - 1);
    436   1.6     lukem 			if (qchar == '"')
    437   1.6     lukem 				*e_token++ = '\'';
    438   1.1       cgd 		}
    439   1.1       cgd stop_lit:
    440   1.6     lukem 		code = ident;
    441   1.6     lukem 		break;
    442   1.6     lukem 
    443   1.6     lukem 	case ('('):
    444   1.6     lukem 	case ('['):
    445   1.6     lukem 		unary_delim = true;
    446   1.6     lukem 		code = lparen;
    447   1.6     lukem 		break;
    448   1.6     lukem 
    449   1.6     lukem 	case (')'):
    450   1.6     lukem 	case (']'):
    451   1.6     lukem 		code = rparen;
    452   1.6     lukem 		break;
    453   1.6     lukem 
    454   1.6     lukem 	case '#':
    455   1.6     lukem 		unary_delim = ps.last_u_d;
    456   1.6     lukem 		code = preesc;
    457   1.6     lukem 		break;
    458   1.6     lukem 
    459   1.6     lukem 	case '?':
    460   1.6     lukem 		unary_delim = true;
    461   1.6     lukem 		code = question;
    462   1.6     lukem 		break;
    463   1.6     lukem 
    464   1.6     lukem 	case (':'):
    465   1.6     lukem 		code = colon;
    466   1.6     lukem 		unary_delim = true;
    467   1.6     lukem 		break;
    468   1.6     lukem 
    469   1.6     lukem 	case (';'):
    470   1.6     lukem 		unary_delim = true;
    471   1.6     lukem 		code = semicolon;
    472   1.6     lukem 		break;
    473   1.1       cgd 
    474   1.6     lukem 	case ('{'):
    475   1.6     lukem 		unary_delim = true;
    476   1.1       cgd 
    477   1.6     lukem 		/*
    478   1.6     lukem 		 * if (ps.in_or_st) ps.block_init = 1;
    479   1.6     lukem 		 */
    480   1.6     lukem 		/* ?	code = ps.block_init ? lparen : lbrace; */
    481   1.6     lukem 		code = lbrace;
    482   1.6     lukem 		break;
    483   1.6     lukem 
    484   1.6     lukem 	case ('}'):
    485   1.6     lukem 		unary_delim = true;
    486   1.6     lukem 		/* ?	code = ps.block_init ? rparen : rbrace; */
    487   1.6     lukem 		code = rbrace;
    488   1.6     lukem 		break;
    489   1.6     lukem 
    490   1.6     lukem 	case 014:		/* a form feed */
    491   1.6     lukem 		unary_delim = ps.last_u_d;
    492   1.6     lukem 		ps.last_nl = true;	/* remember this so we can set
    493   1.6     lukem 					 * 'ps.col_1' right */
    494   1.6     lukem 		code = form_feed;
    495   1.6     lukem 		break;
    496   1.6     lukem 
    497   1.6     lukem 	case (','):
    498   1.6     lukem 		unary_delim = true;
    499   1.6     lukem 		code = comma;
    500   1.6     lukem 		break;
    501   1.1       cgd 
    502   1.6     lukem 	case '.':
    503   1.1       cgd 		unary_delim = false;
    504   1.6     lukem 		code = period;
    505   1.6     lukem 		break;
    506   1.6     lukem 
    507   1.6     lukem 	case '-':
    508   1.6     lukem 	case '+':		/* check for -, +, --, ++ */
    509   1.6     lukem 		code = (ps.last_u_d ? unary_op : binary_op);
    510   1.6     lukem 		unary_delim = true;
    511   1.6     lukem 
    512   1.6     lukem 		if (*buf_ptr == token[0]) {
    513   1.6     lukem 			/* check for doubled character */
    514   1.6     lukem 			*e_token++ = *buf_ptr++;
    515   1.6     lukem 			/* buffer overflow will be checked at end of loop */
    516   1.6     lukem 			if (last_code == ident || last_code == rparen) {
    517   1.6     lukem 				code = (ps.last_u_d ? unary_op : postop);
    518   1.6     lukem 				/* check for following ++ or -- */
    519   1.6     lukem 				unary_delim = false;
    520   1.6     lukem 			}
    521   1.6     lukem 		} else
    522   1.6     lukem 			if (*buf_ptr == '=')
    523   1.6     lukem 				/* check for operator += */
    524   1.6     lukem 				*e_token++ = *buf_ptr++;
    525   1.6     lukem 			else
    526   1.6     lukem 				if (*buf_ptr == '>') {
    527   1.6     lukem 					/* check for operator -> */
    528   1.6     lukem 					*e_token++ = *buf_ptr++;
    529   1.6     lukem 					if (!pointer_as_binop) {
    530   1.6     lukem 						unary_delim = false;
    531   1.6     lukem 						code = unary_op;
    532   1.6     lukem 						ps.want_blank = false;
    533   1.6     lukem 					}
    534   1.6     lukem 				}
    535   1.6     lukem 		break;		/* buffer overflow will be checked at end of
    536   1.1       cgd 				 * switch */
    537   1.1       cgd 
    538   1.6     lukem 	case '=':
    539   1.6     lukem 		if (ps.in_or_st)
    540   1.6     lukem 			ps.block_init = 1;
    541   1.1       cgd #ifdef undef
    542   1.6     lukem 		if (chartype[*buf_ptr] == opchar) {	/* we have two char
    543   1.6     lukem 							 * assignment */
    544   1.6     lukem 			e_token[-1] = *buf_ptr++;
    545   1.6     lukem 			if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
    546   1.6     lukem 				*e_token++ = *buf_ptr++;
    547   1.6     lukem 			*e_token++ = '=';	/* Flip =+ to += */
    548   1.6     lukem 			*e_token = 0;
    549   1.6     lukem 		}
    550   1.1       cgd #else
    551   1.6     lukem 		if (*buf_ptr == '=') {	/* == */
    552   1.6     lukem 			*e_token++ = '=';	/* Flip =+ to += */
    553   1.6     lukem 			buf_ptr++;
    554   1.6     lukem 			*e_token = 0;
    555   1.6     lukem 		}
    556   1.1       cgd #endif
    557   1.6     lukem 		code = binary_op;
    558   1.6     lukem 		unary_delim = true;
    559   1.6     lukem 		break;
    560   1.6     lukem 		/* can drop thru!!! */
    561   1.6     lukem 
    562   1.6     lukem 	case '>':
    563   1.6     lukem 	case '<':
    564   1.6     lukem 	case '!':		/* ops like <, <<, <=, !=, etc */
    565   1.6     lukem 		if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
    566   1.6     lukem 			*e_token++ = *buf_ptr;
    567   1.6     lukem 			if (++buf_ptr >= buf_end)
    568   1.6     lukem 				fill_buffer();
    569   1.6     lukem 		}
    570   1.6     lukem 		if (*buf_ptr == '=')
    571   1.6     lukem 			*e_token++ = *buf_ptr++;
    572   1.6     lukem 		code = (ps.last_u_d ? unary_op : binary_op);
    573   1.6     lukem 		unary_delim = true;
    574   1.6     lukem 		break;
    575   1.6     lukem 
    576   1.6     lukem 	default:
    577   1.6     lukem 		if (token[0] == '/' && *buf_ptr == '*') {
    578   1.6     lukem 			/* it is start of comment */
    579   1.6     lukem 			*e_token++ = '*';
    580   1.6     lukem 
    581   1.6     lukem 			if (++buf_ptr >= buf_end)
    582   1.6     lukem 				fill_buffer();
    583   1.6     lukem 
    584   1.6     lukem 			code = comment;
    585   1.6     lukem 			unary_delim = ps.last_u_d;
    586   1.6     lukem 			break;
    587   1.6     lukem 		}
    588   1.6     lukem 		while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
    589   1.6     lukem 			/*
    590   1.6     lukem 		         * handle ||, &&, etc, and also things as in int *****i
    591   1.6     lukem 		         */
    592   1.6     lukem 			*e_token++ = *buf_ptr;
    593   1.6     lukem 			if (++buf_ptr >= buf_end)
    594   1.6     lukem 				fill_buffer();
    595   1.6     lukem 		}
    596   1.6     lukem 		code = (ps.last_u_d ? unary_op : binary_op);
    597   1.6     lukem 		unary_delim = true;
    598   1.1       cgd 
    599   1.1       cgd 
    600   1.6     lukem 	}			/* end of switch */
    601   1.6     lukem 	if (code != newline) {
    602   1.6     lukem 		l_struct = false;
    603   1.6     lukem 		last_code = code;
    604   1.1       cgd 	}
    605   1.6     lukem 	if (buf_ptr >= buf_end)	/* check for input buffer empty */
    606   1.1       cgd 		fill_buffer();
    607   1.6     lukem 	ps.last_u_d = unary_delim;
    608   1.6     lukem 	*e_token = '\0';	/* null terminate the token */
    609   1.6     lukem 	return (code);
    610   1.1       cgd }
    611   1.1       cgd /*
    612   1.1       cgd  * Add the given keyword to the keyword table, using val as the keyword type
    613   1.1       cgd  */
    614   1.6     lukem void
    615  1.11       wiz addkey(char *key, int val)
    616   1.1       cgd {
    617   1.6     lukem 	struct templ *p = specials;
    618   1.6     lukem 	while (p->rwd)
    619   1.6     lukem 		if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
    620   1.6     lukem 			return;
    621   1.6     lukem 		else
    622   1.6     lukem 			p++;
    623   1.6     lukem 	if (p >= specials + sizeof specials / sizeof specials[0])
    624   1.6     lukem 		return;		/* For now, table overflows are silently
    625   1.1       cgd 				 * ignored */
    626   1.6     lukem 	p->rwd = key;
    627   1.6     lukem 	p->rwcode = val;
    628   1.6     lukem 	p[1].rwd = 0;
    629   1.6     lukem 	p[1].rwcode = 0;
    630   1.1       cgd }
    631