Home | History | Annotate | Line # | Download | only in ksh
lex.c revision 1.6
      1 /*	$NetBSD: lex.c,v 1.6 1999/10/20 15:09:59 hubertf Exp $	*/
      2 
      3 /*
      4  * lexical analysis and source input
      5  */
      6 
      7 #include "sh.h"
      8 #include <ctype.h>
      9 
     10 
     11 /* Structure to keep track of the lexing state and the various pieces of info
     12  * needed for each particular state.
     13  */
     14 typedef struct lex_state Lex_state;
     15 struct lex_state {
     16 	int ls_state;
     17 	union {
     18 	    /* $(...) */
     19 	    struct scsparen_info {
     20 		    int nparen;		/* count open parenthesis */
     21 		    int csstate; /* XXX remove */
     22 #define ls_scsparen ls_info.u_scsparen
     23 	    } u_scsparen;
     24 
     25 	    /* $((...)) */
     26 	    struct sasparen_info {
     27 		    int nparen;		/* count open parenthesis */
     28 		    int start;		/* marks start of $(( in output str */
     29 #define ls_sasparen ls_info.u_sasparen
     30 	    } u_sasparen;
     31 
     32 	    /* ((...)) */
     33 	    struct sletparen_info {
     34 		    int nparen;		/* count open parenthesis */
     35 #define ls_sletparen ls_info.u_sletparen
     36 	    } u_sletparen;
     37 
     38 	    /* `...` */
     39 	    struct sbquote_info {
     40 		    int indquotes;	/* true if in double quotes: "`...`" */
     41 #define ls_sbquote ls_info.u_sbquote
     42 	    } u_sbquote;
     43 
     44 	    Lex_state *base;		/* used to point to next state block */
     45 	} ls_info;
     46 };
     47 
     48 typedef struct State_info State_info;
     49 struct State_info {
     50 	Lex_state	*base;
     51 	Lex_state	*end;
     52 };
     53 
     54 
     55 static void	readhere ARGS((struct ioword *iop));
     56 static int	getsc__ ARGS((void));
     57 static void	getsc_line ARGS((Source *s));
     58 static int	getsc_bn ARGS((void));
     59 static char	*get_brace_var ARGS((XString *wsp, char *wp));
     60 static int	arraysub ARGS((char **strp));
     61 static const char *ungetsc ARGS((int c));
     62 static void	gethere ARGS((void));
     63 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
     64 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
     65 
     66 static int backslash_skip;
     67 static int ignore_backslash_newline;
     68 
     69 /* optimized getsc_bn() */
     70 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
     71 			 && !backslash_skip ? *source->str++ : getsc_bn())
     72 /* optimized getsc__() */
     73 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
     74 
     75 #define STATE_BSIZE	32
     76 
     77 #define PUSH_STATE(s)	do { \
     78 			    if (++statep == state_info.end) \
     79 				statep = push_state_(&state_info, statep); \
     80 			    state = statep->ls_state = (s); \
     81 			} while (0)
     82 
     83 #define POP_STATE()	do { \
     84 			    if (--statep == state_info.base) \
     85 				statep = pop_state_(&state_info, statep); \
     86 			    state = statep->ls_state; \
     87 			} while (0)
     88 
     89 
     90 
     91 /*
     92  * Lexical analyzer
     93  *
     94  * tokens are not regular expressions, they are LL(1).
     95  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
     96  * hence the state stack.
     97  */
     98 
     99 int
    100 yylex(cf)
    101 	int cf;
    102 {
    103 	Lex_state states[STATE_BSIZE], *statep;
    104 	State_info state_info;
    105 	register int c, state;
    106 	XString ws;		/* expandable output word */
    107 	register char *wp;	/* output word pointer */
    108 	char *sp, *dp;
    109 	int c2;
    110 
    111 
    112   Again:
    113 	states[0].ls_state = -1;
    114 	states[0].ls_info.base = (Lex_state *) 0;
    115 	statep = &states[1];
    116 	state_info.base = states;
    117 	state_info.end = &states[STATE_BSIZE];
    118 
    119 	Xinit(ws, wp, 64, ATEMP);
    120 
    121 	backslash_skip = 0;
    122 	ignore_backslash_newline = 0;
    123 
    124 	if (cf&ONEWORD)
    125 		state = SWORD;
    126 #ifdef KSH
    127 	else if (cf&LETEXPR) {
    128 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
    129 		state = SLETPAREN;
    130 		statep->ls_sletparen.nparen = 0;
    131 	}
    132 #endif /* KSH */
    133 	else {		/* normal lexing */
    134 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
    135 		while ((c = getsc()) == ' ' || c == '\t')
    136 			;
    137 		if (c == '#') {
    138 			ignore_backslash_newline++;
    139 			while ((c = getsc()) != '\0' && c != '\n')
    140 				;
    141 			ignore_backslash_newline--;
    142 		}
    143 		ungetsc(c);
    144 	}
    145 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
    146 		source->flags &= ~SF_ALIAS;
    147 		/* In POSIX mode, a trailing space only counts if we are
    148 		 * parsing a simple command
    149 		 */
    150 		if (!Flag(FPOSIX) || (cf & CMDWORD))
    151 			cf |= ALIAS;
    152 	}
    153 
    154 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
    155 	statep->ls_state = state;
    156 
    157 	/* collect non-special or quoted characters to form word */
    158 	while (!((c = getsc()) == 0
    159 		 || ((state == SBASE || state == SHEREDELIM)
    160 		     && ctype(c, C_LEX1))))
    161 	{
    162 		Xcheck(ws, wp);
    163 		switch (state) {
    164 		  case SBASE:
    165 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
    166 				*wp = EOS; /* temporary */
    167 				if (is_wdvarname(Xstring(ws, wp), FALSE))
    168 				{
    169 					char *p, *tmp;
    170 
    171 					if (arraysub(&tmp)) {
    172 						*wp++ = CHAR;
    173 						*wp++ = c;
    174 						for (p = tmp; *p; ) {
    175 							Xcheck(ws, wp);
    176 							*wp++ = CHAR;
    177 							*wp++ = *p++;
    178 						}
    179 						afree(tmp, ATEMP);
    180 						break;
    181 					} else {
    182 						Source *s;
    183 
    184 						s = pushs(SREREAD,
    185 							  source->areap);
    186 						s->start = s->str
    187 							= s->u.freeme = tmp;
    188 						s->next = source;
    189 						source = s;
    190 					}
    191 				}
    192 				*wp++ = CHAR;
    193 				*wp++ = c;
    194 				break;
    195 			}
    196 			/* fall through.. */
    197 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
    198 #ifdef KSH
    199 			if (c == '*' || c == '@' || c == '+' || c == '?'
    200 			    || c == '!')
    201 			{
    202 				c2 = getsc();
    203 				if (c2 == '(' /*)*/ ) {
    204 					*wp++ = OPAT;
    205 					*wp++ = c;
    206 					PUSH_STATE(SPATTERN);
    207 					break;
    208 				}
    209 				ungetsc(c2);
    210 			}
    211 #endif /* KSH */
    212 			/* fall through.. */
    213 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
    214 			switch (c) {
    215 			  case '\\':
    216 				c = getsc();
    217 #ifdef OS2
    218 				if (isalnum(c)) {
    219 					*wp++ = CHAR, *wp++ = '\\';
    220 					*wp++ = CHAR, *wp++ = c;
    221 				} else
    222 #endif
    223 				if (c) /* trailing \ is lost */
    224 					*wp++ = QCHAR, *wp++ = c;
    225 				break;
    226 			  case '\'':
    227 				*wp++ = OQUOTE;
    228 				ignore_backslash_newline++;
    229 				PUSH_STATE(SSQUOTE);
    230 				break;
    231 			  case '"':
    232 				*wp++ = OQUOTE;
    233 				PUSH_STATE(SDQUOTE);
    234 				break;
    235 			  default:
    236 				goto Subst;
    237 			}
    238 			break;
    239 
    240 		  Subst:
    241 			switch (c) {
    242 			  case '\\':
    243 				c = getsc();
    244 				switch (c) {
    245 				  case '"': case '\\':
    246 				  case '$': case '`':
    247 					*wp++ = QCHAR, *wp++ = c;
    248 					break;
    249 				  default:
    250 					Xcheck(ws, wp);
    251 					if (c) { /* trailing \ is lost */
    252 						*wp++ = CHAR, *wp++ = '\\';
    253 						*wp++ = CHAR, *wp++ = c;
    254 					}
    255 					break;
    256 				}
    257 				break;
    258 			  case '$':
    259 				c = getsc();
    260 				if (c == '(') /*)*/ {
    261 					c = getsc();
    262 					if (c == '(') /*)*/ {
    263 						PUSH_STATE(SASPAREN);
    264 						statep->ls_sasparen.nparen = 2;
    265 						statep->ls_sasparen.start =
    266 							Xsavepos(ws, wp);
    267 						*wp++ = EXPRSUB;
    268 					} else {
    269 						ungetsc(c);
    270 						PUSH_STATE(SCSPAREN);
    271 						statep->ls_scsparen.nparen = 1;
    272 						statep->ls_scsparen.csstate = 0;
    273 						*wp++ = COMSUB;
    274 					}
    275 				} else if (c == '{') /*}*/ {
    276 					*wp++ = OSUBST;
    277 					*wp++ = '{'; /*}*/
    278 					wp = get_brace_var(&ws, wp);
    279 					c = getsc();
    280 					/* allow :# and :% (ksh88 compat) */
    281 					if (c == ':') {
    282 						*wp++ = CHAR, *wp++ = c;
    283 						c = getsc();
    284 					}
    285 					/* If this is a trim operation,
    286 					 * treat (,|,) specially in STBRACE.
    287 					 */
    288 					if (c == '#' || c == '%') {
    289 						ungetsc(c);
    290 						PUSH_STATE(STBRACE);
    291 					} else {
    292 						ungetsc(c);
    293 						PUSH_STATE(SBRACE);
    294 					}
    295 				} else if (ctype(c, C_ALPHA)) {
    296 					*wp++ = OSUBST;
    297 					*wp++ = 'X';
    298 					do {
    299 						Xcheck(ws, wp);
    300 						*wp++ = c;
    301 						c = getsc();
    302 					} while (ctype(c, C_ALPHA|C_DIGIT));
    303 					*wp++ = '\0';
    304 					*wp++ = CSUBST;
    305 					*wp++ = 'X';
    306 					ungetsc(c);
    307 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
    308 					Xcheck(ws, wp);
    309 					*wp++ = OSUBST;
    310 					*wp++ = 'X';
    311 					*wp++ = c;
    312 					*wp++ = '\0';
    313 					*wp++ = CSUBST;
    314 					*wp++ = 'X';
    315 				} else {
    316 					*wp++ = CHAR, *wp++ = '$';
    317 					ungetsc(c);
    318 				}
    319 				break;
    320 			  case '`':
    321 				PUSH_STATE(SBQUOTE);
    322 				*wp++ = COMSUB;
    323 				/* Need to know if we are inside double quotes
    324 				 * since sh/at&t-ksh translate the \" to " in
    325 				 * "`..\"..`".
    326 				 * This is not done in posix mode (section
    327 				 * 3.2.3, Double Quotes: "The backquote shall
    328 				 * retain its special meaning introducing the
    329 				 * other form of command substitution (see
    330 				 * 3.6.3). The portion of the quoted string
    331 				 * from the initial backquote and the
    332 				 * characters up to the next backquote that
    333 				 * is not preceded by a backslash (having
    334 				 * escape characters removed) defines that
    335 				 * command whose output replaces `...` when
    336 				 * the word is expanded."
    337 				 * Section 3.6.3, Command Substitution:
    338 				 * "Within the backquoted style of command
    339 				 * substitution, backslash shall retain its
    340 				 * literal meaning, except when followed by
    341 				 * $ ` \.").
    342 				 */
    343 				statep->ls_sbquote.indquotes = 0;
    344 				if (!Flag(FPOSIX)) {
    345 					Lex_state *s = statep;
    346 					Lex_state *base = state_info.base;
    347 					while (1) {
    348 						for (; s != base; s--) {
    349 							if (s->ls_state == SDQUOTE) {
    350 								statep->ls_sbquote.indquotes = 1;
    351 								break;
    352 							}
    353 						}
    354 						if (s != base)
    355 							break;
    356 						if (!(s = s->ls_info.base))
    357 							break;
    358 						base = s-- - STATE_BSIZE;
    359 					}
    360 				}
    361 				break;
    362 			  default:
    363 				*wp++ = CHAR, *wp++ = c;
    364 			}
    365 			break;
    366 
    367 		  case SSQUOTE:
    368 			if (c == '\'') {
    369 				POP_STATE();
    370 				*wp++ = CQUOTE;
    371 				ignore_backslash_newline--;
    372 			} else
    373 				*wp++ = QCHAR, *wp++ = c;
    374 			break;
    375 
    376 		  case SDQUOTE:
    377 			if (c == '"') {
    378 				POP_STATE();
    379 				*wp++ = CQUOTE;
    380 			} else
    381 				goto Subst;
    382 			break;
    383 
    384 		  case SCSPAREN: /* $( .. ) */
    385 			/* todo: deal with $(...) quoting properly
    386 			 * kludge to partly fake quoting inside $(..): doesn't
    387 			 * really work because nested $(..) or ${..} inside
    388 			 * double quotes aren't dealt with.
    389 			 */
    390 			switch (statep->ls_scsparen.csstate) {
    391 			  case 0: /* normal */
    392 				switch (c) {
    393 				  case '(':
    394 					statep->ls_scsparen.nparen++;
    395 					break;
    396 				  case ')':
    397 					statep->ls_scsparen.nparen--;
    398 					break;
    399 				  case '\\':
    400 					statep->ls_scsparen.csstate = 1;
    401 					break;
    402 				  case '"':
    403 					statep->ls_scsparen.csstate = 2;
    404 					break;
    405 				  case '\'':
    406 					statep->ls_scsparen.csstate = 4;
    407 					ignore_backslash_newline++;
    408 					break;
    409 				}
    410 				break;
    411 
    412 			  case 1: /* backslash in normal mode */
    413 			  case 3: /* backslash in double quotes */
    414 				--statep->ls_scsparen.csstate;
    415 				break;
    416 
    417 			  case 2: /* double quotes */
    418 				if (c == '"')
    419 					statep->ls_scsparen.csstate = 0;
    420 				else if (c == '\\')
    421 					statep->ls_scsparen.csstate = 3;
    422 				break;
    423 
    424 			  case 4: /* single quotes */
    425 				if (c == '\'') {
    426 					statep->ls_scsparen.csstate = 0;
    427 					ignore_backslash_newline--;
    428 				}
    429 				break;
    430 			}
    431 			if (statep->ls_scsparen.nparen == 0) {
    432 				POP_STATE();
    433 				*wp++ = 0; /* end of COMSUB */
    434 			} else
    435 				*wp++ = c;
    436 			break;
    437 
    438 		  case SASPAREN: /* $(( .. )) */
    439 			/* todo: deal with $((...); (...)) properly */
    440 			/* XXX should nest using existing state machine
    441 			 *     (embed "..", $(...), etc.) */
    442 			if (c == '(')
    443 				statep->ls_sasparen.nparen++;
    444 			else if (c == ')') {
    445 				statep->ls_sasparen.nparen--;
    446 				if (statep->ls_sasparen.nparen == 1) {
    447 					/*(*/
    448 					if ((c2 = getsc()) == ')') {
    449 						POP_STATE();
    450 						*wp++ = 0; /* end of EXPRSUB */
    451 						break;
    452 					} else {
    453 						char *s;
    454 
    455 						ungetsc(c2);
    456 						/* mismatched parenthesis -
    457 						 * assume we were really
    458 						 * parsing a $(..) expression
    459 						 */
    460 						s = Xrestpos(ws, wp,
    461 						     statep->ls_sasparen.start);
    462 						memmove(s + 1, s, wp - s);
    463 						*s++ = COMSUB;
    464 						*s = '('; /*)*/
    465 						wp++;
    466 						statep->ls_scsparen.nparen = 1;
    467 						statep->ls_scsparen.csstate = 0;
    468 						state = statep->ls_state
    469 							= SCSPAREN;
    470 
    471 					}
    472 				}
    473 			}
    474 			*wp++ = c;
    475 			break;
    476 
    477 		  case SBRACE:
    478 			/*{*/
    479 			if (c == '}') {
    480 				POP_STATE();
    481 				*wp++ = CSUBST;
    482 				*wp++ = /*{*/ '}';
    483 			} else
    484 				goto Sbase1;
    485 			break;
    486 
    487 		  case STBRACE:
    488 			/* Same as SBRACE, except (,|,) treated specially */
    489 			/*{*/
    490 			if (c == '}') {
    491 				POP_STATE();
    492 				*wp++ = CSUBST;
    493 				*wp++ = /*{*/ '}';
    494 			} else if (c == '|') {
    495 				*wp++ = SPAT;
    496 			} else if (c == '(') {
    497 				*wp++ = OPAT;
    498 				*wp++ = ' ';	/* simile for @ */
    499 				PUSH_STATE(SPATTERN);
    500 			} else
    501 				goto Sbase1;
    502 			break;
    503 
    504 		  case SBQUOTE:
    505 			if (c == '`') {
    506 				*wp++ = 0;
    507 				POP_STATE();
    508 			} else if (c == '\\') {
    509 				switch (c = getsc()) {
    510 				  case '\\':
    511 				  case '$': case '`':
    512 					*wp++ = c;
    513 					break;
    514 				  case '"':
    515 					if (statep->ls_sbquote.indquotes) {
    516 						*wp++ = c;
    517 						break;
    518 					}
    519 					/* fall through.. */
    520 				  default:
    521 					if (c) { /* trailing \ is lost */
    522 						*wp++ = '\\';
    523 						*wp++ = c;
    524 					}
    525 					break;
    526 				}
    527 			} else
    528 				*wp++ = c;
    529 			break;
    530 
    531 		  case SWORD:	/* ONEWORD */
    532 			goto Subst;
    533 
    534 #ifdef KSH
    535 		  case SLETPAREN:	/* LETEXPR: (( ... )) */
    536 			/*(*/
    537 			if (c == ')') {
    538 				if (statep->ls_sletparen.nparen > 0)
    539 				    --statep->ls_sletparen.nparen;
    540 				/*(*/
    541 				else if ((c2 = getsc()) == ')') {
    542 					c = 0;
    543 					*wp++ = CQUOTE;
    544 					goto Done;
    545 				} else
    546 					ungetsc(c2);
    547 			} else if (c == '(')
    548 				/* parenthesis inside quotes and backslashes
    549 				 * are lost, but at&t ksh doesn't count them
    550 				 * either
    551 				 */
    552 				++statep->ls_sletparen.nparen;
    553 			goto Sbase2;
    554 #endif /* KSH */
    555 
    556 		  case SHEREDELIM:	/* <<,<<- delimiter */
    557 			/* XXX chuck this state (and the next) - use
    558 			 * the existing states ($ and \`..` should be
    559 			 * stripped of their specialness after the
    560 			 * fact).
    561 			 */
    562 			/* here delimiters need a special case since
    563 			 * $ and `..` are not to be treated specially
    564 			 */
    565 			if (c == '\\') {
    566 				c = getsc();
    567 				if (c) { /* trailing \ is lost */
    568 					*wp++ = QCHAR;
    569 					*wp++ = c;
    570 				}
    571 			} else if (c == '\'') {
    572 				PUSH_STATE(SSQUOTE);
    573 				*wp++ = OQUOTE;
    574 				ignore_backslash_newline++;
    575 			} else if (c == '"') {
    576 				state = statep->ls_state = SHEREDQUOTE;
    577 				*wp++ = OQUOTE;
    578 			} else {
    579 				*wp++ = CHAR;
    580 				*wp++ = c;
    581 			}
    582 			break;
    583 
    584 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
    585 			if (c == '"') {
    586 				*wp++ = CQUOTE;
    587 				state = statep->ls_state = SHEREDELIM;
    588 			} else {
    589 				if (c == '\\') {
    590 					switch (c = getsc()) {
    591 					  case '\\': case '"':
    592 					  case '$': case '`':
    593 						break;
    594 					  default:
    595 						if (c) { /* trailing \ lost */
    596 							*wp++ = CHAR;
    597 							*wp++ = '\\';
    598 						}
    599 						break;
    600 					}
    601 				}
    602 				*wp++ = CHAR;
    603 				*wp++ = c;
    604 			}
    605 			break;
    606 
    607 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
    608 			if ( /*(*/ c == ')') {
    609 				*wp++ = CPAT;
    610 				POP_STATE();
    611 			} else if (c == '|') {
    612 				*wp++ = SPAT;
    613 			} else if (c == '(') {
    614 				*wp++ = OPAT;
    615 				*wp++ = ' ';	/* simile for @ */
    616 				PUSH_STATE(SPATTERN);
    617 			} else
    618 				goto Sbase1;
    619 			break;
    620 		}
    621 	}
    622 Done:
    623 	Xcheck(ws, wp);
    624 	if (statep != &states[1])
    625 		/* XXX figure out what is missing */
    626 		yyerror("no closing quote\n");
    627 
    628 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
    629 	if (state == SHEREDELIM)
    630 		state = SBASE;
    631 
    632 	dp = Xstring(ws, wp);
    633 	if ((c == '<' || c == '>') && state == SBASE
    634 	    && ((c2 = Xlength(ws, wp)) == 0
    635 	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
    636 	{
    637 		struct ioword *iop =
    638 				(struct ioword *) alloc(sizeof(*iop), ATEMP);
    639 
    640 		if (c2 == 2)
    641 			iop->unit = dp[1] - '0';
    642 		else
    643 			iop->unit = c == '>'; /* 0 for <, 1 for > */
    644 
    645 		c2 = getsc();
    646 		/* <<, >>, <> are ok, >< is not */
    647 		if (c == c2 || (c == '<' && c2 == '>')) {
    648 			iop->flag = c == c2 ?
    649 				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
    650 			if (iop->flag == IOHERE)
    651 				if ((c2 = getsc()) == '-')
    652 					iop->flag |= IOSKIP;
    653 				else
    654 					ungetsc(c2);
    655 		} else if (c2 == '&')
    656 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
    657 		else {
    658 			iop->flag = c == '>' ? IOWRITE : IOREAD;
    659 			if (c == '>' && c2 == '|')
    660 				iop->flag |= IOCLOB;
    661 			else
    662 				ungetsc(c2);
    663 		}
    664 
    665 		iop->name = (char *) 0;
    666 		iop->delim = (char *) 0;
    667 		iop->heredoc = (char *) 0;
    668 		Xfree(ws, wp);	/* free word */
    669 		yylval.iop = iop;
    670 		return REDIR;
    671 	}
    672 
    673 	if (wp == dp && state == SBASE) {
    674 		Xfree(ws, wp);	/* free word */
    675 		/* no word, process LEX1 character */
    676 		switch (c) {
    677 		  default:
    678 			return c;
    679 
    680 		  case '|':
    681 		  case '&':
    682 		  case ';':
    683 			if ((c2 = getsc()) == c)
    684 				c = (c == ';') ? BREAK :
    685 				    (c == '|') ? LOGOR :
    686 				    (c == '&') ? LOGAND :
    687 				    YYERRCODE;
    688 #ifdef KSH
    689 			else if (c == '|' && c2 == '&')
    690 				c = COPROC;
    691 #endif /* KSH */
    692 			else
    693 				ungetsc(c2);
    694 			return c;
    695 
    696 		  case '\n':
    697 			gethere();
    698 			if (cf & CONTIN)
    699 				goto Again;
    700 			return c;
    701 
    702 		  case '(':  /*)*/
    703 #ifdef KSH
    704 			if ((c2 = getsc()) == '(') /*)*/
    705 				/* XXX need to handle ((...); (...)) */
    706 				c = MDPAREN;
    707 			else
    708 				ungetsc(c2);
    709 #endif /* KSH */
    710 			return c;
    711 		  /*(*/
    712 		  case ')':
    713 			return c;
    714 		}
    715 	}
    716 
    717 	*wp++ = EOS;		/* terminate word */
    718 	yylval.cp = Xclose(ws, wp);
    719 	if (state == SWORD
    720 #ifdef KSH
    721 		|| state == SLETPAREN
    722 #endif /* KSH */
    723 		)	/* ONEWORD? */
    724 		return LWORD;
    725 	ungetsc(c);		/* unget terminator */
    726 
    727 	/* copy word to unprefixed string ident */
    728 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
    729 		*dp++ = *sp++;
    730 	/* Make sure the ident array stays '\0' paded */
    731 	memset(dp, 0, (ident+IDENT) - dp + 1);
    732 	if (c != EOS)
    733 		*ident = '\0';	/* word is not unquoted */
    734 
    735 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
    736 		struct tbl *p;
    737 		int h = hash(ident);
    738 
    739 		/* { */
    740 		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
    741 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
    742 		{
    743 			afree(yylval.cp, ATEMP);
    744 			return p->val.i;
    745 		}
    746 		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
    747 		    && (p->flag & ISSET))
    748 		{
    749 			register Source *s;
    750 
    751 			for (s = source; s->type == SALIAS; s = s->next)
    752 				if (s->u.tblp == p)
    753 					return LWORD;
    754 			/* push alias expansion */
    755 			s = pushs(SALIAS, source->areap);
    756 			s->start = s->str = p->val.s;
    757 			s->u.tblp = p;
    758 			s->next = source;
    759 			source = s;
    760 			afree(yylval.cp, ATEMP);
    761 			goto Again;
    762 		}
    763 	}
    764 
    765 	return LWORD;
    766 }
    767 
    768 static void
    769 gethere()
    770 {
    771 	register struct ioword **p;
    772 
    773 	for (p = heres; p < herep; p++)
    774 		readhere(*p);
    775 	herep = heres;
    776 }
    777 
    778 /*
    779  * read "<<word" text into temp file
    780  */
    781 
    782 static void
    783 readhere(iop)
    784 	struct ioword *iop;
    785 {
    786 	register int c;
    787 	char *volatile eof;
    788 	char *eofp;
    789 	int skiptabs;
    790 	XString xs;
    791 	char *xp;
    792 	int xpos;
    793 
    794 	eof = evalstr(iop->delim, 0);
    795 
    796 	if (!(iop->flag & IOEVAL))
    797 		ignore_backslash_newline++;
    798 
    799 	Xinit(xs, xp, 256, ATEMP);
    800 
    801 	for (;;) {
    802 		eofp = eof;
    803 		skiptabs = iop->flag & IOSKIP;
    804 		xpos = Xsavepos(xs, xp);
    805 		while ((c = getsc()) != 0) {
    806 			if (skiptabs) {
    807 				if (c == '\t')
    808 					continue;
    809 				skiptabs = 0;
    810 			}
    811 			if (c != *eofp)
    812 				break;
    813 			Xcheck(xs, xp);
    814 			Xput(xs, xp, c);
    815 			eofp++;
    816 		}
    817 		/* Allow EOF here so commands with out trailing newlines
    818 		 * will work (eg, ksh -c '...', $(...), etc).
    819 		 */
    820 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
    821 			xp = Xrestpos(xs, xp, xpos);
    822 			break;
    823 		}
    824 		ungetsc(c);
    825 		while ((c = getsc()) != '\n') {
    826 			if (c == 0)
    827 				yyerror("here document `%s' unclosed\n", eof);
    828 			Xcheck(xs, xp);
    829 			Xput(xs, xp, c);
    830 		}
    831 		Xcheck(xs, xp);
    832 		Xput(xs, xp, c);
    833 	}
    834 	Xput(xs, xp, '\0');
    835 	iop->heredoc = Xclose(xs, xp);
    836 
    837 	if (!(iop->flag & IOEVAL))
    838 		ignore_backslash_newline--;
    839 }
    840 
    841 void
    842 #ifdef HAVE_PROTOTYPES
    843 yyerror(const char *fmt, ...)
    844 #else
    845 yyerror(fmt, va_alist)
    846 	const char *fmt;
    847 	va_dcl
    848 #endif
    849 {
    850 	va_list va;
    851 
    852 	/* pop aliases and re-reads */
    853 	while (source->type == SALIAS || source->type == SREREAD)
    854 		source = source->next;
    855 	source->str = null;	/* zap pending input */
    856 
    857 	error_prefix(TRUE);
    858 	SH_VA_START(va, fmt);
    859 	shf_vfprintf(shl_out, fmt, va);
    860 	va_end(va);
    861 	errorf(null);
    862 }
    863 
    864 /*
    865  * input for yylex with alias expansion
    866  */
    867 
    868 Source *
    869 pushs(type, areap)
    870 	int type;
    871 	Area *areap;
    872 {
    873 	register Source *s;
    874 
    875 	s = (Source *) alloc(sizeof(Source), areap);
    876 	s->type = type;
    877 	s->str = null;
    878 	s->start = NULL;
    879 	s->line = 0;
    880 	s->errline = 0;
    881 	s->file = NULL;
    882 	s->flags = 0;
    883 	s->next = NULL;
    884 	s->areap = areap;
    885 	if (type == SFILE || type == SSTDIN) {
    886 		char *dummy;
    887 		Xinit(s->xs, dummy, 256, s->areap);
    888 	} else
    889 		memset(&s->xs, 0, sizeof(s->xs));
    890 	return s;
    891 }
    892 
    893 static int
    894 getsc__()
    895 {
    896 	register Source *s = source;
    897 	register int c;
    898 
    899 	while ((c = *s->str++) == 0) {
    900 		s->str = NULL;		/* return 0 for EOF by default */
    901 		switch (s->type) {
    902 		  case SEOF:
    903 			s->str = null;
    904 			return 0;
    905 
    906 		  case SSTDIN:
    907 		  case SFILE:
    908 			getsc_line(s);
    909 			break;
    910 
    911 		  case SWSTR:
    912 			break;
    913 
    914 		  case SSTRING:
    915 			break;
    916 
    917 		  case SWORDS:
    918 			s->start = s->str = *s->u.strv++;
    919 			s->type = SWORDSEP;
    920 			break;
    921 
    922 		  case SWORDSEP:
    923 			if (*s->u.strv == NULL) {
    924 				s->start = s->str = newline;
    925 				s->type = SEOF;
    926 			} else {
    927 				s->start = s->str = space;
    928 				s->type = SWORDS;
    929 			}
    930 			break;
    931 
    932 		  case SALIAS:
    933 			if (s->flags & SF_ALIASEND) {
    934 				/* pass on an unused SF_ALIAS flag */
    935 				source = s->next;
    936 				source->flags |= s->flags & SF_ALIAS;
    937 				s = source;
    938 			} else if (*s->u.tblp->val.s
    939 				 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
    940 			{
    941 				source = s = s->next;	/* pop source stack */
    942 				/* Note that this alias ended with a space,
    943 				 * enabling alias expansion on the following
    944 				 * word.
    945 				 */
    946 				s->flags |= SF_ALIAS;
    947 			} else {
    948 				/* At this point, we need to keep the current
    949 				 * alias in the source list so recursive
    950 				 * aliases can be detected and we also need
    951 				 * to return the next character.  Do this
    952 				 * by temporarily popping the alias to get
    953 				 * the next character and then put it back
    954 				 * in the source list with the SF_ALIASEND
    955 				 * flag set.
    956 				 */
    957 				source = s->next;	/* pop source stack */
    958 				source->flags |= s->flags & SF_ALIAS;
    959 				c = getsc__();
    960 				if (c) {
    961 					s->flags |= SF_ALIASEND;
    962 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
    963 					s->start = s->str = s->ugbuf;
    964 					s->next = source;
    965 					source = s;
    966 				} else {
    967 					s = source;
    968 					/* avoid reading eof twice */
    969 					s->str = NULL;
    970 					break;
    971 				}
    972 			}
    973 			continue;
    974 
    975 		  case SREREAD:
    976 			if (s->start != s->ugbuf) /* yuck */
    977 				afree(s->u.freeme, ATEMP);
    978 			source = s = s->next;
    979 			continue;
    980 		}
    981 		if (s->str == NULL) {
    982 			s->type = SEOF;
    983 			s->start = s->str = null;
    984 			return '\0';
    985 		}
    986 		if (s->flags & SF_ECHO) {
    987 			shf_puts(s->str, shl_out);
    988 			shf_flush(shl_out);
    989 		}
    990 	}
    991 	return c;
    992 }
    993 
    994 static void
    995 getsc_line(s)
    996 	Source *s;
    997 {
    998 	char *xp = Xstring(s->xs, xp);
    999 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
   1000 	int have_tty = interactive && (s->flags & SF_TTY);
   1001 
   1002 	/* Done here to ensure nothing odd happens when a timeout occurs */
   1003 	XcheckN(s->xs, xp, LINE);
   1004 	*xp = '\0';
   1005 	s->start = s->str = xp;
   1006 
   1007 #ifdef KSH
   1008 	if (have_tty && ksh_tmout) {
   1009 		ksh_tmout_state = TMOUT_READING;
   1010 		alarm(ksh_tmout);
   1011 	}
   1012 #endif /* KSH */
   1013 #ifdef EDIT
   1014 	if (have_tty && (0
   1015 # ifdef VI
   1016 			 || Flag(FVI)
   1017 # endif /* VI */
   1018 # ifdef EMACS
   1019 			 || Flag(FEMACS) || Flag(FGMACS)
   1020 # endif /* EMACS */
   1021 		))
   1022 	{
   1023 		int nread;
   1024 
   1025 		nread = x_read(xp, LINE);
   1026 		if (nread < 0)	/* read error */
   1027 			nread = 0;
   1028 		xp[nread] = '\0';
   1029 		xp += nread;
   1030 	}
   1031 	else
   1032 #endif /* EDIT */
   1033 	{
   1034 		if (interactive) {
   1035 			pprompt(prompt, 0);
   1036 		} else
   1037 			s->line++;
   1038 
   1039 		while (1) {
   1040 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
   1041 
   1042 			if (!p && shf_error(s->u.shf)
   1043 			    && shf_errno(s->u.shf) == EINTR)
   1044 			{
   1045 				shf_clearerr(s->u.shf);
   1046 				if (trap)
   1047 					runtraps(0);
   1048 				continue;
   1049 			}
   1050 			if (!p || (xp = p, xp[-1] == '\n'))
   1051 				break;
   1052 			/* double buffer size */
   1053 			xp++; /* move past null so doubling works... */
   1054 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
   1055 			xp--; /* ...and move back again */
   1056 		}
   1057 		/* flush any unwanted input so other programs/builtins
   1058 		 * can read it.  Not very optimal, but less error prone
   1059 		 * than flushing else where, dealing with redirections,
   1060 		 * etc..
   1061 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
   1062 		 */
   1063 		if (s->type == SSTDIN)
   1064 			shf_flush(s->u.shf);
   1065 	}
   1066 	/* XXX: temporary kludge to restore source after a
   1067 	 * trap may have been executed.
   1068 	 */
   1069 	source = s;
   1070 #ifdef KSH
   1071 	if (have_tty && ksh_tmout)
   1072 	{
   1073 		ksh_tmout_state = TMOUT_EXECUTING;
   1074 		alarm(0);
   1075 	}
   1076 #endif /* KSH */
   1077 	s->start = s->str = Xstring(s->xs, xp);
   1078 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
   1079 	/* Note: if input is all nulls, this is not eof */
   1080 	if (Xlength(s->xs, xp) == 0) { /* EOF */
   1081 		if (s->type == SFILE)
   1082 			shf_fdclose(s->u.shf);
   1083 		s->str = NULL;
   1084 	} else if (interactive) {
   1085 #ifdef HISTORY
   1086 		char *p = Xstring(s->xs, xp);
   1087 		if (cur_prompt == PS1)
   1088 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
   1089 				p++;
   1090 		if (*p) {
   1091 # ifdef EASY_HISTORY
   1092 			if (cur_prompt == PS2)
   1093 				histappend(Xstring(s->xs, xp), 1);
   1094 			else
   1095 # endif /* EASY_HISTORY */
   1096 			{
   1097 				s->line++;
   1098 				histsave(s->line, s->str, 1);
   1099 			}
   1100 		}
   1101 #endif /* HISTORY */
   1102 	}
   1103 	if (interactive)
   1104 		set_prompt(PS2, (Source *) 0);
   1105 }
   1106 
   1107 void
   1108 set_prompt(to, s)
   1109 	int to;
   1110 	Source *s;
   1111 {
   1112 	cur_prompt = to;
   1113 
   1114 	switch (to) {
   1115 	case PS1: /* command */
   1116 #ifdef KSH
   1117 		/* Substitute ! and !! here, before substitutions are done
   1118 		 * so ! in expanded variables are not expanded.
   1119 		 * NOTE: this is not what at&t ksh does (it does it after
   1120 		 * substitutions, POSIX doesn't say which is to be done.
   1121 		 */
   1122 		{
   1123 			struct shf *shf;
   1124 			char *ps1;
   1125 			Area *saved_atemp;
   1126 #ifdef __GNUC__
   1127 			(void) &ps1;
   1128 #endif
   1129 
   1130 			ps1 = str_val(global("PS1"));
   1131 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
   1132 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
   1133 			while (*ps1) {
   1134 				if (*ps1 != '!' || *++ps1 == '!')
   1135 					shf_putchar(*ps1++, shf);
   1136 				else
   1137 					shf_fprintf(shf, "%d",
   1138 						s ? s->line + 1 : 0);
   1139 			}
   1140 			ps1 = shf_sclose(shf);
   1141 			saved_atemp = ATEMP;
   1142 			newenv(E_ERRH);
   1143 			if (ksh_sigsetjmp(e->jbuf, 0)) {
   1144 				prompt = safe_prompt;
   1145 				/* Don't print an error - assume it has already
   1146 				 * been printed.  Reason is we may have forked
   1147 				 * to run a command and the child may be
   1148 				 * unwinding its stack through this code as it
   1149 				 * exits.
   1150 				 */
   1151 			} else
   1152 				prompt = str_save(substitute(ps1, 0),
   1153 						 saved_atemp);
   1154 			quitenv();
   1155 		}
   1156 #else /* KSH */
   1157 		prompt = str_val(global("PS1"));
   1158 #endif /* KSH */
   1159 		break;
   1160 
   1161 	case PS2: /* command continuation */
   1162 		prompt = str_val(global("PS2"));
   1163 		break;
   1164 	}
   1165 }
   1166 
   1167 /* See also related routine, promptlen() in edit.c */
   1168 void
   1169 pprompt(cp, ntruncate)
   1170 	const char *cp;
   1171 	int ntruncate;
   1172 {
   1173 #if 0
   1174 	char nbuf[32];
   1175 	int c;
   1176 
   1177 	while (*cp != 0) {
   1178 		if (*cp != '!')
   1179 			c = *cp++;
   1180 		else if (*++cp == '!')
   1181 			c = *cp++;
   1182 		else {
   1183 			int len;
   1184 			char *p;
   1185 
   1186 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
   1187 				source->line + 1);
   1188 			len = strlen(nbuf);
   1189 			if (ntruncate) {
   1190 				if (ntruncate >= len) {
   1191 					ntruncate -= len;
   1192 					continue;
   1193 				}
   1194 				p += ntruncate;
   1195 				len -= ntruncate;
   1196 				ntruncate = 0;
   1197 			}
   1198 			shf_write(p, len, shl_out);
   1199 			continue;
   1200 		}
   1201 		if (ntruncate)
   1202 			--ntruncate;
   1203 		else
   1204 			shf_putc(c, shl_out);
   1205 	}
   1206 #endif /* 0 */
   1207 	shf_puts(cp + ntruncate, shl_out);
   1208 	shf_flush(shl_out);
   1209 }
   1210 
   1211 /* Read the variable part of a ${...} expression (ie, up to but not including
   1212  * the :[-+?=#%] or close-brace.
   1213  */
   1214 static char *
   1215 get_brace_var(wsp, wp)
   1216 	XString *wsp;
   1217 	char *wp;
   1218 {
   1219 	enum parse_state {
   1220 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
   1221 			   PS_NUMBER, PS_VAR1, PS_END
   1222 			 }
   1223 		state;
   1224 	char c;
   1225 
   1226 	state = PS_INITIAL;
   1227 	while (1) {
   1228 		c = getsc();
   1229 		/* State machine to figure out where the variable part ends. */
   1230 		switch (state) {
   1231 		  case PS_INITIAL:
   1232 			if (c == '#') {
   1233 				state = PS_SAW_HASH;
   1234 				break;
   1235 			}
   1236 			/* fall through.. */
   1237 		  case PS_SAW_HASH:
   1238 			if (letter(c))
   1239 				state = PS_IDENT;
   1240 			else if (digit(c))
   1241 				state = PS_NUMBER;
   1242 			else if (ctype(c, C_VAR1))
   1243 				state = PS_VAR1;
   1244 			else
   1245 				state = PS_END;
   1246 			break;
   1247 		  case PS_IDENT:
   1248 			if (!letnum(c)) {
   1249 				state = PS_END;
   1250 				if (c == '[') {
   1251 					char *tmp, *p;
   1252 
   1253 					if (!arraysub(&tmp))
   1254 						yyerror("missing ]\n");
   1255 					*wp++ = c;
   1256 					for (p = tmp; *p; ) {
   1257 						Xcheck(*wsp, wp);
   1258 						*wp++ = *p++;
   1259 					}
   1260 					afree(tmp, ATEMP);
   1261 					c = getsc(); /* the ] */
   1262 				}
   1263 			}
   1264 			break;
   1265 		  case PS_NUMBER:
   1266 			if (!digit(c))
   1267 				state = PS_END;
   1268 			break;
   1269 		  case PS_VAR1:
   1270 			state = PS_END;
   1271 			break;
   1272 		  case PS_END: /* keep gcc happy */
   1273 			break;
   1274 		}
   1275 		if (state == PS_END) {
   1276 			*wp++ = '\0';	/* end of variable part */
   1277 			ungetsc(c);
   1278 			break;
   1279 		}
   1280 		Xcheck(*wsp, wp);
   1281 		*wp++ = c;
   1282 	}
   1283 	return wp;
   1284 }
   1285 
   1286 /*
   1287  * Save an array subscript - returns true if matching bracket found, false
   1288  * if eof or newline was found.
   1289  * (Returned string double null terminated)
   1290  */
   1291 static int
   1292 arraysub(strp)
   1293 	char **strp;
   1294 {
   1295 	XString ws;
   1296 	char	*wp;
   1297 	char	c;
   1298 	int 	depth = 1;	/* we are just past the initial [ */
   1299 
   1300 	Xinit(ws, wp, 32, ATEMP);
   1301 
   1302 	do {
   1303 		c = getsc();
   1304 		Xcheck(ws, wp);
   1305 		*wp++ = c;
   1306 		if (c == '[')
   1307 			depth++;
   1308 		else if (c == ']')
   1309 			depth--;
   1310 	} while (depth > 0 && c && c != '\n');
   1311 
   1312 	*wp++ = '\0';
   1313 	*strp = Xclose(ws, wp);
   1314 
   1315 	return depth == 0 ? 1 : 0;
   1316 }
   1317 
   1318 /* Unget a char: handles case when we are already at the start of the buffer */
   1319 static const char *
   1320 ungetsc(c)
   1321 	int c;
   1322 {
   1323 	if (backslash_skip)
   1324 		backslash_skip--;
   1325 	/* Don't unget eof... */
   1326 	if (source->str == null && c == '\0')
   1327 		return source->str;
   1328 	if (source->str > source->start)
   1329 		source->str--;
   1330 	else {
   1331 		Source *s;
   1332 
   1333 		s = pushs(SREREAD, source->areap);
   1334 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1335 		s->start = s->str = s->ugbuf;
   1336 		s->next = source;
   1337 		source = s;
   1338 	}
   1339 	return source->str;
   1340 }
   1341 
   1342 
   1343 /* Called to get a char that isn't a \newline sequence. */
   1344 static int
   1345 getsc_bn ARGS((void))
   1346 {
   1347 	int c, c2;
   1348 
   1349 	if (ignore_backslash_newline)
   1350 		return getsc_();
   1351 
   1352 	if (backslash_skip == 1) {
   1353 		backslash_skip = 2;
   1354 		return getsc_();
   1355 	}
   1356 
   1357 	backslash_skip = 0;
   1358 
   1359 	while (1) {
   1360 		c = getsc_();
   1361 		if (c == '\\') {
   1362 			if ((c2 = getsc_()) == '\n')
   1363 				/* ignore the \newline; get the next char... */
   1364 				continue;
   1365 			ungetsc(c2);
   1366 			backslash_skip = 1;
   1367 		}
   1368 		return c;
   1369 	}
   1370 }
   1371 
   1372 static Lex_state *
   1373 push_state_(si, old_end)
   1374 	State_info *si;
   1375 	Lex_state *old_end;
   1376 {
   1377 	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
   1378 
   1379 	new[0].ls_info.base = old_end;
   1380 	si->base = &new[0];
   1381 	si->end = &new[STATE_BSIZE];
   1382 	return &new[1];
   1383 }
   1384 
   1385 static Lex_state *
   1386 pop_state_(si, old_end)
   1387 	State_info *si;
   1388 	Lex_state *old_end;
   1389 {
   1390 	Lex_state *old_base = si->base;
   1391 
   1392 	si->base = old_end->ls_info.base - STATE_BSIZE;
   1393 	si->end = old_end->ls_info.base;
   1394 
   1395 	afree(old_base, ATEMP);
   1396 
   1397 	return si->base + STATE_BSIZE - 1;;
   1398 }
   1399