Home | History | Annotate | Line # | Download | only in ksh
lex.c revision 1.12
      1 /*	$NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $	*/
      2 
      3 /*
      4  * lexical analysis and source input
      5  */
      6 #include <sys/cdefs.h>
      7 
      8 #ifndef lint
      9 __RCSID("$NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $");
     10 #endif
     11 
     12 
     13 #include "sh.h"
     14 #include <ctype.h>
     15 
     16 
     17 /* Structure to keep track of the lexing state and the various pieces of info
     18  * needed for each particular state.
     19  */
     20 typedef struct lex_state Lex_state;
     21 struct lex_state {
     22 	int ls_state;
     23 	union {
     24 	    /* $(...) */
     25 	    struct scsparen_info {
     26 		    int nparen;		/* count open parenthesis */
     27 		    int csstate; /* XXX remove */
     28 #define ls_scsparen ls_info.u_scsparen
     29 	    } u_scsparen;
     30 
     31 	    /* $((...)) */
     32 	    struct sasparen_info {
     33 		    int nparen;		/* count open parenthesis */
     34 		    int start;		/* marks start of $(( in output str */
     35 #define ls_sasparen ls_info.u_sasparen
     36 	    } u_sasparen;
     37 
     38 	    /* ((...)) */
     39 	    struct sletparen_info {
     40 		    int nparen;		/* count open parenthesis */
     41 #define ls_sletparen ls_info.u_sletparen
     42 	    } u_sletparen;
     43 
     44 	    /* `...` */
     45 	    struct sbquote_info {
     46 		    int indquotes;	/* true if in double quotes: "`...`" */
     47 #define ls_sbquote ls_info.u_sbquote
     48 	    } u_sbquote;
     49 
     50 	    Lex_state *base;		/* used to point to next state block */
     51 	} ls_info;
     52 };
     53 
     54 typedef struct State_info State_info;
     55 struct State_info {
     56 	Lex_state	*base;
     57 	Lex_state	*end;
     58 };
     59 
     60 
     61 static void	readhere ARGS((struct ioword *iop));
     62 static int	getsc__ ARGS((void));
     63 static void	getsc_line ARGS((Source *s));
     64 static int	getsc_bn ARGS((void));
     65 static char	*get_brace_var ARGS((XString *wsp, char *wp));
     66 static int	arraysub ARGS((char **strp));
     67 static const char *ungetsc ARGS((int c));
     68 static void	gethere ARGS((void));
     69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
     70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
     71 
     72 static int backslash_skip;
     73 static int ignore_backslash_newline;
     74 
     75 /* optimized getsc_bn() */
     76 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
     77 			 && !backslash_skip ? *source->str++ : getsc_bn())
     78 /* optimized getsc__() */
     79 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
     80 
     81 #define STATE_BSIZE	32
     82 
     83 #define PUSH_STATE(s)	do { \
     84 			    if (++statep == state_info.end) \
     85 				statep = push_state_(&state_info, statep); \
     86 			    state = statep->ls_state = (s); \
     87 			} while (0)
     88 
     89 #define POP_STATE()	do { \
     90 			    if (--statep == state_info.base) \
     91 				statep = pop_state_(&state_info, statep); \
     92 			    state = statep->ls_state; \
     93 			} while (0)
     94 
     95 
     96 
     97 /*
     98  * Lexical analyzer
     99  *
    100  * tokens are not regular expressions, they are LL(1).
    101  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
    102  * hence the state stack.
    103  */
    104 
    105 int
    106 yylex(cf)
    107 	int cf;
    108 {
    109 	Lex_state states[STATE_BSIZE], *statep;
    110 	State_info state_info;
    111 	register int c, state;
    112 	XString ws;		/* expandable output word */
    113 	register char *wp;	/* output word pointer */
    114 	char *sp, *dp;
    115 	int c2;
    116 
    117 
    118   Again:
    119 	states[0].ls_state = -1;
    120 	states[0].ls_info.base = (Lex_state *) 0;
    121 	statep = &states[1];
    122 	state_info.base = states;
    123 	state_info.end = &states[STATE_BSIZE];
    124 
    125 	Xinit(ws, wp, 64, ATEMP);
    126 
    127 	backslash_skip = 0;
    128 	ignore_backslash_newline = 0;
    129 
    130 	if (cf&ONEWORD)
    131 		state = SWORD;
    132 #ifdef KSH
    133 	else if (cf&LETEXPR) {
    134 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
    135 		state = SLETPAREN;
    136 		statep->ls_sletparen.nparen = 0;
    137 	}
    138 #endif /* KSH */
    139 	else {		/* normal lexing */
    140 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
    141 		while ((c = getsc()) == ' ' || c == '\t')
    142 			;
    143 		if (c == '#') {
    144 			ignore_backslash_newline++;
    145 			while ((c = getsc()) != '\0' && c != '\n')
    146 				;
    147 			ignore_backslash_newline--;
    148 		}
    149 		ungetsc(c);
    150 	}
    151 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
    152 		source->flags &= ~SF_ALIAS;
    153 		/* In POSIX mode, a trailing space only counts if we are
    154 		 * parsing a simple command
    155 		 */
    156 		if (!Flag(FPOSIX) || (cf & CMDWORD))
    157 			cf |= ALIAS;
    158 	}
    159 
    160 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
    161 	statep->ls_state = state;
    162 
    163 	/* collect non-special or quoted characters to form word */
    164 	while (!((c = getsc()) == 0
    165 		 || ((state == SBASE || state == SHEREDELIM)
    166 		     && ctype(c, C_LEX1))))
    167 	{
    168 		Xcheck(ws, wp);
    169 		switch (state) {
    170 		  case SBASE:
    171 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
    172 				*wp = EOS; /* temporary */
    173 				if (is_wdvarname(Xstring(ws, wp), FALSE))
    174 				{
    175 					char *p, *tmp;
    176 
    177 					if (arraysub(&tmp)) {
    178 						*wp++ = CHAR;
    179 						*wp++ = c;
    180 						for (p = tmp; *p; ) {
    181 							Xcheck(ws, wp);
    182 							*wp++ = CHAR;
    183 							*wp++ = *p++;
    184 						}
    185 						afree(tmp, ATEMP);
    186 						break;
    187 					} else {
    188 						Source *s;
    189 
    190 						s = pushs(SREREAD,
    191 							  source->areap);
    192 						s->start = s->str
    193 							= s->u.freeme = tmp;
    194 						s->next = source;
    195 						source = s;
    196 					}
    197 				}
    198 				*wp++ = CHAR;
    199 				*wp++ = c;
    200 				break;
    201 			}
    202 			/* fall through.. */
    203 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
    204 #ifdef KSH
    205 			if (c == '*' || c == '@' || c == '+' || c == '?'
    206 			    || c == '!')
    207 			{
    208 				c2 = getsc();
    209 				if (c2 == '(' /*)*/ ) {
    210 					*wp++ = OPAT;
    211 					*wp++ = c;
    212 					PUSH_STATE(SPATTERN);
    213 					break;
    214 				}
    215 				ungetsc(c2);
    216 			}
    217 #endif /* KSH */
    218 			/* fall through.. */
    219 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
    220 			switch (c) {
    221 			  case '\\':
    222 				c = getsc();
    223 #ifdef OS2
    224 				if (isalnum((unsigned char)c)) {
    225 					*wp++ = CHAR, *wp++ = '\\';
    226 					*wp++ = CHAR, *wp++ = c;
    227 				} else
    228 #endif
    229 				if (c) /* trailing \ is lost */
    230 					*wp++ = QCHAR, *wp++ = c;
    231 				break;
    232 			  case '\'':
    233 				*wp++ = OQUOTE;
    234 				ignore_backslash_newline++;
    235 				PUSH_STATE(SSQUOTE);
    236 				break;
    237 			  case '"':
    238 				*wp++ = OQUOTE;
    239 				PUSH_STATE(SDQUOTE);
    240 				break;
    241 			  default:
    242 				goto Subst;
    243 			}
    244 			break;
    245 
    246 		  Subst:
    247 			switch (c) {
    248 			  case '\\':
    249 				c = getsc();
    250 				switch (c) {
    251 				  case '\\':
    252 				  case '$': case '`':
    253 					*wp++ = QCHAR, *wp++ = c;
    254 					break;
    255 				  case '"':
    256 					if ((cf & HEREDOC) == 0) {
    257 						*wp++ = QCHAR, *wp++ = c;
    258 						break;
    259 					}
    260 					/* FALLTROUGH */
    261 				  default:
    262 					Xcheck(ws, wp);
    263 					if (c) { /* trailing \ is lost */
    264 						*wp++ = CHAR, *wp++ = '\\';
    265 						*wp++ = CHAR, *wp++ = c;
    266 					}
    267 					break;
    268 				}
    269 				break;
    270 			  case '$':
    271 				c = getsc();
    272 				if (c == '(') /*)*/ {
    273 					c = getsc();
    274 					if (c == '(') /*)*/ {
    275 						PUSH_STATE(SASPAREN);
    276 						statep->ls_sasparen.nparen = 2;
    277 						statep->ls_sasparen.start =
    278 							Xsavepos(ws, wp);
    279 						*wp++ = EXPRSUB;
    280 					} else {
    281 						ungetsc(c);
    282 						PUSH_STATE(SCSPAREN);
    283 						statep->ls_scsparen.nparen = 1;
    284 						statep->ls_scsparen.csstate = 0;
    285 						*wp++ = COMSUB;
    286 					}
    287 				} else if (c == '{') /*}*/ {
    288 					*wp++ = OSUBST;
    289 					*wp++ = '{'; /*}*/
    290 					wp = get_brace_var(&ws, wp);
    291 					c = getsc();
    292 					/* allow :# and :% (ksh88 compat) */
    293 					if (c == ':') {
    294 						*wp++ = CHAR, *wp++ = c;
    295 						c = getsc();
    296 					}
    297 					/* If this is a trim operation,
    298 					 * treat (,|,) specially in STBRACE.
    299 					 */
    300 					if (c == '#' || c == '%') {
    301 						ungetsc(c);
    302 						PUSH_STATE(STBRACE);
    303 					} else {
    304 						ungetsc(c);
    305 						PUSH_STATE(SBRACE);
    306 					}
    307 				} else if (ctype(c, C_ALPHA)) {
    308 					*wp++ = OSUBST;
    309 					*wp++ = 'X';
    310 					do {
    311 						Xcheck(ws, wp);
    312 						*wp++ = c;
    313 						c = getsc();
    314 					} while (ctype(c, C_ALPHA|C_DIGIT));
    315 					*wp++ = '\0';
    316 					*wp++ = CSUBST;
    317 					*wp++ = 'X';
    318 					ungetsc(c);
    319 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
    320 					Xcheck(ws, wp);
    321 					*wp++ = OSUBST;
    322 					*wp++ = 'X';
    323 					*wp++ = c;
    324 					*wp++ = '\0';
    325 					*wp++ = CSUBST;
    326 					*wp++ = 'X';
    327 				} else {
    328 					*wp++ = CHAR, *wp++ = '$';
    329 					ungetsc(c);
    330 				}
    331 				break;
    332 			  case '`':
    333 				PUSH_STATE(SBQUOTE);
    334 				*wp++ = COMSUB;
    335 				/* Need to know if we are inside double quotes
    336 				 * since sh/at&t-ksh translate the \" to " in
    337 				 * "`..\"..`".
    338 				 * This is not done in posix mode (section
    339 				 * 3.2.3, Double Quotes: "The backquote shall
    340 				 * retain its special meaning introducing the
    341 				 * other form of command substitution (see
    342 				 * 3.6.3). The portion of the quoted string
    343 				 * from the initial backquote and the
    344 				 * characters up to the next backquote that
    345 				 * is not preceded by a backslash (having
    346 				 * escape characters removed) defines that
    347 				 * command whose output replaces `...` when
    348 				 * the word is expanded."
    349 				 * Section 3.6.3, Command Substitution:
    350 				 * "Within the backquoted style of command
    351 				 * substitution, backslash shall retain its
    352 				 * literal meaning, except when followed by
    353 				 * $ ` \.").
    354 				 */
    355 				statep->ls_sbquote.indquotes = 0;
    356 				if (!Flag(FPOSIX)) {
    357 					Lex_state *s = statep;
    358 					Lex_state *base = state_info.base;
    359 					while (1) {
    360 						for (; s != base; s--) {
    361 							if (s->ls_state == SDQUOTE) {
    362 								statep->ls_sbquote.indquotes = 1;
    363 								break;
    364 							}
    365 						}
    366 						if (s != base)
    367 							break;
    368 						if (!(s = s->ls_info.base))
    369 							break;
    370 						base = s-- - STATE_BSIZE;
    371 					}
    372 				}
    373 				break;
    374 			  default:
    375 				*wp++ = CHAR, *wp++ = c;
    376 			}
    377 			break;
    378 
    379 		  case SSQUOTE:
    380 			if (c == '\'') {
    381 				POP_STATE();
    382 				*wp++ = CQUOTE;
    383 				ignore_backslash_newline--;
    384 			} else
    385 				*wp++ = QCHAR, *wp++ = c;
    386 			break;
    387 
    388 		  case SDQUOTE:
    389 			if (c == '"') {
    390 				POP_STATE();
    391 				*wp++ = CQUOTE;
    392 			} else
    393 				goto Subst;
    394 			break;
    395 
    396 		  case SCSPAREN: /* $( .. ) */
    397 			/* todo: deal with $(...) quoting properly
    398 			 * kludge to partly fake quoting inside $(..): doesn't
    399 			 * really work because nested $(..) or ${..} inside
    400 			 * double quotes aren't dealt with.
    401 			 */
    402 			switch (statep->ls_scsparen.csstate) {
    403 			  case 0: /* normal */
    404 				switch (c) {
    405 				  case '(':
    406 					statep->ls_scsparen.nparen++;
    407 					break;
    408 				  case ')':
    409 					statep->ls_scsparen.nparen--;
    410 					break;
    411 				  case '\\':
    412 					statep->ls_scsparen.csstate = 1;
    413 					break;
    414 				  case '"':
    415 					statep->ls_scsparen.csstate = 2;
    416 					break;
    417 				  case '\'':
    418 					statep->ls_scsparen.csstate = 4;
    419 					ignore_backslash_newline++;
    420 					break;
    421 				}
    422 				break;
    423 
    424 			  case 1: /* backslash in normal mode */
    425 			  case 3: /* backslash in double quotes */
    426 				--statep->ls_scsparen.csstate;
    427 				break;
    428 
    429 			  case 2: /* double quotes */
    430 				if (c == '"')
    431 					statep->ls_scsparen.csstate = 0;
    432 				else if (c == '\\')
    433 					statep->ls_scsparen.csstate = 3;
    434 				break;
    435 
    436 			  case 4: /* single quotes */
    437 				if (c == '\'') {
    438 					statep->ls_scsparen.csstate = 0;
    439 					ignore_backslash_newline--;
    440 				}
    441 				break;
    442 			}
    443 			if (statep->ls_scsparen.nparen == 0) {
    444 				POP_STATE();
    445 				*wp++ = 0; /* end of COMSUB */
    446 			} else
    447 				*wp++ = c;
    448 			break;
    449 
    450 		  case SASPAREN: /* $(( .. )) */
    451 			/* todo: deal with $((...); (...)) properly */
    452 			/* XXX should nest using existing state machine
    453 			 *     (embed "..", $(...), etc.) */
    454 			if (c == '(')
    455 				statep->ls_sasparen.nparen++;
    456 			else if (c == ')') {
    457 				statep->ls_sasparen.nparen--;
    458 				if (statep->ls_sasparen.nparen == 1) {
    459 					/*(*/
    460 					if ((c2 = getsc()) == ')') {
    461 						POP_STATE();
    462 						*wp++ = 0; /* end of EXPRSUB */
    463 						break;
    464 					} else {
    465 						char *s;
    466 
    467 						ungetsc(c2);
    468 						/* mismatched parenthesis -
    469 						 * assume we were really
    470 						 * parsing a $(..) expression
    471 						 */
    472 						s = Xrestpos(ws, wp,
    473 						     statep->ls_sasparen.start);
    474 						memmove(s + 1, s, wp - s);
    475 						*s++ = COMSUB;
    476 						*s = '('; /*)*/
    477 						wp++;
    478 						statep->ls_scsparen.nparen = 1;
    479 						statep->ls_scsparen.csstate = 0;
    480 						state = statep->ls_state
    481 							= SCSPAREN;
    482 
    483 					}
    484 				}
    485 			}
    486 			*wp++ = c;
    487 			break;
    488 
    489 		  case SBRACE:
    490 			/*{*/
    491 			if (c == '}') {
    492 				POP_STATE();
    493 				*wp++ = CSUBST;
    494 				*wp++ = /*{*/ '}';
    495 			} else
    496 				goto Sbase1;
    497 			break;
    498 
    499 		  case STBRACE:
    500 			/* Same as SBRACE, except (,|,) treated specially */
    501 			/*{*/
    502 			if (c == '}') {
    503 				POP_STATE();
    504 				*wp++ = CSUBST;
    505 				*wp++ = /*{*/ '}';
    506 			} else if (c == '|') {
    507 				*wp++ = SPAT;
    508 			} else if (c == '(') {
    509 				*wp++ = OPAT;
    510 				*wp++ = ' ';	/* simile for @ */
    511 				PUSH_STATE(SPATTERN);
    512 			} else
    513 				goto Sbase1;
    514 			break;
    515 
    516 		  case SBQUOTE:
    517 			if (c == '`') {
    518 				*wp++ = 0;
    519 				POP_STATE();
    520 			} else if (c == '\\') {
    521 				switch (c = getsc()) {
    522 				  case '\\':
    523 				  case '$': case '`':
    524 					*wp++ = c;
    525 					break;
    526 				  case '"':
    527 					if (statep->ls_sbquote.indquotes) {
    528 						*wp++ = c;
    529 						break;
    530 					}
    531 					/* fall through.. */
    532 				  default:
    533 					if (c) { /* trailing \ is lost */
    534 						*wp++ = '\\';
    535 						*wp++ = c;
    536 					}
    537 					break;
    538 				}
    539 			} else
    540 				*wp++ = c;
    541 			break;
    542 
    543 		  case SWORD:	/* ONEWORD */
    544 			goto Subst;
    545 
    546 #ifdef KSH
    547 		  case SLETPAREN:	/* LETEXPR: (( ... )) */
    548 			/*(*/
    549 			if (c == ')') {
    550 				if (statep->ls_sletparen.nparen > 0)
    551 				    --statep->ls_sletparen.nparen;
    552 				/*(*/
    553 				else if ((c2 = getsc()) == ')') {
    554 					c = 0;
    555 					*wp++ = CQUOTE;
    556 					goto Done;
    557 				} else
    558 					ungetsc(c2);
    559 			} else if (c == '(')
    560 				/* parenthesis inside quotes and backslashes
    561 				 * are lost, but at&t ksh doesn't count them
    562 				 * either
    563 				 */
    564 				++statep->ls_sletparen.nparen;
    565 			goto Sbase2;
    566 #endif /* KSH */
    567 
    568 		  case SHEREDELIM:	/* <<,<<- delimiter */
    569 			/* XXX chuck this state (and the next) - use
    570 			 * the existing states ($ and \`..` should be
    571 			 * stripped of their specialness after the
    572 			 * fact).
    573 			 */
    574 			/* here delimiters need a special case since
    575 			 * $ and `..` are not to be treated specially
    576 			 */
    577 			if (c == '\\') {
    578 				c = getsc();
    579 				if (c) { /* trailing \ is lost */
    580 					*wp++ = QCHAR;
    581 					*wp++ = c;
    582 				}
    583 			} else if (c == '\'') {
    584 				PUSH_STATE(SSQUOTE);
    585 				*wp++ = OQUOTE;
    586 				ignore_backslash_newline++;
    587 			} else if (c == '"') {
    588 				state = statep->ls_state = SHEREDQUOTE;
    589 				*wp++ = OQUOTE;
    590 			} else {
    591 				*wp++ = CHAR;
    592 				*wp++ = c;
    593 			}
    594 			break;
    595 
    596 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
    597 			if (c == '"') {
    598 				*wp++ = CQUOTE;
    599 				state = statep->ls_state = SHEREDELIM;
    600 			} else {
    601 				if (c == '\\') {
    602 					switch (c = getsc()) {
    603 					  case '\\': case '"':
    604 					  case '$': case '`':
    605 						break;
    606 					  default:
    607 						if (c) { /* trailing \ lost */
    608 							*wp++ = CHAR;
    609 							*wp++ = '\\';
    610 						}
    611 						break;
    612 					}
    613 				}
    614 				*wp++ = CHAR;
    615 				*wp++ = c;
    616 			}
    617 			break;
    618 
    619 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
    620 			if ( /*(*/ c == ')') {
    621 				*wp++ = CPAT;
    622 				POP_STATE();
    623 			} else if (c == '|') {
    624 				*wp++ = SPAT;
    625 			} else if (c == '(') {
    626 				*wp++ = OPAT;
    627 				*wp++ = ' ';	/* simile for @ */
    628 				PUSH_STATE(SPATTERN);
    629 			} else
    630 				goto Sbase1;
    631 			break;
    632 		}
    633 	}
    634 Done:
    635 	Xcheck(ws, wp);
    636 	if (statep != &states[1])
    637 		/* XXX figure out what is missing */
    638 		yyerror("no closing quote\n");
    639 
    640 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
    641 	if (state == SHEREDELIM)
    642 		state = SBASE;
    643 
    644 	dp = Xstring(ws, wp);
    645 	if ((c == '<' || c == '>') && state == SBASE
    646 	    && ((c2 = Xlength(ws, wp)) == 0
    647 	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
    648 	{
    649 		struct ioword *iop =
    650 				(struct ioword *) alloc(sizeof(*iop), ATEMP);
    651 
    652 		if (c2 == 2)
    653 			iop->unit = dp[1] - '0';
    654 		else
    655 			iop->unit = c == '>'; /* 0 for <, 1 for > */
    656 
    657 		c2 = getsc();
    658 		/* <<, >>, <> are ok, >< is not */
    659 		if (c == c2 || (c == '<' && c2 == '>')) {
    660 			iop->flag = c == c2 ?
    661 				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
    662 			if (iop->flag == IOHERE) {
    663 				if ((c2 = getsc()) == '-') {
    664 					iop->flag |= IOSKIP;
    665 				} else {
    666 					ungetsc(c2);
    667 				}
    668 			}
    669 		} else if (c2 == '&')
    670 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
    671 		else {
    672 			iop->flag = c == '>' ? IOWRITE : IOREAD;
    673 			if (c == '>' && c2 == '|')
    674 				iop->flag |= IOCLOB;
    675 			else
    676 				ungetsc(c2);
    677 		}
    678 
    679 		iop->name = (char *) 0;
    680 		iop->delim = (char *) 0;
    681 		iop->heredoc = (char *) 0;
    682 		Xfree(ws, wp);	/* free word */
    683 		yylval.iop = iop;
    684 		return REDIR;
    685 	}
    686 
    687 	if (wp == dp && state == SBASE) {
    688 		Xfree(ws, wp);	/* free word */
    689 		/* no word, process LEX1 character */
    690 		switch (c) {
    691 		  default:
    692 			return c;
    693 
    694 		  case '|':
    695 		  case '&':
    696 		  case ';':
    697 			if ((c2 = getsc()) == c)
    698 				c = (c == ';') ? BREAK :
    699 				    (c == '|') ? LOGOR :
    700 				    (c == '&') ? LOGAND :
    701 				    YYERRCODE;
    702 #ifdef KSH
    703 			else if (c == '|' && c2 == '&')
    704 				c = COPROC;
    705 #endif /* KSH */
    706 			else
    707 				ungetsc(c2);
    708 			return c;
    709 
    710 		  case '\n':
    711 			gethere();
    712 			if (cf & CONTIN)
    713 				goto Again;
    714 			return c;
    715 
    716 		  case '(':  /*)*/
    717 #ifdef KSH
    718 			if ((c2 = getsc()) == '(') /*)*/
    719 				/* XXX need to handle ((...); (...)) */
    720 				c = MDPAREN;
    721 			else
    722 				ungetsc(c2);
    723 #endif /* KSH */
    724 			return c;
    725 		  /*(*/
    726 		  case ')':
    727 			return c;
    728 		}
    729 	}
    730 
    731 	*wp++ = EOS;		/* terminate word */
    732 	yylval.cp = Xclose(ws, wp);
    733 	if (state == SWORD
    734 #ifdef KSH
    735 		|| state == SLETPAREN
    736 #endif /* KSH */
    737 		)	/* ONEWORD? */
    738 		return LWORD;
    739 	ungetsc(c);		/* unget terminator */
    740 
    741 	/* copy word to unprefixed string ident */
    742 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
    743 		*dp++ = *sp++;
    744 	/* Make sure the ident array stays '\0' padded */
    745 	memset(dp, 0, (ident+IDENT) - dp + 1);
    746 	if (c != EOS)
    747 		*ident = '\0';	/* word is not unquoted */
    748 
    749 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
    750 		struct tbl *p;
    751 		int h = hash(ident);
    752 
    753 		/* { */
    754 		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
    755 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
    756 		{
    757 			afree(yylval.cp, ATEMP);
    758 			return p->val.i;
    759 		}
    760 		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
    761 		    && (p->flag & ISSET))
    762 		{
    763 			register Source *s;
    764 
    765 			for (s = source; s->type == SALIAS; s = s->next)
    766 				if (s->u.tblp == p)
    767 					return LWORD;
    768 			/* push alias expansion */
    769 			s = pushs(SALIAS, source->areap);
    770 			s->start = s->str = p->val.s;
    771 			s->u.tblp = p;
    772 			s->next = source;
    773 			source = s;
    774 			afree(yylval.cp, ATEMP);
    775 			goto Again;
    776 		}
    777 	}
    778 
    779 	return LWORD;
    780 }
    781 
    782 static void
    783 gethere()
    784 {
    785 	register struct ioword **p;
    786 
    787 	for (p = heres; p < herep; p++)
    788 		readhere(*p);
    789 	herep = heres;
    790 }
    791 
    792 /*
    793  * read "<<word" text into temp file
    794  */
    795 
    796 static void
    797 readhere(iop)
    798 	struct ioword *iop;
    799 {
    800 	register int c;
    801 	char *volatile eof;
    802 	char *eofp;
    803 	int skiptabs;
    804 	XString xs;
    805 	char *xp;
    806 	int xpos;
    807 
    808 	eof = evalstr(iop->delim, 0);
    809 
    810 	if (!(iop->flag & IOEVAL))
    811 		ignore_backslash_newline++;
    812 
    813 	Xinit(xs, xp, 256, ATEMP);
    814 
    815 	for (;;) {
    816 		eofp = eof;
    817 		skiptabs = iop->flag & IOSKIP;
    818 		xpos = Xsavepos(xs, xp);
    819 		while ((c = getsc()) != 0) {
    820 			if (skiptabs) {
    821 				if (c == '\t')
    822 					continue;
    823 				skiptabs = 0;
    824 			}
    825 			if (c != *eofp)
    826 				break;
    827 			Xcheck(xs, xp);
    828 			Xput(xs, xp, c);
    829 			eofp++;
    830 		}
    831 		/* Allow EOF here so commands with out trailing newlines
    832 		 * will work (eg, ksh -c '...', $(...), etc).
    833 		 */
    834 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
    835 			xp = Xrestpos(xs, xp, xpos);
    836 			break;
    837 		}
    838 		ungetsc(c);
    839 		while ((c = getsc()) != '\n') {
    840 			if (c == 0)
    841 				yyerror("here document `%s' unclosed\n", eof);
    842 			Xcheck(xs, xp);
    843 			Xput(xs, xp, c);
    844 		}
    845 		Xcheck(xs, xp);
    846 		Xput(xs, xp, c);
    847 	}
    848 	Xput(xs, xp, '\0');
    849 	iop->heredoc = Xclose(xs, xp);
    850 
    851 	if (!(iop->flag & IOEVAL))
    852 		ignore_backslash_newline--;
    853 }
    854 
    855 void
    856 #ifdef HAVE_PROTOTYPES
    857 yyerror(const char *fmt, ...)
    858 #else
    859 yyerror(fmt, va_alist)
    860 	const char *fmt;
    861 	va_dcl
    862 #endif
    863 {
    864 	va_list va;
    865 
    866 	/* pop aliases and re-reads */
    867 	while (source->type == SALIAS || source->type == SREREAD)
    868 		source = source->next;
    869 	source->str = null;	/* zap pending input */
    870 
    871 	error_prefix(TRUE);
    872 	SH_VA_START(va, fmt);
    873 	shf_vfprintf(shl_out, fmt, va);
    874 	va_end(va);
    875 	errorf(null);
    876 }
    877 
    878 /*
    879  * input for yylex with alias expansion
    880  */
    881 
    882 Source *
    883 pushs(type, areap)
    884 	int type;
    885 	Area *areap;
    886 {
    887 	register Source *s;
    888 
    889 	s = (Source *) alloc(sizeof(Source), areap);
    890 	s->type = type;
    891 	s->str = null;
    892 	s->start = NULL;
    893 	s->line = 0;
    894 	s->errline = 0;
    895 	s->file = NULL;
    896 	s->flags = 0;
    897 	s->next = NULL;
    898 	s->areap = areap;
    899 	if (type == SFILE || type == SSTDIN) {
    900 		char *dummy;
    901 		Xinit(s->xs, dummy, 256, s->areap);
    902 	} else
    903 		memset(&s->xs, 0, sizeof(s->xs));
    904 	return s;
    905 }
    906 
    907 static int
    908 getsc__()
    909 {
    910 	register Source *s = source;
    911 	register int c;
    912 
    913 	while ((c = *s->str++) == 0) {
    914 		s->str = NULL;		/* return 0 for EOF by default */
    915 		switch (s->type) {
    916 		  case SEOF:
    917 			s->str = null;
    918 			return 0;
    919 
    920 		  case SSTDIN:
    921 		  case SFILE:
    922 			getsc_line(s);
    923 			break;
    924 
    925 		  case SWSTR:
    926 			break;
    927 
    928 		  case SSTRING:
    929 			break;
    930 
    931 		  case SWORDS:
    932 			s->start = s->str = *s->u.strv++;
    933 			s->type = SWORDSEP;
    934 			break;
    935 
    936 		  case SWORDSEP:
    937 			if (*s->u.strv == NULL) {
    938 				s->start = s->str = newline;
    939 				s->type = SEOF;
    940 			} else {
    941 				s->start = s->str = space;
    942 				s->type = SWORDS;
    943 			}
    944 			break;
    945 
    946 		  case SALIAS:
    947 			if (s->flags & SF_ALIASEND) {
    948 				/* pass on an unused SF_ALIAS flag */
    949 				source = s->next;
    950 				source->flags |= s->flags & SF_ALIAS;
    951 				s = source;
    952 			} else if (*s->u.tblp->val.s
    953 				 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
    954 			{
    955 				source = s = s->next;	/* pop source stack */
    956 				/* Note that this alias ended with a space,
    957 				 * enabling alias expansion on the following
    958 				 * word.
    959 				 */
    960 				s->flags |= SF_ALIAS;
    961 			} else {
    962 				/* At this point, we need to keep the current
    963 				 * alias in the source list so recursive
    964 				 * aliases can be detected and we also need
    965 				 * to return the next character.  Do this
    966 				 * by temporarily popping the alias to get
    967 				 * the next character and then put it back
    968 				 * in the source list with the SF_ALIASEND
    969 				 * flag set.
    970 				 */
    971 				source = s->next;	/* pop source stack */
    972 				source->flags |= s->flags & SF_ALIAS;
    973 				c = getsc__();
    974 				if (c) {
    975 					s->flags |= SF_ALIASEND;
    976 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
    977 					s->start = s->str = s->ugbuf;
    978 					s->next = source;
    979 					source = s;
    980 				} else {
    981 					s = source;
    982 					/* avoid reading eof twice */
    983 					s->str = NULL;
    984 					break;
    985 				}
    986 			}
    987 			continue;
    988 
    989 		  case SREREAD:
    990 			if (s->start != s->ugbuf) /* yuck */
    991 				afree(s->u.freeme, ATEMP);
    992 			source = s = s->next;
    993 			continue;
    994 		}
    995 		if (s->str == NULL) {
    996 			s->type = SEOF;
    997 			s->start = s->str = null;
    998 			return '\0';
    999 		}
   1000 		if (s->flags & SF_ECHO) {
   1001 			shf_puts(s->str, shl_out);
   1002 			shf_flush(shl_out);
   1003 		}
   1004 	}
   1005 	return c;
   1006 }
   1007 
   1008 static void
   1009 getsc_line(s)
   1010 	Source *s;
   1011 {
   1012 	char *xp = Xstring(s->xs, xp);
   1013 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
   1014 	int have_tty = interactive && (s->flags & SF_TTY);
   1015 
   1016 	/* Done here to ensure nothing odd happens when a timeout occurs */
   1017 	XcheckN(s->xs, xp, LINE);
   1018 	*xp = '\0';
   1019 	s->start = s->str = xp;
   1020 
   1021 #ifdef KSH
   1022 	if (have_tty && ksh_tmout) {
   1023 		ksh_tmout_state = TMOUT_READING;
   1024 		alarm(ksh_tmout);
   1025 	}
   1026 #endif /* KSH */
   1027 #ifdef EDIT
   1028 	if (have_tty && (0
   1029 # ifdef VI
   1030 			 || Flag(FVI)
   1031 # endif /* VI */
   1032 # ifdef EMACS
   1033 			 || Flag(FEMACS) || Flag(FGMACS)
   1034 # endif /* EMACS */
   1035 		))
   1036 	{
   1037 		int nread;
   1038 
   1039 		nread = x_read(xp, LINE);
   1040 		if (nread < 0)	/* read error */
   1041 			nread = 0;
   1042 		xp[nread] = '\0';
   1043 		xp += nread;
   1044 	}
   1045 	else
   1046 #endif /* EDIT */
   1047 	{
   1048 		if (interactive) {
   1049 			pprompt(prompt, 0);
   1050 		} else
   1051 			s->line++;
   1052 
   1053 		while (1) {
   1054 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
   1055 
   1056 			if (!p && shf_error(s->u.shf)
   1057 			    && shf_errno(s->u.shf) == EINTR)
   1058 			{
   1059 				shf_clearerr(s->u.shf);
   1060 				if (trap)
   1061 					runtraps(0);
   1062 				continue;
   1063 			}
   1064 			if (!p || (xp = p, xp[-1] == '\n'))
   1065 				break;
   1066 			/* double buffer size */
   1067 			xp++; /* move past null so doubling works... */
   1068 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
   1069 			xp--; /* ...and move back again */
   1070 		}
   1071 		/* flush any unwanted input so other programs/builtins
   1072 		 * can read it.  Not very optimal, but less error prone
   1073 		 * than flushing else where, dealing with redirections,
   1074 		 * etc..
   1075 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
   1076 		 */
   1077 		if (s->type == SSTDIN)
   1078 			shf_flush(s->u.shf);
   1079 	}
   1080 	/* XXX: temporary kludge to restore source after a
   1081 	 * trap may have been executed.
   1082 	 */
   1083 	source = s;
   1084 #ifdef KSH
   1085 	if (have_tty && ksh_tmout)
   1086 	{
   1087 		ksh_tmout_state = TMOUT_EXECUTING;
   1088 		alarm(0);
   1089 	}
   1090 #endif /* KSH */
   1091 	s->start = s->str = Xstring(s->xs, xp);
   1092 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
   1093 	/* Note: if input is all nulls, this is not eof */
   1094 	if (Xlength(s->xs, xp) == 0) { /* EOF */
   1095 		if (s->type == SFILE)
   1096 			shf_fdclose(s->u.shf);
   1097 		s->str = NULL;
   1098 	} else if (interactive) {
   1099 #ifdef HISTORY
   1100 		char *p = Xstring(s->xs, xp);
   1101 		if (cur_prompt == PS1)
   1102 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
   1103 				p++;
   1104 		if (*p) {
   1105 # ifdef EASY_HISTORY
   1106 			if (cur_prompt == PS2)
   1107 				histappend(Xstring(s->xs, xp), 1);
   1108 			else
   1109 # endif /* EASY_HISTORY */
   1110 			{
   1111 				s->line++;
   1112 				histsave(s->line, s->str, 1);
   1113 			}
   1114 		}
   1115 #endif /* HISTORY */
   1116 	}
   1117 	if (interactive)
   1118 		set_prompt(PS2, (Source *) 0);
   1119 }
   1120 
   1121 void
   1122 set_prompt(to, s)
   1123 	int to;
   1124 	Source *s;
   1125 {
   1126 	cur_prompt = to;
   1127 
   1128 	switch (to) {
   1129 	case PS1: /* command */
   1130 #ifdef KSH
   1131 		/* Substitute ! and !! here, before substitutions are done
   1132 		 * so ! in expanded variables are not expanded.
   1133 		 * NOTE: this is not what at&t ksh does (it does it after
   1134 		 * substitutions, POSIX doesn't say which is to be done.
   1135 		 */
   1136 		{
   1137 			struct shf *shf;
   1138 			char * volatile ps1;
   1139 			Area *saved_atemp;
   1140 
   1141 			ps1 = str_val(global("PS1"));
   1142 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
   1143 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
   1144 			while (*ps1) {
   1145 				if (*ps1 != '!' || *++ps1 == '!')
   1146 					shf_putchar(*ps1++, shf);
   1147 				else
   1148 					shf_fprintf(shf, "%d",
   1149 						s ? s->line + 1 : 0);
   1150 			}
   1151 			ps1 = shf_sclose(shf);
   1152 			saved_atemp = ATEMP;
   1153 			newenv(E_ERRH);
   1154 			if (ksh_sigsetjmp(e->jbuf, 0)) {
   1155 				prompt = safe_prompt;
   1156 				/* Don't print an error - assume it has already
   1157 				 * been printed.  Reason is we may have forked
   1158 				 * to run a command and the child may be
   1159 				 * unwinding its stack through this code as it
   1160 				 * exits.
   1161 				 */
   1162 			} else
   1163 				prompt = str_save(substitute(ps1, 0),
   1164 						 saved_atemp);
   1165 			quitenv();
   1166 		}
   1167 #else /* KSH */
   1168 		prompt = str_val(global("PS1"));
   1169 #endif /* KSH */
   1170 		break;
   1171 
   1172 	case PS2: /* command continuation */
   1173 		prompt = str_val(global("PS2"));
   1174 		break;
   1175 	}
   1176 }
   1177 
   1178 /* See also related routine, promptlen() in edit.c */
   1179 void
   1180 pprompt(cp, ntruncate)
   1181 	const char *cp;
   1182 	int ntruncate;
   1183 {
   1184 #if 0
   1185 	char nbuf[32];
   1186 	int c;
   1187 
   1188 	while (*cp != 0) {
   1189 		if (*cp != '!')
   1190 			c = *cp++;
   1191 		else if (*++cp == '!')
   1192 			c = *cp++;
   1193 		else {
   1194 			int len;
   1195 			char *p;
   1196 
   1197 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
   1198 				source->line + 1);
   1199 			len = strlen(nbuf);
   1200 			if (ntruncate) {
   1201 				if (ntruncate >= len) {
   1202 					ntruncate -= len;
   1203 					continue;
   1204 				}
   1205 				p += ntruncate;
   1206 				len -= ntruncate;
   1207 				ntruncate = 0;
   1208 			}
   1209 			shf_write(p, len, shl_out);
   1210 			continue;
   1211 		}
   1212 		if (ntruncate)
   1213 			--ntruncate;
   1214 		else
   1215 			shf_putc(c, shl_out);
   1216 	}
   1217 #endif /* 0 */
   1218 	shf_puts(cp + ntruncate, shl_out);
   1219 	shf_flush(shl_out);
   1220 }
   1221 
   1222 /* Read the variable part of a ${...} expression (ie, up to but not including
   1223  * the :[-+?=#%] or close-brace.
   1224  */
   1225 static char *
   1226 get_brace_var(wsp, wp)
   1227 	XString *wsp;
   1228 	char *wp;
   1229 {
   1230 	enum parse_state {
   1231 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
   1232 			   PS_NUMBER, PS_VAR1, PS_END
   1233 			 }
   1234 		state;
   1235 	char c;
   1236 
   1237 	state = PS_INITIAL;
   1238 	while (1) {
   1239 		c = getsc();
   1240 		/* State machine to figure out where the variable part ends. */
   1241 		switch (state) {
   1242 		  case PS_INITIAL:
   1243 			if (c == '#') {
   1244 				state = PS_SAW_HASH;
   1245 				break;
   1246 			}
   1247 			/* fall through.. */
   1248 		  case PS_SAW_HASH:
   1249 			if (letter(c))
   1250 				state = PS_IDENT;
   1251 			else if (digit(c))
   1252 				state = PS_NUMBER;
   1253 			else if (ctype(c, C_VAR1))
   1254 				state = PS_VAR1;
   1255 			else
   1256 				state = PS_END;
   1257 			break;
   1258 		  case PS_IDENT:
   1259 			if (!letnum(c)) {
   1260 				state = PS_END;
   1261 				if (c == '[') {
   1262 					char *tmp, *p;
   1263 
   1264 					if (!arraysub(&tmp))
   1265 						yyerror("missing ]\n");
   1266 					*wp++ = c;
   1267 					for (p = tmp; *p; ) {
   1268 						Xcheck(*wsp, wp);
   1269 						*wp++ = *p++;
   1270 					}
   1271 					afree(tmp, ATEMP);
   1272 					c = getsc(); /* the ] */
   1273 				}
   1274 			}
   1275 			break;
   1276 		  case PS_NUMBER:
   1277 			if (!digit(c))
   1278 				state = PS_END;
   1279 			break;
   1280 		  case PS_VAR1:
   1281 			state = PS_END;
   1282 			break;
   1283 		  case PS_END: /* keep gcc happy */
   1284 			break;
   1285 		}
   1286 		if (state == PS_END) {
   1287 			*wp++ = '\0';	/* end of variable part */
   1288 			ungetsc(c);
   1289 			break;
   1290 		}
   1291 		Xcheck(*wsp, wp);
   1292 		*wp++ = c;
   1293 	}
   1294 	return wp;
   1295 }
   1296 
   1297 /*
   1298  * Save an array subscript - returns true if matching bracket found, false
   1299  * if eof or newline was found.
   1300  * (Returned string double null terminated)
   1301  */
   1302 static int
   1303 arraysub(strp)
   1304 	char **strp;
   1305 {
   1306 	XString ws;
   1307 	char	*wp;
   1308 	char	c;
   1309 	int 	depth = 1;	/* we are just past the initial [ */
   1310 
   1311 	Xinit(ws, wp, 32, ATEMP);
   1312 
   1313 	do {
   1314 		c = getsc();
   1315 		Xcheck(ws, wp);
   1316 		*wp++ = c;
   1317 		if (c == '[')
   1318 			depth++;
   1319 		else if (c == ']')
   1320 			depth--;
   1321 	} while (depth > 0 && c && c != '\n');
   1322 
   1323 	*wp++ = '\0';
   1324 	*strp = Xclose(ws, wp);
   1325 
   1326 	return depth == 0 ? 1 : 0;
   1327 }
   1328 
   1329 /* Unget a char: handles case when we are already at the start of the buffer */
   1330 static const char *
   1331 ungetsc(c)
   1332 	int c;
   1333 {
   1334 	if (backslash_skip)
   1335 		backslash_skip--;
   1336 	/* Don't unget eof... */
   1337 	if (source->str == null && c == '\0')
   1338 		return source->str;
   1339 	if (source->str > source->start)
   1340 		source->str--;
   1341 	else {
   1342 		Source *s;
   1343 
   1344 		s = pushs(SREREAD, source->areap);
   1345 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1346 		s->start = s->str = s->ugbuf;
   1347 		s->next = source;
   1348 		source = s;
   1349 	}
   1350 	return source->str;
   1351 }
   1352 
   1353 
   1354 /* Called to get a char that isn't a \newline sequence. */
   1355 static int
   1356 getsc_bn ARGS((void))
   1357 {
   1358 	int c, c2;
   1359 
   1360 	if (ignore_backslash_newline)
   1361 		return getsc_();
   1362 
   1363 	if (backslash_skip == 1) {
   1364 		backslash_skip = 2;
   1365 		return getsc_();
   1366 	}
   1367 
   1368 	backslash_skip = 0;
   1369 
   1370 	while (1) {
   1371 		c = getsc_();
   1372 		if (c == '\\') {
   1373 			if ((c2 = getsc_()) == '\n')
   1374 				/* ignore the \newline; get the next char... */
   1375 				continue;
   1376 			ungetsc(c2);
   1377 			backslash_skip = 1;
   1378 		}
   1379 		return c;
   1380 	}
   1381 }
   1382 
   1383 static Lex_state *
   1384 push_state_(si, old_end)
   1385 	State_info *si;
   1386 	Lex_state *old_end;
   1387 {
   1388 	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
   1389 
   1390 	new[0].ls_info.base = old_end;
   1391 	si->base = &new[0];
   1392 	si->end = &new[STATE_BSIZE];
   1393 	return &new[1];
   1394 }
   1395 
   1396 static Lex_state *
   1397 pop_state_(si, old_end)
   1398 	State_info *si;
   1399 	Lex_state *old_end;
   1400 {
   1401 	Lex_state *old_base = si->base;
   1402 
   1403 	si->base = old_end->ls_info.base - STATE_BSIZE;
   1404 	si->end = old_end->ls_info.base;
   1405 
   1406 	afree(old_base, ATEMP);
   1407 
   1408 	return si->base + STATE_BSIZE - 1;
   1409 }
   1410