Home | History | Annotate | Line # | Download | only in ksh
lex.c revision 1.15
      1 /*	$NetBSD: lex.c,v 1.15 2011/10/16 17:12:11 joerg Exp $	*/
      2 
      3 /*
      4  * lexical analysis and source input
      5  */
      6 #include <sys/cdefs.h>
      7 
      8 #ifndef lint
      9 __RCSID("$NetBSD: lex.c,v 1.15 2011/10/16 17:12:11 joerg Exp $");
     10 #endif
     11 
     12 
     13 #include "sh.h"
     14 #include <ctype.h>
     15 
     16 
     17 /* Structure to keep track of the lexing state and the various pieces of info
     18  * needed for each particular state.
     19  */
     20 typedef struct lex_state Lex_state;
     21 struct lex_state {
     22 	int ls_state;
     23 	union {
     24 	    /* $(...) */
     25 	    struct scsparen_info {
     26 		    int nparen;		/* count open parenthesis */
     27 		    int csstate; /* XXX remove */
     28 #define ls_scsparen ls_info.u_scsparen
     29 	    } u_scsparen;
     30 
     31 	    /* $((...)) */
     32 	    struct sasparen_info {
     33 		    int nparen;		/* count open parenthesis */
     34 		    int start;		/* marks start of $(( in output str */
     35 #define ls_sasparen ls_info.u_sasparen
     36 	    } u_sasparen;
     37 
     38 	    /* ((...)) */
     39 	    struct sletparen_info {
     40 		    int nparen;		/* count open parenthesis */
     41 #define ls_sletparen ls_info.u_sletparen
     42 	    } u_sletparen;
     43 
     44 	    /* `...` */
     45 	    struct sbquote_info {
     46 		    int indquotes;	/* true if in double quotes: "`...`" */
     47 #define ls_sbquote ls_info.u_sbquote
     48 	    } u_sbquote;
     49 
     50 	    Lex_state *base;		/* used to point to next state block */
     51 	} ls_info;
     52 };
     53 
     54 typedef struct State_info State_info;
     55 struct State_info {
     56 	Lex_state	*base;
     57 	Lex_state	*end;
     58 };
     59 
     60 
     61 static void	readhere ARGS((struct ioword *iop));
     62 static int	getsc__ ARGS((void));
     63 static void	getsc_line ARGS((Source *s));
     64 static int	getsc_bn ARGS((void));
     65 static char	*get_brace_var ARGS((XString *wsp, char *wp));
     66 static int	arraysub ARGS((char **strp));
     67 static const char *ungetsc ARGS((int c));
     68 static void	gethere ARGS((void));
     69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
     70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
     71 
     72 static int backslash_skip;
     73 static int ignore_backslash_newline;
     74 
     75 /* optimized getsc_bn() */
     76 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
     77 			 && !backslash_skip ? *source->str++ : getsc_bn())
     78 /* optimized getsc__() */
     79 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
     80 
     81 #define STATE_BSIZE	32
     82 
     83 #define PUSH_STATE(s)	do { \
     84 			    if (++statep == state_info.end) \
     85 				statep = push_state_(&state_info, statep); \
     86 			    state = statep->ls_state = (s); \
     87 			} while (0)
     88 
     89 #define POP_STATE()	do { \
     90 			    if (--statep == state_info.base) \
     91 				statep = pop_state_(&state_info, statep); \
     92 			    state = statep->ls_state; \
     93 			} while (0)
     94 
     95 
     96 
     97 /*
     98  * Lexical analyzer
     99  *
    100  * tokens are not regular expressions, they are LL(1).
    101  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
    102  * hence the state stack.
    103  */
    104 
    105 int
    106 yylex(cf)
    107 	int cf;
    108 {
    109 	Lex_state states[STATE_BSIZE], *statep;
    110 	State_info state_info;
    111 	register int c, state;
    112 	XString ws;		/* expandable output word */
    113 	register char *wp;	/* output word pointer */
    114 	char *sp, *dp;
    115 	int c2;
    116 
    117 
    118   Again:
    119 	states[0].ls_state = -1;
    120 	states[0].ls_info.base = (Lex_state *) 0;
    121 	statep = &states[1];
    122 	state_info.base = states;
    123 	state_info.end = &states[STATE_BSIZE];
    124 
    125 	Xinit(ws, wp, 64, ATEMP);
    126 
    127 	backslash_skip = 0;
    128 	ignore_backslash_newline = 0;
    129 
    130 	if (cf&ONEWORD)
    131 		state = SWORD;
    132 #ifdef KSH
    133 	else if (cf&LETEXPR) {
    134 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
    135 		state = SLETPAREN;
    136 		statep->ls_sletparen.nparen = 0;
    137 	}
    138 #endif /* KSH */
    139 	else {		/* normal lexing */
    140 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
    141 		while ((c = getsc()) == ' ' || c == '\t')
    142 			;
    143 		if (c == '#') {
    144 			ignore_backslash_newline++;
    145 			while ((c = getsc()) != '\0' && c != '\n')
    146 				;
    147 			ignore_backslash_newline--;
    148 		}
    149 		ungetsc(c);
    150 	}
    151 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
    152 		source->flags &= ~SF_ALIAS;
    153 		/* In POSIX mode, a trailing space only counts if we are
    154 		 * parsing a simple command
    155 		 */
    156 		if (!Flag(FPOSIX) || (cf & CMDWORD))
    157 			cf |= ALIAS;
    158 	}
    159 
    160 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
    161 	statep->ls_state = state;
    162 
    163 	/* collect non-special or quoted characters to form word */
    164 	while (!((c = getsc()) == 0
    165 		 || ((state == SBASE || state == SHEREDELIM)
    166 		     && ctype(c, C_LEX1))))
    167 	{
    168 		Xcheck(ws, wp);
    169 		switch (state) {
    170 		  case SBASE:
    171 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
    172 				*wp = EOS; /* temporary */
    173 				if (is_wdvarname(Xstring(ws, wp), FALSE))
    174 				{
    175 					char *p, *tmp;
    176 
    177 					if (arraysub(&tmp)) {
    178 						*wp++ = CHAR;
    179 						*wp++ = c;
    180 						for (p = tmp; *p; ) {
    181 							Xcheck(ws, wp);
    182 							*wp++ = CHAR;
    183 							*wp++ = *p++;
    184 						}
    185 						afree(tmp, ATEMP);
    186 						break;
    187 					} else {
    188 						Source *s;
    189 
    190 						s = pushs(SREREAD,
    191 							  source->areap);
    192 						s->start = s->str
    193 							= s->u.freeme = tmp;
    194 						s->next = source;
    195 						source = s;
    196 					}
    197 				}
    198 				*wp++ = CHAR;
    199 				*wp++ = c;
    200 				break;
    201 			}
    202 			/* fall through.. */
    203 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
    204 #ifdef KSH
    205 			if (c == '*' || c == '@' || c == '+' || c == '?'
    206 			    || c == '!')
    207 			{
    208 				c2 = getsc();
    209 				if (c2 == '(' /*)*/ ) {
    210 					*wp++ = OPAT;
    211 					*wp++ = c;
    212 					PUSH_STATE(SPATTERN);
    213 					break;
    214 				}
    215 				ungetsc(c2);
    216 			}
    217 #endif /* KSH */
    218 			/* fall through.. */
    219 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
    220 			switch (c) {
    221 			  case '\\':
    222 				c = getsc();
    223 #ifdef OS2
    224 				if (isalnum((unsigned char)c)) {
    225 					*wp++ = CHAR, *wp++ = '\\';
    226 					*wp++ = CHAR, *wp++ = c;
    227 				} else
    228 #endif
    229 				if (c) /* trailing \ is lost */
    230 					*wp++ = QCHAR, *wp++ = c;
    231 				break;
    232 			  case '\'':
    233 				*wp++ = OQUOTE;
    234 				ignore_backslash_newline++;
    235 				PUSH_STATE(SSQUOTE);
    236 				break;
    237 			  case '"':
    238 				*wp++ = OQUOTE;
    239 				PUSH_STATE(SDQUOTE);
    240 				break;
    241 			  default:
    242 				goto Subst;
    243 			}
    244 			break;
    245 
    246 		  Subst:
    247 			switch (c) {
    248 			  Lex_state *s;
    249 			  Lex_state *base;
    250 
    251 			  case '\\':
    252 				c = getsc();
    253 				switch (c) {
    254 				  case '\\':
    255 				  case '$': case '`':
    256 					*wp++ = QCHAR, *wp++ = c;
    257 					break;
    258 				  case '"':
    259 					if ((cf & HEREDOC) == 0) {
    260 						*wp++ = QCHAR, *wp++ = c;
    261 						break;
    262 					}
    263 					/* FALLTROUGH */
    264 				  default:
    265 					Xcheck(ws, wp);
    266 					if (c) { /* trailing \ is lost */
    267 						*wp++ = CHAR, *wp++ = '\\';
    268 						*wp++ = CHAR, *wp++ = c;
    269 					}
    270 					break;
    271 				}
    272 				break;
    273 			  case '$':
    274 				c = getsc();
    275 				if (c == '(') /*)*/ {
    276 					c = getsc();
    277 					if (c == '(') /*)*/ {
    278 						PUSH_STATE(SASPAREN);
    279 						statep->ls_sasparen.nparen = 2;
    280 						statep->ls_sasparen.start =
    281 							Xsavepos(ws, wp);
    282 						*wp++ = EXPRSUB;
    283 					} else {
    284 						ungetsc(c);
    285 						PUSH_STATE(SCSPAREN);
    286 						statep->ls_scsparen.nparen = 1;
    287 						statep->ls_scsparen.csstate = 0;
    288 						*wp++ = COMSUB;
    289 					}
    290 				} else if (c == '{') /*}*/ {
    291 					*wp++ = OSUBST;
    292 					*wp++ = '{'; /*}*/
    293 					wp = get_brace_var(&ws, wp);
    294 					c = getsc();
    295 					/* allow :# and :% (ksh88 compat) */
    296 					if (c == ':') {
    297 						*wp++ = CHAR, *wp++ = c;
    298 						c = getsc();
    299 					}
    300 					/* If this is a trim operation,
    301 					 * treat (,|,) specially in STBRACE.
    302 					 */
    303 					if (c == '#' || c == '%') {
    304 						ungetsc(c);
    305 						PUSH_STATE(STBRACE);
    306 					} else {
    307 						ungetsc(c);
    308 						PUSH_STATE(SBRACE);
    309 					}
    310 				} else if (ctype(c, C_ALPHA)) {
    311 					*wp++ = OSUBST;
    312 					*wp++ = 'X';
    313 					do {
    314 						Xcheck(ws, wp);
    315 						*wp++ = c;
    316 						c = getsc();
    317 					} while (ctype(c, C_ALPHA|C_DIGIT));
    318 					*wp++ = '\0';
    319 					*wp++ = CSUBST;
    320 					*wp++ = 'X';
    321 					ungetsc(c);
    322 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
    323 					Xcheck(ws, wp);
    324 					*wp++ = OSUBST;
    325 					*wp++ = 'X';
    326 					*wp++ = c;
    327 					*wp++ = '\0';
    328 					*wp++ = CSUBST;
    329 					*wp++ = 'X';
    330 				} else {
    331 					*wp++ = CHAR, *wp++ = '$';
    332 					ungetsc(c);
    333 				}
    334 				break;
    335 			  case '`':
    336 				PUSH_STATE(SBQUOTE);
    337 				*wp++ = COMSUB;
    338 				/* Need to know if we are inside double quotes
    339 				 * since sh/at&t-ksh translate the \" to " in
    340 				 * "`..\"..`".  POSIX also requires this.
    341 				 * An earlier version of ksh misinterpreted
    342 				 * the POSIX specification and performed
    343 				 * removal of backslash escapes only if
    344 				 * posix mode was not in effect.
    345 				 */
    346 				statep->ls_sbquote.indquotes = 0;
    347 				s = statep;
    348 				base = state_info.base;
    349 				while (1) {
    350 					for (; s != base; s--) {
    351 						if (s->ls_state == SDQUOTE) {
    352 							statep->ls_sbquote.indquotes = 1;
    353 							break;
    354 						}
    355 					}
    356 					if (s != base)
    357 						break;
    358 					if (!(s = s->ls_info.base))
    359 						break;
    360 					base = s-- - STATE_BSIZE;
    361 				}
    362 				break;
    363 			  default:
    364 				*wp++ = CHAR, *wp++ = c;
    365 			}
    366 			break;
    367 
    368 		  case SSQUOTE:
    369 			if (c == '\'') {
    370 				POP_STATE();
    371 				*wp++ = CQUOTE;
    372 				ignore_backslash_newline--;
    373 			} else
    374 				*wp++ = QCHAR, *wp++ = c;
    375 			break;
    376 
    377 		  case SDQUOTE:
    378 			if (c == '"') {
    379 				POP_STATE();
    380 				*wp++ = CQUOTE;
    381 			} else
    382 				goto Subst;
    383 			break;
    384 
    385 		  case SCSPAREN: /* $( .. ) */
    386 			/* todo: deal with $(...) quoting properly
    387 			 * kludge to partly fake quoting inside $(..): doesn't
    388 			 * really work because nested $(..) or ${..} inside
    389 			 * double quotes aren't dealt with.
    390 			 */
    391 			switch (statep->ls_scsparen.csstate) {
    392 			  case 0: /* normal */
    393 				switch (c) {
    394 				  case '(':
    395 					statep->ls_scsparen.nparen++;
    396 					break;
    397 				  case ')':
    398 					statep->ls_scsparen.nparen--;
    399 					break;
    400 				  case '\\':
    401 					statep->ls_scsparen.csstate = 1;
    402 					break;
    403 				  case '"':
    404 					statep->ls_scsparen.csstate = 2;
    405 					break;
    406 				  case '\'':
    407 					statep->ls_scsparen.csstate = 4;
    408 					ignore_backslash_newline++;
    409 					break;
    410 				}
    411 				break;
    412 
    413 			  case 1: /* backslash in normal mode */
    414 			  case 3: /* backslash in double quotes */
    415 				--statep->ls_scsparen.csstate;
    416 				break;
    417 
    418 			  case 2: /* double quotes */
    419 				if (c == '"')
    420 					statep->ls_scsparen.csstate = 0;
    421 				else if (c == '\\')
    422 					statep->ls_scsparen.csstate = 3;
    423 				break;
    424 
    425 			  case 4: /* single quotes */
    426 				if (c == '\'') {
    427 					statep->ls_scsparen.csstate = 0;
    428 					ignore_backslash_newline--;
    429 				}
    430 				break;
    431 			}
    432 			if (statep->ls_scsparen.nparen == 0) {
    433 				POP_STATE();
    434 				*wp++ = 0; /* end of COMSUB */
    435 			} else
    436 				*wp++ = c;
    437 			break;
    438 
    439 		  case SASPAREN: /* $(( .. )) */
    440 			/* todo: deal with $((...); (...)) properly */
    441 			/* XXX should nest using existing state machine
    442 			 *     (embed "..", $(...), etc.) */
    443 			if (c == '(')
    444 				statep->ls_sasparen.nparen++;
    445 			else if (c == ')') {
    446 				statep->ls_sasparen.nparen--;
    447 				if (statep->ls_sasparen.nparen == 1) {
    448 					/*(*/
    449 					if ((c2 = getsc()) == ')') {
    450 						POP_STATE();
    451 						*wp++ = 0; /* end of EXPRSUB */
    452 						break;
    453 					} else {
    454 						char *s;
    455 
    456 						ungetsc(c2);
    457 						/* mismatched parenthesis -
    458 						 * assume we were really
    459 						 * parsing a $(..) expression
    460 						 */
    461 						s = Xrestpos(ws, wp,
    462 						     statep->ls_sasparen.start);
    463 						memmove(s + 1, s, wp - s);
    464 						*s++ = COMSUB;
    465 						*s = '('; /*)*/
    466 						wp++;
    467 						statep->ls_scsparen.nparen = 1;
    468 						statep->ls_scsparen.csstate = 0;
    469 						state = statep->ls_state
    470 							= SCSPAREN;
    471 
    472 					}
    473 				}
    474 			}
    475 			*wp++ = c;
    476 			break;
    477 
    478 		  case SBRACE:
    479 			/*{*/
    480 			if (c == '}') {
    481 				POP_STATE();
    482 				*wp++ = CSUBST;
    483 				*wp++ = /*{*/ '}';
    484 			} else
    485 				goto Sbase1;
    486 			break;
    487 
    488 		  case STBRACE:
    489 			/* Same as SBRACE, except (,|,) treated specially */
    490 			/*{*/
    491 			if (c == '}') {
    492 				POP_STATE();
    493 				*wp++ = CSUBST;
    494 				*wp++ = /*{*/ '}';
    495 			} else if (c == '|') {
    496 				*wp++ = SPAT;
    497 			} else if (c == '(') {
    498 				*wp++ = OPAT;
    499 				*wp++ = ' ';	/* simile for @ */
    500 				PUSH_STATE(SPATTERN);
    501 			} else
    502 				goto Sbase1;
    503 			break;
    504 
    505 		  case SBQUOTE:
    506 			if (c == '`') {
    507 				*wp++ = 0;
    508 				POP_STATE();
    509 			} else if (c == '\\') {
    510 				switch (c = getsc()) {
    511 				  case '\\':
    512 				  case '$': case '`':
    513 					*wp++ = c;
    514 					break;
    515 				  case '"':
    516 					if (statep->ls_sbquote.indquotes) {
    517 						*wp++ = c;
    518 						break;
    519 					}
    520 					/* fall through.. */
    521 				  default:
    522 					if (c) { /* trailing \ is lost */
    523 						*wp++ = '\\';
    524 						*wp++ = c;
    525 					}
    526 					break;
    527 				}
    528 			} else
    529 				*wp++ = c;
    530 			break;
    531 
    532 		  case SWORD:	/* ONEWORD */
    533 			goto Subst;
    534 
    535 #ifdef KSH
    536 		  case SLETPAREN:	/* LETEXPR: (( ... )) */
    537 			/*(*/
    538 			if (c == ')') {
    539 				if (statep->ls_sletparen.nparen > 0)
    540 				    --statep->ls_sletparen.nparen;
    541 				/*(*/
    542 				else if ((c2 = getsc()) == ')') {
    543 					c = 0;
    544 					*wp++ = CQUOTE;
    545 					goto Done;
    546 				} else
    547 					ungetsc(c2);
    548 			} else if (c == '(')
    549 				/* parenthesis inside quotes and backslashes
    550 				 * are lost, but at&t ksh doesn't count them
    551 				 * either
    552 				 */
    553 				++statep->ls_sletparen.nparen;
    554 			goto Sbase2;
    555 #endif /* KSH */
    556 
    557 		  case SHEREDELIM:	/* <<,<<- delimiter */
    558 			/* XXX chuck this state (and the next) - use
    559 			 * the existing states ($ and \`..` should be
    560 			 * stripped of their specialness after the
    561 			 * fact).
    562 			 */
    563 			/* here delimiters need a special case since
    564 			 * $ and `..` are not to be treated specially
    565 			 */
    566 			if (c == '\\') {
    567 				c = getsc();
    568 				if (c) { /* trailing \ is lost */
    569 					*wp++ = QCHAR;
    570 					*wp++ = c;
    571 				}
    572 			} else if (c == '\'') {
    573 				PUSH_STATE(SSQUOTE);
    574 				*wp++ = OQUOTE;
    575 				ignore_backslash_newline++;
    576 			} else if (c == '"') {
    577 				state = statep->ls_state = SHEREDQUOTE;
    578 				*wp++ = OQUOTE;
    579 			} else {
    580 				*wp++ = CHAR;
    581 				*wp++ = c;
    582 			}
    583 			break;
    584 
    585 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
    586 			if (c == '"') {
    587 				*wp++ = CQUOTE;
    588 				state = statep->ls_state = SHEREDELIM;
    589 			} else {
    590 				if (c == '\\') {
    591 					switch (c = getsc()) {
    592 					  case '\\': case '"':
    593 					  case '$': case '`':
    594 						break;
    595 					  default:
    596 						if (c) { /* trailing \ lost */
    597 							*wp++ = CHAR;
    598 							*wp++ = '\\';
    599 						}
    600 						break;
    601 					}
    602 				}
    603 				*wp++ = CHAR;
    604 				*wp++ = c;
    605 			}
    606 			break;
    607 
    608 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
    609 			if ( /*(*/ c == ')') {
    610 				*wp++ = CPAT;
    611 				POP_STATE();
    612 			} else if (c == '|') {
    613 				*wp++ = SPAT;
    614 			} else if (c == '(') {
    615 				*wp++ = OPAT;
    616 				*wp++ = ' ';	/* simile for @ */
    617 				PUSH_STATE(SPATTERN);
    618 			} else
    619 				goto Sbase1;
    620 			break;
    621 		}
    622 	}
    623 Done:
    624 	Xcheck(ws, wp);
    625 	if (statep != &states[1])
    626 		/* XXX figure out what is missing */
    627 		yyerror("no closing quote\n");
    628 
    629 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
    630 	if (state == SHEREDELIM)
    631 		state = SBASE;
    632 
    633 	dp = Xstring(ws, wp);
    634 	if ((c == '<' || c == '>') && state == SBASE
    635 	    && ((c2 = Xlength(ws, wp)) == 0
    636 	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
    637 	{
    638 		struct ioword *iop =
    639 				(struct ioword *) alloc(sizeof(*iop), ATEMP);
    640 
    641 		if (c2 == 2)
    642 			iop->unit = dp[1] - '0';
    643 		else
    644 			iop->unit = c == '>'; /* 0 for <, 1 for > */
    645 
    646 		c2 = getsc();
    647 		/* <<, >>, <> are ok, >< is not */
    648 		if (c == c2 || (c == '<' && c2 == '>')) {
    649 			iop->flag = c == c2 ?
    650 				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
    651 			if (iop->flag == IOHERE) {
    652 				if ((c2 = getsc()) == '-') {
    653 					iop->flag |= IOSKIP;
    654 				} else {
    655 					ungetsc(c2);
    656 				}
    657 			}
    658 		} else if (c2 == '&')
    659 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
    660 		else {
    661 			iop->flag = c == '>' ? IOWRITE : IOREAD;
    662 			if (c == '>' && c2 == '|')
    663 				iop->flag |= IOCLOB;
    664 			else
    665 				ungetsc(c2);
    666 		}
    667 
    668 		iop->name = (char *) 0;
    669 		iop->delim = (char *) 0;
    670 		iop->heredoc = (char *) 0;
    671 		Xfree(ws, wp);	/* free word */
    672 		yylval.iop = iop;
    673 		return REDIR;
    674 	}
    675 
    676 	if (wp == dp && state == SBASE) {
    677 		Xfree(ws, wp);	/* free word */
    678 		/* no word, process LEX1 character */
    679 		switch (c) {
    680 		  default:
    681 			return c;
    682 
    683 		  case '|':
    684 		  case '&':
    685 		  case ';':
    686 			if ((c2 = getsc()) == c)
    687 				c = (c == ';') ? BREAK :
    688 				    (c == '|') ? LOGOR :
    689 				    (c == '&') ? LOGAND :
    690 				    YYERRCODE;
    691 #ifdef KSH
    692 			else if (c == '|' && c2 == '&')
    693 				c = COPROC;
    694 #endif /* KSH */
    695 			else
    696 				ungetsc(c2);
    697 			return c;
    698 
    699 		  case '\n':
    700 			gethere();
    701 			if (cf & CONTIN)
    702 				goto Again;
    703 			return c;
    704 
    705 		  case '(':  /*)*/
    706 #ifdef KSH
    707 			if ((c2 = getsc()) == '(') /*)*/
    708 				/* XXX need to handle ((...); (...)) */
    709 				c = MDPAREN;
    710 			else
    711 				ungetsc(c2);
    712 #endif /* KSH */
    713 			return c;
    714 		  /*(*/
    715 		  case ')':
    716 			return c;
    717 		}
    718 	}
    719 
    720 	*wp++ = EOS;		/* terminate word */
    721 	yylval.cp = Xclose(ws, wp);
    722 	if (state == SWORD
    723 #ifdef KSH
    724 		|| state == SLETPAREN
    725 #endif /* KSH */
    726 		)	/* ONEWORD? */
    727 		return LWORD;
    728 	ungetsc(c);		/* unget terminator */
    729 
    730 	/* copy word to unprefixed string ident */
    731 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
    732 		*dp++ = *sp++;
    733 	/* Make sure the ident array stays '\0' padded */
    734 	memset(dp, 0, (ident+IDENT) - dp + 1);
    735 	if (c != EOS)
    736 		*ident = '\0';	/* word is not unquoted */
    737 
    738 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
    739 		struct tbl *p;
    740 		int h = hash(ident);
    741 
    742 		/* { */
    743 		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
    744 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
    745 		{
    746 			afree(yylval.cp, ATEMP);
    747 			return p->val.i;
    748 		}
    749 		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
    750 		    && (p->flag & ISSET))
    751 		{
    752 			register Source *s;
    753 
    754 			for (s = source; s->type == SALIAS; s = s->next)
    755 				if (s->u.tblp == p)
    756 					return LWORD;
    757 			/* push alias expansion */
    758 			s = pushs(SALIAS, source->areap);
    759 			s->start = s->str = p->val.s;
    760 			s->u.tblp = p;
    761 			s->next = source;
    762 			source = s;
    763 			afree(yylval.cp, ATEMP);
    764 			goto Again;
    765 		}
    766 	}
    767 
    768 	return LWORD;
    769 }
    770 
    771 static void
    772 gethere()
    773 {
    774 	register struct ioword **p;
    775 
    776 	for (p = heres; p < herep; p++)
    777 		readhere(*p);
    778 	herep = heres;
    779 }
    780 
    781 /*
    782  * read "<<word" text into temp file
    783  */
    784 
    785 static void
    786 readhere(iop)
    787 	struct ioword *iop;
    788 {
    789 	register int c;
    790 	char *volatile eof;
    791 	char *eofp;
    792 	int skiptabs;
    793 	XString xs;
    794 	char *xp;
    795 	int xpos;
    796 
    797 	eof = evalstr(iop->delim, 0);
    798 
    799 	if (!(iop->flag & IOEVAL))
    800 		ignore_backslash_newline++;
    801 
    802 	Xinit(xs, xp, 256, ATEMP);
    803 
    804 	for (;;) {
    805 		eofp = eof;
    806 		skiptabs = iop->flag & IOSKIP;
    807 		xpos = Xsavepos(xs, xp);
    808 		while ((c = getsc()) != 0) {
    809 			if (skiptabs) {
    810 				if (c == '\t')
    811 					continue;
    812 				skiptabs = 0;
    813 			}
    814 			if (c != *eofp)
    815 				break;
    816 			Xcheck(xs, xp);
    817 			Xput(xs, xp, c);
    818 			eofp++;
    819 		}
    820 		/* Allow EOF here so commands with out trailing newlines
    821 		 * will work (eg, ksh -c '...', $(...), etc).
    822 		 */
    823 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
    824 			xp = Xrestpos(xs, xp, xpos);
    825 			break;
    826 		}
    827 		ungetsc(c);
    828 		while ((c = getsc()) != '\n') {
    829 			if (c == 0)
    830 				yyerror("here document `%s' unclosed\n", eof);
    831 			Xcheck(xs, xp);
    832 			Xput(xs, xp, c);
    833 		}
    834 		Xcheck(xs, xp);
    835 		Xput(xs, xp, c);
    836 	}
    837 	Xput(xs, xp, '\0');
    838 	iop->heredoc = Xclose(xs, xp);
    839 
    840 	if (!(iop->flag & IOEVAL))
    841 		ignore_backslash_newline--;
    842 }
    843 
    844 void
    845 #ifdef HAVE_PROTOTYPES
    846 yyerror(const char *fmt, ...)
    847 #else
    848 yyerror(fmt, va_alist)
    849 	const char *fmt;
    850 	va_dcl
    851 #endif
    852 {
    853 	va_list va;
    854 
    855 	/* pop aliases and re-reads */
    856 	while (source->type == SALIAS || source->type == SREREAD)
    857 		source = source->next;
    858 	source->str = null;	/* zap pending input */
    859 
    860 	error_prefix(TRUE);
    861 	SH_VA_START(va, fmt);
    862 	shf_vfprintf(shl_out, fmt, va);
    863 	va_end(va);
    864 	errorf("%s", null);
    865 }
    866 
    867 /*
    868  * input for yylex with alias expansion
    869  */
    870 
    871 Source *
    872 pushs(type, areap)
    873 	int type;
    874 	Area *areap;
    875 {
    876 	register Source *s;
    877 
    878 	s = (Source *) alloc(sizeof(Source), areap);
    879 	s->type = type;
    880 	s->str = null;
    881 	s->start = NULL;
    882 	s->line = 0;
    883 	s->errline = 0;
    884 	s->file = NULL;
    885 	s->flags = 0;
    886 	s->next = NULL;
    887 	s->areap = areap;
    888 	if (type == SFILE || type == SSTDIN) {
    889 		char *dummy;
    890 		Xinit(s->xs, dummy, 256, s->areap);
    891 	} else
    892 		memset(&s->xs, 0, sizeof(s->xs));
    893 	return s;
    894 }
    895 
    896 static int
    897 getsc__()
    898 {
    899 	register Source *s = source;
    900 	register int c;
    901 
    902 	while ((c = *s->str++) == 0) {
    903 		s->str = NULL;		/* return 0 for EOF by default */
    904 		switch (s->type) {
    905 		  case SEOF:
    906 			s->str = null;
    907 			return 0;
    908 
    909 		  case SSTDIN:
    910 		  case SFILE:
    911 			getsc_line(s);
    912 			break;
    913 
    914 		  case SWSTR:
    915 			break;
    916 
    917 		  case SSTRING:
    918 			break;
    919 
    920 		  case SWORDS:
    921 			s->start = s->str = *s->u.strv++;
    922 			s->type = SWORDSEP;
    923 			break;
    924 
    925 		  case SWORDSEP:
    926 			if (*s->u.strv == NULL) {
    927 				s->start = s->str = newline;
    928 				s->type = SEOF;
    929 			} else {
    930 				s->start = s->str = space;
    931 				s->type = SWORDS;
    932 			}
    933 			break;
    934 
    935 		  case SALIAS:
    936 			if (s->flags & SF_ALIASEND) {
    937 				/* pass on an unused SF_ALIAS flag */
    938 				source = s->next;
    939 				source->flags |= s->flags & SF_ALIAS;
    940 				s = source;
    941 			} else if (*s->u.tblp->val.s
    942 				 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
    943 			{
    944 				source = s = s->next;	/* pop source stack */
    945 				/* Note that this alias ended with a space,
    946 				 * enabling alias expansion on the following
    947 				 * word.
    948 				 */
    949 				s->flags |= SF_ALIAS;
    950 			} else {
    951 				/* At this point, we need to keep the current
    952 				 * alias in the source list so recursive
    953 				 * aliases can be detected and we also need
    954 				 * to return the next character.  Do this
    955 				 * by temporarily popping the alias to get
    956 				 * the next character and then put it back
    957 				 * in the source list with the SF_ALIASEND
    958 				 * flag set.
    959 				 */
    960 				source = s->next;	/* pop source stack */
    961 				source->flags |= s->flags & SF_ALIAS;
    962 				c = getsc__();
    963 				if (c) {
    964 					s->flags |= SF_ALIASEND;
    965 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
    966 					s->start = s->str = s->ugbuf;
    967 					s->next = source;
    968 					source = s;
    969 				} else {
    970 					s = source;
    971 					/* avoid reading eof twice */
    972 					s->str = NULL;
    973 					break;
    974 				}
    975 			}
    976 			continue;
    977 
    978 		  case SREREAD:
    979 			if (s->start != s->ugbuf) /* yuck */
    980 				afree(s->u.freeme, ATEMP);
    981 			source = s = s->next;
    982 			continue;
    983 		}
    984 		if (s->str == NULL) {
    985 			s->type = SEOF;
    986 			s->start = s->str = null;
    987 			return '\0';
    988 		}
    989 		if (s->flags & SF_ECHO) {
    990 			shf_puts(s->str, shl_out);
    991 			shf_flush(shl_out);
    992 		}
    993 	}
    994 	return c;
    995 }
    996 
    997 static void
    998 getsc_line(s)
    999 	Source *s;
   1000 {
   1001 	char *xp = Xstring(s->xs, xp);
   1002 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
   1003 	int have_tty = interactive && (s->flags & SF_TTY);
   1004 
   1005 	/* Done here to ensure nothing odd happens when a timeout occurs */
   1006 	XcheckN(s->xs, xp, LINE);
   1007 	*xp = '\0';
   1008 	s->start = s->str = xp;
   1009 
   1010 #ifdef KSH
   1011 	if (have_tty && ksh_tmout) {
   1012 		ksh_tmout_state = TMOUT_READING;
   1013 		alarm(ksh_tmout);
   1014 	}
   1015 #endif /* KSH */
   1016 #ifdef EDIT
   1017 	if (have_tty && (0
   1018 # ifdef VI
   1019 			 || Flag(FVI)
   1020 # endif /* VI */
   1021 # ifdef EMACS
   1022 			 || Flag(FEMACS) || Flag(FGMACS)
   1023 # endif /* EMACS */
   1024 		))
   1025 	{
   1026 		int nread;
   1027 
   1028 		nread = x_read(xp, LINE);
   1029 		if (nread < 0)	/* read error */
   1030 			nread = 0;
   1031 		xp[nread] = '\0';
   1032 		xp += nread;
   1033 	}
   1034 	else
   1035 #endif /* EDIT */
   1036 	{
   1037 		if (interactive) {
   1038 			pprompt(prompt, 0);
   1039 		} else
   1040 			s->line++;
   1041 
   1042 		while (1) {
   1043 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
   1044 
   1045 			if (!p && shf_error(s->u.shf)
   1046 			    && shf_errno(s->u.shf) == EINTR)
   1047 			{
   1048 				shf_clearerr(s->u.shf);
   1049 				if (trap)
   1050 					runtraps(0);
   1051 				continue;
   1052 			}
   1053 			if (!p || (xp = p, xp[-1] == '\n'))
   1054 				break;
   1055 			/* double buffer size */
   1056 			xp++; /* move past null so doubling works... */
   1057 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
   1058 			xp--; /* ...and move back again */
   1059 		}
   1060 		/* flush any unwanted input so other programs/builtins
   1061 		 * can read it.  Not very optimal, but less error prone
   1062 		 * than flushing else where, dealing with redirections,
   1063 		 * etc..
   1064 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
   1065 		 */
   1066 		if (s->type == SSTDIN)
   1067 			shf_flush(s->u.shf);
   1068 	}
   1069 	/* XXX: temporary kludge to restore source after a
   1070 	 * trap may have been executed.
   1071 	 */
   1072 	source = s;
   1073 #ifdef KSH
   1074 	if (have_tty && ksh_tmout)
   1075 	{
   1076 		ksh_tmout_state = TMOUT_EXECUTING;
   1077 		alarm(0);
   1078 	}
   1079 #endif /* KSH */
   1080 	s->start = s->str = Xstring(s->xs, xp);
   1081 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
   1082 	/* Note: if input is all nulls, this is not eof */
   1083 	if (Xlength(s->xs, xp) == 0) { /* EOF */
   1084 		if (s->type == SFILE)
   1085 			shf_fdclose(s->u.shf);
   1086 		s->str = NULL;
   1087 	} else if (interactive) {
   1088 #ifdef HISTORY
   1089 		char *p = Xstring(s->xs, xp);
   1090 		if (cur_prompt == PS1)
   1091 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
   1092 				p++;
   1093 		if (*p) {
   1094 # ifdef EASY_HISTORY
   1095 			if (cur_prompt == PS2)
   1096 				histappend(Xstring(s->xs, xp), 1);
   1097 			else
   1098 # endif /* EASY_HISTORY */
   1099 			{
   1100 				s->line++;
   1101 				histsave(s->line, s->str, 1);
   1102 			}
   1103 		}
   1104 #endif /* HISTORY */
   1105 	}
   1106 	if (interactive)
   1107 		set_prompt(PS2, (Source *) 0);
   1108 }
   1109 
   1110 void
   1111 set_prompt(to, s)
   1112 	int to;
   1113 	Source *s;
   1114 {
   1115 	cur_prompt = to;
   1116 
   1117 	switch (to) {
   1118 	case PS1: /* command */
   1119 #ifdef KSH
   1120 		/* Substitute ! and !! here, before substitutions are done
   1121 		 * so ! in expanded variables are not expanded.
   1122 		 * NOTE: this is not what at&t ksh does (it does it after
   1123 		 * substitutions, POSIX doesn't say which is to be done.
   1124 		 */
   1125 		{
   1126 			struct shf *shf;
   1127 			char * volatile ps1;
   1128 			Area *saved_atemp;
   1129 
   1130 			ps1 = str_val(global("PS1"));
   1131 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
   1132 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
   1133 			while (*ps1) {
   1134 				if (*ps1 != '!' || *++ps1 == '!')
   1135 					shf_putchar(*ps1++, shf);
   1136 				else
   1137 					shf_fprintf(shf, "%d",
   1138 						s ? s->line + 1 : 0);
   1139 			}
   1140 			ps1 = shf_sclose(shf);
   1141 			saved_atemp = ATEMP;
   1142 			newenv(E_ERRH);
   1143 			if (ksh_sigsetjmp(e->jbuf, 0)) {
   1144 				prompt = safe_prompt;
   1145 				/* Don't print an error - assume it has already
   1146 				 * been printed.  Reason is we may have forked
   1147 				 * to run a command and the child may be
   1148 				 * unwinding its stack through this code as it
   1149 				 * exits.
   1150 				 */
   1151 			} else
   1152 				prompt = str_save(substitute(ps1, 0),
   1153 						 saved_atemp);
   1154 			quitenv();
   1155 		}
   1156 #else /* KSH */
   1157 		prompt = str_val(global("PS1"));
   1158 #endif /* KSH */
   1159 		break;
   1160 
   1161 	case PS2: /* command continuation */
   1162 		prompt = str_val(global("PS2"));
   1163 		break;
   1164 	}
   1165 }
   1166 
   1167 /* See also related routine, promptlen() in edit.c */
   1168 void
   1169 pprompt(cp, ntruncate)
   1170 	const char *cp;
   1171 	int ntruncate;
   1172 {
   1173 #if 0
   1174 	char nbuf[32];
   1175 	int c;
   1176 
   1177 	while (*cp != 0) {
   1178 		if (*cp != '!')
   1179 			c = *cp++;
   1180 		else if (*++cp == '!')
   1181 			c = *cp++;
   1182 		else {
   1183 			int len;
   1184 			char *p;
   1185 
   1186 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
   1187 				source->line + 1);
   1188 			len = strlen(nbuf);
   1189 			if (ntruncate) {
   1190 				if (ntruncate >= len) {
   1191 					ntruncate -= len;
   1192 					continue;
   1193 				}
   1194 				p += ntruncate;
   1195 				len -= ntruncate;
   1196 				ntruncate = 0;
   1197 			}
   1198 			shf_write(p, len, shl_out);
   1199 			continue;
   1200 		}
   1201 		if (ntruncate)
   1202 			--ntruncate;
   1203 		else
   1204 			shf_putc(c, shl_out);
   1205 	}
   1206 #endif /* 0 */
   1207 	shf_puts(cp + ntruncate, shl_out);
   1208 	shf_flush(shl_out);
   1209 }
   1210 
   1211 /* Read the variable part of a ${...} expression (ie, up to but not including
   1212  * the :[-+?=#%] or close-brace.
   1213  */
   1214 static char *
   1215 get_brace_var(wsp, wp)
   1216 	XString *wsp;
   1217 	char *wp;
   1218 {
   1219 	enum parse_state {
   1220 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
   1221 			   PS_NUMBER, PS_VAR1, PS_END
   1222 			 }
   1223 		state;
   1224 	char c;
   1225 
   1226 	state = PS_INITIAL;
   1227 	while (1) {
   1228 		c = getsc();
   1229 		/* State machine to figure out where the variable part ends. */
   1230 		switch (state) {
   1231 		  case PS_INITIAL:
   1232 			if (c == '#') {
   1233 				state = PS_SAW_HASH;
   1234 				break;
   1235 			}
   1236 			/* fall through.. */
   1237 		  case PS_SAW_HASH:
   1238 			if (letter(c))
   1239 				state = PS_IDENT;
   1240 			else if (digit(c))
   1241 				state = PS_NUMBER;
   1242 			else if (ctype(c, C_VAR1))
   1243 				state = PS_VAR1;
   1244 			else
   1245 				state = PS_END;
   1246 			break;
   1247 		  case PS_IDENT:
   1248 			if (!letnum(c)) {
   1249 				state = PS_END;
   1250 				if (c == '[') {
   1251 					char *tmp, *p;
   1252 
   1253 					if (!arraysub(&tmp))
   1254 						yyerror("missing ]\n");
   1255 					*wp++ = c;
   1256 					for (p = tmp; *p; ) {
   1257 						Xcheck(*wsp, wp);
   1258 						*wp++ = *p++;
   1259 					}
   1260 					afree(tmp, ATEMP);
   1261 					c = getsc(); /* the ] */
   1262 				}
   1263 			}
   1264 			break;
   1265 		  case PS_NUMBER:
   1266 			if (!digit(c))
   1267 				state = PS_END;
   1268 			break;
   1269 		  case PS_VAR1:
   1270 			state = PS_END;
   1271 			break;
   1272 		  case PS_END: /* keep gcc happy */
   1273 			break;
   1274 		}
   1275 		if (state == PS_END) {
   1276 			*wp++ = '\0';	/* end of variable part */
   1277 			ungetsc(c);
   1278 			break;
   1279 		}
   1280 		Xcheck(*wsp, wp);
   1281 		*wp++ = c;
   1282 	}
   1283 	return wp;
   1284 }
   1285 
   1286 /*
   1287  * Save an array subscript - returns true if matching bracket found, false
   1288  * if eof or newline was found.
   1289  * (Returned string double null terminated)
   1290  */
   1291 static int
   1292 arraysub(strp)
   1293 	char **strp;
   1294 {
   1295 	XString ws;
   1296 	char	*wp;
   1297 	char	c;
   1298 	int 	depth = 1;	/* we are just past the initial [ */
   1299 
   1300 	Xinit(ws, wp, 32, ATEMP);
   1301 
   1302 	do {
   1303 		c = getsc();
   1304 		Xcheck(ws, wp);
   1305 		*wp++ = c;
   1306 		if (c == '[')
   1307 			depth++;
   1308 		else if (c == ']')
   1309 			depth--;
   1310 	} while (depth > 0 && c && c != '\n');
   1311 
   1312 	*wp++ = '\0';
   1313 	*strp = Xclose(ws, wp);
   1314 
   1315 	return depth == 0 ? 1 : 0;
   1316 }
   1317 
   1318 /* Unget a char: handles case when we are already at the start of the buffer */
   1319 static const char *
   1320 ungetsc(c)
   1321 	int c;
   1322 {
   1323 	if (backslash_skip)
   1324 		backslash_skip--;
   1325 	/* Don't unget eof... */
   1326 	if (source->str == null && c == '\0')
   1327 		return source->str;
   1328 	if (source->str > source->start)
   1329 		source->str--;
   1330 	else {
   1331 		Source *s;
   1332 
   1333 		s = pushs(SREREAD, source->areap);
   1334 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1335 		s->start = s->str = s->ugbuf;
   1336 		s->next = source;
   1337 		source = s;
   1338 	}
   1339 	return source->str;
   1340 }
   1341 
   1342 
   1343 /* Called to get a char that isn't a \newline sequence. */
   1344 static int
   1345 getsc_bn ARGS((void))
   1346 {
   1347 	int c, c2;
   1348 
   1349 	if (ignore_backslash_newline)
   1350 		return getsc_();
   1351 
   1352 	if (backslash_skip == 1) {
   1353 		backslash_skip = 2;
   1354 		return getsc_();
   1355 	}
   1356 
   1357 	backslash_skip = 0;
   1358 
   1359 	while (1) {
   1360 		c = getsc_();
   1361 		if (c == '\\') {
   1362 			if ((c2 = getsc_()) == '\n')
   1363 				/* ignore the \newline; get the next char... */
   1364 				continue;
   1365 			ungetsc(c2);
   1366 			backslash_skip = 1;
   1367 		}
   1368 		return c;
   1369 	}
   1370 }
   1371 
   1372 static Lex_state *
   1373 push_state_(si, old_end)
   1374 	State_info *si;
   1375 	Lex_state *old_end;
   1376 {
   1377 	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
   1378 
   1379 	new[0].ls_info.base = old_end;
   1380 	si->base = &new[0];
   1381 	si->end = &new[STATE_BSIZE];
   1382 	return &new[1];
   1383 }
   1384 
   1385 static Lex_state *
   1386 pop_state_(si, old_end)
   1387 	State_info *si;
   1388 	Lex_state *old_end;
   1389 {
   1390 	Lex_state *old_base = si->base;
   1391 
   1392 	si->base = old_end->ls_info.base - STATE_BSIZE;
   1393 	si->end = old_end->ls_info.base;
   1394 
   1395 	afree(old_base, ATEMP);
   1396 
   1397 	return si->base + STATE_BSIZE - 1;
   1398 }
   1399