Home | History | Annotate | Line # | Download | only in sh
parser.c revision 1.120
      1 /*	$NetBSD: parser.c,v 1.120 2016/06/01 02:47:05 kre Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Kenneth Almquist.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. Neither the name of the University nor the names of its contributors
     19  *    may be used to endorse or promote products derived from this software
     20  *    without specific prior written permission.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32  * SUCH DAMAGE.
     33  */
     34 
     35 #include <sys/cdefs.h>
     36 #ifndef lint
     37 #if 0
     38 static char sccsid[] = "@(#)parser.c	8.7 (Berkeley) 5/16/95";
     39 #else
     40 __RCSID("$NetBSD: parser.c,v 1.120 2016/06/01 02:47:05 kre Exp $");
     41 #endif
     42 #endif /* not lint */
     43 
     44 #include <stdio.h>
     45 #include <stdlib.h>
     46 #include <limits.h>
     47 
     48 #include "shell.h"
     49 #include "parser.h"
     50 #include "nodes.h"
     51 #include "expand.h"	/* defines rmescapes() */
     52 #include "eval.h"	/* defines commandname */
     53 #include "syntax.h"
     54 #include "options.h"
     55 #include "input.h"
     56 #include "output.h"
     57 #include "var.h"
     58 #include "error.h"
     59 #include "memalloc.h"
     60 #include "mystring.h"
     61 #include "alias.h"
     62 #include "show.h"
     63 #ifndef SMALL
     64 #include "myhistedit.h"
     65 #endif
     66 
     67 /*
     68  * Shell command parser.
     69  */
     70 
     71 /* values returned by readtoken */
     72 #include "token.h"
     73 
     74 #define OPENBRACE '{'
     75 #define CLOSEBRACE '}'
     76 
     77 
     78 struct heredoc {
     79 	struct heredoc *next;	/* next here document in list */
     80 	union node *here;		/* redirection node */
     81 	char *eofmark;		/* string indicating end of input */
     82 	int striptabs;		/* if set, strip leading tabs */
     83 	int startline;		/* line number where << seen */
     84 };
     85 
     86 
     87 
     88 static int noalias = 0;		/* when set, don't handle aliases */
     89 struct heredoc *heredoclist;	/* list of here documents to read */
     90 int parsebackquote;		/* nonzero if we are inside backquotes */
     91 int doprompt;			/* if set, prompt the user */
     92 int needprompt;			/* true if interactive and at start of line */
     93 int lasttoken;			/* last token read */
     94 MKINIT int tokpushback;		/* last token pushed back */
     95 char *wordtext;			/* text of last word returned by readtoken */
     96 MKINIT int checkkwd;		/* 1 == check for kwds, 2 == also eat newlines */
     97 struct nodelist *backquotelist;
     98 union node *redirnode;
     99 struct heredoc *heredoc;
    100 int quoteflag;			/* set if (part of) last token was quoted */
    101 int startlinno;			/* line # where last token started */
    102 int funclinno;			/* line # where the current function started */
    103 
    104 
    105 STATIC union node *list(int, int);
    106 STATIC union node *andor(void);
    107 STATIC union node *pipeline(void);
    108 STATIC union node *command(void);
    109 STATIC union node *simplecmd(union node **, union node *);
    110 STATIC union node *makename(void);
    111 STATIC void parsefname(void);
    112 STATIC void slurp_heredoc(char *const, const int, const int);
    113 STATIC void readheredocs(void);
    114 STATIC int peektoken(void);
    115 STATIC int readtoken(void);
    116 STATIC int xxreadtoken(void);
    117 STATIC int readtoken1(int, char const *, int);
    118 STATIC int noexpand(char *);
    119 STATIC void synexpect(int, const char *) __dead;
    120 STATIC void synerror(const char *) __dead;
    121 STATIC void setprompt(int);
    122 
    123 
    124 static const char EOFhere[] = "EOF reading here (<<) document";
    125 
    126 
    127 /*
    128  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
    129  * valid parse tree indicating a blank line.)
    130  */
    131 
    132 union node *
    133 parsecmd(int interact)
    134 {
    135 	int t;
    136 	union node *n;
    137 
    138 	tokpushback = 0;
    139 	doprompt = interact;
    140 	if (doprompt)
    141 		setprompt(1);
    142 	else
    143 		setprompt(0);
    144 	needprompt = 0;
    145 	t = readtoken();
    146 	if (t == TEOF)
    147 		return NEOF;
    148 	if (t == TNL)
    149 		return NULL;
    150 	tokpushback++;
    151 	n = list(1, 0);
    152 	if (heredoclist)
    153 		error("%d: Here document (<<%s) expected but not present",
    154 			heredoclist->startline, heredoclist->eofmark);
    155 	return n;
    156 }
    157 
    158 
    159 STATIC union node *
    160 list(int nlflag, int erflag)
    161 {
    162 	union node *n1, *n2, *n3;
    163 	int tok;
    164 	TRACE(("list(%d,%d): entered\n", nlflag, erflag));
    165 
    166 	checkkwd = 2;
    167 	if (nlflag == 0 && tokendlist[peektoken()])
    168 		return NULL;
    169 	n1 = NULL;
    170 	for (;;) {
    171 		n2 = andor();
    172 		tok = readtoken();
    173 		if (tok == TBACKGND) {
    174 			if (n2->type == NCMD || n2->type == NPIPE) {
    175 				n2->ncmd.backgnd = 1;
    176 			} else if (n2->type == NREDIR) {
    177 				n2->type = NBACKGND;
    178 			} else {
    179 				n3 = stalloc(sizeof(struct nredir));
    180 				n3->type = NBACKGND;
    181 				n3->nredir.n = n2;
    182 				n3->nredir.redirect = NULL;
    183 				n2 = n3;
    184 			}
    185 		}
    186 		if (n1 == NULL) {
    187 			n1 = n2;
    188 		}
    189 		else {
    190 			n3 = stalloc(sizeof(struct nbinary));
    191 			n3->type = NSEMI;
    192 			n3->nbinary.ch1 = n1;
    193 			n3->nbinary.ch2 = n2;
    194 			n1 = n3;
    195 		}
    196 		switch (tok) {
    197 		case TBACKGND:
    198 		case TSEMI:
    199 			tok = readtoken();
    200 			/* FALLTHROUGH */
    201 		case TNL:
    202 			if (tok == TNL) {
    203 				readheredocs();
    204 				if (nlflag)
    205 					return n1;
    206 			} else {
    207 				tokpushback++;
    208 			}
    209 			checkkwd = 2;
    210 			if (tokendlist[peektoken()])
    211 				return n1;
    212 			break;
    213 		case TEOF:
    214 			pungetc();	/* push back EOF on input */
    215 			return n1;
    216 		default:
    217 			if (nlflag || erflag)
    218 				synexpect(-1, 0);
    219 			tokpushback++;
    220 			return n1;
    221 		}
    222 	}
    223 }
    224 
    225 STATIC union node *
    226 andor(void)
    227 {
    228 	union node *n1, *n2, *n3;
    229 	int t;
    230 
    231 	TRACE(("andor: entered\n"));
    232 	n1 = pipeline();
    233 	for (;;) {
    234 		if ((t = readtoken()) == TAND) {
    235 			t = NAND;
    236 		} else if (t == TOR) {
    237 			t = NOR;
    238 		} else {
    239 			tokpushback++;
    240 			return n1;
    241 		}
    242 		n2 = pipeline();
    243 		n3 = stalloc(sizeof(struct nbinary));
    244 		n3->type = t;
    245 		n3->nbinary.ch1 = n1;
    246 		n3->nbinary.ch2 = n2;
    247 		n1 = n3;
    248 	}
    249 }
    250 
    251 STATIC union node *
    252 pipeline(void)
    253 {
    254 	union node *n1, *n2, *pipenode;
    255 	struct nodelist *lp, *prev;
    256 	int negate;
    257 
    258 	TRACE(("pipeline: entered\n"));
    259 
    260 	negate = 0;
    261 	checkkwd = 2;
    262 	while (readtoken() == TNOT) {
    263 		TRACE(("pipeline: TNOT recognized\n"));
    264 		negate = !negate;
    265 	}
    266 	tokpushback++;
    267 	n1 = command();
    268 	if (readtoken() == TPIPE) {
    269 		pipenode = stalloc(sizeof(struct npipe));
    270 		pipenode->type = NPIPE;
    271 		pipenode->npipe.backgnd = 0;
    272 		lp = stalloc(sizeof(struct nodelist));
    273 		pipenode->npipe.cmdlist = lp;
    274 		lp->n = n1;
    275 		do {
    276 			prev = lp;
    277 			lp = stalloc(sizeof(struct nodelist));
    278 			lp->n = command();
    279 			prev->next = lp;
    280 		} while (readtoken() == TPIPE);
    281 		lp->next = NULL;
    282 		n1 = pipenode;
    283 	}
    284 	tokpushback++;
    285 	if (negate) {
    286 		TRACE(("negate pipeline\n"));
    287 		n2 = stalloc(sizeof(struct nnot));
    288 		n2->type = NNOT;
    289 		n2->nnot.com = n1;
    290 		return n2;
    291 	} else
    292 		return n1;
    293 }
    294 
    295 
    296 
    297 STATIC union node *
    298 command(void)
    299 {
    300 	union node *n1, *n2;
    301 	union node *ap, **app;
    302 	union node *cp, **cpp;
    303 	union node *redir, **rpp;
    304 	int t, negate = 0;
    305 
    306 	TRACE(("command: entered\n"));
    307 
    308 	checkkwd = 2;
    309 	redir = NULL;
    310 	n1 = NULL;
    311 	rpp = &redir;
    312 
    313 	/* Check for redirection which may precede command */
    314 	while (readtoken() == TREDIR) {
    315 		*rpp = n2 = redirnode;
    316 		rpp = &n2->nfile.next;
    317 		parsefname();
    318 	}
    319 	tokpushback++;
    320 
    321 	while (readtoken() == TNOT) {
    322 		TRACE(("command: TNOT recognized\n"));
    323 		negate = !negate;
    324 	}
    325 	tokpushback++;
    326 
    327 	switch (readtoken()) {
    328 	case TIF:
    329 		n1 = stalloc(sizeof(struct nif));
    330 		n1->type = NIF;
    331 		n1->nif.test = list(0, 0);
    332 		if (readtoken() != TTHEN)
    333 			synexpect(TTHEN, 0);
    334 		n1->nif.ifpart = list(0, 0);
    335 		n2 = n1;
    336 		while (readtoken() == TELIF) {
    337 			n2->nif.elsepart = stalloc(sizeof(struct nif));
    338 			n2 = n2->nif.elsepart;
    339 			n2->type = NIF;
    340 			n2->nif.test = list(0, 0);
    341 			if (readtoken() != TTHEN)
    342 				synexpect(TTHEN, 0);
    343 			n2->nif.ifpart = list(0, 0);
    344 		}
    345 		if (lasttoken == TELSE)
    346 			n2->nif.elsepart = list(0, 0);
    347 		else {
    348 			n2->nif.elsepart = NULL;
    349 			tokpushback++;
    350 		}
    351 		if (readtoken() != TFI)
    352 			synexpect(TFI, 0);
    353 		checkkwd = 1;
    354 		break;
    355 	case TWHILE:
    356 	case TUNTIL: {
    357 		int got;
    358 		n1 = stalloc(sizeof(struct nbinary));
    359 		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
    360 		n1->nbinary.ch1 = list(0, 0);
    361 		if ((got=readtoken()) != TDO) {
    362 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
    363 			synexpect(TDO, 0);
    364 		}
    365 		n1->nbinary.ch2 = list(0, 0);
    366 		if (readtoken() != TDONE)
    367 			synexpect(TDONE, 0);
    368 		checkkwd = 1;
    369 		break;
    370 	}
    371 	case TFOR:
    372 		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
    373 			synerror("Bad for loop variable");
    374 		n1 = stalloc(sizeof(struct nfor));
    375 		n1->type = NFOR;
    376 		n1->nfor.var = wordtext;
    377 		if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
    378 			app = &ap;
    379 			while (readtoken() == TWORD) {
    380 				n2 = stalloc(sizeof(struct narg));
    381 				n2->type = NARG;
    382 				n2->narg.text = wordtext;
    383 				n2->narg.backquote = backquotelist;
    384 				*app = n2;
    385 				app = &n2->narg.next;
    386 			}
    387 			*app = NULL;
    388 			n1->nfor.args = ap;
    389 			if (lasttoken != TNL && lasttoken != TSEMI)
    390 				synexpect(-1, 0);
    391 		} else {
    392 			static char argvars[5] = {
    393 			    CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
    394 			};
    395 			n2 = stalloc(sizeof(struct narg));
    396 			n2->type = NARG;
    397 			n2->narg.text = argvars;
    398 			n2->narg.backquote = NULL;
    399 			n2->narg.next = NULL;
    400 			n1->nfor.args = n2;
    401 			/*
    402 			 * Newline or semicolon here is optional (but note
    403 			 * that the original Bourne shell only allowed NL).
    404 			 */
    405 			if (lasttoken != TNL && lasttoken != TSEMI)
    406 				tokpushback++;
    407 		}
    408 		checkkwd = 2;
    409 		if ((t = readtoken()) == TDO)
    410 			t = TDONE;
    411 		else if (t == TBEGIN)
    412 			t = TEND;
    413 		else
    414 			synexpect(-1, 0);
    415 		n1->nfor.body = list(0, 0);
    416 		if (readtoken() != t)
    417 			synexpect(t, 0);
    418 		checkkwd = 1;
    419 		break;
    420 	case TCASE:
    421 		n1 = stalloc(sizeof(struct ncase));
    422 		n1->type = NCASE;
    423 		if (readtoken() != TWORD)
    424 			synexpect(TWORD, 0);
    425 		n1->ncase.expr = n2 = stalloc(sizeof(struct narg));
    426 		n2->type = NARG;
    427 		n2->narg.text = wordtext;
    428 		n2->narg.backquote = backquotelist;
    429 		n2->narg.next = NULL;
    430 		while (readtoken() == TNL);
    431 		if (lasttoken != TWORD || ! equal(wordtext, "in"))
    432 			synexpect(-1, "in");
    433 		cpp = &n1->ncase.cases;
    434 		noalias = 1;
    435 		checkkwd = 2, readtoken();
    436 		/*
    437 		 * Both ksh and bash accept 'case x in esac'
    438 		 * so configure scripts started taking advantage of this.
    439 		 * The page: http://pubs.opengroup.org/onlinepubs/\
    440 		 * 009695399/utilities/xcu_chap02.html contradicts itself,
    441 		 * as to if this is legal; the "Case Conditional Format"
    442 		 * paragraph shows one case is required, but the "Grammar"
    443 		 * section shows a grammar that explicitly allows the no
    444 		 * case option.
    445 		 */
    446 		while (lasttoken != TESAC) {
    447 			*cpp = cp = stalloc(sizeof(struct nclist));
    448 			if (lasttoken == TLP)
    449 				readtoken();
    450 			cp->type = NCLIST;
    451 			app = &cp->nclist.pattern;
    452 			for (;;) {
    453 				*app = ap = stalloc(sizeof(struct narg));
    454 				ap->type = NARG;
    455 				ap->narg.text = wordtext;
    456 				ap->narg.backquote = backquotelist;
    457 				if (checkkwd = 2, readtoken() != TPIPE)
    458 					break;
    459 				app = &ap->narg.next;
    460 				readtoken();
    461 			}
    462 			ap->narg.next = NULL;
    463 			noalias = 0;
    464 			if (lasttoken != TRP) {
    465 				synexpect(TRP, 0);
    466 			}
    467 			cp->nclist.body = list(0, 0);
    468 
    469 			checkkwd = 2;
    470 			if ((t = readtoken()) != TESAC) {
    471 				if (t != TENDCASE) {
    472 					noalias = 0;
    473 					synexpect(TENDCASE, 0);
    474 				} else {
    475 					noalias = 1;
    476 					checkkwd = 2;
    477 					readtoken();
    478 				}
    479 			}
    480 			cpp = &cp->nclist.next;
    481 		}
    482 		noalias = 0;
    483 		*cpp = NULL;
    484 		checkkwd = 1;
    485 		break;
    486 	case TLP:
    487 		n1 = stalloc(sizeof(struct nredir));
    488 		n1->type = NSUBSHELL;
    489 		n1->nredir.n = list(0, 0);
    490 		n1->nredir.redirect = NULL;
    491 		if (n1->nredir.n == NULL)
    492 			synexpect(-1, 0);
    493 		if (readtoken() != TRP)
    494 			synexpect(TRP, 0);
    495 		checkkwd = 1;
    496 		break;
    497 	case TBEGIN:
    498 		n1 = list(0, 0);
    499 		if (posix && n1 == NULL)
    500 			synexpect(-1, 0);
    501 		if (readtoken() != TEND)
    502 			synexpect(TEND, 0);
    503 		checkkwd = 1;
    504 		break;
    505 
    506 	case TSEMI:
    507 	case TAND:
    508 	case TOR:
    509 	case TPIPE:
    510 	case TNL:
    511 	case TEOF:
    512 	case TRP:
    513 		/*
    514 		 * simple commands must have something in them,
    515 		 * either a word (which at this point includes a=b)
    516 		 * or a redirection.  If we reached the end of the
    517 		 * command (which one of these tokens indicates)
    518 		 * when we are just starting, and have not had a
    519 		 * redirect, then ...
    520 		 *
    521 		 * nb: it is still possible to end up with empty
    522 		 * simple commands, if the "command" is a var
    523 		 * expansion that produces nothing
    524 		 *	X= ; $X && $X
    525 		 * -->          &&
    526 		 * I am not sure if this is intended to be legal or not.
    527 		 */
    528 		if (!redir)
    529 			synexpect(-1, 0);
    530 	case TWORD:
    531 		tokpushback++;
    532 		n1 = simplecmd(rpp, redir);
    533 		goto checkneg;
    534 	case TENDCASE:
    535 		if (redir) {
    536 			tokpushback++;
    537 			goto checkneg;
    538 		}
    539 		/* FALLTHROUGH */
    540 	default:
    541 		synexpect(-1, 0);
    542 		/* NOTREACHED */
    543 	}
    544 
    545 	/* Now check for redirection which may follow command */
    546 	while (readtoken() == TREDIR) {
    547 		*rpp = n2 = redirnode;
    548 		rpp = &n2->nfile.next;
    549 		parsefname();
    550 	}
    551 	tokpushback++;
    552 	*rpp = NULL;
    553 	if (redir) {
    554 		if (n1->type != NSUBSHELL) {
    555 			n2 = stalloc(sizeof(struct nredir));
    556 			n2->type = NREDIR;
    557 			n2->nredir.n = n1;
    558 			n1 = n2;
    559 		}
    560 		n1->nredir.redirect = redir;
    561 	}
    562 
    563 checkneg:
    564 	if (negate) {
    565 		TRACE(("negate command\n"));
    566 		n2 = stalloc(sizeof(struct nnot));
    567 		n2->type = NNOT;
    568 		n2->nnot.com = n1;
    569 		return n2;
    570 	}
    571 	else
    572 		return n1;
    573 }
    574 
    575 
    576 STATIC union node *
    577 simplecmd(union node **rpp, union node *redir)
    578 {
    579 	union node *args, **app;
    580 	union node *n = NULL, *n2;
    581 	int negate = 0;
    582 
    583 	/* If we don't have any redirections already, then we must reset */
    584 	/* rpp to be the address of the local redir variable.  */
    585 	if (redir == 0)
    586 		rpp = &redir;
    587 
    588 	args = NULL;
    589 	app = &args;
    590 
    591 	while (readtoken() == TNOT) {
    592 		TRACE(("simplcmd: TNOT recognized\n"));
    593 		negate = !negate;
    594 	}
    595 	tokpushback++;
    596 
    597 	for (;;) {
    598 		if (readtoken() == TWORD) {
    599 			n = stalloc(sizeof(struct narg));
    600 			n->type = NARG;
    601 			n->narg.text = wordtext;
    602 			n->narg.backquote = backquotelist;
    603 			*app = n;
    604 			app = &n->narg.next;
    605 		} else if (lasttoken == TREDIR) {
    606 			*rpp = n = redirnode;
    607 			rpp = &n->nfile.next;
    608 			parsefname();	/* read name of redirection file */
    609 		} else if (lasttoken == TLP && app == &args->narg.next
    610 					    && redir == 0) {
    611 			/* We have a function */
    612 			if (readtoken() != TRP)
    613 				synexpect(TRP, 0);
    614 			funclinno = plinno;
    615 			rmescapes(n->narg.text);
    616 			if (strchr(n->narg.text, '/'))
    617 				synerror("Bad function name");
    618 			n->type = NDEFUN;
    619 			n->narg.next = command();
    620 			funclinno = 0;
    621 			goto checkneg;
    622 		} else {
    623 			tokpushback++;
    624 			break;
    625 		}
    626 	}
    627 
    628 	if (args == NULL && redir == NULL)
    629 		synexpect(-1, 0);
    630 	*app = NULL;
    631 	*rpp = NULL;
    632 	n = stalloc(sizeof(struct ncmd));
    633 	n->type = NCMD;
    634 	n->ncmd.backgnd = 0;
    635 	n->ncmd.args = args;
    636 	n->ncmd.redirect = redir;
    637 
    638 checkneg:
    639 	if (negate) {
    640 		TRACE(("negate simplecmd\n"));
    641 		n2 = stalloc(sizeof(struct nnot));
    642 		n2->type = NNOT;
    643 		n2->nnot.com = n;
    644 		return n2;
    645 	}
    646 	else
    647 		return n;
    648 }
    649 
    650 STATIC union node *
    651 makename(void)
    652 {
    653 	union node *n;
    654 
    655 	n = stalloc(sizeof(struct narg));
    656 	n->type = NARG;
    657 	n->narg.next = NULL;
    658 	n->narg.text = wordtext;
    659 	n->narg.backquote = backquotelist;
    660 	return n;
    661 }
    662 
    663 void
    664 fixredir(union node *n, const char *text, int err)
    665 {
    666 	TRACE(("Fix redir %s %d\n", text, err));
    667 	if (!err)
    668 		n->ndup.vname = NULL;
    669 
    670 	if (is_number(text))
    671 		n->ndup.dupfd = number(text);
    672 	else if (text[0] == '-' && text[1] == '\0')
    673 		n->ndup.dupfd = -1;
    674 	else {
    675 
    676 		if (err)
    677 			synerror("Bad fd number");
    678 		else
    679 			n->ndup.vname = makename();
    680 	}
    681 }
    682 
    683 
    684 STATIC void
    685 parsefname(void)
    686 {
    687 	union node *n = redirnode;
    688 
    689 	if (readtoken() != TWORD)
    690 		synexpect(-1, 0);
    691 	if (n->type == NHERE) {
    692 		struct heredoc *here = heredoc;
    693 		struct heredoc *p;
    694 
    695 		if (quoteflag == 0)
    696 			n->type = NXHERE;
    697 		TRACE(("Here document %d\n", n->type));
    698 		if (here->striptabs) {
    699 			while (*wordtext == '\t')
    700 				wordtext++;
    701 		}
    702 
    703 		/*
    704 		 * this test is not really necessary, we are not
    705 		 * required to expand wordtext, but there's no reason
    706 		 * it cannot be $$ or something like that - that would
    707 		 * not mean the pid, but literally two '$' characters.
    708 		 * There is no need for limits on what the word can be.
    709 		 * However, it needs to stay literal as entered, not
    710 		 * have $ converted to CTLVAR or something, which as
    711 		 * the parser is, at the minute, is impossible to prevent.
    712 		 * So, leave it like this until the rest of the parser is fixed.
    713 		 */
    714 		if (! noexpand(wordtext))
    715 			synerror("Illegal eof marker for << redirection");
    716 
    717 		rmescapes(wordtext);
    718 		here->eofmark = wordtext;
    719 		here->next = NULL;
    720 		if (heredoclist == NULL)
    721 			heredoclist = here;
    722 		else {
    723 			for (p = heredoclist ; p->next ; p = p->next)
    724 				continue;
    725 			p->next = here;
    726 		}
    727 	} else if (n->type == NTOFD || n->type == NFROMFD) {
    728 		fixredir(n, wordtext, 0);
    729 	} else {
    730 		n->nfile.fname = makename();
    731 	}
    732 }
    733 
    734 /*
    735  * Check to see whether we are at the end of the here document.  When this
    736  * is called, c is set to the first character of the next input line.  If
    737  * we are at the end of the here document, this routine sets the c to PEOF.
    738  * The new value of c is returned.
    739  */
    740 
    741 static int
    742 checkend(int c, char * const eofmark, const int striptabs)
    743 {
    744 	if (striptabs) {
    745 		while (c == '\t')
    746 			c = pgetc();
    747 	}
    748 	if (c == PEOF) {
    749 		if (*eofmark == '\0')
    750 			return (c);
    751 		synerror(EOFhere);
    752 	}
    753 	if (c == *eofmark) {
    754 		int c2;
    755 		char *q;
    756 
    757 		for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
    758 			;
    759 		if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
    760 			c = PEOF;
    761 			if (c2 == '\n') {
    762 				plinno++;
    763 				needprompt = doprompt;
    764 			}
    765 		} else {
    766 			pungetc();
    767 			pushstring(eofmark + 1, q - (eofmark + 1), NULL);
    768 		}
    769 	} else if (c == '\n' && *eofmark == '\0') {
    770 		c = PEOF;
    771 		plinno++;
    772 		needprompt = doprompt;
    773 	}
    774 	return (c);
    775 }
    776 
    777 
    778 /*
    779  * Input any here documents.
    780  */
    781 
    782 STATIC void
    783 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
    784 {
    785 	int c;
    786 	char *out;
    787 
    788 	c = pgetc();
    789 
    790 	/*
    791 	 * If we hit EOF on the input, and the eofmark is a null string ('')
    792 	 * we consider this empty line to be the eofmark, and exit without err.
    793 	 */
    794 	if (c == PEOF && *eofmark != '\0')
    795 		synerror(EOFhere);
    796 
    797 	STARTSTACKSTR(out);
    798 
    799 	while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
    800 		do {
    801 			if (sq) {
    802 				/*
    803 				 * in single quoted mode (eofmark quoted)
    804 				 * all we look for is \n so we can check
    805 				 * for the epfmark - everything saved literally.
    806 				 */
    807 				STPUTC(c, out);
    808 				if (c == '\n')
    809 					break;
    810 				continue;
    811 			}
    812 			/*
    813 			 * In double quoted (non-quoted eofmark)
    814 			 * we must handle \ followed by \n here
    815 			 * otherwise we can mismatch the end mark.
    816 			 * All other uses of \ will be handled later
    817 			 * when the here doc is expanded.
    818 			 *
    819 			 * This also makes sure \\ followed by \n does
    820 			 * not suppress the newline (the \ quotes itself)
    821 			 */
    822 			if (c == '\\') {		/* A backslash */
    823 				c = pgetc();		/* followed by */
    824 				if (c == '\n')		/* a newline?  */
    825 					continue;	/* y:drop both */
    826 				STPUTC('\\', out);	/* else keep \ */
    827 			}
    828 			STPUTC(c, out);			/* keep the char */
    829 			if (c == '\n')			/* at end of line */
    830 				break;			/* look for eofmark */
    831 
    832 		} while ((c = pgetc()) != PEOF);
    833 
    834 		/*
    835 		 * If we have read a line, and reached EOF, without
    836 		 * finding the eofmark, whether the EOF comes before
    837 		 * or immediately after the \n, that is an error.
    838 		 */
    839 		if (c == PEOF || (c = pgetc()) == PEOF)
    840 			synerror(EOFhere);
    841 	}
    842 	STPUTC('\0', out);
    843 
    844 	c = out - stackblock();
    845 	out = stackblock();
    846 	grabstackblock(c);
    847 	wordtext = out;
    848 
    849 	TRACE(("Slurped a heredoc (to '%s')%s: len %d, \"%.16s\"...\n",
    850 		eofmark, striptabs ? " tab stripped" : "", c, wordtext));
    851 }
    852 
    853 STATIC void
    854 readheredocs(void)
    855 {
    856 	struct heredoc *here;
    857 	union node *n;
    858 
    859 	while (heredoclist) {
    860 		here = heredoclist;
    861 		heredoclist = here->next;
    862 		if (needprompt) {
    863 			setprompt(2);
    864 			needprompt = 0;
    865 		}
    866 
    867 		slurp_heredoc(here->eofmark, here->striptabs,
    868 		    here->here->nhere.type == NHERE);
    869 
    870 		n = stalloc(sizeof(struct narg));
    871 		n->narg.type = NARG;
    872 		n->narg.next = NULL;
    873 		n->narg.text = wordtext;
    874 		n->narg.backquote = backquotelist;
    875 		here->here->nhere.doc = n;
    876 
    877 		if (here->here->nhere.type == NHERE)
    878 			continue;
    879 
    880 		/*
    881 		 * Now "parse" here docs that have unquoted eofmarkers.
    882 		 */
    883 		setinputstring(wordtext, 1);
    884 		readtoken1(pgetc(), DQSYNTAX, 1);
    885 		n->narg.text = wordtext;
    886 		n->narg.backquote = backquotelist;
    887 		popfile();
    888 	}
    889 }
    890 
    891 STATIC int
    892 peektoken(void)
    893 {
    894 	int t;
    895 
    896 	t = readtoken();
    897 	tokpushback++;
    898 	return (t);
    899 }
    900 
    901 STATIC int
    902 readtoken(void)
    903 {
    904 	int t;
    905 	int savecheckkwd = checkkwd;
    906 #ifdef DEBUG
    907 	int alreadyseen = tokpushback;
    908 #endif
    909 	struct alias *ap;
    910 
    911 	top:
    912 	t = xxreadtoken();
    913 
    914 	if (checkkwd) {
    915 		/*
    916 		 * eat newlines
    917 		 */
    918 		if (checkkwd == 2) {
    919 			checkkwd = 0;
    920 			while (t == TNL) {
    921 				readheredocs();
    922 				t = xxreadtoken();
    923 			}
    924 		} else
    925 			checkkwd = 0;
    926 		/*
    927 		 * check for keywords and aliases
    928 		 */
    929 		if (t == TWORD && !quoteflag) {
    930 			const char *const *pp;
    931 
    932 			for (pp = parsekwd; *pp; pp++) {
    933 				if (**pp == *wordtext && equal(*pp, wordtext)) {
    934 					lasttoken = t = pp -
    935 					    parsekwd + KWDOFFSET;
    936 					TRACE(("keyword %s recognized\n", tokname[t]));
    937 					goto out;
    938 				}
    939 			}
    940 			if (!noalias &&
    941 			    (ap = lookupalias(wordtext, 1)) != NULL) {
    942 				pushstring(ap->val, strlen(ap->val), ap);
    943 				checkkwd = savecheckkwd;
    944 				goto top;
    945 			}
    946 		}
    947 out:
    948 		checkkwd = (t == TNOT) ? savecheckkwd : 0;
    949 	}
    950 	TRACE(("%stoken %s %s\n", alreadyseen ? "reread " : "", tokname[t], t == TWORD ? wordtext : ""));
    951 	return (t);
    952 }
    953 
    954 
    955 /*
    956  * Read the next input token.
    957  * If the token is a word, we set backquotelist to the list of cmds in
    958  *	backquotes.  We set quoteflag to true if any part of the word was
    959  *	quoted.
    960  * If the token is TREDIR, then we set redirnode to a structure containing
    961  *	the redirection.
    962  * In all cases, the variable startlinno is set to the number of the line
    963  *	on which the token starts.
    964  *
    965  * [Change comment:  here documents and internal procedures]
    966  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
    967  *  word parsing code into a separate routine.  In this case, readtoken
    968  *  doesn't need to have any internal procedures, but parseword does.
    969  *  We could also make parseoperator in essence the main routine, and
    970  *  have parseword (readtoken1?) handle both words and redirection.]
    971  */
    972 
    973 #define RETURN(token)	return lasttoken = token
    974 
    975 STATIC int
    976 xxreadtoken(void)
    977 {
    978 	int c;
    979 
    980 	if (tokpushback) {
    981 		tokpushback = 0;
    982 		return lasttoken;
    983 	}
    984 	if (needprompt) {
    985 		setprompt(2);
    986 		needprompt = 0;
    987 	}
    988 	startlinno = plinno;
    989 	for (;;) {	/* until token or start of word found */
    990 		c = pgetc_macro();
    991 		switch (c) {
    992 		case ' ': case '\t':
    993 			continue;
    994 		case '#':
    995 			while ((c = pgetc()) != '\n' && c != PEOF)
    996 				continue;
    997 			pungetc();
    998 			continue;
    999 
   1000 		case '\n':
   1001 			plinno++;
   1002 			needprompt = doprompt;
   1003 			RETURN(TNL);
   1004 		case PEOF:
   1005 			RETURN(TEOF);
   1006 
   1007 		case '&':
   1008 			if (pgetc() == '&')
   1009 				RETURN(TAND);
   1010 			pungetc();
   1011 			RETURN(TBACKGND);
   1012 		case '|':
   1013 			if (pgetc() == '|')
   1014 				RETURN(TOR);
   1015 			pungetc();
   1016 			RETURN(TPIPE);
   1017 		case ';':
   1018 			if (pgetc() == ';')
   1019 				RETURN(TENDCASE);
   1020 			pungetc();
   1021 			RETURN(TSEMI);
   1022 		case '(':
   1023 			RETURN(TLP);
   1024 		case ')':
   1025 			RETURN(TRP);
   1026 
   1027 		case '\\':
   1028 			switch (pgetc()) {
   1029 			case '\n':
   1030 				startlinno = ++plinno;
   1031 				if (doprompt)
   1032 					setprompt(2);
   1033 				else
   1034 					setprompt(0);
   1035 				continue;
   1036 			case PEOF:
   1037 				RETURN(TEOF);
   1038 			default:
   1039 				pungetc();
   1040 				break;
   1041 			}
   1042 			/* FALLTHROUGH */
   1043 		default:
   1044 			return readtoken1(c, BASESYNTAX, 0);
   1045 		}
   1046 	}
   1047 #undef RETURN
   1048 }
   1049 
   1050 
   1051 
   1052 /*
   1053  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
   1054  * is not NULL, read a here document.  In the latter case, eofmark is the
   1055  * word which marks the end of the document and striptabs is true if
   1056  * leading tabs should be stripped from the document.  The argument firstc
   1057  * is the first character of the input token or document.
   1058  *
   1059  * Because C does not have internal subroutines, I have simulated them
   1060  * using goto's to implement the subroutine linkage.  The following macros
   1061  * will run code that appears at the end of readtoken1.
   1062  */
   1063 
   1064 /*
   1065  * We used to remember only the current syntax, variable nesting level,
   1066  * double quote state for each var nesting level, and arith nesting
   1067  * level (unrelated to var nesting) and one prev syntax when in arith
   1068  * syntax.  This worked for simple cases, but can't handle arith inside
   1069  * var expansion inside arith inside var with some quoted and some not.
   1070  *
   1071  * Inspired by FreeBSD's implementation (though it was the obvious way)
   1072  * though implemented differently, we now have a stack that keeps track
   1073  * of what we are doing now, and what we were doing previously.
   1074  * Every time something changes, which will eventually end and should
   1075  * revert to the previous state, we push this stack, and then pop it
   1076  * again later (that is every ${} with an operator (to parse the word
   1077  * or pattern that follows) ${x} and $x are too simple to need it)
   1078  * $(( )) $( ) and "...".   Always.   Really, always!
   1079  *
   1080  * The stack is implemented as one static (on the C stack) base block
   1081  * containing LEVELS_PER_BLOCK (8) stack entries, which should be
   1082  * enough for the vast majority of cases.  For torture tests, we
   1083  * malloc more blocks as needed.  All accesses through the inline
   1084  * functions below.
   1085  */
   1086 
   1087 /*
   1088  * varnest & arinest will typically be 0 or 1
   1089  * (varnest can increment in usages like ${x=${y}} but probably
   1090  *  does not really need to)
   1091  * parenlevel allows balancing parens inside a $(( )), it is reset
   1092  * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
   1093  * quoted is special - we need to know 2 things ... are we inside "..."
   1094  * (even if inherited from some previous nesting level) and was there
   1095  * an opening '"' at this level (so the next will be closing).
   1096  * "..." can span nesting levels, but cannot be opened in one and
   1097  * closed in a different one.
   1098  * To handle this, "quoted" has two fields, the bottom 4 (really 2)
   1099  * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
   1100  * is really so special that this setting is not very important)
   1101  * and 0x10 that indicates that an opening quote has been seen.
   1102  * The bottom 4 bits are inherited, the 0x10 bit is not.
   1103  */
   1104 struct tokenstate {
   1105 	const char *ts_syntax;
   1106 	unsigned short ts_parenlevel;	/* counters */
   1107 	unsigned short ts_varnest;	/* 64000 levels should be enough! */
   1108 	unsigned short ts_arinest;
   1109 	unsigned short ts_quoted;	/* 1 -> single, 2 -> double */
   1110 };
   1111 
   1112 #define	NQ	0x00	/* Unquoted */
   1113 #define	SQ	0x01	/* Single Quotes */
   1114 #define	DQ	0x02	/* Double Quotes (or equivalent) */
   1115 #define	QF	0x0F		/* Mask to extract previous values */
   1116 #define	QS	0x10	/* Quoting started at this level in stack */
   1117 
   1118 #define	LEVELS_PER_BLOCK	8
   1119 #define	VSS			struct statestack
   1120 
   1121 struct statestack {
   1122 	VSS *prev;		/* previous block in list */
   1123 	int cur;		/* which of our tokenstates is current */
   1124 	struct tokenstate tokenstate[LEVELS_PER_BLOCK];
   1125 };
   1126 
   1127 static inline struct tokenstate *
   1128 currentstate(VSS *stack)
   1129 {
   1130 	return &stack->tokenstate[stack->cur];
   1131 }
   1132 
   1133 static inline struct tokenstate *
   1134 prevstate(VSS *stack)
   1135 {
   1136 	if (stack->cur != 0)
   1137 		return &stack->tokenstate[stack->cur - 1];
   1138 	if (stack->prev == NULL)	/* cannot drop below base */
   1139 		return &stack->tokenstate[0];
   1140 	return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
   1141 }
   1142 
   1143 static inline VSS *
   1144 bump_state_level(VSS *stack)
   1145 {
   1146 	struct tokenstate *os, *ts;
   1147 
   1148 	os = currentstate(stack);
   1149 
   1150 	if (++stack->cur >= LEVELS_PER_BLOCK) {
   1151 		VSS *ss;
   1152 
   1153 		ss = (VSS *)ckmalloc(sizeof (struct statestack));
   1154 		ss->cur = 0;
   1155 		ss->prev = stack;
   1156 		stack = ss;
   1157 	}
   1158 
   1159 	ts = currentstate(stack);
   1160 
   1161 	ts->ts_parenlevel = 0;	/* parens inside never match outside */
   1162 
   1163 	ts->ts_quoted  = os->ts_quoted & QF;	/* these are default settings */
   1164 	ts->ts_varnest = os->ts_varnest;
   1165 	ts->ts_arinest = os->ts_arinest;	/* when appropriate	   */
   1166 	ts->ts_syntax  = os->ts_syntax;		/*    they will be altered */
   1167 
   1168 	return stack;
   1169 }
   1170 
   1171 static inline VSS *
   1172 drop_state_level(VSS *stack)
   1173 {
   1174 	if (stack->cur == 0) {
   1175 		VSS *ss;
   1176 
   1177 		ss = stack;
   1178 		stack = ss->prev;
   1179 		if (stack == NULL)
   1180 			return ss;
   1181 		ckfree(ss);
   1182 	}
   1183 	--stack->cur;
   1184 	return stack;
   1185 }
   1186 
   1187 static inline void
   1188 cleanup_state_stack(VSS *stack)
   1189 {
   1190 	while (stack->prev != NULL) {
   1191 		stack->cur = 0;
   1192 		stack = drop_state_level(stack);
   1193 	}
   1194 }
   1195 
   1196 #define	PARSESUB()	{goto parsesub; parsesub_return:;}
   1197 #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
   1198 
   1199 /*
   1200  * The following macros all assume the existance of a local var "stack"
   1201  * which contains a pointer to the current struct stackstate
   1202  */
   1203 
   1204 /*
   1205  * These are macros rather than inline funcs to avoid code churn as much
   1206  * as possible - they replace macros of the same name used previously.
   1207  */
   1208 #define	ISDBLQUOTE()	(currentstate(stack)->ts_quoted & QS)
   1209 #define	SETDBLQUOTE()	(currentstate(stack)->ts_quoted = QS | DQ)
   1210 #define	CLRDBLQUOTE()	(currentstate(stack)->ts_quoted =		\
   1211 			    stack->cur != 0 || stack->prev ?		\
   1212 				prevstate(stack)->ts_quoted & QF : 0)
   1213 
   1214 /*
   1215  * This set are just to avoid excess typing and line lengths...
   1216  * The ones that "look like" var names must be implemented to be lvalues
   1217  */
   1218 #define	syntax		(currentstate(stack)->ts_syntax)
   1219 #define	parenlevel	(currentstate(stack)->ts_parenlevel)
   1220 #define	varnest		(currentstate(stack)->ts_varnest)
   1221 #define	arinest		(currentstate(stack)->ts_arinest)
   1222 #define	quoted		(currentstate(stack)->ts_quoted)
   1223 #define	TS_PUSH()	(stack = bump_state_level(stack))
   1224 #define	TS_POP()	(stack = drop_state_level(stack))
   1225 
   1226 /*
   1227  * Called to parse command substitutions.  oldstyle is true if the command
   1228  * is enclosed inside `` (otherwise it was enclosed in "$( )")
   1229  *
   1230  * Internally nlpp is a pointer to the head of the linked
   1231  * list of commands (passed by reference), and savelen is the number of
   1232  * characters on the top of the stack which must be preserved.
   1233  */
   1234 static char *
   1235 parsebackq(VSS *const stack, char * const in,
   1236     struct nodelist **const pbqlist, const int oldstyle)
   1237 {
   1238 	struct nodelist **nlpp;
   1239 	const int savepbq = parsebackquote;
   1240 	union node *n;
   1241 	char *out;
   1242 	char *str = NULL;
   1243 	char *volatile sstr = str;
   1244 	struct jmploc jmploc;
   1245 	struct jmploc *const savehandler = handler;
   1246 	const int savelen = in - stackblock();
   1247 	int saveprompt;
   1248 
   1249 	if (setjmp(jmploc.loc)) {
   1250 		if (sstr)
   1251 			ckfree(__UNVOLATILE(sstr));
   1252 		cleanup_state_stack(stack);
   1253 		parsebackquote = 0;
   1254 		handler = savehandler;
   1255 		longjmp(handler->loc, 1);
   1256 	}
   1257 	INTOFF;
   1258 	sstr = str = NULL;
   1259 	if (savelen > 0) {
   1260 		sstr = str = ckmalloc(savelen);
   1261 		memcpy(str, stackblock(), savelen);
   1262 	}
   1263 	handler = &jmploc;
   1264 	INTON;
   1265         if (oldstyle) {
   1266                 /* We must read until the closing backquote, giving special
   1267                    treatment to some slashes, and then push the string and
   1268                    reread it as input, interpreting it normally.  */
   1269                 int pc;
   1270                 int psavelen;
   1271                 char *pstr;
   1272 
   1273 		/*
   1274 		 * Because the entire `...` is read here, we don't
   1275 		 * need to bother the state stack.  That will be used
   1276 		 * (as appropriate) when the processed string is re-read.
   1277 		 */
   1278                 STARTSTACKSTR(out);
   1279 		for (;;) {
   1280 			if (needprompt) {
   1281 				setprompt(2);
   1282 				needprompt = 0;
   1283 			}
   1284 			switch (pc = pgetc()) {
   1285 			case '`':
   1286 				goto done;
   1287 
   1288 			case '\\':
   1289                                 if ((pc = pgetc()) == '\n') {
   1290 					plinno++;
   1291 					if (doprompt)
   1292 						setprompt(2);
   1293 					else
   1294 						setprompt(0);
   1295 					/*
   1296 					 * If eating a newline, avoid putting
   1297 					 * the newline into the new character
   1298 					 * stream (via the STPUTC after the
   1299 					 * switch).
   1300 					 */
   1301 					continue;
   1302 				}
   1303                                 if (pc != '\\' && pc != '`' && pc != '$'
   1304                                     && (!ISDBLQUOTE() || pc != '"'))
   1305                                         STPUTC('\\', out);
   1306 				break;
   1307 
   1308 			case '\n':
   1309 				plinno++;
   1310 				needprompt = doprompt;
   1311 				break;
   1312 
   1313 			case PEOF:
   1314 			        startlinno = plinno;
   1315 				synerror("EOF in backquote substitution");
   1316  				break;
   1317 
   1318 			default:
   1319 				break;
   1320 			}
   1321 			STPUTC(pc, out);
   1322                 }
   1323 done:
   1324                 STPUTC('\0', out);
   1325                 psavelen = out - stackblock();
   1326                 if (psavelen > 0) {
   1327 			pstr = grabstackstr(out);
   1328 			setinputstring(pstr, 1);
   1329                 }
   1330         }
   1331 	nlpp = pbqlist;
   1332 	while (*nlpp)
   1333 		nlpp = &(*nlpp)->next;
   1334 	*nlpp = stalloc(sizeof(struct nodelist));
   1335 	(*nlpp)->next = NULL;
   1336 	parsebackquote = oldstyle;
   1337 
   1338 	if (oldstyle) {
   1339 		saveprompt = doprompt;
   1340 		doprompt = 0;
   1341 	} else
   1342 		saveprompt = 0;
   1343 
   1344 	n = list(0, oldstyle);
   1345 
   1346 	if (oldstyle)
   1347 		doprompt = saveprompt;
   1348 	else {
   1349 		if (readtoken() != TRP) {
   1350 			cleanup_state_stack(stack);
   1351 			synexpect(TRP, 0);
   1352 		}
   1353 	}
   1354 
   1355 	(*nlpp)->n = n;
   1356         if (oldstyle) {
   1357 		/*
   1358 		 * Start reading from old file again, ignoring any pushed back
   1359 		 * tokens left from the backquote parsing
   1360 		 */
   1361                 popfile();
   1362 		tokpushback = 0;
   1363 	}
   1364 
   1365 	while (stackblocksize() <= savelen)
   1366 		growstackblock();
   1367 	STARTSTACKSTR(out);
   1368 	if (str) {
   1369 		memcpy(out, str, savelen);
   1370 		STADJUST(savelen, out);
   1371 		INTOFF;
   1372 		ckfree(str);
   1373 		sstr = str = NULL;
   1374 		INTON;
   1375 	}
   1376 	parsebackquote = savepbq;
   1377 	handler = savehandler;
   1378 	if (arinest || ISDBLQUOTE())
   1379 		USTPUTC(CTLBACKQ | CTLQUOTE, out);
   1380 	else
   1381 		USTPUTC(CTLBACKQ, out);
   1382 
   1383 	return out;
   1384 }
   1385 
   1386 /*
   1387  * Parse a redirection operator.  The parameter "out" points to a string
   1388  * specifying the fd to be redirected.  It is guaranteed to be either ""
   1389  * or a numeric string (for now anyway).  The parameter "c" contains the
   1390  * first character of the redirection operator.
   1391  *
   1392  * Note the string "out" is on the stack, which we are about to clobber,
   1393  * so process it first...
   1394  */
   1395 
   1396 static void
   1397 parseredir(const char *out,  int c)
   1398 {
   1399 	union node *np;
   1400 	int fd;
   1401 
   1402 	fd = (*out == '\0') ? -1 : atoi(out);
   1403 
   1404 	np = stalloc(sizeof(struct nfile));
   1405 	if (c == '>') {
   1406 		if (fd < 0)
   1407 			fd = 1;
   1408 		c = pgetc();
   1409 		if (c == '>')
   1410 			np->type = NAPPEND;
   1411 		else if (c == '|')
   1412 			np->type = NCLOBBER;
   1413 		else if (c == '&')
   1414 			np->type = NTOFD;
   1415 		else {
   1416 			np->type = NTO;
   1417 			pungetc();
   1418 		}
   1419 	} else {	/* c == '<' */
   1420 		if (fd < 0)
   1421 			fd = 0;
   1422 		switch (c = pgetc()) {
   1423 		case '<':
   1424 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
   1425 				np = stalloc(sizeof(struct nhere));
   1426 				np->nfile.fd = 0;
   1427 			}
   1428 			np->type = NHERE;
   1429 			heredoc = stalloc(sizeof(struct heredoc));
   1430 			heredoc->here = np;
   1431 			heredoc->startline = plinno;
   1432 			if ((c = pgetc()) == '-') {
   1433 				heredoc->striptabs = 1;
   1434 			} else {
   1435 				heredoc->striptabs = 0;
   1436 				pungetc();
   1437 			}
   1438 			break;
   1439 
   1440 		case '&':
   1441 			np->type = NFROMFD;
   1442 			break;
   1443 
   1444 		case '>':
   1445 			np->type = NFROMTO;
   1446 			break;
   1447 
   1448 		default:
   1449 			np->type = NFROM;
   1450 			pungetc();
   1451 			break;
   1452 		}
   1453 	}
   1454 	np->nfile.fd = fd;
   1455 
   1456 	redirnode = np;		/* this is the "value" of TRENODE */
   1457 }
   1458 
   1459 
   1460 /*
   1461  * The lowest level basic tokenizer.
   1462  *
   1463  * The next input byte (character) is in firstc, syn says which
   1464  * syntax tables we are to use (basic, single or double quoted, or arith)
   1465  * and magicq (used with sqsyntax and dqsyntax only) indicates that the
   1466  * quote character itself is not special (used parsing here docs and similar)
   1467  *
   1468  * The result is the type of the next token (its value, when there is one,
   1469  * is saved in the relevant global var - must fix that someday!) which is
   1470  * also saved for re-reading ("lasttoken").
   1471  *
   1472  * Overall, this routine does far more parsing than it is supposed to.
   1473  * That will also need fixing, someday...
   1474  */
   1475 STATIC int
   1476 readtoken1(int firstc, char const *syn, int magicq)
   1477 {
   1478 	int c;
   1479 	char * out;
   1480 	int len;
   1481 	struct nodelist *bqlist;
   1482 	int quotef;
   1483 	VSS static_stack;
   1484 	VSS *stack = &static_stack;
   1485 
   1486 	stack->prev = NULL;
   1487 	stack->cur = 0;
   1488 
   1489 	syntax = syn;
   1490 
   1491 	startlinno = plinno;
   1492 	varnest = 0;
   1493 	quoted = 0;
   1494 	if (syntax == DQSYNTAX)
   1495 		SETDBLQUOTE();
   1496 	quotef = 0;
   1497 	bqlist = NULL;
   1498 	arinest = 0;
   1499 	parenlevel = 0;
   1500 
   1501 	STARTSTACKSTR(out);
   1502 
   1503 	for (c = firstc ;; c = pgetc_macro()) {	/* until of token */
   1504 		CHECKSTRSPACE(4, out);	/* permit 4 calls to USTPUTC */
   1505 		switch (syntax[c]) {
   1506 		case CNL:	/* '\n' */
   1507 			if (syntax == BASESYNTAX)
   1508 				break;	/* exit loop */
   1509 			USTPUTC(c, out);
   1510 			plinno++;
   1511 			if (doprompt)
   1512 				setprompt(2);
   1513 			else
   1514 				setprompt(0);
   1515 			continue;
   1516 
   1517 		case CWORD:
   1518 			USTPUTC(c, out);
   1519 			continue;
   1520 		case CCTL:
   1521 			if (!magicq || ISDBLQUOTE())
   1522 				USTPUTC(CTLESC, out);
   1523 			USTPUTC(c, out);
   1524 			continue;
   1525 		case CBACK:	/* backslash */
   1526 			c = pgetc();
   1527 			if (c == PEOF) {
   1528 				USTPUTC('\\', out);
   1529 				pungetc();
   1530 				continue;
   1531 			}
   1532 			if (c == '\n') {
   1533 				plinno++;
   1534 				if (doprompt)
   1535 					setprompt(2);
   1536 				else
   1537 					setprompt(0);
   1538 				continue;
   1539 			}
   1540 			quotef = 1;	/* current token is quoted */
   1541 			if (ISDBLQUOTE() && c != '\\' && c != '`' &&
   1542 			    c != '$' && (c != '"' || magicq))
   1543 				USTPUTC('\\', out);
   1544 			if (SQSYNTAX[c] == CCTL)
   1545 				USTPUTC(CTLESC, out);
   1546 			else if (!magicq) {
   1547 				USTPUTC(CTLQUOTEMARK, out);
   1548 				USTPUTC(c, out);
   1549 				if (varnest != 0)
   1550 					USTPUTC(CTLQUOTEEND, out);
   1551 				continue;
   1552 			}
   1553 			USTPUTC(c, out);
   1554 			continue;
   1555 		case CSQUOTE:
   1556 			if (syntax != SQSYNTAX) {
   1557 				if (!magicq)
   1558 					USTPUTC(CTLQUOTEMARK, out);
   1559 				quotef = 1;
   1560 				TS_PUSH();
   1561 				syntax = SQSYNTAX;
   1562 				quoted = SQ;
   1563 				continue;
   1564 			}
   1565 			if (magicq && arinest == 0 && varnest == 0) {
   1566 				/* Ignore inside quoted here document */
   1567 				USTPUTC(c, out);
   1568 				continue;
   1569 			}
   1570 			/* End of single quotes... */
   1571 			TS_POP();
   1572 			if (syntax == BASESYNTAX && varnest != 0)
   1573 				USTPUTC(CTLQUOTEEND, out);
   1574 			continue;
   1575 		case CDQUOTE:
   1576 			if (magicq && arinest == 0 && varnest == 0) {
   1577 				/* Ignore inside here document */
   1578 				USTPUTC(c, out);
   1579 				continue;
   1580 			}
   1581 			quotef = 1;
   1582 			if (arinest) {
   1583 				if (ISDBLQUOTE()) {
   1584 					TS_POP();
   1585 				} else {
   1586 					TS_PUSH();
   1587 					syntax = DQSYNTAX;
   1588 					SETDBLQUOTE();
   1589 					USTPUTC(CTLQUOTEMARK, out);
   1590 				}
   1591 				continue;
   1592 			}
   1593 			if (magicq)
   1594 				continue;
   1595 			if (ISDBLQUOTE()) {
   1596 				TS_POP();
   1597 				if (varnest != 0)
   1598 					USTPUTC(CTLQUOTEEND, out);
   1599 			} else {
   1600 				TS_PUSH();
   1601 				syntax = DQSYNTAX;
   1602 				SETDBLQUOTE();
   1603 				USTPUTC(CTLQUOTEMARK, out);
   1604 			}
   1605 			continue;
   1606 		case CVAR:	/* '$' */
   1607 			PARSESUB();		/* parse substitution */
   1608 			continue;
   1609 		case CENDVAR:	/* CLOSEBRACE */
   1610 			if (varnest > 0 && !ISDBLQUOTE()) {
   1611 				TS_POP();
   1612 				USTPUTC(CTLENDVAR, out);
   1613 			} else {
   1614 				USTPUTC(c, out);
   1615 			}
   1616 			continue;
   1617 		case CLP:	/* '(' in arithmetic */
   1618 			parenlevel++;
   1619 			USTPUTC(c, out);
   1620 			continue;;
   1621 		case CRP:	/* ')' in arithmetic */
   1622 			if (parenlevel > 0) {
   1623 				USTPUTC(c, out);
   1624 				--parenlevel;
   1625 			} else {
   1626 				if (pgetc() == ')') {
   1627 					if (--arinest == 0) {
   1628 						TS_POP();
   1629 						USTPUTC(CTLENDARI, out);
   1630 					} else
   1631 						USTPUTC(')', out);
   1632 				} else {
   1633 					/*
   1634 					 * unbalanced parens
   1635 					 *  (don't 2nd guess - no error)
   1636 					 */
   1637 					pungetc();
   1638 					USTPUTC(')', out);
   1639 				}
   1640 			}
   1641 			continue;
   1642 		case CBQUOTE:	/* '`' */
   1643 			out = parsebackq(stack, out, &bqlist, 1);
   1644 			continue;
   1645 		case CEOF:		/* --> c == PEOF */
   1646 			break;		/* will exit loop */
   1647 		default:
   1648 			if (varnest == 0 && !ISDBLQUOTE())
   1649 				break;	/* exit loop */
   1650 			USTPUTC(c, out);
   1651 			continue;
   1652 		}
   1653 		break;	/* break from switch -> break from for loop too */
   1654 	}
   1655 
   1656 	if (syntax == ARISYNTAX) {
   1657 		cleanup_state_stack(stack);
   1658 		synerror("Missing '))'");
   1659 	}
   1660 	if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
   1661 		cleanup_state_stack(stack);
   1662 		synerror("Unterminated quoted string");
   1663 	}
   1664 	if (varnest != 0) {
   1665 		cleanup_state_stack(stack);
   1666 		startlinno = plinno;
   1667 		/* { */
   1668 		synerror("Missing '}'");
   1669 	}
   1670 
   1671 	USTPUTC('\0', out);
   1672 	len = out - stackblock();
   1673 	out = stackblock();
   1674 
   1675 	if (!magicq) {
   1676 		if ((c == '<' || c == '>')
   1677 		 && quotef == 0 && (*out == '\0' || is_number(out))) {
   1678 			parseredir(out, c);
   1679 			cleanup_state_stack(stack);
   1680 			return lasttoken = TREDIR;
   1681 		} else {
   1682 			pungetc();
   1683 		}
   1684 	}
   1685 
   1686 	quoteflag = quotef;
   1687 	backquotelist = bqlist;
   1688 	grabstackblock(len);
   1689 	wordtext = out;
   1690 	cleanup_state_stack(stack);
   1691 	return lasttoken = TWORD;
   1692 /* end of readtoken routine */
   1693 
   1694 
   1695 /*
   1696  * Parse a substitution.  At this point, we have read the dollar sign
   1697  * and nothing else.
   1698  */
   1699 
   1700 parsesub: {
   1701 	char buf[10];
   1702 	int subtype;
   1703 	int typeloc;
   1704 	int flags;
   1705 	char *p;
   1706 	static const char types[] = "}-+?=";
   1707 	int i;
   1708 	int linno;
   1709 
   1710 	c = pgetc();
   1711 	if (c != '(' && c != OPENBRACE && !is_name(c) && !is_special(c)) {
   1712 		USTPUTC('$', out);
   1713 		pungetc();
   1714 	} else if (c == '(') {	/* $(command) or $((arith)) */
   1715 		if (pgetc() == '(') {
   1716 			PARSEARITH();
   1717 		} else {
   1718 			pungetc();
   1719 			out = parsebackq(stack, out, &bqlist, 0);
   1720 		}
   1721 	} else {
   1722 		USTPUTC(CTLVAR, out);
   1723 		typeloc = out - stackblock();
   1724 		USTPUTC(VSNORMAL, out);
   1725 		subtype = VSNORMAL;
   1726 		flags = 0;
   1727 		if (c == OPENBRACE) {
   1728 			c = pgetc();
   1729 			if (c == '#') {
   1730 				if ((c = pgetc()) == CLOSEBRACE)
   1731 					c = '#';
   1732 				else
   1733 					subtype = VSLENGTH;
   1734 			}
   1735 			else
   1736 				subtype = 0;
   1737 		}
   1738 		if (is_name(c)) {
   1739 			p = out;
   1740 			do {
   1741 				STPUTC(c, out);
   1742 				c = pgetc();
   1743 			} while (is_in_name(c));
   1744 			if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
   1745 				/* Replace the variable name with the
   1746 				 * current line number. */
   1747 				linno = plinno;
   1748 				if (funclinno != 0)
   1749 					linno -= funclinno - 1;
   1750 				snprintf(buf, sizeof(buf), "%d", linno);
   1751 				STADJUST(-6, out);
   1752 				for (i = 0; buf[i] != '\0'; i++)
   1753 					STPUTC(buf[i], out);
   1754 				flags |= VSLINENO;
   1755 			}
   1756 		} else if (is_digit(c)) {
   1757 			do {
   1758 				USTPUTC(c, out);
   1759 				c = pgetc();
   1760 			} while (subtype != VSNORMAL && is_digit(c));
   1761 		}
   1762 		else if (is_special(c)) {
   1763 			USTPUTC(c, out);
   1764 			c = pgetc();
   1765 		}
   1766 		else {
   1767 badsub:
   1768 			cleanup_state_stack(stack);
   1769 			synerror("Bad substitution");
   1770 		}
   1771 
   1772 		STPUTC('=', out);
   1773 		if (subtype == 0) {
   1774 			switch (c) {
   1775 			case ':':
   1776 				flags |= VSNUL;
   1777 				c = pgetc();
   1778 				/*FALLTHROUGH*/
   1779 			default:
   1780 				p = strchr(types, c);
   1781 				if (p == NULL)
   1782 					goto badsub;
   1783 				subtype = p - types + VSNORMAL;
   1784 				break;
   1785 			case '%':
   1786 			case '#':
   1787 				{
   1788 					int cc = c;
   1789 					subtype = c == '#' ? VSTRIMLEFT :
   1790 							     VSTRIMRIGHT;
   1791 					c = pgetc();
   1792 					if (c == cc)
   1793 						subtype++;
   1794 					else
   1795 						pungetc();
   1796 					break;
   1797 				}
   1798 			}
   1799 		} else {
   1800 			pungetc();
   1801 		}
   1802 		if (ISDBLQUOTE() || arinest)
   1803 			flags |= VSQUOTE;
   1804 		if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
   1805 			flags |= VSPATQ;
   1806 		*(stackblock() + typeloc) = subtype | flags;
   1807 		if (subtype != VSNORMAL) {
   1808 			TS_PUSH();
   1809 			varnest++;
   1810 			arinest = 0;
   1811 			if (subtype > VSASSIGN) {	/* # ## % %% */
   1812 				syntax = BASESYNTAX;
   1813 				CLRDBLQUOTE();
   1814 			}
   1815 		}
   1816 	}
   1817 	goto parsesub_return;
   1818 }
   1819 
   1820 
   1821 /*
   1822  * Parse an arithmetic expansion (indicate start of one and set state)
   1823  */
   1824 parsearith: {
   1825 
   1826 	if (syntax == ARISYNTAX) {
   1827 		/*
   1828 		 * we collapse embedded arithmetic expansion to
   1829 		 * parentheses, which should be equivalent
   1830 		 */
   1831 		USTPUTC('(', out);
   1832 		USTPUTC('(', out);
   1833 		/*
   1834 		 * Need 2 of them because there will (should be)
   1835 		 * two closing ))'s to follow later.
   1836 		 */
   1837 		parenlevel += 2;
   1838 	} else {
   1839 		TS_PUSH();
   1840 		syntax = ARISYNTAX;
   1841 		++arinest;
   1842 		varnest = 0;
   1843 
   1844 		USTPUTC(CTLARI, out);
   1845 		if (ISDBLQUOTE())
   1846 			USTPUTC('"',out);
   1847 		else
   1848 			USTPUTC(' ',out);
   1849 	}
   1850 	goto parsearith_return;
   1851 }
   1852 
   1853 } /* end of readtoken */
   1854 
   1855 
   1856 
   1857 #ifdef mkinit
   1858 RESET {
   1859 	tokpushback = 0;
   1860 	checkkwd = 0;
   1861 }
   1862 #endif
   1863 
   1864 /*
   1865  * Returns true if the text contains nothing to expand (no dollar signs
   1866  * or backquotes).
   1867  */
   1868 
   1869 STATIC int
   1870 noexpand(char *text)
   1871 {
   1872 	char *p;
   1873 	char c;
   1874 
   1875 	p = text;
   1876 	while ((c = *p++) != '\0') {
   1877 		if (c == CTLQUOTEMARK)
   1878 			continue;
   1879 		if (c == CTLESC)
   1880 			p++;
   1881 		else if (BASESYNTAX[(int)c] == CCTL)
   1882 			return 0;
   1883 	}
   1884 	return 1;
   1885 }
   1886 
   1887 
   1888 /*
   1889  * Return true if the argument is a legal variable name (a letter or
   1890  * underscore followed by zero or more letters, underscores, and digits).
   1891  */
   1892 
   1893 int
   1894 goodname(char *name)
   1895 {
   1896 	char *p;
   1897 
   1898 	p = name;
   1899 	if (! is_name(*p))
   1900 		return 0;
   1901 	while (*++p) {
   1902 		if (! is_in_name(*p))
   1903 			return 0;
   1904 	}
   1905 	return 1;
   1906 }
   1907 
   1908 
   1909 /*
   1910  * Called when an unexpected token is read during the parse.  The argument
   1911  * is the token that is expected, or -1 if more than one type of token can
   1912  * occur at this point.
   1913  */
   1914 
   1915 STATIC void
   1916 synexpect(int token, const char *text)
   1917 {
   1918 	char msg[64];
   1919 	char *p;
   1920 
   1921 	if (lasttoken == TWORD) {
   1922 		size_t len = strlen(wordtext);
   1923 
   1924 		if (len <= 13)
   1925 			fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
   1926 		else
   1927 			fmtstr(msg, 34,
   1928 			    "Word \"%.10s...\" unexpected", wordtext);
   1929 	} else
   1930 		fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
   1931 
   1932 	p = strchr(msg, '\0');
   1933 	if (text)
   1934 		fmtstr(p, 30, " (expecting \"%.10s\")", text);
   1935 	else if (token >= 0)
   1936 		fmtstr(p, 30, " (expecting %s)",  tokname[token]);
   1937 
   1938 	synerror(msg);
   1939 	/* NOTREACHED */
   1940 }
   1941 
   1942 
   1943 STATIC void
   1944 synerror(const char *msg)
   1945 {
   1946 	error("%d: Syntax error: %s\n", startlinno, msg);
   1947 	/* NOTREACHED */
   1948 }
   1949 
   1950 STATIC void
   1951 setprompt(int which)
   1952 {
   1953 	whichprompt = which;
   1954 
   1955 #ifndef SMALL
   1956 	if (!el)
   1957 #endif
   1958 		out2str(getprompt(NULL));
   1959 }
   1960 
   1961 /*
   1962  * called by editline -- any expansions to the prompt
   1963  *    should be added here.
   1964  */
   1965 const char *
   1966 getprompt(void *unused)
   1967 {
   1968 	switch (whichprompt) {
   1969 	case 0:
   1970 		return "";
   1971 	case 1:
   1972 		return ps1val();
   1973 	case 2:
   1974 		return ps2val();
   1975 	default:
   1976 		return "<internal prompt error>";
   1977 	}
   1978 }
   1979