parser.c revision 1.130 1 /* $NetBSD: parser.c,v 1.130 2017/05/29 10:43:27 kre Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.130 2017/05/29 10:43:27 kre Exp $");
41 #endif
42 #endif /* not lint */
43
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <limits.h>
47
48 #include "shell.h"
49 #include "parser.h"
50 #include "nodes.h"
51 #include "expand.h" /* defines rmescapes() */
52 #include "eval.h" /* defines commandname */
53 #include "syntax.h"
54 #include "options.h"
55 #include "input.h"
56 #include "output.h"
57 #include "var.h"
58 #include "error.h"
59 #include "memalloc.h"
60 #include "mystring.h"
61 #include "alias.h"
62 #include "show.h"
63 #ifndef SMALL
64 #include "myhistedit.h"
65 #endif
66
67 /*
68 * Shell command parser.
69 */
70
71 /* values returned by readtoken */
72 #include "token.h"
73
74 #define OPENBRACE '{'
75 #define CLOSEBRACE '}'
76
77
78 struct heredoc {
79 struct heredoc *next; /* next here document in list */
80 union node *here; /* redirection node */
81 char *eofmark; /* string indicating end of input */
82 int striptabs; /* if set, strip leading tabs */
83 int startline; /* line number where << seen */
84 };
85
86
87
88 static int noalias = 0; /* when set, don't handle aliases */
89 struct heredoc *heredoclist; /* list of here documents to read */
90 int parsebackquote; /* nonzero if we are inside backquotes */
91 int doprompt; /* if set, prompt the user */
92 int needprompt; /* true if interactive and at start of line */
93 int lasttoken; /* last token read */
94 MKINIT int tokpushback; /* last token pushed back */
95 char *wordtext; /* text of last word returned by readtoken */
96 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */
97 struct nodelist *backquotelist;
98 union node *redirnode;
99 struct heredoc *heredoc;
100 int quoteflag; /* set if (part of) last token was quoted */
101 int startlinno; /* line # where last token started */
102 int funclinno; /* line # where the current function started */
103
104
105 STATIC union node *list(int, int);
106 STATIC union node *andor(void);
107 STATIC union node *pipeline(void);
108 STATIC union node *command(void);
109 STATIC union node *simplecmd(union node **, union node *);
110 STATIC union node *makename(void);
111 STATIC void parsefname(void);
112 STATIC void slurp_heredoc(char *const, const int, const int);
113 STATIC void readheredocs(void);
114 STATIC int peektoken(void);
115 STATIC int readtoken(void);
116 STATIC int xxreadtoken(void);
117 STATIC int readtoken1(int, char const *, int);
118 STATIC int noexpand(char *);
119 STATIC void synexpect(int, const char *) __dead;
120 STATIC void synerror(const char *) __dead;
121 STATIC void setprompt(int);
122 STATIC int pgetc_linecont(void);
123
124
125 static const char EOFhere[] = "EOF reading here (<<) document";
126
127
128 /*
129 * Read and parse a command. Returns NEOF on end of file. (NULL is a
130 * valid parse tree indicating a blank line.)
131 */
132
133 union node *
134 parsecmd(int interact)
135 {
136 int t;
137 union node *n;
138
139 tokpushback = 0;
140 doprompt = interact;
141 if (doprompt)
142 setprompt(1);
143 else
144 setprompt(0);
145 needprompt = 0;
146 t = readtoken();
147 if (t == TEOF)
148 return NEOF;
149 if (t == TNL)
150 return NULL;
151 tokpushback++;
152 n = list(1, 0);
153 if (heredoclist)
154 error("%d: Here document (<<%s) expected but not present",
155 heredoclist->startline, heredoclist->eofmark);
156 return n;
157 }
158
159
160 STATIC union node *
161 list(int nlflag, int erflag)
162 {
163 union node *n1, *n2, *n3;
164 int tok;
165
166 CTRACE(DBG_PARSE, ("list(%d,%d): entered @%d\n",nlflag,erflag,plinno));
167
168 checkkwd = 2;
169 if (nlflag == 0 && tokendlist[peektoken()])
170 return NULL;
171 n1 = NULL;
172 for (;;) {
173 n2 = andor();
174 tok = readtoken();
175 if (tok == TBACKGND) {
176 if (n2->type == NCMD || n2->type == NPIPE) {
177 n2->ncmd.backgnd = 1;
178 } else if (n2->type == NREDIR) {
179 n2->type = NBACKGND;
180 } else {
181 n3 = stalloc(sizeof(struct nredir));
182 n3->type = NBACKGND;
183 n3->nredir.n = n2;
184 n3->nredir.redirect = NULL;
185 n2 = n3;
186 }
187 }
188 if (n1 == NULL) {
189 n1 = n2;
190 }
191 else {
192 n3 = stalloc(sizeof(struct nbinary));
193 n3->type = NSEMI;
194 n3->nbinary.ch1 = n1;
195 n3->nbinary.ch2 = n2;
196 n1 = n3;
197 }
198 switch (tok) {
199 case TBACKGND:
200 case TSEMI:
201 tok = readtoken();
202 /* FALLTHROUGH */
203 case TNL:
204 if (tok == TNL) {
205 readheredocs();
206 if (nlflag)
207 return n1;
208 } else {
209 tokpushback++;
210 }
211 checkkwd = 2;
212 if (tokendlist[peektoken()])
213 return n1;
214 break;
215 case TEOF:
216 pungetc(); /* push back EOF on input */
217 return n1;
218 default:
219 if (nlflag || erflag)
220 synexpect(-1, 0);
221 tokpushback++;
222 return n1;
223 }
224 }
225 }
226
227 STATIC union node *
228 andor(void)
229 {
230 union node *n1, *n2, *n3;
231 int t;
232
233 CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno));
234
235 n1 = pipeline();
236 for (;;) {
237 if ((t = readtoken()) == TAND) {
238 t = NAND;
239 } else if (t == TOR) {
240 t = NOR;
241 } else {
242 tokpushback++;
243 return n1;
244 }
245 n2 = pipeline();
246 n3 = stalloc(sizeof(struct nbinary));
247 n3->type = t;
248 n3->nbinary.ch1 = n1;
249 n3->nbinary.ch2 = n2;
250 n1 = n3;
251 }
252 }
253
254 STATIC union node *
255 pipeline(void)
256 {
257 union node *n1, *n2, *pipenode;
258 struct nodelist *lp, *prev;
259 int negate;
260
261 CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno));
262
263 negate = 0;
264 checkkwd = 2;
265 while (readtoken() == TNOT) {
266 CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n"));
267 #ifndef BOGUS_NOT_COMMAND
268 if (posix && negate)
269 synerror("2nd \"!\" unexpected");
270 #endif
271 negate++;
272 }
273 tokpushback++;
274 n1 = command();
275 if (readtoken() == TPIPE) {
276 pipenode = stalloc(sizeof(struct npipe));
277 pipenode->type = NPIPE;
278 pipenode->npipe.backgnd = 0;
279 lp = stalloc(sizeof(struct nodelist));
280 pipenode->npipe.cmdlist = lp;
281 lp->n = n1;
282 do {
283 prev = lp;
284 lp = stalloc(sizeof(struct nodelist));
285 lp->n = command();
286 prev->next = lp;
287 } while (readtoken() == TPIPE);
288 lp->next = NULL;
289 n1 = pipenode;
290 }
291 tokpushback++;
292 if (negate) {
293 CTRACE(DBG_PARSE, ("%snegate pipeline\n",
294 (negate&1) ? "" : "double "));
295 n2 = stalloc(sizeof(struct nnot));
296 n2->type = (negate & 1) ? NNOT : NDNOT;
297 n2->nnot.com = n1;
298 return n2;
299 } else
300 return n1;
301 }
302
303
304
305 STATIC union node *
306 command(void)
307 {
308 union node *n1, *n2;
309 union node *ap, **app;
310 union node *cp, **cpp;
311 union node *redir, **rpp;
312 int t;
313 #ifdef BOGUS_NOT_COMMAND
314 int negate = 0;
315 #endif
316
317 CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno));
318
319 checkkwd = 2;
320 redir = NULL;
321 n1 = NULL;
322 rpp = &redir;
323
324 /* Check for redirection which may precede command */
325 while (readtoken() == TREDIR) {
326 *rpp = n2 = redirnode;
327 rpp = &n2->nfile.next;
328 parsefname();
329 }
330 tokpushback++;
331
332 #ifdef BOGUS_NOT_COMMAND /* only in pileline() */
333 while (readtoken() == TNOT) {
334 CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n"));
335 negate++;
336 }
337 tokpushback++;
338 #endif
339
340 switch (readtoken()) {
341 case TIF:
342 n1 = stalloc(sizeof(struct nif));
343 n1->type = NIF;
344 n1->nif.test = list(0, 0);
345 if (readtoken() != TTHEN)
346 synexpect(TTHEN, 0);
347 n1->nif.ifpart = list(0, 0);
348 n2 = n1;
349 while (readtoken() == TELIF) {
350 n2->nif.elsepart = stalloc(sizeof(struct nif));
351 n2 = n2->nif.elsepart;
352 n2->type = NIF;
353 n2->nif.test = list(0, 0);
354 if (readtoken() != TTHEN)
355 synexpect(TTHEN, 0);
356 n2->nif.ifpart = list(0, 0);
357 }
358 if (lasttoken == TELSE)
359 n2->nif.elsepart = list(0, 0);
360 else {
361 n2->nif.elsepart = NULL;
362 tokpushback++;
363 }
364 if (readtoken() != TFI)
365 synexpect(TFI, 0);
366 checkkwd = 1;
367 break;
368 case TWHILE:
369 case TUNTIL: {
370 int got;
371
372 n1 = stalloc(sizeof(struct nbinary));
373 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
374 n1->nbinary.ch1 = list(0, 0);
375 if ((got=readtoken()) != TDO) {
376 VTRACE(DBG_PARSE, ("expecting DO got %s %s\n",
377 tokname[got], got == TWORD ? wordtext : ""));
378 synexpect(TDO, 0);
379 }
380 n1->nbinary.ch2 = list(0, 0);
381 if (readtoken() != TDONE)
382 synexpect(TDONE, 0);
383 checkkwd = 1;
384 break;
385 }
386 case TFOR:
387 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
388 synerror("Bad for loop variable");
389 n1 = stalloc(sizeof(struct nfor));
390 n1->type = NFOR;
391 n1->nfor.var = wordtext;
392 if (readtoken()==TWORD && !quoteflag && equal(wordtext,"in")) {
393 app = ≈
394 while (readtoken() == TWORD) {
395 n2 = stalloc(sizeof(struct narg));
396 n2->type = NARG;
397 n2->narg.text = wordtext;
398 n2->narg.backquote = backquotelist;
399 *app = n2;
400 app = &n2->narg.next;
401 }
402 *app = NULL;
403 n1->nfor.args = ap;
404 if (lasttoken != TNL && lasttoken != TSEMI)
405 synexpect(-1, 0);
406 } else {
407 static char argvars[5] = {
408 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
409 };
410
411 n2 = stalloc(sizeof(struct narg));
412 n2->type = NARG;
413 n2->narg.text = argvars;
414 n2->narg.backquote = NULL;
415 n2->narg.next = NULL;
416 n1->nfor.args = n2;
417 /*
418 * Newline or semicolon here is optional (but note
419 * that the original Bourne shell only allowed NL).
420 */
421 if (lasttoken != TNL && lasttoken != TSEMI)
422 tokpushback++;
423 }
424 checkkwd = 2;
425 if ((t = readtoken()) == TDO)
426 t = TDONE;
427 else if (t == TBEGIN)
428 t = TEND;
429 else
430 synexpect(-1, 0);
431 n1->nfor.body = list(0, 0);
432 if (readtoken() != t)
433 synexpect(t, 0);
434 checkkwd = 1;
435 break;
436 case TCASE:
437 n1 = stalloc(sizeof(struct ncase));
438 n1->type = NCASE;
439 if (readtoken() != TWORD)
440 synexpect(TWORD, 0);
441 n1->ncase.expr = n2 = stalloc(sizeof(struct narg));
442 n2->type = NARG;
443 n2->narg.text = wordtext;
444 n2->narg.backquote = backquotelist;
445 n2->narg.next = NULL;
446 while (readtoken() == TNL);
447 if (lasttoken != TWORD || ! equal(wordtext, "in"))
448 synexpect(-1, "in");
449 cpp = &n1->ncase.cases;
450 noalias = 1;
451 checkkwd = 2, readtoken();
452 /*
453 * Both ksh and bash accept 'case x in esac'
454 * so configure scripts started taking advantage of this.
455 * The page: http://pubs.opengroup.org/onlinepubs/\
456 * 009695399/utilities/xcu_chap02.html contradicts itself,
457 * as to if this is legal; the "Case Conditional Format"
458 * paragraph shows one case is required, but the "Grammar"
459 * section shows a grammar that explicitly allows the no
460 * case option.
461 */
462 while (lasttoken != TESAC) {
463 *cpp = cp = stalloc(sizeof(struct nclist));
464 if (lasttoken == TLP)
465 readtoken();
466 cp->type = NCLIST;
467 app = &cp->nclist.pattern;
468 for (;;) {
469 *app = ap = stalloc(sizeof(struct narg));
470 ap->type = NARG;
471 ap->narg.text = wordtext;
472 ap->narg.backquote = backquotelist;
473 if (checkkwd = 2, readtoken() != TPIPE)
474 break;
475 app = &ap->narg.next;
476 readtoken();
477 }
478 ap->narg.next = NULL;
479 noalias = 0;
480 if (lasttoken != TRP) {
481 synexpect(TRP, 0);
482 }
483 cp->nclist.body = list(0, 0);
484
485 checkkwd = 2;
486 if ((t = readtoken()) != TESAC) {
487 if (t != TENDCASE && t != TCASEFALL) {
488 noalias = 0;
489 synexpect(TENDCASE, 0);
490 } else {
491 if (t == TCASEFALL)
492 cp->type = NCLISTCONT;
493 noalias = 1;
494 checkkwd = 2;
495 readtoken();
496 }
497 }
498 cpp = &cp->nclist.next;
499 }
500 noalias = 0;
501 *cpp = NULL;
502 checkkwd = 1;
503 break;
504 case TLP:
505 n1 = stalloc(sizeof(struct nredir));
506 n1->type = NSUBSHELL;
507 n1->nredir.n = list(0, 0);
508 n1->nredir.redirect = NULL;
509 if (n1->nredir.n == NULL)
510 synexpect(-1, 0);
511 if (readtoken() != TRP)
512 synexpect(TRP, 0);
513 checkkwd = 1;
514 break;
515 case TBEGIN:
516 n1 = list(0, 0);
517 if (posix && n1 == NULL)
518 synexpect(-1, 0);
519 if (readtoken() != TEND)
520 synexpect(TEND, 0);
521 checkkwd = 1;
522 break;
523
524 case TSEMI:
525 case TAND:
526 case TOR:
527 case TPIPE:
528 case TNL:
529 case TEOF:
530 case TRP:
531 /*
532 * simple commands must have something in them,
533 * either a word (which at this point includes a=b)
534 * or a redirection. If we reached the end of the
535 * command (which one of these tokens indicates)
536 * when we are just starting, and have not had a
537 * redirect, then ...
538 *
539 * nb: it is still possible to end up with empty
540 * simple commands, if the "command" is a var
541 * expansion that produces nothing
542 * X= ; $X && $X
543 * --> &&
544 * I am not sure if this is intended to be legal or not.
545 */
546 if (!redir)
547 synexpect(-1, 0);
548 case TWORD:
549 tokpushback++;
550 n1 = simplecmd(rpp, redir);
551 goto checkneg;
552 case TENDCASE:
553 if (redir) {
554 tokpushback++;
555 goto checkneg;
556 }
557 /* FALLTHROUGH */
558 default:
559 synexpect(-1, 0);
560 /* NOTREACHED */
561 }
562
563 /* Now check for redirection which may follow command */
564 while (readtoken() == TREDIR) {
565 *rpp = n2 = redirnode;
566 rpp = &n2->nfile.next;
567 parsefname();
568 }
569 tokpushback++;
570 *rpp = NULL;
571 if (redir) {
572 if (n1->type != NSUBSHELL) {
573 n2 = stalloc(sizeof(struct nredir));
574 n2->type = NREDIR;
575 n2->nredir.n = n1;
576 n1 = n2;
577 }
578 n1->nredir.redirect = redir;
579 }
580
581 checkneg:
582 #ifdef BOGUS_NOT_COMMAND
583 if (negate) {
584 VTRACE(DBG_PARSE, ("bogus %snegate command\n",
585 (negate&1) ? "" : "double "));
586 n2 = stalloc(sizeof(struct nnot));
587 n2->type = (negate & 1) ? NNOT : NDNOT;
588 n2->nnot.com = n1;
589 return n2;
590 }
591 else
592 #endif
593 return n1;
594 }
595
596
597 STATIC union node *
598 simplecmd(union node **rpp, union node *redir)
599 {
600 union node *args, **app;
601 union node *n = NULL;
602 #ifdef BOGUS_NOT_COMMAND
603 union node *n2;
604 int negate = 0;
605 #endif
606
607 CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n",
608 redir ? "" : "out", plinno));
609
610 /* If we don't have any redirections already, then we must reset */
611 /* rpp to be the address of the local redir variable. */
612 if (redir == 0)
613 rpp = &redir;
614
615 args = NULL;
616 app = &args;
617
618 #ifdef BOGUS_NOT_COMMAND /* pipelines get negated, commands do not */
619 while (readtoken() == TNOT) {
620 VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n"));
621 negate++;
622 }
623 tokpushback++;
624 #endif
625
626 for (;;) {
627 if (readtoken() == TWORD) {
628 n = stalloc(sizeof(struct narg));
629 n->type = NARG;
630 n->narg.text = wordtext;
631 n->narg.backquote = backquotelist;
632 *app = n;
633 app = &n->narg.next;
634 } else if (lasttoken == TREDIR) {
635 *rpp = n = redirnode;
636 rpp = &n->nfile.next;
637 parsefname(); /* read name of redirection file */
638 } else if (lasttoken == TLP && app == &args->narg.next
639 && redir == 0) {
640 /* We have a function */
641 if (readtoken() != TRP)
642 synexpect(TRP, 0);
643 funclinno = plinno;
644 rmescapes(n->narg.text);
645 if (strchr(n->narg.text, '/'))
646 synerror("Bad function name");
647 n->type = NDEFUN;
648 n->narg.next = command();
649 funclinno = 0;
650 goto checkneg;
651 } else {
652 tokpushback++;
653 break;
654 }
655 }
656
657 if (args == NULL && redir == NULL)
658 synexpect(-1, 0);
659 *app = NULL;
660 *rpp = NULL;
661 n = stalloc(sizeof(struct ncmd));
662 n->type = NCMD;
663 n->ncmd.backgnd = 0;
664 n->ncmd.args = args;
665 n->ncmd.redirect = redir;
666
667 checkneg:
668 #ifdef BOGUS_NOT_COMMAND
669 if (negate) {
670 VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n",
671 (negate&1) ? "" : "double "));
672 n2 = stalloc(sizeof(struct nnot));
673 n2->type = (negate & 1) ? NNOT : NDNOT;
674 n2->nnot.com = n;
675 return n2;
676 }
677 else
678 #endif
679 return n;
680 }
681
682 STATIC union node *
683 makename(void)
684 {
685 union node *n;
686
687 n = stalloc(sizeof(struct narg));
688 n->type = NARG;
689 n->narg.next = NULL;
690 n->narg.text = wordtext;
691 n->narg.backquote = backquotelist;
692 return n;
693 }
694
695 void
696 fixredir(union node *n, const char *text, int err)
697 {
698
699 VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err));
700 if (!err)
701 n->ndup.vname = NULL;
702
703 if (is_number(text))
704 n->ndup.dupfd = number(text);
705 else if (text[0] == '-' && text[1] == '\0')
706 n->ndup.dupfd = -1;
707 else {
708
709 if (err)
710 synerror("Bad fd number");
711 else
712 n->ndup.vname = makename();
713 }
714 }
715
716
717 STATIC void
718 parsefname(void)
719 {
720 union node *n = redirnode;
721
722 if (readtoken() != TWORD)
723 synexpect(-1, 0);
724 if (n->type == NHERE) {
725 struct heredoc *here = heredoc;
726 struct heredoc *p;
727
728 if (quoteflag == 0)
729 n->type = NXHERE;
730 VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno));
731 if (here->striptabs) {
732 while (*wordtext == '\t')
733 wordtext++;
734 }
735
736 /*
737 * this test is not really necessary, we are not
738 * required to expand wordtext, but there's no reason
739 * it cannot be $$ or something like that - that would
740 * not mean the pid, but literally two '$' characters.
741 * There is no need for limits on what the word can be.
742 * However, it needs to stay literal as entered, not
743 * have $ converted to CTLVAR or something, which as
744 * the parser is, at the minute, is impossible to prevent.
745 * So, leave it like this until the rest of the parser is fixed.
746 */
747 if (!noexpand(wordtext))
748 synerror("Illegal eof marker for << redirection");
749
750 rmescapes(wordtext);
751 here->eofmark = wordtext;
752 here->next = NULL;
753 if (heredoclist == NULL)
754 heredoclist = here;
755 else {
756 for (p = heredoclist ; p->next ; p = p->next)
757 continue;
758 p->next = here;
759 }
760 } else if (n->type == NTOFD || n->type == NFROMFD) {
761 fixredir(n, wordtext, 0);
762 } else {
763 n->nfile.fname = makename();
764 }
765 }
766
767 /*
768 * Check to see whether we are at the end of the here document. When this
769 * is called, c is set to the first character of the next input line. If
770 * we are at the end of the here document, this routine sets the c to PEOF.
771 * The new value of c is returned.
772 */
773
774 static int
775 checkend(int c, char * const eofmark, const int striptabs)
776 {
777
778 if (striptabs) {
779 while (c == '\t')
780 c = pgetc();
781 }
782 if (c == PEOF) {
783 if (*eofmark == '\0')
784 return (c);
785 synerror(EOFhere);
786 }
787 if (c == *eofmark) {
788 int c2;
789 char *q;
790
791 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
792 ;
793 if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
794 c = PEOF;
795 if (c2 == '\n') {
796 plinno++;
797 needprompt = doprompt;
798 }
799 } else {
800 pungetc();
801 pushstring(eofmark + 1, q - (eofmark + 1), NULL);
802 }
803 } else if (c == '\n' && *eofmark == '\0') {
804 c = PEOF;
805 plinno++;
806 needprompt = doprompt;
807 }
808 return (c);
809 }
810
811
812 /*
813 * Input any here documents.
814 */
815
816 STATIC void
817 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
818 {
819 int c;
820 char *out;
821
822 c = pgetc();
823
824 /*
825 * If we hit EOF on the input, and the eofmark is a null string ('')
826 * we consider this empty line to be the eofmark, and exit without err.
827 */
828 if (c == PEOF && *eofmark != '\0')
829 synerror(EOFhere);
830
831 STARTSTACKSTR(out);
832
833 while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
834 do {
835 if (sq) {
836 /*
837 * in single quoted mode (eofmark quoted)
838 * all we look for is \n so we can check
839 * for the epfmark - everything saved literally.
840 */
841 STPUTC(c, out);
842 if (c == '\n') {
843 plinno++;
844 break;
845 }
846 continue;
847 }
848 /*
849 * In double quoted (non-quoted eofmark)
850 * we must handle \ followed by \n here
851 * otherwise we can mismatch the end mark.
852 * All other uses of \ will be handled later
853 * when the here doc is expanded.
854 *
855 * This also makes sure \\ followed by \n does
856 * not suppress the newline (the \ quotes itself)
857 */
858 if (c == '\\') { /* A backslash */
859 c = pgetc(); /* followed by */
860 if (c == '\n') { /* a newline? */
861 plinno++;
862 continue; /* :drop both */
863 }
864 STPUTC('\\', out); /* else keep \ */
865 }
866 STPUTC(c, out); /* keep the char */
867 if (c == '\n') { /* at end of line */
868 plinno++;
869 break; /* look for eofmark */
870 }
871 } while ((c = pgetc()) != PEOF);
872
873 /*
874 * If we have read a line, and reached EOF, without
875 * finding the eofmark, whether the EOF comes before
876 * or immediately after the \n, that is an error.
877 */
878 if (c == PEOF || (c = pgetc()) == PEOF)
879 synerror(EOFhere);
880 }
881 STPUTC('\0', out);
882
883 c = out - stackblock();
884 out = stackblock();
885 grabstackblock(c);
886 wordtext = out;
887
888 VTRACE(DBG_PARSE,
889 ("Slurped a heredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n",
890 eofmark, striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c),
891 wordtext, (c > 16 ? "..." : ""), plinno));
892 }
893
894 STATIC void
895 readheredocs(void)
896 {
897 struct heredoc *here;
898 union node *n;
899
900 while (heredoclist) {
901 here = heredoclist;
902 heredoclist = here->next;
903 if (needprompt) {
904 setprompt(2);
905 needprompt = 0;
906 }
907
908 slurp_heredoc(here->eofmark, here->striptabs,
909 here->here->nhere.type == NHERE);
910
911 n = stalloc(sizeof(struct narg));
912 n->narg.type = NARG;
913 n->narg.next = NULL;
914 n->narg.text = wordtext;
915 n->narg.backquote = backquotelist;
916 here->here->nhere.doc = n;
917
918 if (here->here->nhere.type == NHERE)
919 continue;
920
921 /*
922 * Now "parse" here docs that have unquoted eofmarkers.
923 */
924 setinputstring(wordtext, 1);
925 readtoken1(pgetc(), DQSYNTAX, 1);
926 n->narg.text = wordtext;
927 n->narg.backquote = backquotelist;
928 popfile();
929 }
930 }
931
932 STATIC int
933 peektoken(void)
934 {
935 int t;
936
937 t = readtoken();
938 tokpushback++;
939 return (t);
940 }
941
942 STATIC int
943 readtoken(void)
944 {
945 int t;
946 int savecheckkwd = checkkwd;
947 #ifdef DEBUG
948 int alreadyseen = tokpushback;
949 #endif
950 struct alias *ap;
951
952 top:
953 t = xxreadtoken();
954
955 if (checkkwd) {
956 /*
957 * eat newlines
958 */
959 if (checkkwd == 2) {
960 checkkwd = 0;
961 while (t == TNL) {
962 readheredocs();
963 t = xxreadtoken();
964 }
965 } else
966 checkkwd = 0;
967 /*
968 * check for keywords and aliases
969 */
970 if (t == TWORD && !quoteflag) {
971 const char *const *pp;
972
973 for (pp = parsekwd; *pp; pp++) {
974 if (**pp == *wordtext && equal(*pp, wordtext)) {
975 lasttoken = t = pp -
976 parsekwd + KWDOFFSET;
977 VTRACE(DBG_PARSE,
978 ("keyword %s recognized\n",
979 tokname[t]));
980 goto out;
981 }
982 }
983 if (!noalias &&
984 (ap = lookupalias(wordtext, 1)) != NULL) {
985 pushstring(ap->val, strlen(ap->val), ap);
986 checkkwd = savecheckkwd;
987 goto top;
988 }
989 }
990 out:
991 checkkwd = (t == TNOT) ? savecheckkwd : 0;
992 }
993 VTRACE(DBG_PARSE, ("%stoken %s %s\n", alreadyseen ? "reread " : "",
994 tokname[t], t == TWORD ? wordtext : ""));
995 return (t);
996 }
997
998
999 /*
1000 * Read the next input token.
1001 * If the token is a word, we set backquotelist to the list of cmds in
1002 * backquotes. We set quoteflag to true if any part of the word was
1003 * quoted.
1004 * If the token is TREDIR, then we set redirnode to a structure containing
1005 * the redirection.
1006 * In all cases, the variable startlinno is set to the number of the line
1007 * on which the token starts.
1008 *
1009 * [Change comment: here documents and internal procedures]
1010 * [Readtoken shouldn't have any arguments. Perhaps we should make the
1011 * word parsing code into a separate routine. In this case, readtoken
1012 * doesn't need to have any internal procedures, but parseword does.
1013 * We could also make parseoperator in essence the main routine, and
1014 * have parseword (readtoken1?) handle both words and redirection.]
1015 */
1016
1017 #define RETURN(token) return lasttoken = token
1018
1019 STATIC int
1020 xxreadtoken(void)
1021 {
1022 int c;
1023
1024 if (tokpushback) {
1025 tokpushback = 0;
1026 return lasttoken;
1027 }
1028 if (needprompt) {
1029 setprompt(2);
1030 needprompt = 0;
1031 }
1032 startlinno = plinno;
1033 for (;;) { /* until token or start of word found */
1034 c = pgetc_macro();
1035 switch (c) {
1036 case ' ': case '\t':
1037 continue;
1038 case '#':
1039 while ((c = pgetc()) != '\n' && c != PEOF)
1040 continue;
1041 pungetc();
1042 continue;
1043
1044 case '\n':
1045 plinno++;
1046 needprompt = doprompt;
1047 RETURN(TNL);
1048 case PEOF:
1049 RETURN(TEOF);
1050
1051 case '&':
1052 if (pgetc_linecont() == '&')
1053 RETURN(TAND);
1054 pungetc();
1055 RETURN(TBACKGND);
1056 case '|':
1057 if (pgetc_linecont() == '|')
1058 RETURN(TOR);
1059 pungetc();
1060 RETURN(TPIPE);
1061 case ';':
1062 switch (pgetc_linecont()) {
1063 case ';':
1064 RETURN(TENDCASE);
1065 case '&':
1066 RETURN(TCASEFALL);
1067 default:
1068 pungetc();
1069 RETURN(TSEMI);
1070 }
1071 case '(':
1072 RETURN(TLP);
1073 case ')':
1074 RETURN(TRP);
1075
1076 case '\\':
1077 switch (pgetc()) {
1078 case '\n':
1079 startlinno = ++plinno;
1080 if (doprompt)
1081 setprompt(2);
1082 else
1083 setprompt(0);
1084 continue;
1085 case PEOF:
1086 RETURN(TEOF);
1087 default:
1088 pungetc();
1089 break;
1090 }
1091 /* FALLTHROUGH */
1092 default:
1093 return readtoken1(c, BASESYNTAX, 0);
1094 }
1095 }
1096 #undef RETURN
1097 }
1098
1099
1100
1101 /*
1102 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
1103 * is not NULL, read a here document. In the latter case, eofmark is the
1104 * word which marks the end of the document and striptabs is true if
1105 * leading tabs should be stripped from the document. The argument firstc
1106 * is the first character of the input token or document.
1107 *
1108 * Because C does not have internal subroutines, I have simulated them
1109 * using goto's to implement the subroutine linkage. The following macros
1110 * will run code that appears at the end of readtoken1.
1111 */
1112
1113 /*
1114 * We used to remember only the current syntax, variable nesting level,
1115 * double quote state for each var nesting level, and arith nesting
1116 * level (unrelated to var nesting) and one prev syntax when in arith
1117 * syntax. This worked for simple cases, but can't handle arith inside
1118 * var expansion inside arith inside var with some quoted and some not.
1119 *
1120 * Inspired by FreeBSD's implementation (though it was the obvious way)
1121 * though implemented differently, we now have a stack that keeps track
1122 * of what we are doing now, and what we were doing previously.
1123 * Every time something changes, which will eventually end and should
1124 * revert to the previous state, we push this stack, and then pop it
1125 * again later (that is every ${} with an operator (to parse the word
1126 * or pattern that follows) ${x} and $x are too simple to need it)
1127 * $(( )) $( ) and "...". Always. Really, always!
1128 *
1129 * The stack is implemented as one static (on the C stack) base block
1130 * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1131 * enough for the vast majority of cases. For torture tests, we
1132 * malloc more blocks as needed. All accesses through the inline
1133 * functions below.
1134 */
1135
1136 /*
1137 * varnest & arinest will typically be 0 or 1
1138 * (varnest can increment in usages like ${x=${y}} but probably
1139 * does not really need to)
1140 * parenlevel allows balancing parens inside a $(( )), it is reset
1141 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1142 * quoted is special - we need to know 2 things ... are we inside "..."
1143 * (even if inherited from some previous nesting level) and was there
1144 * an opening '"' at this level (so the next will be closing).
1145 * "..." can span nesting levels, but cannot be opened in one and
1146 * closed in a different one.
1147 * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1148 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1149 * is really so special that this setting is not very important)
1150 * and 0x10 that indicates that an opening quote has been seen.
1151 * The bottom 4 bits are inherited, the 0x10 bit is not.
1152 */
1153 struct tokenstate {
1154 const char *ts_syntax;
1155 unsigned short ts_parenlevel; /* counters */
1156 unsigned short ts_varnest; /* 64000 levels should be enough! */
1157 unsigned short ts_arinest;
1158 unsigned short ts_quoted; /* 1 -> single, 2 -> double */
1159 };
1160
1161 #define NQ 0x00 /* Unquoted */
1162 #define SQ 0x01 /* Single Quotes */
1163 #define DQ 0x02 /* Double Quotes (or equivalent) */
1164 #define QF 0x0F /* Mask to extract previous values */
1165 #define QS 0x10 /* Quoting started at this level in stack */
1166
1167 #define LEVELS_PER_BLOCK 8
1168 #define VSS struct statestack
1169
1170 struct statestack {
1171 VSS *prev; /* previous block in list */
1172 int cur; /* which of our tokenstates is current */
1173 struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1174 };
1175
1176 static inline struct tokenstate *
1177 currentstate(VSS *stack)
1178 {
1179 return &stack->tokenstate[stack->cur];
1180 }
1181
1182 static inline struct tokenstate *
1183 prevstate(VSS *stack)
1184 {
1185 if (stack->cur != 0)
1186 return &stack->tokenstate[stack->cur - 1];
1187 if (stack->prev == NULL) /* cannot drop below base */
1188 return &stack->tokenstate[0];
1189 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1190 }
1191
1192 static inline VSS *
1193 bump_state_level(VSS *stack)
1194 {
1195 struct tokenstate *os, *ts;
1196
1197 os = currentstate(stack);
1198
1199 if (++stack->cur >= LEVELS_PER_BLOCK) {
1200 VSS *ss;
1201
1202 ss = (VSS *)ckmalloc(sizeof (struct statestack));
1203 ss->cur = 0;
1204 ss->prev = stack;
1205 stack = ss;
1206 }
1207
1208 ts = currentstate(stack);
1209
1210 ts->ts_parenlevel = 0; /* parens inside never match outside */
1211
1212 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */
1213 ts->ts_varnest = os->ts_varnest;
1214 ts->ts_arinest = os->ts_arinest; /* when appropriate */
1215 ts->ts_syntax = os->ts_syntax; /* they will be altered */
1216
1217 return stack;
1218 }
1219
1220 static inline VSS *
1221 drop_state_level(VSS *stack)
1222 {
1223 if (stack->cur == 0) {
1224 VSS *ss;
1225
1226 ss = stack;
1227 stack = ss->prev;
1228 if (stack == NULL)
1229 return ss;
1230 ckfree(ss);
1231 }
1232 --stack->cur;
1233 return stack;
1234 }
1235
1236 static inline void
1237 cleanup_state_stack(VSS *stack)
1238 {
1239 while (stack->prev != NULL) {
1240 stack->cur = 0;
1241 stack = drop_state_level(stack);
1242 }
1243 }
1244
1245 #define PARSESUB() {goto parsesub; parsesub_return:;}
1246 #define PARSEARITH() {goto parsearith; parsearith_return:;}
1247
1248 /*
1249 * The following macros all assume the existance of a local var "stack"
1250 * which contains a pointer to the current struct stackstate
1251 */
1252
1253 /*
1254 * These are macros rather than inline funcs to avoid code churn as much
1255 * as possible - they replace macros of the same name used previously.
1256 */
1257 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS)
1258 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ)
1259 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \
1260 stack->cur != 0 || stack->prev ? \
1261 prevstate(stack)->ts_quoted & QF : 0)
1262
1263 /*
1264 * This set are just to avoid excess typing and line lengths...
1265 * The ones that "look like" var names must be implemented to be lvalues
1266 */
1267 #define syntax (currentstate(stack)->ts_syntax)
1268 #define parenlevel (currentstate(stack)->ts_parenlevel)
1269 #define varnest (currentstate(stack)->ts_varnest)
1270 #define arinest (currentstate(stack)->ts_arinest)
1271 #define quoted (currentstate(stack)->ts_quoted)
1272 #define TS_PUSH() (stack = bump_state_level(stack))
1273 #define TS_POP() (stack = drop_state_level(stack))
1274
1275 /*
1276 * Called to parse command substitutions. oldstyle is true if the command
1277 * is enclosed inside `` (otherwise it was enclosed in "$( )")
1278 *
1279 * Internally nlpp is a pointer to the head of the linked
1280 * list of commands (passed by reference), and savelen is the number of
1281 * characters on the top of the stack which must be preserved.
1282 */
1283 static char *
1284 parsebackq(VSS *const stack, char * const in,
1285 struct nodelist **const pbqlist, const int oldstyle)
1286 {
1287 struct nodelist **nlpp;
1288 const int savepbq = parsebackquote;
1289 union node *n;
1290 char *out;
1291 char *str = NULL;
1292 char *volatile sstr = str;
1293 struct jmploc jmploc;
1294 struct jmploc *const savehandler = handler;
1295 const int savelen = in - stackblock();
1296 int saveprompt;
1297
1298 if (setjmp(jmploc.loc)) {
1299 if (sstr)
1300 ckfree(__UNVOLATILE(sstr));
1301 cleanup_state_stack(stack);
1302 parsebackquote = 0;
1303 handler = savehandler;
1304 longjmp(handler->loc, 1);
1305 }
1306 INTOFF;
1307 sstr = str = NULL;
1308 if (savelen > 0) {
1309 sstr = str = ckmalloc(savelen);
1310 memcpy(str, stackblock(), savelen);
1311 }
1312 handler = &jmploc;
1313 INTON;
1314 if (oldstyle) {
1315 /* We must read until the closing backquote, giving special
1316 treatment to some slashes, and then push the string and
1317 reread it as input, interpreting it normally. */
1318 int pc;
1319 int psavelen;
1320 char *pstr;
1321
1322 /*
1323 * Because the entire `...` is read here, we don't
1324 * need to bother the state stack. That will be used
1325 * (as appropriate) when the processed string is re-read.
1326 */
1327 STARTSTACKSTR(out);
1328 for (;;) {
1329 if (needprompt) {
1330 setprompt(2);
1331 needprompt = 0;
1332 }
1333 switch (pc = pgetc_linecont()) {
1334 case '`':
1335 goto done;
1336
1337 case '\\':
1338 if ((pc = pgetc()) == '\n') {
1339 plinno++;
1340 if (doprompt)
1341 setprompt(2);
1342 else
1343 setprompt(0);
1344 /*
1345 * If eating a newline, avoid putting
1346 * the newline into the new character
1347 * stream (via the STPUTC after the
1348 * switch).
1349 */
1350 continue;
1351 }
1352 if (pc != '\\' && pc != '`' && pc != '$'
1353 && (!ISDBLQUOTE() || pc != '"'))
1354 STPUTC('\\', out);
1355 break;
1356
1357 case '\n':
1358 plinno++;
1359 needprompt = doprompt;
1360 break;
1361
1362 case PEOF:
1363 startlinno = plinno;
1364 synerror("EOF in backquote substitution");
1365 break;
1366
1367 default:
1368 break;
1369 }
1370 STPUTC(pc, out);
1371 }
1372 done:
1373 STPUTC('\0', out);
1374 psavelen = out - stackblock();
1375 if (psavelen > 0) {
1376 pstr = grabstackstr(out);
1377 setinputstring(pstr, 1);
1378 }
1379 }
1380 nlpp = pbqlist;
1381 while (*nlpp)
1382 nlpp = &(*nlpp)->next;
1383 *nlpp = stalloc(sizeof(struct nodelist));
1384 (*nlpp)->next = NULL;
1385 parsebackquote = oldstyle;
1386
1387 if (oldstyle) {
1388 saveprompt = doprompt;
1389 doprompt = 0;
1390 } else
1391 saveprompt = 0;
1392
1393 n = list(0, oldstyle);
1394
1395 if (oldstyle)
1396 doprompt = saveprompt;
1397 else {
1398 if (readtoken() != TRP) {
1399 cleanup_state_stack(stack);
1400 synexpect(TRP, 0);
1401 }
1402 }
1403
1404 (*nlpp)->n = n;
1405 if (oldstyle) {
1406 /*
1407 * Start reading from old file again, ignoring any pushed back
1408 * tokens left from the backquote parsing
1409 */
1410 popfile();
1411 tokpushback = 0;
1412 }
1413
1414 while (stackblocksize() <= savelen)
1415 growstackblock();
1416 STARTSTACKSTR(out);
1417 if (str) {
1418 memcpy(out, str, savelen);
1419 STADJUST(savelen, out);
1420 INTOFF;
1421 ckfree(str);
1422 sstr = str = NULL;
1423 INTON;
1424 }
1425 parsebackquote = savepbq;
1426 handler = savehandler;
1427 if (arinest || ISDBLQUOTE())
1428 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1429 else
1430 USTPUTC(CTLBACKQ, out);
1431
1432 return out;
1433 }
1434
1435 /*
1436 * Parse a redirection operator. The parameter "out" points to a string
1437 * specifying the fd to be redirected. It is guaranteed to be either ""
1438 * or a numeric string (for now anyway). The parameter "c" contains the
1439 * first character of the redirection operator.
1440 *
1441 * Note the string "out" is on the stack, which we are about to clobber,
1442 * so process it first...
1443 */
1444
1445 static void
1446 parseredir(const char *out, int c)
1447 {
1448 union node *np;
1449 int fd;
1450
1451 fd = (*out == '\0') ? -1 : atoi(out);
1452
1453 np = stalloc(sizeof(struct nfile));
1454 if (c == '>') {
1455 if (fd < 0)
1456 fd = 1;
1457 c = pgetc_linecont();
1458 if (c == '>')
1459 np->type = NAPPEND;
1460 else if (c == '|')
1461 np->type = NCLOBBER;
1462 else if (c == '&')
1463 np->type = NTOFD;
1464 else {
1465 np->type = NTO;
1466 pungetc();
1467 }
1468 } else { /* c == '<' */
1469 if (fd < 0)
1470 fd = 0;
1471 switch (c = pgetc_linecont()) {
1472 case '<':
1473 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1474 np = stalloc(sizeof(struct nhere));
1475 np->nfile.fd = 0;
1476 }
1477 np->type = NHERE;
1478 heredoc = stalloc(sizeof(struct heredoc));
1479 heredoc->here = np;
1480 heredoc->startline = plinno;
1481 if ((c = pgetc_linecont()) == '-') {
1482 heredoc->striptabs = 1;
1483 } else {
1484 heredoc->striptabs = 0;
1485 pungetc();
1486 }
1487 break;
1488
1489 case '&':
1490 np->type = NFROMFD;
1491 break;
1492
1493 case '>':
1494 np->type = NFROMTO;
1495 break;
1496
1497 default:
1498 np->type = NFROM;
1499 pungetc();
1500 break;
1501 }
1502 }
1503 np->nfile.fd = fd;
1504
1505 redirnode = np; /* this is the "value" of TRENODE */
1506 }
1507
1508
1509 /*
1510 * The lowest level basic tokenizer.
1511 *
1512 * The next input byte (character) is in firstc, syn says which
1513 * syntax tables we are to use (basic, single or double quoted, or arith)
1514 * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1515 * quote character itself is not special (used parsing here docs and similar)
1516 *
1517 * The result is the type of the next token (its value, when there is one,
1518 * is saved in the relevant global var - must fix that someday!) which is
1519 * also saved for re-reading ("lasttoken").
1520 *
1521 * Overall, this routine does far more parsing than it is supposed to.
1522 * That will also need fixing, someday...
1523 */
1524 STATIC int
1525 readtoken1(int firstc, char const *syn, int magicq)
1526 {
1527 int c;
1528 char * out;
1529 int len;
1530 struct nodelist *bqlist;
1531 int quotef;
1532 VSS static_stack;
1533 VSS *stack = &static_stack;
1534
1535 stack->prev = NULL;
1536 stack->cur = 0;
1537
1538 syntax = syn;
1539
1540 startlinno = plinno;
1541 varnest = 0;
1542 quoted = 0;
1543 if (syntax == DQSYNTAX)
1544 SETDBLQUOTE();
1545 quotef = 0;
1546 bqlist = NULL;
1547 arinest = 0;
1548 parenlevel = 0;
1549
1550 STARTSTACKSTR(out);
1551
1552 for (c = firstc ;; c = pgetc_macro()) { /* until of token */
1553 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
1554 switch (syntax[c]) {
1555 case CNL: /* '\n' */
1556 if (syntax == BASESYNTAX)
1557 break; /* exit loop */
1558 USTPUTC(c, out);
1559 plinno++;
1560 if (doprompt)
1561 setprompt(2);
1562 else
1563 setprompt(0);
1564 continue;
1565
1566 case CWORD:
1567 USTPUTC(c, out);
1568 continue;
1569 case CCTL:
1570 if (!magicq || ISDBLQUOTE())
1571 USTPUTC(CTLESC, out);
1572 USTPUTC(c, out);
1573 continue;
1574 case CBACK: /* backslash */
1575 c = pgetc();
1576 if (c == PEOF) {
1577 USTPUTC('\\', out);
1578 pungetc();
1579 continue;
1580 }
1581 if (c == '\n') {
1582 plinno++;
1583 if (doprompt)
1584 setprompt(2);
1585 else
1586 setprompt(0);
1587 continue;
1588 }
1589 quotef = 1; /* current token is quoted */
1590 if (ISDBLQUOTE() && c != '\\' && c != '`' &&
1591 c != '$' && (c != '"' || magicq))
1592 USTPUTC('\\', out);
1593 if (SQSYNTAX[c] == CCTL)
1594 USTPUTC(CTLESC, out);
1595 else if (!magicq) {
1596 USTPUTC(CTLQUOTEMARK, out);
1597 USTPUTC(c, out);
1598 if (varnest != 0)
1599 USTPUTC(CTLQUOTEEND, out);
1600 continue;
1601 }
1602 USTPUTC(c, out);
1603 continue;
1604 case CSQUOTE:
1605 if (syntax != SQSYNTAX) {
1606 if (!magicq)
1607 USTPUTC(CTLQUOTEMARK, out);
1608 quotef = 1;
1609 TS_PUSH();
1610 syntax = SQSYNTAX;
1611 quoted = SQ;
1612 continue;
1613 }
1614 if (magicq && arinest == 0 && varnest == 0) {
1615 /* Ignore inside quoted here document */
1616 USTPUTC(c, out);
1617 continue;
1618 }
1619 /* End of single quotes... */
1620 TS_POP();
1621 if (syntax == BASESYNTAX && varnest != 0)
1622 USTPUTC(CTLQUOTEEND, out);
1623 continue;
1624 case CDQUOTE:
1625 if (magicq && arinest == 0 && varnest == 0) {
1626 /* Ignore inside here document */
1627 USTPUTC(c, out);
1628 continue;
1629 }
1630 quotef = 1;
1631 if (arinest) {
1632 if (ISDBLQUOTE()) {
1633 TS_POP();
1634 } else {
1635 TS_PUSH();
1636 syntax = DQSYNTAX;
1637 SETDBLQUOTE();
1638 USTPUTC(CTLQUOTEMARK, out);
1639 }
1640 continue;
1641 }
1642 if (magicq)
1643 continue;
1644 if (ISDBLQUOTE()) {
1645 TS_POP();
1646 if (varnest != 0)
1647 USTPUTC(CTLQUOTEEND, out);
1648 } else {
1649 TS_PUSH();
1650 syntax = DQSYNTAX;
1651 SETDBLQUOTE();
1652 USTPUTC(CTLQUOTEMARK, out);
1653 }
1654 continue;
1655 case CVAR: /* '$' */
1656 PARSESUB(); /* parse substitution */
1657 continue;
1658 case CENDVAR: /* CLOSEBRACE */
1659 if (varnest > 0 && !ISDBLQUOTE()) {
1660 TS_POP();
1661 USTPUTC(CTLENDVAR, out);
1662 } else {
1663 USTPUTC(c, out);
1664 }
1665 continue;
1666 case CLP: /* '(' in arithmetic */
1667 parenlevel++;
1668 USTPUTC(c, out);
1669 continue;;
1670 case CRP: /* ')' in arithmetic */
1671 if (parenlevel > 0) {
1672 USTPUTC(c, out);
1673 --parenlevel;
1674 } else {
1675 if (pgetc_linecont() == ')') {
1676 if (--arinest == 0) {
1677 TS_POP();
1678 USTPUTC(CTLENDARI, out);
1679 } else
1680 USTPUTC(')', out);
1681 } else {
1682 /*
1683 * unbalanced parens
1684 * (don't 2nd guess - no error)
1685 */
1686 pungetc();
1687 USTPUTC(')', out);
1688 }
1689 }
1690 continue;
1691 case CBQUOTE: /* '`' */
1692 out = parsebackq(stack, out, &bqlist, 1);
1693 continue;
1694 case CEOF: /* --> c == PEOF */
1695 break; /* will exit loop */
1696 default:
1697 if (varnest == 0 && !ISDBLQUOTE())
1698 break; /* exit loop */
1699 USTPUTC(c, out);
1700 continue;
1701 }
1702 break; /* break from switch -> break from for loop too */
1703 }
1704
1705 if (syntax == ARISYNTAX) {
1706 cleanup_state_stack(stack);
1707 synerror("Missing '))'");
1708 }
1709 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
1710 cleanup_state_stack(stack);
1711 synerror("Unterminated quoted string");
1712 }
1713 if (varnest != 0) {
1714 cleanup_state_stack(stack);
1715 startlinno = plinno;
1716 /* { */
1717 synerror("Missing '}'");
1718 }
1719
1720 USTPUTC('\0', out);
1721 len = out - stackblock();
1722 out = stackblock();
1723
1724 if (!magicq) {
1725 if ((c == '<' || c == '>')
1726 && quotef == 0 && (*out == '\0' || is_number(out))) {
1727 parseredir(out, c);
1728 cleanup_state_stack(stack);
1729 return lasttoken = TREDIR;
1730 } else {
1731 pungetc();
1732 }
1733 }
1734
1735 quoteflag = quotef;
1736 backquotelist = bqlist;
1737 grabstackblock(len);
1738 wordtext = out;
1739 cleanup_state_stack(stack);
1740 return lasttoken = TWORD;
1741 /* end of readtoken routine */
1742
1743
1744 /*
1745 * Parse a substitution. At this point, we have read the dollar sign
1746 * and nothing else.
1747 */
1748
1749 parsesub: {
1750 char buf[10];
1751 int subtype;
1752 int typeloc;
1753 int flags;
1754 char *p;
1755 static const char types[] = "}-+?=";
1756 int i;
1757 int linno;
1758
1759 c = pgetc_linecont();
1760 if (c != '(' && c != OPENBRACE && !is_name(c) && !is_special(c)) {
1761 USTPUTC('$', out);
1762 pungetc();
1763 } else if (c == '(') { /* $(command) or $((arith)) */
1764 if (pgetc_linecont() == '(') {
1765 PARSEARITH();
1766 } else {
1767 pungetc();
1768 out = parsebackq(stack, out, &bqlist, 0);
1769 }
1770 } else {
1771 USTPUTC(CTLVAR, out);
1772 typeloc = out - stackblock();
1773 USTPUTC(VSNORMAL, out);
1774 subtype = VSNORMAL;
1775 flags = 0;
1776 if (c == OPENBRACE) {
1777 c = pgetc_linecont();
1778 if (c == '#') {
1779 if ((c = pgetc_linecont()) == CLOSEBRACE)
1780 c = '#';
1781 else if (is_name(c) || isdigit(c))
1782 subtype = VSLENGTH;
1783 else if (is_special(c)) {
1784 /*
1785 * ${#} is $# - the number of sh params
1786 * ${##} is the length of ${#}
1787 * ${###} is ${#} with as much nothing
1788 * as possible removed from start
1789 * ${##1} is ${#} with leading 1 gone
1790 * ${##\#} is ${#} with leading # gone
1791 *
1792 * this stuff is UGLY!
1793 */
1794 if (pgetc_linecont() == CLOSEBRACE) {
1795 pungetc();
1796 subtype = VSLENGTH;
1797 } else {
1798 static char cbuf[2];
1799
1800 pungetc(); /* would like 2 */
1801 cbuf[0] = c; /* so ... */
1802 cbuf[1] = '\0';
1803 pushstring(cbuf, 1, NULL);
1804 c = '#'; /* ${#:...} */
1805 subtype = 0; /* .. or similar */
1806 }
1807 } else {
1808 pungetc();
1809 c = '#';
1810 subtype = 0;
1811 }
1812 }
1813 else
1814 subtype = 0;
1815 }
1816 if (is_name(c)) {
1817 p = out;
1818 do {
1819 STPUTC(c, out);
1820 c = pgetc_linecont();
1821 } while (is_in_name(c));
1822 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
1823 /* Replace the variable name with the
1824 * current line number. */
1825 linno = plinno;
1826 if (funclinno != 0)
1827 linno -= funclinno - 1;
1828 snprintf(buf, sizeof(buf), "%d", linno);
1829 STADJUST(-6, out);
1830 for (i = 0; buf[i] != '\0'; i++)
1831 STPUTC(buf[i], out);
1832 flags |= VSLINENO;
1833 }
1834 } else if (is_digit(c)) {
1835 do {
1836 USTPUTC(c, out);
1837 c = pgetc_linecont();
1838 } while (subtype != VSNORMAL && is_digit(c));
1839 }
1840 else if (is_special(c)) {
1841 USTPUTC(c, out);
1842 c = pgetc_linecont();
1843 }
1844 else {
1845 badsub:
1846 cleanup_state_stack(stack);
1847 synerror("Bad substitution");
1848 }
1849
1850 STPUTC('=', out);
1851 if (subtype == 0) {
1852 switch (c) {
1853 case ':':
1854 flags |= VSNUL;
1855 c = pgetc_linecont();
1856 /*FALLTHROUGH*/
1857 default:
1858 p = strchr(types, c);
1859 if (p == NULL)
1860 goto badsub;
1861 subtype = p - types + VSNORMAL;
1862 break;
1863 case '%':
1864 case '#':
1865 {
1866 int cc = c;
1867 subtype = c == '#' ? VSTRIMLEFT :
1868 VSTRIMRIGHT;
1869 c = pgetc_linecont();
1870 if (c == cc)
1871 subtype++;
1872 else
1873 pungetc();
1874 break;
1875 }
1876 }
1877 } else {
1878 if (subtype == VSLENGTH && c != /*{*/ '}')
1879 synerror("no modifiers allowed with ${#var}");
1880 pungetc();
1881 }
1882 if (ISDBLQUOTE() || arinest)
1883 flags |= VSQUOTE;
1884 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
1885 flags |= VSPATQ;
1886 *(stackblock() + typeloc) = subtype | flags;
1887 if (subtype != VSNORMAL) {
1888 TS_PUSH();
1889 varnest++;
1890 arinest = 0;
1891 if (subtype > VSASSIGN) { /* # ## % %% */
1892 syntax = BASESYNTAX;
1893 CLRDBLQUOTE();
1894 }
1895 }
1896 }
1897 goto parsesub_return;
1898 }
1899
1900
1901 /*
1902 * Parse an arithmetic expansion (indicate start of one and set state)
1903 */
1904 parsearith: {
1905
1906 if (syntax == ARISYNTAX) {
1907 /*
1908 * we collapse embedded arithmetic expansion to
1909 * parentheses, which should be equivalent
1910 */
1911 USTPUTC('(', out);
1912 USTPUTC('(', out);
1913 /*
1914 * Need 2 of them because there will (should be)
1915 * two closing ))'s to follow later.
1916 */
1917 parenlevel += 2;
1918 } else {
1919 TS_PUSH();
1920 syntax = ARISYNTAX;
1921 ++arinest;
1922 varnest = 0;
1923
1924 USTPUTC(CTLARI, out);
1925 if (ISDBLQUOTE())
1926 USTPUTC('"',out);
1927 else
1928 USTPUTC(' ',out);
1929 }
1930 goto parsearith_return;
1931 }
1932
1933 } /* end of readtoken */
1934
1935
1936
1937 #ifdef mkinit
1938 RESET {
1939 tokpushback = 0;
1940 checkkwd = 0;
1941 }
1942 #endif
1943
1944 /*
1945 * Returns true if the text contains nothing to expand (no dollar signs
1946 * or backquotes).
1947 */
1948
1949 STATIC int
1950 noexpand(char *text)
1951 {
1952 char *p;
1953 char c;
1954
1955 p = text;
1956 while ((c = *p++) != '\0') {
1957 if (c == CTLQUOTEMARK)
1958 continue;
1959 if (c == CTLESC)
1960 p++;
1961 else if (BASESYNTAX[(int)c] == CCTL)
1962 return 0;
1963 }
1964 return 1;
1965 }
1966
1967
1968 /*
1969 * Return true if the argument is a legal variable name (a letter or
1970 * underscore followed by zero or more letters, underscores, and digits).
1971 */
1972
1973 int
1974 goodname(char *name)
1975 {
1976 char *p;
1977
1978 p = name;
1979 if (! is_name(*p))
1980 return 0;
1981 while (*++p) {
1982 if (! is_in_name(*p))
1983 return 0;
1984 }
1985 return 1;
1986 }
1987
1988
1989 /*
1990 * Called when an unexpected token is read during the parse. The argument
1991 * is the token that is expected, or -1 if more than one type of token can
1992 * occur at this point.
1993 */
1994
1995 STATIC void
1996 synexpect(int token, const char *text)
1997 {
1998 char msg[64];
1999 char *p;
2000
2001 if (lasttoken == TWORD) {
2002 size_t len = strlen(wordtext);
2003
2004 if (len <= 13)
2005 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
2006 else
2007 fmtstr(msg, 34,
2008 "Word \"%.10s...\" unexpected", wordtext);
2009 } else
2010 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
2011
2012 p = strchr(msg, '\0');
2013 if (text)
2014 fmtstr(p, 30, " (expecting \"%.10s\")", text);
2015 else if (token >= 0)
2016 fmtstr(p, 30, " (expecting %s)", tokname[token]);
2017
2018 synerror(msg);
2019 /* NOTREACHED */
2020 }
2021
2022
2023 STATIC void
2024 synerror(const char *msg)
2025 {
2026 error("%d: Syntax error: %s", startlinno, msg);
2027 /* NOTREACHED */
2028 }
2029
2030 STATIC void
2031 setprompt(int which)
2032 {
2033 whichprompt = which;
2034
2035 #ifndef SMALL
2036 if (!el)
2037 #endif
2038 out2str(getprompt(NULL));
2039 }
2040
2041 /*
2042 * handle getting the next character, while ignoring \ \n
2043 * (which is a little tricky as we only have one char of pushback
2044 * and we need that one elsewhere).
2045 */
2046 STATIC int
2047 pgetc_linecont(void)
2048 {
2049 int c;
2050
2051 while ((c = pgetc_macro()) == '\\') {
2052 c = pgetc();
2053 if (c == '\n') {
2054 plinno++;
2055 if (doprompt)
2056 setprompt(2);
2057 else
2058 setprompt(0);
2059 } else {
2060 pungetc();
2061 /* Allow the backslash to be pushed back. */
2062 pushstring("\\", 1, NULL);
2063 return (pgetc());
2064 }
2065 }
2066 return (c);
2067 }
2068
2069 /*
2070 * called by editline -- any expansions to the prompt
2071 * should be added here.
2072 */
2073 const char *
2074 getprompt(void *unused)
2075 {
2076 switch (whichprompt) {
2077 case 0:
2078 return "";
2079 case 1:
2080 return ps1val();
2081 case 2:
2082 return ps2val();
2083 default:
2084 return "<internal prompt error>";
2085 }
2086 }
2087