parser.c revision 1.116 1 /* $NetBSD: parser.c,v 1.116 2016/04/04 12:39:08 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.116 2016/04/04 12:39:08 christos Exp $");
41 #endif
42 #endif /* not lint */
43
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <limits.h>
47
48 #include "shell.h"
49 #include "parser.h"
50 #include "nodes.h"
51 #include "expand.h" /* defines rmescapes() */
52 #include "eval.h" /* defines commandname */
53 #include "redir.h" /* defines copyfd() */
54 #include "syntax.h"
55 #include "options.h"
56 #include "input.h"
57 #include "output.h"
58 #include "var.h"
59 #include "error.h"
60 #include "memalloc.h"
61 #include "mystring.h"
62 #include "alias.h"
63 #include "show.h"
64 #ifndef SMALL
65 #include "myhistedit.h"
66 #endif
67
68 /*
69 * Shell command parser.
70 */
71
72 /* values returned by readtoken */
73 #include "token.h"
74
75 #define OPENBRACE '{'
76 #define CLOSEBRACE '}'
77
78
79 struct heredoc {
80 struct heredoc *next; /* next here document in list */
81 union node *here; /* redirection node */
82 char *eofmark; /* string indicating end of input */
83 int striptabs; /* if set, strip leading tabs */
84 int startline; /* line number where << seen */
85 };
86
87
88
89 static int noalias = 0; /* when set, don't handle aliases */
90 struct heredoc *heredoclist; /* list of here documents to read */
91 int parsebackquote; /* nonzero if we are inside backquotes */
92 int doprompt; /* if set, prompt the user */
93 int needprompt; /* true if interactive and at start of line */
94 int lasttoken; /* last token read */
95 MKINIT int tokpushback; /* last token pushed back */
96 char *wordtext; /* text of last word returned by readtoken */
97 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */
98 struct nodelist *backquotelist;
99 union node *redirnode;
100 struct heredoc *heredoc;
101 int quoteflag; /* set if (part of) last token was quoted */
102 int startlinno; /* line # where last token started */
103 int funclinno; /* line # where the current function started */
104
105
106 STATIC union node *list(int, int);
107 STATIC union node *andor(void);
108 STATIC union node *pipeline(void);
109 STATIC union node *command(void);
110 STATIC union node *simplecmd(union node **, union node *);
111 STATIC union node *makename(void);
112 STATIC void parsefname(void);
113 STATIC void slurp_heredoc(char *const, const int, const int);
114 STATIC void readheredocs(void);
115 STATIC int peektoken(void);
116 STATIC int readtoken(void);
117 STATIC int xxreadtoken(void);
118 STATIC int readtoken1(int, char const *, int);
119 STATIC int noexpand(char *);
120 STATIC void synexpect(int, const char *) __dead;
121 STATIC void synerror(const char *) __dead;
122 STATIC void setprompt(int);
123
124
125 static const char EOFhere[] = "EOF reading here (<<) document";
126
127
128 /*
129 * Read and parse a command. Returns NEOF on end of file. (NULL is a
130 * valid parse tree indicating a blank line.)
131 */
132
133 union node *
134 parsecmd(int interact)
135 {
136 int t;
137 union node *n;
138
139 tokpushback = 0;
140 doprompt = interact;
141 if (doprompt)
142 setprompt(1);
143 else
144 setprompt(0);
145 needprompt = 0;
146 t = readtoken();
147 if (t == TEOF)
148 return NEOF;
149 if (t == TNL)
150 return NULL;
151 tokpushback++;
152 n = list(1, 0);
153 if (heredoclist)
154 error("%d: Here document (<<%s) expected but not present",
155 heredoclist->startline, heredoclist->eofmark);
156 return n;
157 }
158
159
160 STATIC union node *
161 list(int nlflag, int erflag)
162 {
163 union node *n1, *n2, *n3;
164 int tok;
165 TRACE(("list(%d,%d): entered\n", nlflag, erflag));
166
167 checkkwd = 2;
168 if (nlflag == 0 && tokendlist[peektoken()])
169 return NULL;
170 n1 = NULL;
171 for (;;) {
172 n2 = andor();
173 tok = readtoken();
174 if (tok == TBACKGND) {
175 if (n2->type == NCMD || n2->type == NPIPE) {
176 n2->ncmd.backgnd = 1;
177 } else if (n2->type == NREDIR) {
178 n2->type = NBACKGND;
179 } else {
180 n3 = stalloc(sizeof(struct nredir));
181 n3->type = NBACKGND;
182 n3->nredir.n = n2;
183 n3->nredir.redirect = NULL;
184 n2 = n3;
185 }
186 }
187 if (n1 == NULL) {
188 n1 = n2;
189 }
190 else {
191 n3 = stalloc(sizeof(struct nbinary));
192 n3->type = NSEMI;
193 n3->nbinary.ch1 = n1;
194 n3->nbinary.ch2 = n2;
195 n1 = n3;
196 }
197 switch (tok) {
198 case TBACKGND:
199 case TSEMI:
200 tok = readtoken();
201 /* FALLTHROUGH */
202 case TNL:
203 if (tok == TNL) {
204 readheredocs();
205 if (nlflag)
206 return n1;
207 } else {
208 tokpushback++;
209 }
210 checkkwd = 2;
211 if (tokendlist[peektoken()])
212 return n1;
213 break;
214 case TEOF:
215 pungetc(); /* push back EOF on input */
216 return n1;
217 default:
218 if (nlflag || erflag)
219 synexpect(-1, 0);
220 tokpushback++;
221 return n1;
222 }
223 }
224 }
225
226 STATIC union node *
227 andor(void)
228 {
229 union node *n1, *n2, *n3;
230 int t;
231
232 TRACE(("andor: entered\n"));
233 n1 = pipeline();
234 for (;;) {
235 if ((t = readtoken()) == TAND) {
236 t = NAND;
237 } else if (t == TOR) {
238 t = NOR;
239 } else {
240 tokpushback++;
241 return n1;
242 }
243 n2 = pipeline();
244 n3 = stalloc(sizeof(struct nbinary));
245 n3->type = t;
246 n3->nbinary.ch1 = n1;
247 n3->nbinary.ch2 = n2;
248 n1 = n3;
249 }
250 }
251
252 STATIC union node *
253 pipeline(void)
254 {
255 union node *n1, *n2, *pipenode;
256 struct nodelist *lp, *prev;
257 int negate;
258
259 TRACE(("pipeline: entered\n"));
260
261 negate = 0;
262 checkkwd = 2;
263 while (readtoken() == TNOT) {
264 TRACE(("pipeline: TNOT recognized\n"));
265 negate = !negate;
266 }
267 tokpushback++;
268 n1 = command();
269 if (readtoken() == TPIPE) {
270 pipenode = stalloc(sizeof(struct npipe));
271 pipenode->type = NPIPE;
272 pipenode->npipe.backgnd = 0;
273 lp = stalloc(sizeof(struct nodelist));
274 pipenode->npipe.cmdlist = lp;
275 lp->n = n1;
276 do {
277 prev = lp;
278 lp = stalloc(sizeof(struct nodelist));
279 lp->n = command();
280 prev->next = lp;
281 } while (readtoken() == TPIPE);
282 lp->next = NULL;
283 n1 = pipenode;
284 }
285 tokpushback++;
286 if (negate) {
287 TRACE(("negate pipeline\n"));
288 n2 = stalloc(sizeof(struct nnot));
289 n2->type = NNOT;
290 n2->nnot.com = n1;
291 return n2;
292 } else
293 return n1;
294 }
295
296
297
298 STATIC union node *
299 command(void)
300 {
301 union node *n1, *n2;
302 union node *ap, **app;
303 union node *cp, **cpp;
304 union node *redir, **rpp;
305 int t, negate = 0;
306
307 TRACE(("command: entered\n"));
308
309 checkkwd = 2;
310 redir = NULL;
311 n1 = NULL;
312 rpp = &redir;
313
314 /* Check for redirection which may precede command */
315 while (readtoken() == TREDIR) {
316 *rpp = n2 = redirnode;
317 rpp = &n2->nfile.next;
318 parsefname();
319 }
320 tokpushback++;
321
322 while (readtoken() == TNOT) {
323 TRACE(("command: TNOT recognized\n"));
324 negate = !negate;
325 }
326 tokpushback++;
327
328 switch (readtoken()) {
329 case TIF:
330 n1 = stalloc(sizeof(struct nif));
331 n1->type = NIF;
332 n1->nif.test = list(0, 0);
333 if (readtoken() != TTHEN)
334 synexpect(TTHEN, 0);
335 n1->nif.ifpart = list(0, 0);
336 n2 = n1;
337 while (readtoken() == TELIF) {
338 n2->nif.elsepart = stalloc(sizeof(struct nif));
339 n2 = n2->nif.elsepart;
340 n2->type = NIF;
341 n2->nif.test = list(0, 0);
342 if (readtoken() != TTHEN)
343 synexpect(TTHEN, 0);
344 n2->nif.ifpart = list(0, 0);
345 }
346 if (lasttoken == TELSE)
347 n2->nif.elsepart = list(0, 0);
348 else {
349 n2->nif.elsepart = NULL;
350 tokpushback++;
351 }
352 if (readtoken() != TFI)
353 synexpect(TFI, 0);
354 checkkwd = 1;
355 break;
356 case TWHILE:
357 case TUNTIL: {
358 int got;
359 n1 = stalloc(sizeof(struct nbinary));
360 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
361 n1->nbinary.ch1 = list(0, 0);
362 if ((got=readtoken()) != TDO) {
363 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
364 synexpect(TDO, 0);
365 }
366 n1->nbinary.ch2 = list(0, 0);
367 if (readtoken() != TDONE)
368 synexpect(TDONE, 0);
369 checkkwd = 1;
370 break;
371 }
372 case TFOR:
373 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
374 synerror("Bad for loop variable");
375 n1 = stalloc(sizeof(struct nfor));
376 n1->type = NFOR;
377 n1->nfor.var = wordtext;
378 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
379 app = ≈
380 while (readtoken() == TWORD) {
381 n2 = stalloc(sizeof(struct narg));
382 n2->type = NARG;
383 n2->narg.text = wordtext;
384 n2->narg.backquote = backquotelist;
385 *app = n2;
386 app = &n2->narg.next;
387 }
388 *app = NULL;
389 n1->nfor.args = ap;
390 if (lasttoken != TNL && lasttoken != TSEMI)
391 synexpect(-1, 0);
392 } else {
393 static char argvars[5] = {
394 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
395 };
396 n2 = stalloc(sizeof(struct narg));
397 n2->type = NARG;
398 n2->narg.text = argvars;
399 n2->narg.backquote = NULL;
400 n2->narg.next = NULL;
401 n1->nfor.args = n2;
402 /*
403 * Newline or semicolon here is optional (but note
404 * that the original Bourne shell only allowed NL).
405 */
406 if (lasttoken != TNL && lasttoken != TSEMI)
407 tokpushback++;
408 }
409 checkkwd = 2;
410 if ((t = readtoken()) == TDO)
411 t = TDONE;
412 else if (t == TBEGIN)
413 t = TEND;
414 else
415 synexpect(-1, 0);
416 n1->nfor.body = list(0, 0);
417 if (readtoken() != t)
418 synexpect(t, 0);
419 checkkwd = 1;
420 break;
421 case TCASE:
422 n1 = stalloc(sizeof(struct ncase));
423 n1->type = NCASE;
424 if (readtoken() != TWORD)
425 synexpect(TWORD, 0);
426 n1->ncase.expr = n2 = stalloc(sizeof(struct narg));
427 n2->type = NARG;
428 n2->narg.text = wordtext;
429 n2->narg.backquote = backquotelist;
430 n2->narg.next = NULL;
431 while (readtoken() == TNL);
432 if (lasttoken != TWORD || ! equal(wordtext, "in"))
433 synexpect(-1, "in");
434 cpp = &n1->ncase.cases;
435 noalias = 1;
436 checkkwd = 2, readtoken();
437 /*
438 * Both ksh and bash accept 'case x in esac'
439 * so configure scripts started taking advantage of this.
440 * The page: http://pubs.opengroup.org/onlinepubs/\
441 * 009695399/utilities/xcu_chap02.html contradicts itself,
442 * as to if this is legal; the "Case Conditional Format"
443 * paragraph shows one case is required, but the "Grammar"
444 * section shows a grammar that explicitly allows the no
445 * case option.
446 */
447 while (lasttoken != TESAC) {
448 *cpp = cp = stalloc(sizeof(struct nclist));
449 if (lasttoken == TLP)
450 readtoken();
451 cp->type = NCLIST;
452 app = &cp->nclist.pattern;
453 for (;;) {
454 *app = ap = stalloc(sizeof(struct narg));
455 ap->type = NARG;
456 ap->narg.text = wordtext;
457 ap->narg.backquote = backquotelist;
458 if (checkkwd = 2, readtoken() != TPIPE)
459 break;
460 app = &ap->narg.next;
461 readtoken();
462 }
463 ap->narg.next = NULL;
464 noalias = 0;
465 if (lasttoken != TRP) {
466 synexpect(TRP, 0);
467 }
468 cp->nclist.body = list(0, 0);
469
470 checkkwd = 2;
471 if ((t = readtoken()) != TESAC) {
472 if (t != TENDCASE) {
473 noalias = 0;
474 synexpect(TENDCASE, 0);
475 } else {
476 noalias = 1;
477 checkkwd = 2;
478 readtoken();
479 }
480 }
481 cpp = &cp->nclist.next;
482 }
483 noalias = 0;
484 *cpp = NULL;
485 checkkwd = 1;
486 break;
487 case TLP:
488 n1 = stalloc(sizeof(struct nredir));
489 n1->type = NSUBSHELL;
490 n1->nredir.n = list(0, 0);
491 n1->nredir.redirect = NULL;
492 if (readtoken() != TRP)
493 synexpect(TRP, 0);
494 checkkwd = 1;
495 break;
496 case TBEGIN:
497 n1 = list(0, 0);
498 if (readtoken() != TEND)
499 synexpect(TEND, 0);
500 checkkwd = 1;
501 break;
502 /* Handle an empty command like other simple commands. */
503 case TSEMI:
504 /*
505 * An empty command before a ; doesn't make much sense, and
506 * should certainly be disallowed in the case of `if ;'.
507 */
508 if (!redir)
509 synexpect(-1, 0);
510 case TAND:
511 case TOR:
512 case TNL:
513 case TEOF:
514 case TWORD:
515 case TRP:
516 tokpushback++;
517 n1 = simplecmd(rpp, redir);
518 goto checkneg;
519 case TENDCASE:
520 if (redir) {
521 tokpushback++;
522 goto checkneg;
523 }
524 /* FALLTHROUGH */
525 default:
526 synexpect(-1, 0);
527 /* NOTREACHED */
528 }
529
530 /* Now check for redirection which may follow command */
531 while (readtoken() == TREDIR) {
532 *rpp = n2 = redirnode;
533 rpp = &n2->nfile.next;
534 parsefname();
535 }
536 tokpushback++;
537 *rpp = NULL;
538 if (redir) {
539 if (n1->type != NSUBSHELL) {
540 n2 = stalloc(sizeof(struct nredir));
541 n2->type = NREDIR;
542 n2->nredir.n = n1;
543 n1 = n2;
544 }
545 n1->nredir.redirect = redir;
546 }
547
548 checkneg:
549 if (negate) {
550 TRACE(("negate command\n"));
551 n2 = stalloc(sizeof(struct nnot));
552 n2->type = NNOT;
553 n2->nnot.com = n1;
554 return n2;
555 }
556 else
557 return n1;
558 }
559
560
561 STATIC union node *
562 simplecmd(union node **rpp, union node *redir)
563 {
564 union node *args, **app;
565 union node *n = NULL, *n2;
566 int negate = 0;
567
568 /* If we don't have any redirections already, then we must reset */
569 /* rpp to be the address of the local redir variable. */
570 if (redir == 0)
571 rpp = &redir;
572
573 args = NULL;
574 app = &args;
575
576 while (readtoken() == TNOT) {
577 TRACE(("simplcmd: TNOT recognized\n"));
578 negate = !negate;
579 }
580 tokpushback++;
581
582 for (;;) {
583 if (readtoken() == TWORD) {
584 n = stalloc(sizeof(struct narg));
585 n->type = NARG;
586 n->narg.text = wordtext;
587 n->narg.backquote = backquotelist;
588 *app = n;
589 app = &n->narg.next;
590 } else if (lasttoken == TREDIR) {
591 *rpp = n = redirnode;
592 rpp = &n->nfile.next;
593 parsefname(); /* read name of redirection file */
594 } else if (lasttoken == TLP && app == &args->narg.next
595 && redir == 0) {
596 /* We have a function */
597 if (readtoken() != TRP)
598 synexpect(TRP, 0);
599 funclinno = plinno;
600 rmescapes(n->narg.text);
601 if (!goodname(n->narg.text))
602 synerror("Bad function name");
603 n->type = NDEFUN;
604 n->narg.next = command();
605 funclinno = 0;
606 goto checkneg;
607 } else {
608 tokpushback++;
609 break;
610 }
611 }
612 *app = NULL;
613 *rpp = NULL;
614 n = stalloc(sizeof(struct ncmd));
615 n->type = NCMD;
616 n->ncmd.backgnd = 0;
617 n->ncmd.args = args;
618 n->ncmd.redirect = redir;
619
620 checkneg:
621 if (negate) {
622 TRACE(("negate simplecmd\n"));
623 n2 = stalloc(sizeof(struct nnot));
624 n2->type = NNOT;
625 n2->nnot.com = n;
626 return n2;
627 }
628 else
629 return n;
630 }
631
632 STATIC union node *
633 makename(void)
634 {
635 union node *n;
636
637 n = stalloc(sizeof(struct narg));
638 n->type = NARG;
639 n->narg.next = NULL;
640 n->narg.text = wordtext;
641 n->narg.backquote = backquotelist;
642 return n;
643 }
644
645 void
646 fixredir(union node *n, const char *text, int err)
647 {
648 TRACE(("Fix redir %s %d\n", text, err));
649 if (!err)
650 n->ndup.vname = NULL;
651
652 if (is_number(text))
653 n->ndup.dupfd = number(text);
654 else if (text[0] == '-' && text[1] == '\0')
655 n->ndup.dupfd = -1;
656 else {
657
658 if (err)
659 synerror("Bad fd number");
660 else
661 n->ndup.vname = makename();
662 }
663 }
664
665
666 STATIC void
667 parsefname(void)
668 {
669 union node *n = redirnode;
670
671 if (readtoken() != TWORD)
672 synexpect(-1, 0);
673 if (n->type == NHERE) {
674 struct heredoc *here = heredoc;
675 struct heredoc *p;
676
677 if (quoteflag == 0)
678 n->type = NXHERE;
679 TRACE(("Here document %d\n", n->type));
680 if (here->striptabs) {
681 while (*wordtext == '\t')
682 wordtext++;
683 }
684
685 /*
686 * this test is not really necessary, we are not
687 * required to expand wordtext, but there's no reason
688 * it cannot be $$ or something like that - that would
689 * not mean the pid, but literally two '$' characters.
690 * There is no need for limits on what the word can be.
691 * However, it needs to stay literal as entered, not
692 * have $ converted to CTLVAR or something, which as
693 * the parser is, at the minute, is impossible to prevent.
694 * So, leave it like this until the rest of the parser is fixed.
695 */
696 if (! noexpand(wordtext))
697 synerror("Illegal eof marker for << redirection");
698
699 rmescapes(wordtext);
700 here->eofmark = wordtext;
701 here->next = NULL;
702 if (heredoclist == NULL)
703 heredoclist = here;
704 else {
705 for (p = heredoclist ; p->next ; p = p->next)
706 continue;
707 p->next = here;
708 }
709 } else if (n->type == NTOFD || n->type == NFROMFD) {
710 fixredir(n, wordtext, 0);
711 } else {
712 n->nfile.fname = makename();
713 }
714 }
715
716 /*
717 * Check to see whether we are at the end of the here document. When this
718 * is called, c is set to the first character of the next input line. If
719 * we are at the end of the here document, this routine sets the c to PEOF.
720 * The new value of c is returned.
721 */
722
723 static int
724 checkend(int c, char * const eofmark, const int striptabs)
725 {
726 if (striptabs) {
727 while (c == '\t')
728 c = pgetc();
729 }
730 if (c == PEOF) {
731 if (*eofmark == '\0')
732 return (c);
733 synerror(EOFhere);
734 }
735 if (c == *eofmark) {
736 int c2;
737 char *q;
738
739 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
740 ;
741 if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
742 c = PEOF;
743 if (c2 == '\n') {
744 plinno++;
745 needprompt = doprompt;
746 }
747 } else {
748 pungetc();
749 pushstring(eofmark + 1, q - (eofmark + 1), NULL);
750 }
751 } else if (c == '\n' && *eofmark == '\0') {
752 c = PEOF;
753 plinno++;
754 needprompt = doprompt;
755 }
756 return (c);
757 }
758
759
760 /*
761 * Input any here documents.
762 */
763
764 STATIC void
765 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
766 {
767 int c;
768 char *out;
769
770 c = pgetc();
771
772 /*
773 * If we hit EOF on the input, and the eofmark is a null string ('')
774 * we consider this empty line to be the eofmark, and exit without err.
775 */
776 if (c == PEOF && *eofmark != '\0')
777 synerror(EOFhere);
778
779 STARTSTACKSTR(out);
780
781 while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
782 do {
783 if (sq) {
784 /*
785 * in single quoted mode (eofmark quoted)
786 * all we look for is \n so we can check
787 * for the epfmark - everything saved literally.
788 */
789 STPUTC(c, out);
790 if (c == '\n')
791 break;
792 continue;
793 }
794 /*
795 * In double quoted (non-quoted eofmark)
796 * we must handle \ followed by \n here
797 * otherwise we can mismatch the end mark.
798 * All other uses of \ will be handled later
799 * when the here doc is expanded.
800 *
801 * This also makes sure \\ followed by \n does
802 * not suppress the newline (the \ quotes itself)
803 */
804 if (c == '\\') { /* A backslash */
805 c = pgetc(); /* followed by */
806 if (c == '\n') /* a newline? */
807 continue; /* y:drop both */
808 STPUTC('\\', out); /* else keep \ */
809 }
810 STPUTC(c, out); /* keep the char */
811 if (c == '\n') /* at end of line */
812 break; /* look for eofmark */
813
814 } while ((c = pgetc()) != PEOF);
815
816 /*
817 * If we have read a line, and reached EOF, without
818 * finding the eofmark, whether the EOF comes before
819 * or immediately after the \n, that is an error.
820 */
821 if (c == PEOF || (c = pgetc()) == PEOF)
822 synerror(EOFhere);
823 }
824 STPUTC('\0', out);
825
826 c = out - stackblock();
827 out = stackblock();
828 grabstackblock(c);
829 wordtext = out;
830
831 TRACE(("Slurped a heredoc (to '%s')%s: len %d, \"%.16s\"...\n",
832 eofmark, striptabs ? " tab stripped" : "", c, wordtext));
833 }
834
835 STATIC void
836 readheredocs(void)
837 {
838 struct heredoc *here;
839 union node *n;
840
841 while (heredoclist) {
842 here = heredoclist;
843 heredoclist = here->next;
844 if (needprompt) {
845 setprompt(2);
846 needprompt = 0;
847 }
848
849 slurp_heredoc(here->eofmark, here->striptabs,
850 here->here->nhere.type == NHERE);
851
852 n = stalloc(sizeof(struct narg));
853 n->narg.type = NARG;
854 n->narg.next = NULL;
855 n->narg.text = wordtext;
856 n->narg.backquote = backquotelist;
857 here->here->nhere.doc = n;
858
859 if (here->here->nhere.type == NHERE)
860 continue;
861
862 /*
863 * Now "parse" here docs that have unquoted eofmarkers.
864 */
865 setinputstring(wordtext, 1);
866 readtoken1(pgetc(), DQSYNTAX, 1);
867 n->narg.text = wordtext;
868 n->narg.backquote = backquotelist;
869 popfile();
870 }
871 }
872
873 STATIC int
874 peektoken(void)
875 {
876 int t;
877
878 t = readtoken();
879 tokpushback++;
880 return (t);
881 }
882
883 STATIC int
884 readtoken(void)
885 {
886 int t;
887 int savecheckkwd = checkkwd;
888 #ifdef DEBUG
889 int alreadyseen = tokpushback;
890 #endif
891 struct alias *ap;
892
893 top:
894 t = xxreadtoken();
895
896 if (checkkwd) {
897 /*
898 * eat newlines
899 */
900 if (checkkwd == 2) {
901 checkkwd = 0;
902 while (t == TNL) {
903 readheredocs();
904 t = xxreadtoken();
905 }
906 } else
907 checkkwd = 0;
908 /*
909 * check for keywords and aliases
910 */
911 if (t == TWORD && !quoteflag) {
912 const char *const *pp;
913
914 for (pp = parsekwd; *pp; pp++) {
915 if (**pp == *wordtext && equal(*pp, wordtext)) {
916 lasttoken = t = pp -
917 parsekwd + KWDOFFSET;
918 TRACE(("keyword %s recognized\n", tokname[t]));
919 goto out;
920 }
921 }
922 if (!noalias &&
923 (ap = lookupalias(wordtext, 1)) != NULL) {
924 pushstring(ap->val, strlen(ap->val), ap);
925 checkkwd = savecheckkwd;
926 goto top;
927 }
928 }
929 out:
930 checkkwd = (t == TNOT) ? savecheckkwd : 0;
931 }
932 TRACE(("%stoken %s %s\n", alreadyseen ? "reread " : "", tokname[t], t == TWORD ? wordtext : ""));
933 return (t);
934 }
935
936
937 /*
938 * Read the next input token.
939 * If the token is a word, we set backquotelist to the list of cmds in
940 * backquotes. We set quoteflag to true if any part of the word was
941 * quoted.
942 * If the token is TREDIR, then we set redirnode to a structure containing
943 * the redirection.
944 * In all cases, the variable startlinno is set to the number of the line
945 * on which the token starts.
946 *
947 * [Change comment: here documents and internal procedures]
948 * [Readtoken shouldn't have any arguments. Perhaps we should make the
949 * word parsing code into a separate routine. In this case, readtoken
950 * doesn't need to have any internal procedures, but parseword does.
951 * We could also make parseoperator in essence the main routine, and
952 * have parseword (readtoken1?) handle both words and redirection.]
953 */
954
955 #define RETURN(token) return lasttoken = token
956
957 STATIC int
958 xxreadtoken(void)
959 {
960 int c;
961
962 if (tokpushback) {
963 tokpushback = 0;
964 return lasttoken;
965 }
966 if (needprompt) {
967 setprompt(2);
968 needprompt = 0;
969 }
970 startlinno = plinno;
971 for (;;) { /* until token or start of word found */
972 c = pgetc_macro();
973 switch (c) {
974 case ' ': case '\t':
975 continue;
976 case '#':
977 while ((c = pgetc()) != '\n' && c != PEOF)
978 continue;
979 pungetc();
980 continue;
981
982 case '\n':
983 plinno++;
984 needprompt = doprompt;
985 RETURN(TNL);
986 case PEOF:
987 RETURN(TEOF);
988
989 case '&':
990 if (pgetc() == '&')
991 RETURN(TAND);
992 pungetc();
993 RETURN(TBACKGND);
994 case '|':
995 if (pgetc() == '|')
996 RETURN(TOR);
997 pungetc();
998 RETURN(TPIPE);
999 case ';':
1000 if (pgetc() == ';')
1001 RETURN(TENDCASE);
1002 pungetc();
1003 RETURN(TSEMI);
1004 case '(':
1005 RETURN(TLP);
1006 case ')':
1007 RETURN(TRP);
1008
1009 case '\\':
1010 switch (pgetc()) {
1011 case '\n':
1012 startlinno = ++plinno;
1013 if (doprompt)
1014 setprompt(2);
1015 else
1016 setprompt(0);
1017 continue;
1018 case PEOF:
1019 RETURN(TEOF);
1020 default:
1021 pungetc();
1022 break;
1023 }
1024 /* FALLTHROUGH */
1025 default:
1026 return readtoken1(c, BASESYNTAX, 0);
1027 }
1028 }
1029 #undef RETURN
1030 }
1031
1032
1033
1034 /*
1035 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
1036 * is not NULL, read a here document. In the latter case, eofmark is the
1037 * word which marks the end of the document and striptabs is true if
1038 * leading tabs should be stripped from the document. The argument firstc
1039 * is the first character of the input token or document.
1040 *
1041 * Because C does not have internal subroutines, I have simulated them
1042 * using goto's to implement the subroutine linkage. The following macros
1043 * will run code that appears at the end of readtoken1.
1044 */
1045
1046 /*
1047 * We used to remember only the current syntax, variable nesting level,
1048 * double quote state for each var nesting level, and arith nesting
1049 * level (unrelated to var nesting) and one prev syntax when in arith
1050 * syntax. This worked for simple cases, but can't handle arith inside
1051 * var expansion inside arith inside var with some quoted and some not.
1052 *
1053 * Inspired by FreeBSD's implementation (though it was the obvious way)
1054 * though implemented differently, we now have a stack that keeps track
1055 * of what we are doing now, and what we were doing previously.
1056 * Every time something changes, which will eventually end and should
1057 * revert to the previous state, we push this stack, and then pop it
1058 * again later (that is every ${} with an operator (to parse the word
1059 * or pattern that follows) ${x} and $x are too simple to need it)
1060 * $(( )) $( ) and "...". Always. Really, always!
1061 *
1062 * The stack is implemented as one static (on the C stack) base block
1063 * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1064 * enough for the vast majority of cases. For torture tests, we
1065 * malloc more blocks as needed. All accesses through the inline
1066 * functions below.
1067 */
1068
1069 /*
1070 * varnest & arinest will typically be 0 or 1
1071 * (varnest can increment in usages like ${x=${y}} but probably
1072 * does not really need to)
1073 * parenlevel allows balancing parens inside a $(( )), it is reset
1074 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1075 * quoted is special - we need to know 2 things ... are we inside "..."
1076 * (even if inherited from some previous nesting level) and was there
1077 * an opening '"' at this level (so the next will be closing).
1078 * "..." can span nesting levels, but cannot be opened in one and
1079 * closed in a different one.
1080 * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1081 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1082 * is really so special that this setting is not very important)
1083 * and 0x10 that indicates that an opening quote has been seen.
1084 * The bottom 4 bits are inherited, the 0x10 bit is not.
1085 */
1086 struct tokenstate {
1087 const char *ts_syntax;
1088 unsigned short ts_parenlevel; /* counters */
1089 unsigned short ts_varnest; /* 64000 levels should be enough! */
1090 unsigned short ts_arinest;
1091 unsigned short ts_quoted; /* 1 -> single, 2 -> double */
1092 };
1093
1094 #define NQ 0x00 /* Unquoted */
1095 #define SQ 0x01 /* Single Quotes */
1096 #define DQ 0x02 /* Double Quotes (or equivalent) */
1097 #define QF 0x0F /* Mask to extract previous values */
1098 #define QS 0x10 /* Quoting started at this level in stack */
1099
1100 #define LEVELS_PER_BLOCK 8
1101 #define VSS struct statestack
1102
1103 struct statestack {
1104 VSS *prev; /* previous block in list */
1105 int cur; /* which of our tokenstates is current */
1106 struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1107 };
1108
1109 static inline struct tokenstate *
1110 currentstate(VSS *stack)
1111 {
1112 return &stack->tokenstate[stack->cur];
1113 }
1114
1115 static inline struct tokenstate *
1116 prevstate(VSS *stack)
1117 {
1118 if (stack->cur != 0)
1119 return &stack->tokenstate[stack->cur - 1];
1120 if (stack->prev == NULL) /* cannot drop below base */
1121 return &stack->tokenstate[0];
1122 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1123 }
1124
1125 static inline VSS *
1126 bump_state_level(VSS *stack)
1127 {
1128 struct tokenstate *os, *ts;
1129
1130 os = currentstate(stack);
1131
1132 if (++stack->cur >= LEVELS_PER_BLOCK) {
1133 VSS *ss;
1134
1135 ss = (VSS *)ckmalloc(sizeof (struct statestack));
1136 ss->cur = 0;
1137 ss->prev = stack;
1138 stack = ss;
1139 }
1140
1141 ts = currentstate(stack);
1142
1143 ts->ts_parenlevel = 0; /* parens inside never match outside */
1144
1145 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */
1146 ts->ts_varnest = os->ts_varnest;
1147 ts->ts_arinest = os->ts_arinest; /* when appropriate */
1148 ts->ts_syntax = os->ts_syntax; /* they will be altered */
1149
1150 return stack;
1151 }
1152
1153 static inline VSS *
1154 drop_state_level(VSS *stack)
1155 {
1156 if (stack->cur == 0) {
1157 VSS *ss;
1158
1159 ss = stack;
1160 stack = ss->prev;
1161 if (stack == NULL)
1162 return ss;
1163 ckfree(ss);
1164 }
1165 --stack->cur;
1166 return stack;
1167 }
1168
1169 static inline void
1170 cleanup_state_stack(VSS *stack)
1171 {
1172 while (stack->prev != NULL) {
1173 stack->cur = 0;
1174 stack = drop_state_level(stack);
1175 }
1176 }
1177
1178 #define PARSESUB() {goto parsesub; parsesub_return:;}
1179 #define PARSEARITH() {goto parsearith; parsearith_return:;}
1180
1181 /*
1182 * The following macros all assume the existance of a local var "stack"
1183 * which contains a pointer to the current struct stackstate
1184 */
1185
1186 /*
1187 * These are macros rather than inline funcs to avoid code churn as much
1188 * as possible - they replace macros of the same name used previously.
1189 */
1190 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS)
1191 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ)
1192 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \
1193 stack->cur != 0 || stack->prev ? \
1194 prevstate(stack)->ts_quoted & QF : 0)
1195
1196 /*
1197 * This set are just to avoid excess typing and line lengths...
1198 * The ones that "look like" var names must be implemented to be lvalues
1199 */
1200 #define syntax (currentstate(stack)->ts_syntax)
1201 #define parenlevel (currentstate(stack)->ts_parenlevel)
1202 #define varnest (currentstate(stack)->ts_varnest)
1203 #define arinest (currentstate(stack)->ts_arinest)
1204 #define quoted (currentstate(stack)->ts_quoted)
1205 #define TS_PUSH() (stack = bump_state_level(stack))
1206 #define TS_POP() (stack = drop_state_level(stack))
1207
1208 /*
1209 * Called to parse command substitutions. oldstyle is true if the command
1210 * is enclosed inside `` (otherwise it was enclosed in "$( )")
1211 *
1212 * Internally nlpp is a pointer to the head of the linked
1213 * list of commands (passed by reference), and savelen is the number of
1214 * characters on the top of the stack which must be preserved.
1215 */
1216 static char *
1217 parsebackq(VSS *const stack, char * const in,
1218 struct nodelist **const pbqlist, const int oldstyle)
1219 {
1220 struct nodelist **nlpp;
1221 const int savepbq = parsebackquote;
1222 union node *n;
1223 char *out;
1224 char *str = NULL;
1225 char *volatile sstr = str;
1226 struct jmploc jmploc;
1227 struct jmploc *const savehandler = handler;
1228 const int savelen = in - stackblock();
1229 int saveprompt;
1230
1231 if (setjmp(jmploc.loc)) {
1232 if (sstr)
1233 ckfree(__UNVOLATILE(sstr));
1234 cleanup_state_stack(stack);
1235 parsebackquote = 0;
1236 handler = savehandler;
1237 longjmp(handler->loc, 1);
1238 }
1239 INTOFF;
1240 sstr = str = NULL;
1241 if (savelen > 0) {
1242 sstr = str = ckmalloc(savelen);
1243 memcpy(str, stackblock(), savelen);
1244 }
1245 handler = &jmploc;
1246 INTON;
1247 if (oldstyle) {
1248 /* We must read until the closing backquote, giving special
1249 treatment to some slashes, and then push the string and
1250 reread it as input, interpreting it normally. */
1251 int pc;
1252 int psavelen;
1253 char *pstr;
1254
1255 /*
1256 * Because the entire `...` is read here, we don't
1257 * need to bother the state stack. That will be used
1258 * (as appropriate) when the processed string is re-read.
1259 */
1260 STARTSTACKSTR(out);
1261 for (;;) {
1262 if (needprompt) {
1263 setprompt(2);
1264 needprompt = 0;
1265 }
1266 switch (pc = pgetc()) {
1267 case '`':
1268 goto done;
1269
1270 case '\\':
1271 if ((pc = pgetc()) == '\n') {
1272 plinno++;
1273 if (doprompt)
1274 setprompt(2);
1275 else
1276 setprompt(0);
1277 /*
1278 * If eating a newline, avoid putting
1279 * the newline into the new character
1280 * stream (via the STPUTC after the
1281 * switch).
1282 */
1283 continue;
1284 }
1285 if (pc != '\\' && pc != '`' && pc != '$'
1286 && (!ISDBLQUOTE() || pc != '"'))
1287 STPUTC('\\', out);
1288 break;
1289
1290 case '\n':
1291 plinno++;
1292 needprompt = doprompt;
1293 break;
1294
1295 case PEOF:
1296 startlinno = plinno;
1297 synerror("EOF in backquote substitution");
1298 break;
1299
1300 default:
1301 break;
1302 }
1303 STPUTC(pc, out);
1304 }
1305 done:
1306 STPUTC('\0', out);
1307 psavelen = out - stackblock();
1308 if (psavelen > 0) {
1309 pstr = grabstackstr(out);
1310 setinputstring(pstr, 1);
1311 }
1312 }
1313 nlpp = pbqlist;
1314 while (*nlpp)
1315 nlpp = &(*nlpp)->next;
1316 *nlpp = stalloc(sizeof(struct nodelist));
1317 (*nlpp)->next = NULL;
1318 parsebackquote = oldstyle;
1319
1320 if (oldstyle) {
1321 saveprompt = doprompt;
1322 doprompt = 0;
1323 } else
1324 saveprompt = 0;
1325
1326 n = list(0, oldstyle);
1327
1328 if (oldstyle)
1329 doprompt = saveprompt;
1330 else {
1331 if (readtoken() != TRP) {
1332 cleanup_state_stack(stack);
1333 synexpect(TRP, 0);
1334 }
1335 }
1336
1337 (*nlpp)->n = n;
1338 if (oldstyle) {
1339 /*
1340 * Start reading from old file again, ignoring any pushed back
1341 * tokens left from the backquote parsing
1342 */
1343 popfile();
1344 tokpushback = 0;
1345 }
1346
1347 while (stackblocksize() <= savelen)
1348 growstackblock();
1349 STARTSTACKSTR(out);
1350 if (str) {
1351 memcpy(out, str, savelen);
1352 STADJUST(savelen, out);
1353 INTOFF;
1354 ckfree(str);
1355 sstr = str = NULL;
1356 INTON;
1357 }
1358 parsebackquote = savepbq;
1359 handler = savehandler;
1360 if (arinest || ISDBLQUOTE())
1361 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1362 else
1363 USTPUTC(CTLBACKQ, out);
1364
1365 return out;
1366 }
1367
1368 /*
1369 * Parse a redirection operator. The parameter "out" points to a string
1370 * specifying the fd to be redirected. It is guaranteed to be either ""
1371 * or a numeric string (for now anyway). The parameter "c" contains the
1372 * first character of the redirection operator.
1373 *
1374 * Note the string "out" is on the stack, which we are about to clobber,
1375 * so process it first...
1376 */
1377
1378 static void
1379 parseredir(const char *out, int c)
1380 {
1381 union node *np;
1382 int fd;
1383
1384 fd = (*out == '\0') ? -1 : atoi(out);
1385
1386 np = stalloc(sizeof(struct nfile));
1387 if (c == '>') {
1388 if (fd < 0)
1389 fd = 1;
1390 c = pgetc();
1391 if (c == '>')
1392 np->type = NAPPEND;
1393 else if (c == '|')
1394 np->type = NCLOBBER;
1395 else if (c == '&')
1396 np->type = NTOFD;
1397 else {
1398 np->type = NTO;
1399 pungetc();
1400 }
1401 } else { /* c == '<' */
1402 if (fd < 0)
1403 fd = 0;
1404 switch (c = pgetc()) {
1405 case '<':
1406 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1407 np = stalloc(sizeof(struct nhere));
1408 np->nfile.fd = 0;
1409 }
1410 np->type = NHERE;
1411 heredoc = stalloc(sizeof(struct heredoc));
1412 heredoc->here = np;
1413 heredoc->startline = plinno;
1414 if ((c = pgetc()) == '-') {
1415 heredoc->striptabs = 1;
1416 } else {
1417 heredoc->striptabs = 0;
1418 pungetc();
1419 }
1420 break;
1421
1422 case '&':
1423 np->type = NFROMFD;
1424 break;
1425
1426 case '>':
1427 np->type = NFROMTO;
1428 break;
1429
1430 default:
1431 np->type = NFROM;
1432 pungetc();
1433 break;
1434 }
1435 }
1436 np->nfile.fd = fd;
1437
1438 redirnode = np; /* this is the "value" of TRENODE */
1439 }
1440
1441
1442 /*
1443 * The lowest level basic tokenizer.
1444 *
1445 * The next input byte (character) is in firstc, syn says which
1446 * syntax tables we are to use (basic, single or double quoted, or arith)
1447 * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1448 * quote character itself is not special (used parsing here docs and similar)
1449 *
1450 * The result is the type of the next token (its value, when there is one,
1451 * is saved in the relevant global var - must fix that someday!) which is
1452 * also saved for re-reading ("lasttoken").
1453 *
1454 * Overall, this routine does far more parsing than it is supposed to.
1455 * That will also need fixing, someday...
1456 */
1457 STATIC int
1458 readtoken1(int firstc, char const *syn, int magicq)
1459 {
1460 int c;
1461 char * out;
1462 int len;
1463 struct nodelist *bqlist;
1464 int quotef;
1465 VSS static_stack;
1466 VSS *stack = &static_stack;
1467
1468 stack->prev = NULL;
1469 stack->cur = 0;
1470
1471 syntax = syn;
1472
1473 startlinno = plinno;
1474 varnest = 0;
1475 quoted = 0;
1476 if (syntax == DQSYNTAX)
1477 SETDBLQUOTE();
1478 quotef = 0;
1479 bqlist = NULL;
1480 arinest = 0;
1481 parenlevel = 0;
1482
1483 STARTSTACKSTR(out);
1484
1485 for (c = firstc ;; c = pgetc_macro()) { /* until of token */
1486 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
1487 switch (syntax[c]) {
1488 case CNL: /* '\n' */
1489 if (syntax == BASESYNTAX)
1490 break; /* exit loop */
1491 USTPUTC(c, out);
1492 plinno++;
1493 if (doprompt)
1494 setprompt(2);
1495 else
1496 setprompt(0);
1497 continue;
1498
1499 case CWORD:
1500 USTPUTC(c, out);
1501 continue;
1502 case CCTL:
1503 if (!magicq || ISDBLQUOTE())
1504 USTPUTC(CTLESC, out);
1505 USTPUTC(c, out);
1506 continue;
1507 case CBACK: /* backslash */
1508 c = pgetc();
1509 if (c == PEOF) {
1510 USTPUTC('\\', out);
1511 pungetc();
1512 continue;
1513 }
1514 if (c == '\n') {
1515 plinno++;
1516 if (doprompt)
1517 setprompt(2);
1518 else
1519 setprompt(0);
1520 continue;
1521 }
1522 quotef = 1; /* current token is quoted */
1523 if (ISDBLQUOTE() && c != '\\' && c != '`' &&
1524 c != '$' && (c != '"' || magicq))
1525 USTPUTC('\\', out);
1526 if (SQSYNTAX[c] == CCTL)
1527 USTPUTC(CTLESC, out);
1528 else if (!magicq) {
1529 USTPUTC(CTLQUOTEMARK, out);
1530 USTPUTC(c, out);
1531 if (varnest != 0)
1532 USTPUTC(CTLQUOTEEND, out);
1533 continue;
1534 }
1535 USTPUTC(c, out);
1536 continue;
1537 case CSQUOTE:
1538 if (syntax != SQSYNTAX) {
1539 if (!magicq)
1540 USTPUTC(CTLQUOTEMARK, out);
1541 quotef = 1;
1542 TS_PUSH();
1543 syntax = SQSYNTAX;
1544 quoted = SQ;
1545 continue;
1546 }
1547 if (magicq && arinest == 0 && varnest == 0) {
1548 /* Ignore inside quoted here document */
1549 USTPUTC(c, out);
1550 continue;
1551 }
1552 /* End of single quotes... */
1553 TS_POP();
1554 if (syntax == BASESYNTAX && varnest != 0)
1555 USTPUTC(CTLQUOTEEND, out);
1556 continue;
1557 case CDQUOTE:
1558 if (magicq && arinest == 0 && varnest == 0) {
1559 /* Ignore inside here document */
1560 USTPUTC(c, out);
1561 continue;
1562 }
1563 quotef = 1;
1564 if (arinest) {
1565 if (ISDBLQUOTE()) {
1566 TS_POP();
1567 } else {
1568 TS_PUSH();
1569 syntax = DQSYNTAX;
1570 SETDBLQUOTE();
1571 USTPUTC(CTLQUOTEMARK, out);
1572 }
1573 continue;
1574 }
1575 if (magicq)
1576 continue;
1577 if (ISDBLQUOTE()) {
1578 TS_POP();
1579 if (varnest != 0)
1580 USTPUTC(CTLQUOTEEND, out);
1581 } else {
1582 TS_PUSH();
1583 syntax = DQSYNTAX;
1584 SETDBLQUOTE();
1585 USTPUTC(CTLQUOTEMARK, out);
1586 }
1587 continue;
1588 case CVAR: /* '$' */
1589 PARSESUB(); /* parse substitution */
1590 continue;
1591 case CENDVAR: /* CLOSEBRACE */
1592 if (varnest > 0 && !ISDBLQUOTE()) {
1593 TS_POP();
1594 USTPUTC(CTLENDVAR, out);
1595 } else {
1596 USTPUTC(c, out);
1597 }
1598 continue;
1599 case CLP: /* '(' in arithmetic */
1600 parenlevel++;
1601 USTPUTC(c, out);
1602 continue;;
1603 case CRP: /* ')' in arithmetic */
1604 if (parenlevel > 0) {
1605 USTPUTC(c, out);
1606 --parenlevel;
1607 } else {
1608 if (pgetc() == ')') {
1609 if (--arinest == 0) {
1610 TS_POP();
1611 USTPUTC(CTLENDARI, out);
1612 } else
1613 USTPUTC(')', out);
1614 } else {
1615 /*
1616 * unbalanced parens
1617 * (don't 2nd guess - no error)
1618 */
1619 pungetc();
1620 USTPUTC(')', out);
1621 }
1622 }
1623 continue;
1624 case CBQUOTE: /* '`' */
1625 out = parsebackq(stack, out, &bqlist, 1);
1626 continue;
1627 case CEOF: /* --> c == PEOF */
1628 break; /* will exit loop */
1629 default:
1630 if (varnest == 0 && !ISDBLQUOTE())
1631 break; /* exit loop */
1632 USTPUTC(c, out);
1633 continue;
1634 }
1635 break; /* break from switch -> break from for loop too */
1636 }
1637
1638 if (syntax == ARISYNTAX) {
1639 cleanup_state_stack(stack);
1640 synerror("Missing '))'");
1641 }
1642 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
1643 cleanup_state_stack(stack);
1644 synerror("Unterminated quoted string");
1645 }
1646 if (varnest != 0) {
1647 cleanup_state_stack(stack);
1648 startlinno = plinno;
1649 /* { */
1650 synerror("Missing '}'");
1651 }
1652
1653 USTPUTC('\0', out);
1654 len = out - stackblock();
1655 out = stackblock();
1656
1657 if (!magicq) {
1658 if ((c == '<' || c == '>')
1659 && quotef == 0 && (*out == '\0' || is_number(out))) {
1660 parseredir(out, c);
1661 cleanup_state_stack(stack);
1662 return lasttoken = TREDIR;
1663 } else {
1664 pungetc();
1665 }
1666 }
1667
1668 quoteflag = quotef;
1669 backquotelist = bqlist;
1670 grabstackblock(len);
1671 wordtext = out;
1672 cleanup_state_stack(stack);
1673 return lasttoken = TWORD;
1674 /* end of readtoken routine */
1675
1676
1677 /*
1678 * Parse a substitution. At this point, we have read the dollar sign
1679 * and nothing else.
1680 */
1681
1682 parsesub: {
1683 char buf[10];
1684 int subtype;
1685 int typeloc;
1686 int flags;
1687 char *p;
1688 static const char types[] = "}-+?=";
1689 int i;
1690 int linno;
1691
1692 c = pgetc();
1693 if (c != '(' && c != OPENBRACE && !is_name(c) && !is_special(c)) {
1694 USTPUTC('$', out);
1695 pungetc();
1696 } else if (c == '(') { /* $(command) or $((arith)) */
1697 if (pgetc() == '(') {
1698 PARSEARITH();
1699 } else {
1700 pungetc();
1701 out = parsebackq(stack, out, &bqlist, 0);
1702 }
1703 } else {
1704 USTPUTC(CTLVAR, out);
1705 typeloc = out - stackblock();
1706 USTPUTC(VSNORMAL, out);
1707 subtype = VSNORMAL;
1708 flags = 0;
1709 if (c == OPENBRACE) {
1710 c = pgetc();
1711 if (c == '#') {
1712 if ((c = pgetc()) == CLOSEBRACE)
1713 c = '#';
1714 else
1715 subtype = VSLENGTH;
1716 }
1717 else
1718 subtype = 0;
1719 }
1720 if (is_name(c)) {
1721 p = out;
1722 do {
1723 STPUTC(c, out);
1724 c = pgetc();
1725 } while (is_in_name(c));
1726 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
1727 /* Replace the variable name with the
1728 * current line number. */
1729 linno = plinno;
1730 if (funclinno != 0)
1731 linno -= funclinno - 1;
1732 snprintf(buf, sizeof(buf), "%d", linno);
1733 STADJUST(-6, out);
1734 for (i = 0; buf[i] != '\0'; i++)
1735 STPUTC(buf[i], out);
1736 flags |= VSLINENO;
1737 }
1738 } else if (is_digit(c)) {
1739 do {
1740 USTPUTC(c, out);
1741 c = pgetc();
1742 } while (subtype != VSNORMAL && is_digit(c));
1743 }
1744 else if (is_special(c)) {
1745 USTPUTC(c, out);
1746 c = pgetc();
1747 }
1748 else {
1749 badsub:
1750 cleanup_state_stack(stack);
1751 synerror("Bad substitution");
1752 }
1753
1754 STPUTC('=', out);
1755 if (subtype == 0) {
1756 switch (c) {
1757 case ':':
1758 flags |= VSNUL;
1759 c = pgetc();
1760 /*FALLTHROUGH*/
1761 default:
1762 p = strchr(types, c);
1763 if (p == NULL)
1764 goto badsub;
1765 subtype = p - types + VSNORMAL;
1766 break;
1767 case '%':
1768 case '#':
1769 {
1770 int cc = c;
1771 subtype = c == '#' ? VSTRIMLEFT :
1772 VSTRIMRIGHT;
1773 c = pgetc();
1774 if (c == cc)
1775 subtype++;
1776 else
1777 pungetc();
1778 break;
1779 }
1780 }
1781 } else {
1782 pungetc();
1783 }
1784 if (ISDBLQUOTE() || arinest)
1785 flags |= VSQUOTE;
1786 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
1787 flags |= VSPATQ;
1788 *(stackblock() + typeloc) = subtype | flags;
1789 if (subtype != VSNORMAL) {
1790 TS_PUSH();
1791 varnest++;
1792 arinest = 0;
1793 if (subtype > VSASSIGN) { /* # ## % %% */
1794 syntax = BASESYNTAX;
1795 CLRDBLQUOTE();
1796 }
1797 }
1798 }
1799 goto parsesub_return;
1800 }
1801
1802
1803 /*
1804 * Parse an arithmetic expansion (indicate start of one and set state)
1805 */
1806 parsearith: {
1807
1808 if (syntax == ARISYNTAX) {
1809 /*
1810 * we collapse embedded arithmetic expansion to
1811 * parentheses, which should be equivalent
1812 */
1813 USTPUTC('(', out);
1814 USTPUTC('(', out);
1815 /*
1816 * Need 2 of them because there will (should be)
1817 * two closing ))'s to follow later.
1818 */
1819 parenlevel += 2;
1820 } else {
1821 TS_PUSH();
1822 syntax = ARISYNTAX;
1823 ++arinest;
1824 varnest = 0;
1825
1826 USTPUTC(CTLARI, out);
1827 if (ISDBLQUOTE())
1828 USTPUTC('"',out);
1829 else
1830 USTPUTC(' ',out);
1831 }
1832 goto parsearith_return;
1833 }
1834
1835 } /* end of readtoken */
1836
1837
1838
1839 #ifdef mkinit
1840 RESET {
1841 tokpushback = 0;
1842 checkkwd = 0;
1843 }
1844 #endif
1845
1846 /*
1847 * Returns true if the text contains nothing to expand (no dollar signs
1848 * or backquotes).
1849 */
1850
1851 STATIC int
1852 noexpand(char *text)
1853 {
1854 char *p;
1855 char c;
1856
1857 p = text;
1858 while ((c = *p++) != '\0') {
1859 if (c == CTLQUOTEMARK)
1860 continue;
1861 if (c == CTLESC)
1862 p++;
1863 else if (BASESYNTAX[(int)c] == CCTL)
1864 return 0;
1865 }
1866 return 1;
1867 }
1868
1869
1870 /*
1871 * Return true if the argument is a legal variable name (a letter or
1872 * underscore followed by zero or more letters, underscores, and digits).
1873 */
1874
1875 int
1876 goodname(char *name)
1877 {
1878 char *p;
1879
1880 p = name;
1881 if (! is_name(*p))
1882 return 0;
1883 while (*++p) {
1884 if (! is_in_name(*p))
1885 return 0;
1886 }
1887 return 1;
1888 }
1889
1890
1891 /*
1892 * Called when an unexpected token is read during the parse. The argument
1893 * is the token that is expected, or -1 if more than one type of token can
1894 * occur at this point.
1895 */
1896
1897 STATIC void
1898 synexpect(int token, const char *text)
1899 {
1900 char msg[64];
1901 char *p;
1902
1903 if (lasttoken == TWORD) {
1904 size_t len = strlen(wordtext);
1905
1906 if (len <= 13)
1907 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
1908 else
1909 fmtstr(msg, 34,
1910 "Word \"%.10s...\" unexpected", wordtext);
1911 } else
1912 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
1913
1914 p = strchr(msg, '\0');
1915 if (text)
1916 fmtstr(p, 30, " (expecting \"%.10s\")", text);
1917 else if (token >= 0)
1918 fmtstr(p, 30, " (expecting %s)", tokname[token]);
1919
1920 synerror(msg);
1921 /* NOTREACHED */
1922 }
1923
1924
1925 STATIC void
1926 synerror(const char *msg)
1927 {
1928 error("%d: Syntax error: %s\n", startlinno, msg);
1929 /* NOTREACHED */
1930 }
1931
1932 STATIC void
1933 setprompt(int which)
1934 {
1935 whichprompt = which;
1936
1937 #ifndef SMALL
1938 if (!el)
1939 #endif
1940 out2str(getprompt(NULL));
1941 }
1942
1943 /*
1944 * called by editline -- any expansions to the prompt
1945 * should be added here.
1946 */
1947 const char *
1948 getprompt(void *unused)
1949 {
1950 switch (whichprompt) {
1951 case 0:
1952 return "";
1953 case 1:
1954 return ps1val();
1955 case 2:
1956 return ps2val();
1957 default:
1958 return "<internal prompt error>";
1959 }
1960 }
1961