parser.c revision 1.54 1 /* $NetBSD: parser.c,v 1.54 2002/11/24 22:35:42 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
43 #else
44 __RCSID("$NetBSD: parser.c,v 1.54 2002/11/24 22:35:42 christos Exp $");
45 #endif
46 #endif /* not lint */
47
48 #include <stdlib.h>
49
50 #include "shell.h"
51 #include "parser.h"
52 #include "nodes.h"
53 #include "expand.h" /* defines rmescapes() */
54 #include "eval.h" /* defines commandname */
55 #include "redir.h" /* defines copyfd() */
56 #include "syntax.h"
57 #include "options.h"
58 #include "input.h"
59 #include "output.h"
60 #include "var.h"
61 #include "error.h"
62 #include "memalloc.h"
63 #include "mystring.h"
64 #include "alias.h"
65 #include "show.h"
66 #ifndef SMALL
67 #include "myhistedit.h"
68 #endif
69
70 /*
71 * Shell command parser.
72 */
73
74 #define EOFMARKLEN 79
75
76 /* values returned by readtoken */
77 #include "token.h"
78
79 #define OPENBRACE '{'
80 #define CLOSEBRACE '}'
81
82
83 struct heredoc {
84 struct heredoc *next; /* next here document in list */
85 union node *here; /* redirection node */
86 char *eofmark; /* string indicating end of input */
87 int striptabs; /* if set, strip leading tabs */
88 };
89
90
91
92 static int noalias = 0; /* when set, don't handle aliases */
93 struct heredoc *heredoclist; /* list of here documents to read */
94 int parsebackquote; /* nonzero if we are inside backquotes */
95 int doprompt; /* if set, prompt the user */
96 int needprompt; /* true if interactive and at start of line */
97 int lasttoken; /* last token read */
98 MKINIT int tokpushback; /* last token pushed back */
99 char *wordtext; /* text of last word returned by readtoken */
100 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */
101 struct nodelist *backquotelist;
102 union node *redirnode;
103 struct heredoc *heredoc;
104 int quoteflag; /* set if (part of) last token was quoted */
105 int startlinno; /* line # where last token started */
106
107
108 STATIC union node *list(int);
109 STATIC union node *andor(void);
110 STATIC union node *pipeline(void);
111 STATIC union node *command(void);
112 STATIC union node *simplecmd(union node **, union node *);
113 STATIC union node *makename(void);
114 STATIC void parsefname(void);
115 STATIC void parseheredoc(void);
116 STATIC int peektoken(void);
117 STATIC int readtoken(void);
118 STATIC int xxreadtoken(void);
119 STATIC int readtoken1(int, char const *, char *, int);
120 STATIC int noexpand(char *);
121 STATIC void synexpect(int) __attribute__((__noreturn__));
122 STATIC void synerror(const char *) __attribute__((__noreturn__));
123 STATIC void setprompt(int);
124
125
126 /*
127 * Read and parse a command. Returns NEOF on end of file. (NULL is a
128 * valid parse tree indicating a blank line.)
129 */
130
131 union node *
132 parsecmd(int interact)
133 {
134 int t;
135
136 tokpushback = 0;
137 doprompt = interact;
138 if (doprompt)
139 setprompt(1);
140 else
141 setprompt(0);
142 needprompt = 0;
143 t = readtoken();
144 if (t == TEOF)
145 return NEOF;
146 if (t == TNL)
147 return NULL;
148 tokpushback++;
149 return list(1);
150 }
151
152
153 STATIC union node *
154 list(int nlflag)
155 {
156 union node *n1, *n2, *n3;
157 int tok;
158
159 checkkwd = 2;
160 if (nlflag == 0 && tokendlist[peektoken()])
161 return NULL;
162 n1 = NULL;
163 for (;;) {
164 n2 = andor();
165 tok = readtoken();
166 if (tok == TBACKGND) {
167 if (n2->type == NCMD || n2->type == NPIPE) {
168 n2->ncmd.backgnd = 1;
169 } else if (n2->type == NREDIR) {
170 n2->type = NBACKGND;
171 } else {
172 n3 = (union node *)stalloc(sizeof (struct nredir));
173 n3->type = NBACKGND;
174 n3->nredir.n = n2;
175 n3->nredir.redirect = NULL;
176 n2 = n3;
177 }
178 }
179 if (n1 == NULL) {
180 n1 = n2;
181 }
182 else {
183 n3 = (union node *)stalloc(sizeof (struct nbinary));
184 n3->type = NSEMI;
185 n3->nbinary.ch1 = n1;
186 n3->nbinary.ch2 = n2;
187 n1 = n3;
188 }
189 switch (tok) {
190 case TBACKGND:
191 case TSEMI:
192 tok = readtoken();
193 /* fall through */
194 case TNL:
195 if (tok == TNL) {
196 parseheredoc();
197 if (nlflag)
198 return n1;
199 } else {
200 tokpushback++;
201 }
202 checkkwd = 2;
203 if (tokendlist[peektoken()])
204 return n1;
205 break;
206 case TEOF:
207 if (heredoclist)
208 parseheredoc();
209 else
210 pungetc(); /* push back EOF on input */
211 return n1;
212 default:
213 if (nlflag)
214 synexpect(-1);
215 tokpushback++;
216 return n1;
217 }
218 }
219 }
220
221
222
223 STATIC union node *
224 andor(void)
225 {
226 union node *n1, *n2, *n3;
227 int t;
228
229 n1 = pipeline();
230 for (;;) {
231 if ((t = readtoken()) == TAND) {
232 t = NAND;
233 } else if (t == TOR) {
234 t = NOR;
235 } else {
236 tokpushback++;
237 return n1;
238 }
239 n2 = pipeline();
240 n3 = (union node *)stalloc(sizeof (struct nbinary));
241 n3->type = t;
242 n3->nbinary.ch1 = n1;
243 n3->nbinary.ch2 = n2;
244 n1 = n3;
245 }
246 }
247
248
249
250 STATIC union node *
251 pipeline(void)
252 {
253 union node *n1, *n2, *pipenode;
254 struct nodelist *lp, *prev;
255 int negate;
256
257 negate = 0;
258 TRACE(("pipeline: entered\n"));
259 while (readtoken() == TNOT)
260 negate = !negate;
261 tokpushback++;
262 n1 = command();
263 if (readtoken() == TPIPE) {
264 pipenode = (union node *)stalloc(sizeof (struct npipe));
265 pipenode->type = NPIPE;
266 pipenode->npipe.backgnd = 0;
267 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
268 pipenode->npipe.cmdlist = lp;
269 lp->n = n1;
270 do {
271 prev = lp;
272 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
273 lp->n = command();
274 prev->next = lp;
275 } while (readtoken() == TPIPE);
276 lp->next = NULL;
277 n1 = pipenode;
278 }
279 tokpushback++;
280 if (negate) {
281 n2 = (union node *)stalloc(sizeof (struct nnot));
282 n2->type = NNOT;
283 n2->nnot.com = n1;
284 return n2;
285 } else
286 return n1;
287 }
288
289
290
291 STATIC union node *
292 command(void)
293 {
294 union node *n1, *n2;
295 union node *ap, **app;
296 union node *cp, **cpp;
297 union node *redir, **rpp;
298 int t, negate = 0;
299
300 checkkwd = 2;
301 redir = NULL;
302 n1 = NULL;
303 rpp = &redir;
304
305 /* Check for redirection which may precede command */
306 while (readtoken() == TREDIR) {
307 *rpp = n2 = redirnode;
308 rpp = &n2->nfile.next;
309 parsefname();
310 }
311 tokpushback++;
312
313 while (readtoken() == TNOT) {
314 TRACE(("command: TNOT recognized\n"));
315 negate = !negate;
316 }
317 tokpushback++;
318
319 switch (readtoken()) {
320 case TIF:
321 n1 = (union node *)stalloc(sizeof (struct nif));
322 n1->type = NIF;
323 n1->nif.test = list(0);
324 if (readtoken() != TTHEN)
325 synexpect(TTHEN);
326 n1->nif.ifpart = list(0);
327 n2 = n1;
328 while (readtoken() == TELIF) {
329 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
330 n2 = n2->nif.elsepart;
331 n2->type = NIF;
332 n2->nif.test = list(0);
333 if (readtoken() != TTHEN)
334 synexpect(TTHEN);
335 n2->nif.ifpart = list(0);
336 }
337 if (lasttoken == TELSE)
338 n2->nif.elsepart = list(0);
339 else {
340 n2->nif.elsepart = NULL;
341 tokpushback++;
342 }
343 if (readtoken() != TFI)
344 synexpect(TFI);
345 checkkwd = 1;
346 break;
347 case TWHILE:
348 case TUNTIL: {
349 int got;
350 n1 = (union node *)stalloc(sizeof (struct nbinary));
351 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
352 n1->nbinary.ch1 = list(0);
353 if ((got=readtoken()) != TDO) {
354 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
355 synexpect(TDO);
356 }
357 n1->nbinary.ch2 = list(0);
358 if (readtoken() != TDONE)
359 synexpect(TDONE);
360 checkkwd = 1;
361 break;
362 }
363 case TFOR:
364 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
365 synerror("Bad for loop variable");
366 n1 = (union node *)stalloc(sizeof (struct nfor));
367 n1->type = NFOR;
368 n1->nfor.var = wordtext;
369 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
370 app = ≈
371 while (readtoken() == TWORD) {
372 n2 = (union node *)stalloc(sizeof (struct narg));
373 n2->type = NARG;
374 n2->narg.text = wordtext;
375 n2->narg.backquote = backquotelist;
376 *app = n2;
377 app = &n2->narg.next;
378 }
379 *app = NULL;
380 n1->nfor.args = ap;
381 if (lasttoken != TNL && lasttoken != TSEMI)
382 synexpect(-1);
383 } else {
384 static char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
385 '@', '=', '\0'};
386 n2 = (union node *)stalloc(sizeof (struct narg));
387 n2->type = NARG;
388 n2->narg.text = argvars;
389 n2->narg.backquote = NULL;
390 n2->narg.next = NULL;
391 n1->nfor.args = n2;
392 /*
393 * Newline or semicolon here is optional (but note
394 * that the original Bourne shell only allowed NL).
395 */
396 if (lasttoken != TNL && lasttoken != TSEMI)
397 tokpushback++;
398 }
399 checkkwd = 2;
400 if ((t = readtoken()) == TDO)
401 t = TDONE;
402 else if (t == TBEGIN)
403 t = TEND;
404 else
405 synexpect(-1);
406 n1->nfor.body = list(0);
407 if (readtoken() != t)
408 synexpect(t);
409 checkkwd = 1;
410 break;
411 case TCASE:
412 n1 = (union node *)stalloc(sizeof (struct ncase));
413 n1->type = NCASE;
414 if (readtoken() != TWORD)
415 synexpect(TWORD);
416 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
417 n2->type = NARG;
418 n2->narg.text = wordtext;
419 n2->narg.backquote = backquotelist;
420 n2->narg.next = NULL;
421 while (readtoken() == TNL);
422 if (lasttoken != TWORD || ! equal(wordtext, "in"))
423 synerror("expecting \"in\"");
424 cpp = &n1->ncase.cases;
425 noalias = 1;
426 checkkwd = 2, readtoken();
427 do {
428 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
429 cp->type = NCLIST;
430 app = &cp->nclist.pattern;
431 for (;;) {
432 *app = ap = (union node *)stalloc(sizeof (struct narg));
433 ap->type = NARG;
434 ap->narg.text = wordtext;
435 ap->narg.backquote = backquotelist;
436 if (checkkwd = 2, readtoken() != TPIPE)
437 break;
438 app = &ap->narg.next;
439 readtoken();
440 }
441 ap->narg.next = NULL;
442 noalias = 0;
443 if (lasttoken != TRP) {
444 synexpect(TRP);
445 }
446 cp->nclist.body = list(0);
447
448 checkkwd = 2;
449 if ((t = readtoken()) != TESAC) {
450 if (t != TENDCASE) {
451 noalias = 0;
452 synexpect(TENDCASE);
453 } else {
454 noalias = 1;
455 checkkwd = 2;
456 readtoken();
457 }
458 }
459 cpp = &cp->nclist.next;
460 } while(lasttoken != TESAC);
461 noalias = 0;
462 *cpp = NULL;
463 checkkwd = 1;
464 break;
465 case TLP:
466 n1 = (union node *)stalloc(sizeof (struct nredir));
467 n1->type = NSUBSHELL;
468 n1->nredir.n = list(0);
469 n1->nredir.redirect = NULL;
470 if (readtoken() != TRP)
471 synexpect(TRP);
472 checkkwd = 1;
473 break;
474 case TBEGIN:
475 n1 = list(0);
476 if (readtoken() != TEND)
477 synexpect(TEND);
478 checkkwd = 1;
479 break;
480 /* Handle an empty command like other simple commands. */
481 case TSEMI:
482 /*
483 * An empty command before a ; doesn't make much sense, and
484 * should certainly be disallowed in the case of `if ;'.
485 */
486 if (!redir)
487 synexpect(-1);
488 case TAND:
489 case TOR:
490 case TNL:
491 case TEOF:
492 case TWORD:
493 case TRP:
494 tokpushback++;
495 n1 = simplecmd(rpp, redir);
496 goto checkneg;
497 default:
498 synexpect(-1);
499 /* NOTREACHED */
500 }
501
502 /* Now check for redirection which may follow command */
503 while (readtoken() == TREDIR) {
504 *rpp = n2 = redirnode;
505 rpp = &n2->nfile.next;
506 parsefname();
507 }
508 tokpushback++;
509 *rpp = NULL;
510 if (redir) {
511 if (n1->type != NSUBSHELL) {
512 n2 = (union node *)stalloc(sizeof (struct nredir));
513 n2->type = NREDIR;
514 n2->nredir.n = n1;
515 n1 = n2;
516 }
517 n1->nredir.redirect = redir;
518 }
519
520 checkneg:
521 if (negate) {
522 n2 = (union node *)stalloc(sizeof (struct nnot));
523 n2->type = NNOT;
524 n2->nnot.com = n1;
525 return n2;
526 }
527 else
528 return n1;
529 }
530
531
532 STATIC union node *
533 simplecmd(union node **rpp, union node *redir)
534 {
535 union node *args, **app;
536 union node **orig_rpp = rpp;
537 union node *n = NULL, *n2;
538 int negate = 0;
539
540 /* If we don't have any redirections already, then we must reset */
541 /* rpp to be the address of the local redir variable. */
542 if (redir == 0)
543 rpp = &redir;
544
545 args = NULL;
546 app = &args;
547 /*
548 * We save the incoming value, because we need this for shell
549 * functions. There can not be a redirect or an argument between
550 * the function name and the open parenthesis.
551 */
552 orig_rpp = rpp;
553
554 while (readtoken() == TNOT) {
555 TRACE(("command: TNOT recognized\n"));
556 negate = !negate;
557 }
558 tokpushback++;
559
560 for (;;) {
561 if (readtoken() == TWORD) {
562 n = (union node *)stalloc(sizeof (struct narg));
563 n->type = NARG;
564 n->narg.text = wordtext;
565 n->narg.backquote = backquotelist;
566 *app = n;
567 app = &n->narg.next;
568 } else if (lasttoken == TREDIR) {
569 *rpp = n = redirnode;
570 rpp = &n->nfile.next;
571 parsefname(); /* read name of redirection file */
572 } else if (lasttoken == TLP && app == &args->narg.next
573 && rpp == orig_rpp) {
574 /* We have a function */
575 if (readtoken() != TRP)
576 synexpect(TRP);
577 #ifdef notdef
578 if (! goodname(n->narg.text))
579 synerror("Bad function name");
580 #endif
581 n->type = NDEFUN;
582 n->narg.next = command();
583 goto checkneg;
584 } else {
585 tokpushback++;
586 break;
587 }
588 }
589 *app = NULL;
590 *rpp = NULL;
591 n = (union node *)stalloc(sizeof (struct ncmd));
592 n->type = NCMD;
593 n->ncmd.backgnd = 0;
594 n->ncmd.args = args;
595 n->ncmd.redirect = redir;
596
597 checkneg:
598 if (negate) {
599 n2 = (union node *)stalloc(sizeof (struct nnot));
600 n2->type = NNOT;
601 n2->nnot.com = n;
602 return n2;
603 }
604 else
605 return n;
606 }
607
608 STATIC union node *
609 makename(void)
610 {
611 union node *n;
612
613 n = (union node *)stalloc(sizeof (struct narg));
614 n->type = NARG;
615 n->narg.next = NULL;
616 n->narg.text = wordtext;
617 n->narg.backquote = backquotelist;
618 return n;
619 }
620
621 void fixredir(union node *n, const char *text, int err)
622 {
623 TRACE(("Fix redir %s %d\n", text, err));
624 if (!err)
625 n->ndup.vname = NULL;
626
627 if (is_digit(text[0]) && text[1] == '\0')
628 n->ndup.dupfd = digit_val(text[0]);
629 else if (text[0] == '-' && text[1] == '\0')
630 n->ndup.dupfd = -1;
631 else {
632
633 if (err)
634 synerror("Bad fd number");
635 else
636 n->ndup.vname = makename();
637 }
638 }
639
640
641 STATIC void
642 parsefname(void)
643 {
644 union node *n = redirnode;
645
646 if (readtoken() != TWORD)
647 synexpect(-1);
648 if (n->type == NHERE) {
649 struct heredoc *here = heredoc;
650 struct heredoc *p;
651 int i;
652
653 if (quoteflag == 0)
654 n->type = NXHERE;
655 TRACE(("Here document %d\n", n->type));
656 if (here->striptabs) {
657 while (*wordtext == '\t')
658 wordtext++;
659 }
660 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
661 synerror("Illegal eof marker for << redirection");
662 rmescapes(wordtext);
663 here->eofmark = wordtext;
664 here->next = NULL;
665 if (heredoclist == NULL)
666 heredoclist = here;
667 else {
668 for (p = heredoclist ; p->next ; p = p->next);
669 p->next = here;
670 }
671 } else if (n->type == NTOFD || n->type == NFROMFD) {
672 fixredir(n, wordtext, 0);
673 } else {
674 n->nfile.fname = makename();
675 }
676 }
677
678
679 /*
680 * Input any here documents.
681 */
682
683 STATIC void
684 parseheredoc(void)
685 {
686 struct heredoc *here;
687 union node *n;
688
689 while (heredoclist) {
690 here = heredoclist;
691 heredoclist = here->next;
692 if (needprompt) {
693 setprompt(2);
694 needprompt = 0;
695 }
696 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
697 here->eofmark, here->striptabs);
698 n = (union node *)stalloc(sizeof (struct narg));
699 n->narg.type = NARG;
700 n->narg.next = NULL;
701 n->narg.text = wordtext;
702 n->narg.backquote = backquotelist;
703 here->here->nhere.doc = n;
704 }
705 }
706
707 STATIC int
708 peektoken(void)
709 {
710 int t;
711
712 t = readtoken();
713 tokpushback++;
714 return (t);
715 }
716
717 STATIC int
718 readtoken(void)
719 {
720 int t;
721 int savecheckkwd = checkkwd;
722 #ifdef DEBUG
723 int alreadyseen = tokpushback;
724 #endif
725 struct alias *ap;
726
727 top:
728 t = xxreadtoken();
729
730 if (checkkwd) {
731 /*
732 * eat newlines
733 */
734 if (checkkwd == 2) {
735 checkkwd = 0;
736 while (t == TNL) {
737 parseheredoc();
738 t = xxreadtoken();
739 }
740 } else
741 checkkwd = 0;
742 /*
743 * check for keywords and aliases
744 */
745 if (t == TWORD && !quoteflag)
746 {
747 const char *const *pp;
748
749 for (pp = parsekwd; *pp; pp++) {
750 if (**pp == *wordtext && equal(*pp, wordtext))
751 {
752 lasttoken = t = pp -
753 parsekwd + KWDOFFSET;
754 TRACE(("keyword %s recognized\n", tokname[t]));
755 goto out;
756 }
757 }
758 if(!noalias &&
759 (ap = lookupalias(wordtext, 1)) != NULL) {
760 pushstring(ap->val, strlen(ap->val), ap);
761 checkkwd = savecheckkwd;
762 goto top;
763 }
764 }
765 out:
766 checkkwd = (t == TNOT) ? savecheckkwd : 0;
767 }
768 #ifdef DEBUG
769 if (!alreadyseen)
770 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
771 else
772 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
773 #endif
774 return (t);
775 }
776
777
778 /*
779 * Read the next input token.
780 * If the token is a word, we set backquotelist to the list of cmds in
781 * backquotes. We set quoteflag to true if any part of the word was
782 * quoted.
783 * If the token is TREDIR, then we set redirnode to a structure containing
784 * the redirection.
785 * In all cases, the variable startlinno is set to the number of the line
786 * on which the token starts.
787 *
788 * [Change comment: here documents and internal procedures]
789 * [Readtoken shouldn't have any arguments. Perhaps we should make the
790 * word parsing code into a separate routine. In this case, readtoken
791 * doesn't need to have any internal procedures, but parseword does.
792 * We could also make parseoperator in essence the main routine, and
793 * have parseword (readtoken1?) handle both words and redirection.]
794 */
795
796 #define RETURN(token) return lasttoken = token
797
798 STATIC int
799 xxreadtoken(void)
800 {
801 int c;
802
803 if (tokpushback) {
804 tokpushback = 0;
805 return lasttoken;
806 }
807 if (needprompt) {
808 setprompt(2);
809 needprompt = 0;
810 }
811 startlinno = plinno;
812 for (;;) { /* until token or start of word found */
813 c = pgetc_macro();
814 if (c == ' ' || c == '\t')
815 continue; /* quick check for white space first */
816 switch (c) {
817 case ' ': case '\t':
818 continue;
819 case '#':
820 while ((c = pgetc()) != '\n' && c != PEOF);
821 pungetc();
822 continue;
823 case '\\':
824 if (pgetc() == '\n') {
825 startlinno = ++plinno;
826 if (doprompt)
827 setprompt(2);
828 else
829 setprompt(0);
830 continue;
831 }
832 pungetc();
833 goto breakloop;
834 case '\n':
835 plinno++;
836 needprompt = doprompt;
837 RETURN(TNL);
838 case PEOF:
839 RETURN(TEOF);
840 case '&':
841 if (pgetc() == '&')
842 RETURN(TAND);
843 pungetc();
844 RETURN(TBACKGND);
845 case '|':
846 if (pgetc() == '|')
847 RETURN(TOR);
848 pungetc();
849 RETURN(TPIPE);
850 case ';':
851 if (pgetc() == ';')
852 RETURN(TENDCASE);
853 pungetc();
854 RETURN(TSEMI);
855 case '(':
856 RETURN(TLP);
857 case ')':
858 RETURN(TRP);
859 default:
860 goto breakloop;
861 }
862 }
863 breakloop:
864 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
865 #undef RETURN
866 }
867
868
869
870 /*
871 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
872 * is not NULL, read a here document. In the latter case, eofmark is the
873 * word which marks the end of the document and striptabs is true if
874 * leading tabs should be stripped from the document. The argument firstc
875 * is the first character of the input token or document.
876 *
877 * Because C does not have internal subroutines, I have simulated them
878 * using goto's to implement the subroutine linkage. The following macros
879 * will run code that appears at the end of readtoken1.
880 */
881
882 #define CHECKEND() {goto checkend; checkend_return:;}
883 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
884 #define PARSESUB() {goto parsesub; parsesub_return:;}
885 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
886 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
887 #define PARSEARITH() {goto parsearith; parsearith_return:;}
888
889 /*
890 * Keep track of nested doublequotes in dblquote and doublequotep.
891 * We use dblquote for the first 32 levels, and we expand to a malloc'ed
892 * region for levels above that. Usually we never need to malloc.
893 * This code assumes that an int is 32 bits. We don't use uint32_t,
894 * because the rest of the code does not.
895 */
896 #define ISDBLQUOTE() ((varnest < 32) ? (dblquote & (1 << varnest)) : \
897 (dblquotep[(varnest / 32) - 1] & (1 << (varnest % 32))))
898
899 #define SETDBLQUOTE() \
900 if (varnest < 32) \
901 dblquote |= (1 << varnest); \
902 else \
903 dblquotep[(varnest / 32) - 1] |= (1 << (varnest % 32))
904
905 #define CLRDBLQUOTE() \
906 if (varnest < 32) \
907 dblquote &= ~(1 << varnest); \
908 else \
909 dblquotep[(varnest / 32) - 1] &= ~(1 << (varnest % 32))
910
911 STATIC int
912 readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
913 {
914 int c = firstc;
915 char *out;
916 int len;
917 char line[EOFMARKLEN + 1];
918 struct nodelist *bqlist;
919 int quotef;
920 int *dblquotep = NULL;
921 size_t maxnest = 32;
922 int dblquote;
923 int varnest; /* levels of variables expansion */
924 int arinest; /* levels of arithmetic expansion */
925 int parenlevel; /* levels of parens in arithmetic */
926 int oldstyle;
927 char const *prevsyntax; /* syntax before arithmetic */
928 #if __GNUC__
929 /* Avoid longjmp clobbering */
930 (void) &maxnest;
931 (void) &dblquotep;
932 (void) &out;
933 (void) "ef;
934 (void) &dblquote;
935 (void) &varnest;
936 (void) &arinest;
937 (void) &parenlevel;
938 (void) &oldstyle;
939 (void) &prevsyntax;
940 (void) &syntax;
941 #endif
942
943 startlinno = plinno;
944 dblquote = 0;
945 varnest = 0;
946 if (syntax == DQSYNTAX) {
947 SETDBLQUOTE();
948 }
949 quotef = 0;
950 bqlist = NULL;
951 arinest = 0;
952 parenlevel = 0;
953
954 STARTSTACKSTR(out);
955 loop: { /* for each line, until end of word */
956 #if ATTY
957 if (c == '\034' && doprompt
958 && attyset() && ! equal(termval(), "emacs")) {
959 attyline();
960 if (syntax == BASESYNTAX)
961 return readtoken();
962 c = pgetc();
963 goto loop;
964 }
965 #endif
966 CHECKEND(); /* set c to PEOF if at end of here document */
967 for (;;) { /* until end of line or end of word */
968 CHECKSTRSPACE(3, out); /* permit 3 calls to USTPUTC */
969 switch(syntax[c]) {
970 case CNL: /* '\n' */
971 if (syntax == BASESYNTAX)
972 goto endword; /* exit outer loop */
973 USTPUTC(c, out);
974 plinno++;
975 if (doprompt)
976 setprompt(2);
977 else
978 setprompt(0);
979 c = pgetc();
980 goto loop; /* continue outer loop */
981 case CWORD:
982 USTPUTC(c, out);
983 break;
984 case CCTL:
985 if (eofmark == NULL || ISDBLQUOTE())
986 USTPUTC(CTLESC, out);
987 USTPUTC(c, out);
988 break;
989 case CBACK: /* backslash */
990 c = pgetc();
991 if (c == PEOF) {
992 USTPUTC('\\', out);
993 pungetc();
994 } else if (c == '\n') {
995 if (doprompt)
996 setprompt(2);
997 else
998 setprompt(0);
999 } else {
1000 if (ISDBLQUOTE() && c != '\\' &&
1001 c != '`' && c != '$' &&
1002 (c != '"' || eofmark != NULL))
1003 USTPUTC('\\', out);
1004 if (SQSYNTAX[c] == CCTL)
1005 USTPUTC(CTLESC, out);
1006 else if (eofmark == NULL)
1007 USTPUTC(CTLQUOTEMARK, out);
1008 USTPUTC(c, out);
1009 quotef++;
1010 }
1011 break;
1012 case CSQUOTE:
1013 if (syntax != SQSYNTAX) {
1014 if (eofmark == NULL)
1015 USTPUTC(CTLQUOTEMARK, out);
1016 syntax = SQSYNTAX;
1017 break;
1018 }
1019 /* FALLTHROUGH */
1020 case CDQUOTE:
1021 if (eofmark != NULL && arinest == 0 &&
1022 varnest == 0) {
1023 USTPUTC(c, out);
1024 } else {
1025 if (arinest) {
1026 if (c != '"' || ISDBLQUOTE()) {
1027 syntax = ARISYNTAX;
1028 CLRDBLQUOTE();
1029 } else {
1030 syntax = DQSYNTAX;
1031 SETDBLQUOTE();
1032 USTPUTC(CTLQUOTEMARK, out);
1033 }
1034 } else if (eofmark == NULL) {
1035 if (c != '"' || ISDBLQUOTE()) {
1036 syntax = BASESYNTAX;
1037 CLRDBLQUOTE();
1038 } else {
1039 syntax = DQSYNTAX;
1040 SETDBLQUOTE();
1041 USTPUTC(CTLQUOTEMARK, out);
1042 }
1043 }
1044 quotef++;
1045 }
1046 break;
1047 case CVAR: /* '$' */
1048 PARSESUB(); /* parse substitution */
1049 break;
1050 case CENDVAR: /* CLOSEBRACE */
1051 if (varnest > 0 && !ISDBLQUOTE()) {
1052 varnest--;
1053 USTPUTC(CTLENDVAR, out);
1054 } else {
1055 USTPUTC(c, out);
1056 }
1057 break;
1058 case CLP: /* '(' in arithmetic */
1059 parenlevel++;
1060 USTPUTC(c, out);
1061 break;
1062 case CRP: /* ')' in arithmetic */
1063 if (parenlevel > 0) {
1064 USTPUTC(c, out);
1065 --parenlevel;
1066 } else {
1067 if (pgetc() == ')') {
1068 if (--arinest == 0) {
1069 USTPUTC(CTLENDARI, out);
1070 syntax = prevsyntax;
1071 if (syntax == DQSYNTAX)
1072 SETDBLQUOTE();
1073 else
1074 CLRDBLQUOTE();
1075 } else
1076 USTPUTC(')', out);
1077 } else {
1078 /*
1079 * unbalanced parens
1080 * (don't 2nd guess - no error)
1081 */
1082 pungetc();
1083 USTPUTC(')', out);
1084 }
1085 }
1086 break;
1087 case CBQUOTE: /* '`' */
1088 PARSEBACKQOLD();
1089 break;
1090 case CEOF:
1091 goto endword; /* exit outer loop */
1092 default:
1093 if (varnest == 0)
1094 goto endword; /* exit outer loop */
1095 USTPUTC(c, out);
1096 }
1097 c = pgetc_macro();
1098 }
1099 }
1100 endword:
1101 if (syntax == ARISYNTAX)
1102 synerror("Missing '))'");
1103 if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
1104 synerror("Unterminated quoted string");
1105 if (varnest != 0) {
1106 startlinno = plinno;
1107 /* { */
1108 synerror("Missing '}'");
1109 }
1110 USTPUTC('\0', out);
1111 len = out - stackblock();
1112 out = stackblock();
1113 if (eofmark == NULL) {
1114 if ((c == '>' || c == '<')
1115 && quotef == 0
1116 && len <= 2
1117 && (*out == '\0' || is_digit(*out))) {
1118 PARSEREDIR();
1119 return lasttoken = TREDIR;
1120 } else {
1121 pungetc();
1122 }
1123 }
1124 quoteflag = quotef;
1125 backquotelist = bqlist;
1126 grabstackblock(len);
1127 wordtext = out;
1128 if (dblquotep != NULL)
1129 ckfree(dblquotep);
1130 return lasttoken = TWORD;
1131 /* end of readtoken routine */
1132
1133
1134
1135 /*
1136 * Check to see whether we are at the end of the here document. When this
1137 * is called, c is set to the first character of the next input line. If
1138 * we are at the end of the here document, this routine sets the c to PEOF.
1139 */
1140
1141 checkend: {
1142 if (eofmark) {
1143 if (striptabs) {
1144 while (c == '\t')
1145 c = pgetc();
1146 }
1147 if (c == *eofmark) {
1148 if (pfgets(line, sizeof line) != NULL) {
1149 char *p, *q;
1150
1151 p = line;
1152 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
1153 if (*p == '\n' && *q == '\0') {
1154 c = PEOF;
1155 plinno++;
1156 needprompt = doprompt;
1157 } else {
1158 pushstring(line, strlen(line), NULL);
1159 }
1160 }
1161 }
1162 }
1163 goto checkend_return;
1164 }
1165
1166
1167 /*
1168 * Parse a redirection operator. The variable "out" points to a string
1169 * specifying the fd to be redirected. The variable "c" contains the
1170 * first character of the redirection operator.
1171 */
1172
1173 parseredir: {
1174 char fd = *out;
1175 union node *np;
1176
1177 np = (union node *)stalloc(sizeof (struct nfile));
1178 if (c == '>') {
1179 np->nfile.fd = 1;
1180 c = pgetc();
1181 if (c == '>')
1182 np->type = NAPPEND;
1183 else if (c == '|')
1184 np->type = NCLOBBER;
1185 else if (c == '&')
1186 np->type = NTOFD;
1187 else {
1188 np->type = NTO;
1189 pungetc();
1190 }
1191 } else { /* c == '<' */
1192 np->nfile.fd = 0;
1193 switch (c = pgetc()) {
1194 case '<':
1195 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1196 np = (union node *)stalloc(sizeof (struct nhere));
1197 np->nfile.fd = 0;
1198 }
1199 np->type = NHERE;
1200 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1201 heredoc->here = np;
1202 if ((c = pgetc()) == '-') {
1203 heredoc->striptabs = 1;
1204 } else {
1205 heredoc->striptabs = 0;
1206 pungetc();
1207 }
1208 break;
1209
1210 case '&':
1211 np->type = NFROMFD;
1212 break;
1213
1214 case '>':
1215 np->type = NFROMTO;
1216 break;
1217
1218 default:
1219 np->type = NFROM;
1220 pungetc();
1221 break;
1222 }
1223 }
1224 if (fd != '\0')
1225 np->nfile.fd = digit_val(fd);
1226 redirnode = np;
1227 goto parseredir_return;
1228 }
1229
1230
1231 /*
1232 * Parse a substitution. At this point, we have read the dollar sign
1233 * and nothing else.
1234 */
1235
1236 parsesub: {
1237 int subtype;
1238 int typeloc;
1239 int flags;
1240 char *p;
1241 static const char types[] = "}-+?=";
1242
1243 c = pgetc();
1244 if (c != '(' && c != OPENBRACE && !is_name(c) && !is_special(c)) {
1245 USTPUTC('$', out);
1246 pungetc();
1247 } else if (c == '(') { /* $(command) or $((arith)) */
1248 if (pgetc() == '(') {
1249 PARSEARITH();
1250 } else {
1251 pungetc();
1252 PARSEBACKQNEW();
1253 }
1254 } else {
1255 USTPUTC(CTLVAR, out);
1256 typeloc = out - stackblock();
1257 USTPUTC(VSNORMAL, out);
1258 subtype = VSNORMAL;
1259 if (c == OPENBRACE) {
1260 c = pgetc();
1261 if (c == '#') {
1262 if ((c = pgetc()) == CLOSEBRACE)
1263 c = '#';
1264 else
1265 subtype = VSLENGTH;
1266 }
1267 else
1268 subtype = 0;
1269 }
1270 if (is_name(c)) {
1271 do {
1272 STPUTC(c, out);
1273 c = pgetc();
1274 } while (is_in_name(c));
1275 } else if (is_digit(c)) {
1276 do {
1277 USTPUTC(c, out);
1278 c = pgetc();
1279 } while (is_digit(c));
1280 }
1281 else if (is_special(c)) {
1282 USTPUTC(c, out);
1283 c = pgetc();
1284 }
1285 else
1286 badsub: synerror("Bad substitution");
1287
1288 STPUTC('=', out);
1289 flags = 0;
1290 if (subtype == 0) {
1291 switch (c) {
1292 case ':':
1293 flags = VSNUL;
1294 c = pgetc();
1295 /*FALLTHROUGH*/
1296 default:
1297 p = strchr(types, c);
1298 if (p == NULL)
1299 goto badsub;
1300 subtype = p - types + VSNORMAL;
1301 break;
1302 case '%':
1303 case '#':
1304 {
1305 int cc = c;
1306 subtype = c == '#' ? VSTRIMLEFT :
1307 VSTRIMRIGHT;
1308 c = pgetc();
1309 if (c == cc)
1310 subtype++;
1311 else
1312 pungetc();
1313 break;
1314 }
1315 }
1316 } else {
1317 pungetc();
1318 }
1319 if (ISDBLQUOTE() || arinest)
1320 flags |= VSQUOTE;
1321 *(stackblock() + typeloc) = subtype | flags;
1322 if (subtype != VSNORMAL) {
1323 varnest++;
1324 if (varnest >= maxnest) {
1325 dblquotep = ckrealloc(dblquotep, maxnest / 8);
1326 dblquotep[(maxnest / 32) - 1] = 0;
1327 maxnest += 32;
1328 }
1329 }
1330 }
1331 goto parsesub_return;
1332 }
1333
1334
1335 /*
1336 * Called to parse command substitutions. Newstyle is set if the command
1337 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1338 * list of commands (passed by reference), and savelen is the number of
1339 * characters on the top of the stack which must be preserved.
1340 */
1341
1342 parsebackq: {
1343 struct nodelist **nlpp;
1344 int savepbq;
1345 union node *n;
1346 char *volatile str;
1347 struct jmploc jmploc;
1348 struct jmploc *volatile savehandler;
1349 int savelen;
1350 int saveprompt;
1351 #ifdef __GNUC__
1352 (void) &saveprompt;
1353 #endif
1354
1355 savepbq = parsebackquote;
1356 if (setjmp(jmploc.loc)) {
1357 if (str)
1358 ckfree(str);
1359 parsebackquote = 0;
1360 handler = savehandler;
1361 longjmp(handler->loc, 1);
1362 }
1363 INTOFF;
1364 str = NULL;
1365 savelen = out - stackblock();
1366 if (savelen > 0) {
1367 str = ckmalloc(savelen);
1368 memcpy(str, stackblock(), savelen);
1369 }
1370 savehandler = handler;
1371 handler = &jmploc;
1372 INTON;
1373 if (oldstyle) {
1374 /* We must read until the closing backquote, giving special
1375 treatment to some slashes, and then push the string and
1376 reread it as input, interpreting it normally. */
1377 char *pout;
1378 int pc;
1379 int psavelen;
1380 char *pstr;
1381
1382
1383 STARTSTACKSTR(pout);
1384 for (;;) {
1385 if (needprompt) {
1386 setprompt(2);
1387 needprompt = 0;
1388 }
1389 switch (pc = pgetc()) {
1390 case '`':
1391 goto done;
1392
1393 case '\\':
1394 if ((pc = pgetc()) == '\n') {
1395 plinno++;
1396 if (doprompt)
1397 setprompt(2);
1398 else
1399 setprompt(0);
1400 /*
1401 * If eating a newline, avoid putting
1402 * the newline into the new character
1403 * stream (via the STPUTC after the
1404 * switch).
1405 */
1406 continue;
1407 }
1408 if (pc != '\\' && pc != '`' && pc != '$'
1409 && (!ISDBLQUOTE() || pc != '"'))
1410 STPUTC('\\', pout);
1411 break;
1412
1413 case '\n':
1414 plinno++;
1415 needprompt = doprompt;
1416 break;
1417
1418 case PEOF:
1419 startlinno = plinno;
1420 synerror("EOF in backquote substitution");
1421 break;
1422
1423 default:
1424 break;
1425 }
1426 STPUTC(pc, pout);
1427 }
1428 done:
1429 STPUTC('\0', pout);
1430 psavelen = pout - stackblock();
1431 if (psavelen > 0) {
1432 pstr = grabstackstr(pout);
1433 setinputstring(pstr, 1);
1434 }
1435 }
1436 nlpp = &bqlist;
1437 while (*nlpp)
1438 nlpp = &(*nlpp)->next;
1439 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1440 (*nlpp)->next = NULL;
1441 parsebackquote = oldstyle;
1442
1443 if (oldstyle) {
1444 saveprompt = doprompt;
1445 doprompt = 0;
1446 }
1447
1448 n = list(0);
1449
1450 if (oldstyle)
1451 doprompt = saveprompt;
1452 else {
1453 if (readtoken() != TRP)
1454 synexpect(TRP);
1455 }
1456
1457 (*nlpp)->n = n;
1458 if (oldstyle) {
1459 /*
1460 * Start reading from old file again, ignoring any pushed back
1461 * tokens left from the backquote parsing
1462 */
1463 popfile();
1464 tokpushback = 0;
1465 }
1466 while (stackblocksize() <= savelen)
1467 growstackblock();
1468 STARTSTACKSTR(out);
1469 if (str) {
1470 memcpy(out, str, savelen);
1471 STADJUST(savelen, out);
1472 INTOFF;
1473 ckfree(str);
1474 str = NULL;
1475 INTON;
1476 }
1477 parsebackquote = savepbq;
1478 handler = savehandler;
1479 if (arinest || ISDBLQUOTE())
1480 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1481 else
1482 USTPUTC(CTLBACKQ, out);
1483 if (oldstyle)
1484 goto parsebackq_oldreturn;
1485 else
1486 goto parsebackq_newreturn;
1487 }
1488
1489 /*
1490 * Parse an arithmetic expansion (indicate start of one and set state)
1491 */
1492 parsearith: {
1493
1494 if (++arinest == 1) {
1495 prevsyntax = syntax;
1496 syntax = ARISYNTAX;
1497 USTPUTC(CTLARI, out);
1498 if (ISDBLQUOTE())
1499 USTPUTC('"',out);
1500 else
1501 USTPUTC(' ',out);
1502 } else {
1503 /*
1504 * we collapse embedded arithmetic expansion to
1505 * parenthesis, which should be equivalent
1506 */
1507 USTPUTC('(', out);
1508 }
1509 goto parsearith_return;
1510 }
1511
1512 } /* end of readtoken */
1513
1514
1515
1516 #ifdef mkinit
1517 RESET {
1518 tokpushback = 0;
1519 checkkwd = 0;
1520 }
1521 #endif
1522
1523 /*
1524 * Returns true if the text contains nothing to expand (no dollar signs
1525 * or backquotes).
1526 */
1527
1528 STATIC int
1529 noexpand(char *text)
1530 {
1531 char *p;
1532 char c;
1533
1534 p = text;
1535 while ((c = *p++) != '\0') {
1536 if (c == CTLQUOTEMARK)
1537 continue;
1538 if (c == CTLESC)
1539 p++;
1540 else if (BASESYNTAX[(int)c] == CCTL)
1541 return 0;
1542 }
1543 return 1;
1544 }
1545
1546
1547 /*
1548 * Return true if the argument is a legal variable name (a letter or
1549 * underscore followed by zero or more letters, underscores, and digits).
1550 */
1551
1552 int
1553 goodname(char *name)
1554 {
1555 char *p;
1556
1557 p = name;
1558 if (! is_name(*p))
1559 return 0;
1560 while (*++p) {
1561 if (! is_in_name(*p))
1562 return 0;
1563 }
1564 return 1;
1565 }
1566
1567
1568 /*
1569 * Called when an unexpected token is read during the parse. The argument
1570 * is the token that is expected, or -1 if more than one type of token can
1571 * occur at this point.
1572 */
1573
1574 STATIC void
1575 synexpect(int token)
1576 {
1577 char msg[64];
1578
1579 if (token >= 0) {
1580 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1581 tokname[lasttoken], tokname[token]);
1582 } else {
1583 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1584 }
1585 synerror(msg);
1586 /* NOTREACHED */
1587 }
1588
1589
1590 STATIC void
1591 synerror(const char *msg)
1592 {
1593 if (commandname)
1594 outfmt(&errout, "%s: %d: ", commandname, startlinno);
1595 outfmt(&errout, "Syntax error: %s\n", msg);
1596 error((char *)NULL);
1597 /* NOTREACHED */
1598 }
1599
1600 STATIC void
1601 setprompt(int which)
1602 {
1603 whichprompt = which;
1604
1605 #ifndef SMALL
1606 if (!el)
1607 #endif
1608 out2str(getprompt(NULL));
1609 }
1610
1611 /*
1612 * called by editline -- any expansions to the prompt
1613 * should be added here.
1614 */
1615 const char *
1616 getprompt(void *unused)
1617 {
1618 switch (whichprompt) {
1619 case 0:
1620 return "";
1621 case 1:
1622 return ps1val();
1623 case 2:
1624 return ps2val();
1625 default:
1626 return "<internal prompt error>";
1627 }
1628 }
1629