parser.c revision 1.53 1 /* $NetBSD: parser.c,v 1.53 2002/05/15 16:33:35 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
43 #else
44 __RCSID("$NetBSD: parser.c,v 1.53 2002/05/15 16:33:35 christos Exp $");
45 #endif
46 #endif /* not lint */
47
48 #include <stdlib.h>
49
50 #include "shell.h"
51 #include "parser.h"
52 #include "nodes.h"
53 #include "expand.h" /* defines rmescapes() */
54 #include "eval.h" /* defines commandname */
55 #include "redir.h" /* defines copyfd() */
56 #include "syntax.h"
57 #include "options.h"
58 #include "input.h"
59 #include "output.h"
60 #include "var.h"
61 #include "error.h"
62 #include "memalloc.h"
63 #include "mystring.h"
64 #include "alias.h"
65 #include "show.h"
66 #ifndef SMALL
67 #include "myhistedit.h"
68 #endif
69
70 /*
71 * Shell command parser.
72 */
73
74 #define EOFMARKLEN 79
75
76 /* values returned by readtoken */
77 #include "token.h"
78
79 #define OPENBRACE '{'
80 #define CLOSEBRACE '}'
81
82
83 struct heredoc {
84 struct heredoc *next; /* next here document in list */
85 union node *here; /* redirection node */
86 char *eofmark; /* string indicating end of input */
87 int striptabs; /* if set, strip leading tabs */
88 };
89
90
91
92 static int noalias = 0; /* when set, don't handle aliases */
93 struct heredoc *heredoclist; /* list of here documents to read */
94 int parsebackquote; /* nonzero if we are inside backquotes */
95 int doprompt; /* if set, prompt the user */
96 int needprompt; /* true if interactive and at start of line */
97 int lasttoken; /* last token read */
98 MKINIT int tokpushback; /* last token pushed back */
99 char *wordtext; /* text of last word returned by readtoken */
100 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */
101 struct nodelist *backquotelist;
102 union node *redirnode;
103 struct heredoc *heredoc;
104 int quoteflag; /* set if (part of) last token was quoted */
105 int startlinno; /* line # where last token started */
106
107
108 STATIC union node *list __P((int));
109 STATIC union node *andor __P((void));
110 STATIC union node *pipeline __P((void));
111 STATIC union node *command __P((void));
112 STATIC union node *simplecmd __P((union node **, union node *));
113 STATIC union node *makename __P((void));
114 STATIC void parsefname __P((void));
115 STATIC void parseheredoc __P((void));
116 STATIC int peektoken __P((void));
117 STATIC int readtoken __P((void));
118 STATIC int xxreadtoken __P((void));
119 STATIC int readtoken1 __P((int, char const *, char *, int));
120 STATIC int noexpand __P((char *));
121 STATIC void synexpect __P((int)) __attribute__((noreturn));
122 STATIC void synerror __P((const char *)) __attribute__((noreturn));
123 STATIC void setprompt __P((int));
124
125
126 /*
127 * Read and parse a command. Returns NEOF on end of file. (NULL is a
128 * valid parse tree indicating a blank line.)
129 */
130
131 union node *
132 parsecmd(int interact)
133 {
134 int t;
135
136 tokpushback = 0;
137 doprompt = interact;
138 if (doprompt)
139 setprompt(1);
140 else
141 setprompt(0);
142 needprompt = 0;
143 t = readtoken();
144 if (t == TEOF)
145 return NEOF;
146 if (t == TNL)
147 return NULL;
148 tokpushback++;
149 return list(1);
150 }
151
152
153 STATIC union node *
154 list(nlflag)
155 int nlflag;
156 {
157 union node *n1, *n2, *n3;
158 int tok;
159
160 checkkwd = 2;
161 if (nlflag == 0 && tokendlist[peektoken()])
162 return NULL;
163 n1 = NULL;
164 for (;;) {
165 n2 = andor();
166 tok = readtoken();
167 if (tok == TBACKGND) {
168 if (n2->type == NCMD || n2->type == NPIPE) {
169 n2->ncmd.backgnd = 1;
170 } else if (n2->type == NREDIR) {
171 n2->type = NBACKGND;
172 } else {
173 n3 = (union node *)stalloc(sizeof (struct nredir));
174 n3->type = NBACKGND;
175 n3->nredir.n = n2;
176 n3->nredir.redirect = NULL;
177 n2 = n3;
178 }
179 }
180 if (n1 == NULL) {
181 n1 = n2;
182 }
183 else {
184 n3 = (union node *)stalloc(sizeof (struct nbinary));
185 n3->type = NSEMI;
186 n3->nbinary.ch1 = n1;
187 n3->nbinary.ch2 = n2;
188 n1 = n3;
189 }
190 switch (tok) {
191 case TBACKGND:
192 case TSEMI:
193 tok = readtoken();
194 /* fall through */
195 case TNL:
196 if (tok == TNL) {
197 parseheredoc();
198 if (nlflag)
199 return n1;
200 } else {
201 tokpushback++;
202 }
203 checkkwd = 2;
204 if (tokendlist[peektoken()])
205 return n1;
206 break;
207 case TEOF:
208 if (heredoclist)
209 parseheredoc();
210 else
211 pungetc(); /* push back EOF on input */
212 return n1;
213 default:
214 if (nlflag)
215 synexpect(-1);
216 tokpushback++;
217 return n1;
218 }
219 }
220 }
221
222
223
224 STATIC union node *
225 andor() {
226 union node *n1, *n2, *n3;
227 int t;
228
229 n1 = pipeline();
230 for (;;) {
231 if ((t = readtoken()) == TAND) {
232 t = NAND;
233 } else if (t == TOR) {
234 t = NOR;
235 } else {
236 tokpushback++;
237 return n1;
238 }
239 n2 = pipeline();
240 n3 = (union node *)stalloc(sizeof (struct nbinary));
241 n3->type = t;
242 n3->nbinary.ch1 = n1;
243 n3->nbinary.ch2 = n2;
244 n1 = n3;
245 }
246 }
247
248
249
250 STATIC union node *
251 pipeline() {
252 union node *n1, *n2, *pipenode;
253 struct nodelist *lp, *prev;
254 int negate;
255
256 negate = 0;
257 TRACE(("pipeline: entered\n"));
258 while (readtoken() == TNOT)
259 negate = !negate;
260 tokpushback++;
261 n1 = command();
262 if (readtoken() == TPIPE) {
263 pipenode = (union node *)stalloc(sizeof (struct npipe));
264 pipenode->type = NPIPE;
265 pipenode->npipe.backgnd = 0;
266 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
267 pipenode->npipe.cmdlist = lp;
268 lp->n = n1;
269 do {
270 prev = lp;
271 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
272 lp->n = command();
273 prev->next = lp;
274 } while (readtoken() == TPIPE);
275 lp->next = NULL;
276 n1 = pipenode;
277 }
278 tokpushback++;
279 if (negate) {
280 n2 = (union node *)stalloc(sizeof (struct nnot));
281 n2->type = NNOT;
282 n2->nnot.com = n1;
283 return n2;
284 } else
285 return n1;
286 }
287
288
289
290 STATIC union node *
291 command() {
292 union node *n1, *n2;
293 union node *ap, **app;
294 union node *cp, **cpp;
295 union node *redir, **rpp;
296 int t, negate = 0;
297
298 checkkwd = 2;
299 redir = NULL;
300 n1 = NULL;
301 rpp = &redir;
302
303 /* Check for redirection which may precede command */
304 while (readtoken() == TREDIR) {
305 *rpp = n2 = redirnode;
306 rpp = &n2->nfile.next;
307 parsefname();
308 }
309 tokpushback++;
310
311 while (readtoken() == TNOT) {
312 TRACE(("command: TNOT recognized\n"));
313 negate = !negate;
314 }
315 tokpushback++;
316
317 switch (readtoken()) {
318 case TIF:
319 n1 = (union node *)stalloc(sizeof (struct nif));
320 n1->type = NIF;
321 n1->nif.test = list(0);
322 if (readtoken() != TTHEN)
323 synexpect(TTHEN);
324 n1->nif.ifpart = list(0);
325 n2 = n1;
326 while (readtoken() == TELIF) {
327 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
328 n2 = n2->nif.elsepart;
329 n2->type = NIF;
330 n2->nif.test = list(0);
331 if (readtoken() != TTHEN)
332 synexpect(TTHEN);
333 n2->nif.ifpart = list(0);
334 }
335 if (lasttoken == TELSE)
336 n2->nif.elsepart = list(0);
337 else {
338 n2->nif.elsepart = NULL;
339 tokpushback++;
340 }
341 if (readtoken() != TFI)
342 synexpect(TFI);
343 checkkwd = 1;
344 break;
345 case TWHILE:
346 case TUNTIL: {
347 int got;
348 n1 = (union node *)stalloc(sizeof (struct nbinary));
349 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
350 n1->nbinary.ch1 = list(0);
351 if ((got=readtoken()) != TDO) {
352 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
353 synexpect(TDO);
354 }
355 n1->nbinary.ch2 = list(0);
356 if (readtoken() != TDONE)
357 synexpect(TDONE);
358 checkkwd = 1;
359 break;
360 }
361 case TFOR:
362 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
363 synerror("Bad for loop variable");
364 n1 = (union node *)stalloc(sizeof (struct nfor));
365 n1->type = NFOR;
366 n1->nfor.var = wordtext;
367 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
368 app = ≈
369 while (readtoken() == TWORD) {
370 n2 = (union node *)stalloc(sizeof (struct narg));
371 n2->type = NARG;
372 n2->narg.text = wordtext;
373 n2->narg.backquote = backquotelist;
374 *app = n2;
375 app = &n2->narg.next;
376 }
377 *app = NULL;
378 n1->nfor.args = ap;
379 if (lasttoken != TNL && lasttoken != TSEMI)
380 synexpect(-1);
381 } else {
382 static char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
383 '@', '=', '\0'};
384 n2 = (union node *)stalloc(sizeof (struct narg));
385 n2->type = NARG;
386 n2->narg.text = argvars;
387 n2->narg.backquote = NULL;
388 n2->narg.next = NULL;
389 n1->nfor.args = n2;
390 /*
391 * Newline or semicolon here is optional (but note
392 * that the original Bourne shell only allowed NL).
393 */
394 if (lasttoken != TNL && lasttoken != TSEMI)
395 tokpushback++;
396 }
397 checkkwd = 2;
398 if ((t = readtoken()) == TDO)
399 t = TDONE;
400 else if (t == TBEGIN)
401 t = TEND;
402 else
403 synexpect(-1);
404 n1->nfor.body = list(0);
405 if (readtoken() != t)
406 synexpect(t);
407 checkkwd = 1;
408 break;
409 case TCASE:
410 n1 = (union node *)stalloc(sizeof (struct ncase));
411 n1->type = NCASE;
412 if (readtoken() != TWORD)
413 synexpect(TWORD);
414 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
415 n2->type = NARG;
416 n2->narg.text = wordtext;
417 n2->narg.backquote = backquotelist;
418 n2->narg.next = NULL;
419 while (readtoken() == TNL);
420 if (lasttoken != TWORD || ! equal(wordtext, "in"))
421 synerror("expecting \"in\"");
422 cpp = &n1->ncase.cases;
423 noalias = 1;
424 checkkwd = 2, readtoken();
425 do {
426 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
427 cp->type = NCLIST;
428 app = &cp->nclist.pattern;
429 for (;;) {
430 *app = ap = (union node *)stalloc(sizeof (struct narg));
431 ap->type = NARG;
432 ap->narg.text = wordtext;
433 ap->narg.backquote = backquotelist;
434 if (checkkwd = 2, readtoken() != TPIPE)
435 break;
436 app = &ap->narg.next;
437 readtoken();
438 }
439 ap->narg.next = NULL;
440 noalias = 0;
441 if (lasttoken != TRP) {
442 synexpect(TRP);
443 }
444 cp->nclist.body = list(0);
445
446 checkkwd = 2;
447 if ((t = readtoken()) != TESAC) {
448 if (t != TENDCASE) {
449 noalias = 0;
450 synexpect(TENDCASE);
451 } else {
452 noalias = 1;
453 checkkwd = 2;
454 readtoken();
455 }
456 }
457 cpp = &cp->nclist.next;
458 } while(lasttoken != TESAC);
459 noalias = 0;
460 *cpp = NULL;
461 checkkwd = 1;
462 break;
463 case TLP:
464 n1 = (union node *)stalloc(sizeof (struct nredir));
465 n1->type = NSUBSHELL;
466 n1->nredir.n = list(0);
467 n1->nredir.redirect = NULL;
468 if (readtoken() != TRP)
469 synexpect(TRP);
470 checkkwd = 1;
471 break;
472 case TBEGIN:
473 n1 = list(0);
474 if (readtoken() != TEND)
475 synexpect(TEND);
476 checkkwd = 1;
477 break;
478 /* Handle an empty command like other simple commands. */
479 case TSEMI:
480 /*
481 * An empty command before a ; doesn't make much sense, and
482 * should certainly be disallowed in the case of `if ;'.
483 */
484 if (!redir)
485 synexpect(-1);
486 case TAND:
487 case TOR:
488 case TNL:
489 case TEOF:
490 case TWORD:
491 case TRP:
492 tokpushback++;
493 n1 = simplecmd(rpp, redir);
494 goto checkneg;
495 default:
496 synexpect(-1);
497 /* NOTREACHED */
498 }
499
500 /* Now check for redirection which may follow command */
501 while (readtoken() == TREDIR) {
502 *rpp = n2 = redirnode;
503 rpp = &n2->nfile.next;
504 parsefname();
505 }
506 tokpushback++;
507 *rpp = NULL;
508 if (redir) {
509 if (n1->type != NSUBSHELL) {
510 n2 = (union node *)stalloc(sizeof (struct nredir));
511 n2->type = NREDIR;
512 n2->nredir.n = n1;
513 n1 = n2;
514 }
515 n1->nredir.redirect = redir;
516 }
517
518 checkneg:
519 if (negate) {
520 n2 = (union node *)stalloc(sizeof (struct nnot));
521 n2->type = NNOT;
522 n2->nnot.com = n1;
523 return n2;
524 }
525 else
526 return n1;
527 }
528
529
530 STATIC union node *
531 simplecmd(rpp, redir)
532 union node **rpp, *redir;
533 {
534 union node *args, **app;
535 union node **orig_rpp = rpp;
536 union node *n = NULL, *n2;
537 int negate = 0;
538
539 /* If we don't have any redirections already, then we must reset */
540 /* rpp to be the address of the local redir variable. */
541 if (redir == 0)
542 rpp = &redir;
543
544 args = NULL;
545 app = &args;
546 /*
547 * We save the incoming value, because we need this for shell
548 * functions. There can not be a redirect or an argument between
549 * the function name and the open parenthesis.
550 */
551 orig_rpp = rpp;
552
553 while (readtoken() == TNOT) {
554 TRACE(("command: TNOT recognized\n"));
555 negate = !negate;
556 }
557 tokpushback++;
558
559 for (;;) {
560 if (readtoken() == TWORD) {
561 n = (union node *)stalloc(sizeof (struct narg));
562 n->type = NARG;
563 n->narg.text = wordtext;
564 n->narg.backquote = backquotelist;
565 *app = n;
566 app = &n->narg.next;
567 } else if (lasttoken == TREDIR) {
568 *rpp = n = redirnode;
569 rpp = &n->nfile.next;
570 parsefname(); /* read name of redirection file */
571 } else if (lasttoken == TLP && app == &args->narg.next
572 && rpp == orig_rpp) {
573 /* We have a function */
574 if (readtoken() != TRP)
575 synexpect(TRP);
576 #ifdef notdef
577 if (! goodname(n->narg.text))
578 synerror("Bad function name");
579 #endif
580 n->type = NDEFUN;
581 n->narg.next = command();
582 goto checkneg;
583 } else {
584 tokpushback++;
585 break;
586 }
587 }
588 *app = NULL;
589 *rpp = NULL;
590 n = (union node *)stalloc(sizeof (struct ncmd));
591 n->type = NCMD;
592 n->ncmd.backgnd = 0;
593 n->ncmd.args = args;
594 n->ncmd.redirect = redir;
595
596 checkneg:
597 if (negate) {
598 n2 = (union node *)stalloc(sizeof (struct nnot));
599 n2->type = NNOT;
600 n2->nnot.com = n;
601 return n2;
602 }
603 else
604 return n;
605 }
606
607 STATIC union node *
608 makename() {
609 union node *n;
610
611 n = (union node *)stalloc(sizeof (struct narg));
612 n->type = NARG;
613 n->narg.next = NULL;
614 n->narg.text = wordtext;
615 n->narg.backquote = backquotelist;
616 return n;
617 }
618
619 void fixredir(union node *n, const char *text, int err)
620 {
621 TRACE(("Fix redir %s %d\n", text, err));
622 if (!err)
623 n->ndup.vname = NULL;
624
625 if (is_digit(text[0]) && text[1] == '\0')
626 n->ndup.dupfd = digit_val(text[0]);
627 else if (text[0] == '-' && text[1] == '\0')
628 n->ndup.dupfd = -1;
629 else {
630
631 if (err)
632 synerror("Bad fd number");
633 else
634 n->ndup.vname = makename();
635 }
636 }
637
638
639 STATIC void
640 parsefname() {
641 union node *n = redirnode;
642
643 if (readtoken() != TWORD)
644 synexpect(-1);
645 if (n->type == NHERE) {
646 struct heredoc *here = heredoc;
647 struct heredoc *p;
648 int i;
649
650 if (quoteflag == 0)
651 n->type = NXHERE;
652 TRACE(("Here document %d\n", n->type));
653 if (here->striptabs) {
654 while (*wordtext == '\t')
655 wordtext++;
656 }
657 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
658 synerror("Illegal eof marker for << redirection");
659 rmescapes(wordtext);
660 here->eofmark = wordtext;
661 here->next = NULL;
662 if (heredoclist == NULL)
663 heredoclist = here;
664 else {
665 for (p = heredoclist ; p->next ; p = p->next);
666 p->next = here;
667 }
668 } else if (n->type == NTOFD || n->type == NFROMFD) {
669 fixredir(n, wordtext, 0);
670 } else {
671 n->nfile.fname = makename();
672 }
673 }
674
675
676 /*
677 * Input any here documents.
678 */
679
680 STATIC void
681 parseheredoc() {
682 struct heredoc *here;
683 union node *n;
684
685 while (heredoclist) {
686 here = heredoclist;
687 heredoclist = here->next;
688 if (needprompt) {
689 setprompt(2);
690 needprompt = 0;
691 }
692 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
693 here->eofmark, here->striptabs);
694 n = (union node *)stalloc(sizeof (struct narg));
695 n->narg.type = NARG;
696 n->narg.next = NULL;
697 n->narg.text = wordtext;
698 n->narg.backquote = backquotelist;
699 here->here->nhere.doc = n;
700 }
701 }
702
703 STATIC int
704 peektoken() {
705 int t;
706
707 t = readtoken();
708 tokpushback++;
709 return (t);
710 }
711
712 STATIC int
713 readtoken() {
714 int t;
715 int savecheckkwd = checkkwd;
716 #ifdef DEBUG
717 int alreadyseen = tokpushback;
718 #endif
719 struct alias *ap;
720
721 top:
722 t = xxreadtoken();
723
724 if (checkkwd) {
725 /*
726 * eat newlines
727 */
728 if (checkkwd == 2) {
729 checkkwd = 0;
730 while (t == TNL) {
731 parseheredoc();
732 t = xxreadtoken();
733 }
734 } else
735 checkkwd = 0;
736 /*
737 * check for keywords and aliases
738 */
739 if (t == TWORD && !quoteflag)
740 {
741 const char *const *pp;
742
743 for (pp = parsekwd; *pp; pp++) {
744 if (**pp == *wordtext && equal(*pp, wordtext))
745 {
746 lasttoken = t = pp -
747 parsekwd + KWDOFFSET;
748 TRACE(("keyword %s recognized\n", tokname[t]));
749 goto out;
750 }
751 }
752 if(!noalias &&
753 (ap = lookupalias(wordtext, 1)) != NULL) {
754 pushstring(ap->val, strlen(ap->val), ap);
755 checkkwd = savecheckkwd;
756 goto top;
757 }
758 }
759 out:
760 checkkwd = (t == TNOT) ? savecheckkwd : 0;
761 }
762 #ifdef DEBUG
763 if (!alreadyseen)
764 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
765 else
766 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
767 #endif
768 return (t);
769 }
770
771
772 /*
773 * Read the next input token.
774 * If the token is a word, we set backquotelist to the list of cmds in
775 * backquotes. We set quoteflag to true if any part of the word was
776 * quoted.
777 * If the token is TREDIR, then we set redirnode to a structure containing
778 * the redirection.
779 * In all cases, the variable startlinno is set to the number of the line
780 * on which the token starts.
781 *
782 * [Change comment: here documents and internal procedures]
783 * [Readtoken shouldn't have any arguments. Perhaps we should make the
784 * word parsing code into a separate routine. In this case, readtoken
785 * doesn't need to have any internal procedures, but parseword does.
786 * We could also make parseoperator in essence the main routine, and
787 * have parseword (readtoken1?) handle both words and redirection.]
788 */
789
790 #define RETURN(token) return lasttoken = token
791
792 STATIC int
793 xxreadtoken() {
794 int c;
795
796 if (tokpushback) {
797 tokpushback = 0;
798 return lasttoken;
799 }
800 if (needprompt) {
801 setprompt(2);
802 needprompt = 0;
803 }
804 startlinno = plinno;
805 for (;;) { /* until token or start of word found */
806 c = pgetc_macro();
807 if (c == ' ' || c == '\t')
808 continue; /* quick check for white space first */
809 switch (c) {
810 case ' ': case '\t':
811 continue;
812 case '#':
813 while ((c = pgetc()) != '\n' && c != PEOF);
814 pungetc();
815 continue;
816 case '\\':
817 if (pgetc() == '\n') {
818 startlinno = ++plinno;
819 if (doprompt)
820 setprompt(2);
821 else
822 setprompt(0);
823 continue;
824 }
825 pungetc();
826 goto breakloop;
827 case '\n':
828 plinno++;
829 needprompt = doprompt;
830 RETURN(TNL);
831 case PEOF:
832 RETURN(TEOF);
833 case '&':
834 if (pgetc() == '&')
835 RETURN(TAND);
836 pungetc();
837 RETURN(TBACKGND);
838 case '|':
839 if (pgetc() == '|')
840 RETURN(TOR);
841 pungetc();
842 RETURN(TPIPE);
843 case ';':
844 if (pgetc() == ';')
845 RETURN(TENDCASE);
846 pungetc();
847 RETURN(TSEMI);
848 case '(':
849 RETURN(TLP);
850 case ')':
851 RETURN(TRP);
852 default:
853 goto breakloop;
854 }
855 }
856 breakloop:
857 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
858 #undef RETURN
859 }
860
861
862
863 /*
864 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
865 * is not NULL, read a here document. In the latter case, eofmark is the
866 * word which marks the end of the document and striptabs is true if
867 * leading tabs should be stripped from the document. The argument firstc
868 * is the first character of the input token or document.
869 *
870 * Because C does not have internal subroutines, I have simulated them
871 * using goto's to implement the subroutine linkage. The following macros
872 * will run code that appears at the end of readtoken1.
873 */
874
875 #define CHECKEND() {goto checkend; checkend_return:;}
876 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
877 #define PARSESUB() {goto parsesub; parsesub_return:;}
878 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
879 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
880 #define PARSEARITH() {goto parsearith; parsearith_return:;}
881
882 /*
883 * Keep track of nested doublequotes in dblquote and doublequotep.
884 * We use dblquote for the first 32 levels, and we expand to a malloc'ed
885 * region for levels above that. Usually we never need to malloc.
886 * This code assumes that an int is 32 bits. We don't use uint32_t,
887 * because the rest of the code does not.
888 */
889 #define ISDBLQUOTE() ((varnest < 32) ? (dblquote & (1 << varnest)) : \
890 (dblquotep[(varnest / 32) - 1] & (1 << (varnest % 32))))
891
892 #define SETDBLQUOTE() \
893 if (varnest < 32) \
894 dblquote |= (1 << varnest); \
895 else \
896 dblquotep[(varnest / 32) - 1] |= (1 << (varnest % 32))
897
898 #define CLRDBLQUOTE() \
899 if (varnest < 32) \
900 dblquote &= ~(1 << varnest); \
901 else \
902 dblquotep[(varnest / 32) - 1] &= ~(1 << (varnest % 32))
903
904 STATIC int
905 readtoken1(firstc, syntax, eofmark, striptabs)
906 int firstc;
907 char const *syntax;
908 char *eofmark;
909 int striptabs;
910 {
911 int c = firstc;
912 char *out;
913 int len;
914 char line[EOFMARKLEN + 1];
915 struct nodelist *bqlist;
916 int quotef;
917 int *dblquotep = NULL;
918 size_t maxnest = 32;
919 int dblquote;
920 int varnest; /* levels of variables expansion */
921 int arinest; /* levels of arithmetic expansion */
922 int parenlevel; /* levels of parens in arithmetic */
923 int oldstyle;
924 char const *prevsyntax; /* syntax before arithmetic */
925 #if __GNUC__
926 /* Avoid longjmp clobbering */
927 (void) &maxnest;
928 (void) &dblquotep;
929 (void) &out;
930 (void) "ef;
931 (void) &dblquote;
932 (void) &varnest;
933 (void) &arinest;
934 (void) &parenlevel;
935 (void) &oldstyle;
936 (void) &prevsyntax;
937 (void) &syntax;
938 #endif
939
940 startlinno = plinno;
941 dblquote = 0;
942 varnest = 0;
943 if (syntax == DQSYNTAX) {
944 SETDBLQUOTE();
945 }
946 quotef = 0;
947 bqlist = NULL;
948 arinest = 0;
949 parenlevel = 0;
950
951 STARTSTACKSTR(out);
952 loop: { /* for each line, until end of word */
953 #if ATTY
954 if (c == '\034' && doprompt
955 && attyset() && ! equal(termval(), "emacs")) {
956 attyline();
957 if (syntax == BASESYNTAX)
958 return readtoken();
959 c = pgetc();
960 goto loop;
961 }
962 #endif
963 CHECKEND(); /* set c to PEOF if at end of here document */
964 for (;;) { /* until end of line or end of word */
965 CHECKSTRSPACE(3, out); /* permit 3 calls to USTPUTC */
966 switch(syntax[c]) {
967 case CNL: /* '\n' */
968 if (syntax == BASESYNTAX)
969 goto endword; /* exit outer loop */
970 USTPUTC(c, out);
971 plinno++;
972 if (doprompt)
973 setprompt(2);
974 else
975 setprompt(0);
976 c = pgetc();
977 goto loop; /* continue outer loop */
978 case CWORD:
979 USTPUTC(c, out);
980 break;
981 case CCTL:
982 if (eofmark == NULL || ISDBLQUOTE())
983 USTPUTC(CTLESC, out);
984 USTPUTC(c, out);
985 break;
986 case CBACK: /* backslash */
987 c = pgetc();
988 if (c == PEOF) {
989 USTPUTC('\\', out);
990 pungetc();
991 } else if (c == '\n') {
992 if (doprompt)
993 setprompt(2);
994 else
995 setprompt(0);
996 } else {
997 if (ISDBLQUOTE() && c != '\\' &&
998 c != '`' && c != '$' &&
999 (c != '"' || eofmark != NULL))
1000 USTPUTC('\\', out);
1001 if (SQSYNTAX[c] == CCTL)
1002 USTPUTC(CTLESC, out);
1003 else if (eofmark == NULL)
1004 USTPUTC(CTLQUOTEMARK, out);
1005 USTPUTC(c, out);
1006 quotef++;
1007 }
1008 break;
1009 case CSQUOTE:
1010 if (syntax != SQSYNTAX) {
1011 if (eofmark == NULL)
1012 USTPUTC(CTLQUOTEMARK, out);
1013 syntax = SQSYNTAX;
1014 break;
1015 }
1016 /* FALLTHROUGH */
1017 case CDQUOTE:
1018 if (eofmark != NULL && arinest == 0 &&
1019 varnest == 0) {
1020 USTPUTC(c, out);
1021 } else {
1022 if (arinest) {
1023 if (c != '"' || ISDBLQUOTE()) {
1024 syntax = ARISYNTAX;
1025 CLRDBLQUOTE();
1026 } else {
1027 syntax = DQSYNTAX;
1028 SETDBLQUOTE();
1029 USTPUTC(CTLQUOTEMARK, out);
1030 }
1031 } else if (eofmark == NULL) {
1032 if (c != '"' || ISDBLQUOTE()) {
1033 syntax = BASESYNTAX;
1034 CLRDBLQUOTE();
1035 } else {
1036 syntax = DQSYNTAX;
1037 SETDBLQUOTE();
1038 USTPUTC(CTLQUOTEMARK, out);
1039 }
1040 }
1041 quotef++;
1042 }
1043 break;
1044 case CVAR: /* '$' */
1045 PARSESUB(); /* parse substitution */
1046 break;
1047 case CENDVAR: /* CLOSEBRACE */
1048 if (varnest > 0 && !ISDBLQUOTE()) {
1049 varnest--;
1050 USTPUTC(CTLENDVAR, out);
1051 } else {
1052 USTPUTC(c, out);
1053 }
1054 break;
1055 case CLP: /* '(' in arithmetic */
1056 parenlevel++;
1057 USTPUTC(c, out);
1058 break;
1059 case CRP: /* ')' in arithmetic */
1060 if (parenlevel > 0) {
1061 USTPUTC(c, out);
1062 --parenlevel;
1063 } else {
1064 if (pgetc() == ')') {
1065 if (--arinest == 0) {
1066 USTPUTC(CTLENDARI, out);
1067 syntax = prevsyntax;
1068 if (syntax == DQSYNTAX)
1069 SETDBLQUOTE();
1070 else
1071 CLRDBLQUOTE();
1072 } else
1073 USTPUTC(')', out);
1074 } else {
1075 /*
1076 * unbalanced parens
1077 * (don't 2nd guess - no error)
1078 */
1079 pungetc();
1080 USTPUTC(')', out);
1081 }
1082 }
1083 break;
1084 case CBQUOTE: /* '`' */
1085 PARSEBACKQOLD();
1086 break;
1087 case CEOF:
1088 goto endword; /* exit outer loop */
1089 default:
1090 if (varnest == 0)
1091 goto endword; /* exit outer loop */
1092 USTPUTC(c, out);
1093 }
1094 c = pgetc_macro();
1095 }
1096 }
1097 endword:
1098 if (syntax == ARISYNTAX)
1099 synerror("Missing '))'");
1100 if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
1101 synerror("Unterminated quoted string");
1102 if (varnest != 0) {
1103 startlinno = plinno;
1104 /* { */
1105 synerror("Missing '}'");
1106 }
1107 USTPUTC('\0', out);
1108 len = out - stackblock();
1109 out = stackblock();
1110 if (eofmark == NULL) {
1111 if ((c == '>' || c == '<')
1112 && quotef == 0
1113 && len <= 2
1114 && (*out == '\0' || is_digit(*out))) {
1115 PARSEREDIR();
1116 return lasttoken = TREDIR;
1117 } else {
1118 pungetc();
1119 }
1120 }
1121 quoteflag = quotef;
1122 backquotelist = bqlist;
1123 grabstackblock(len);
1124 wordtext = out;
1125 if (dblquotep != NULL)
1126 ckfree(dblquotep);
1127 return lasttoken = TWORD;
1128 /* end of readtoken routine */
1129
1130
1131
1132 /*
1133 * Check to see whether we are at the end of the here document. When this
1134 * is called, c is set to the first character of the next input line. If
1135 * we are at the end of the here document, this routine sets the c to PEOF.
1136 */
1137
1138 checkend: {
1139 if (eofmark) {
1140 if (striptabs) {
1141 while (c == '\t')
1142 c = pgetc();
1143 }
1144 if (c == *eofmark) {
1145 if (pfgets(line, sizeof line) != NULL) {
1146 char *p, *q;
1147
1148 p = line;
1149 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
1150 if (*p == '\n' && *q == '\0') {
1151 c = PEOF;
1152 plinno++;
1153 needprompt = doprompt;
1154 } else {
1155 pushstring(line, strlen(line), NULL);
1156 }
1157 }
1158 }
1159 }
1160 goto checkend_return;
1161 }
1162
1163
1164 /*
1165 * Parse a redirection operator. The variable "out" points to a string
1166 * specifying the fd to be redirected. The variable "c" contains the
1167 * first character of the redirection operator.
1168 */
1169
1170 parseredir: {
1171 char fd = *out;
1172 union node *np;
1173
1174 np = (union node *)stalloc(sizeof (struct nfile));
1175 if (c == '>') {
1176 np->nfile.fd = 1;
1177 c = pgetc();
1178 if (c == '>')
1179 np->type = NAPPEND;
1180 else if (c == '|')
1181 np->type = NCLOBBER;
1182 else if (c == '&')
1183 np->type = NTOFD;
1184 else {
1185 np->type = NTO;
1186 pungetc();
1187 }
1188 } else { /* c == '<' */
1189 np->nfile.fd = 0;
1190 switch (c = pgetc()) {
1191 case '<':
1192 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1193 np = (union node *)stalloc(sizeof (struct nhere));
1194 np->nfile.fd = 0;
1195 }
1196 np->type = NHERE;
1197 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1198 heredoc->here = np;
1199 if ((c = pgetc()) == '-') {
1200 heredoc->striptabs = 1;
1201 } else {
1202 heredoc->striptabs = 0;
1203 pungetc();
1204 }
1205 break;
1206
1207 case '&':
1208 np->type = NFROMFD;
1209 break;
1210
1211 case '>':
1212 np->type = NFROMTO;
1213 break;
1214
1215 default:
1216 np->type = NFROM;
1217 pungetc();
1218 break;
1219 }
1220 }
1221 if (fd != '\0')
1222 np->nfile.fd = digit_val(fd);
1223 redirnode = np;
1224 goto parseredir_return;
1225 }
1226
1227
1228 /*
1229 * Parse a substitution. At this point, we have read the dollar sign
1230 * and nothing else.
1231 */
1232
1233 parsesub: {
1234 int subtype;
1235 int typeloc;
1236 int flags;
1237 char *p;
1238 static const char types[] = "}-+?=";
1239
1240 c = pgetc();
1241 if (c != '(' && c != OPENBRACE && !is_name(c) && !is_special(c)) {
1242 USTPUTC('$', out);
1243 pungetc();
1244 } else if (c == '(') { /* $(command) or $((arith)) */
1245 if (pgetc() == '(') {
1246 PARSEARITH();
1247 } else {
1248 pungetc();
1249 PARSEBACKQNEW();
1250 }
1251 } else {
1252 USTPUTC(CTLVAR, out);
1253 typeloc = out - stackblock();
1254 USTPUTC(VSNORMAL, out);
1255 subtype = VSNORMAL;
1256 if (c == OPENBRACE) {
1257 c = pgetc();
1258 if (c == '#') {
1259 if ((c = pgetc()) == CLOSEBRACE)
1260 c = '#';
1261 else
1262 subtype = VSLENGTH;
1263 }
1264 else
1265 subtype = 0;
1266 }
1267 if (is_name(c)) {
1268 do {
1269 STPUTC(c, out);
1270 c = pgetc();
1271 } while (is_in_name(c));
1272 } else if (is_digit(c)) {
1273 do {
1274 USTPUTC(c, out);
1275 c = pgetc();
1276 } while (is_digit(c));
1277 }
1278 else if (is_special(c)) {
1279 USTPUTC(c, out);
1280 c = pgetc();
1281 }
1282 else
1283 badsub: synerror("Bad substitution");
1284
1285 STPUTC('=', out);
1286 flags = 0;
1287 if (subtype == 0) {
1288 switch (c) {
1289 case ':':
1290 flags = VSNUL;
1291 c = pgetc();
1292 /*FALLTHROUGH*/
1293 default:
1294 p = strchr(types, c);
1295 if (p == NULL)
1296 goto badsub;
1297 subtype = p - types + VSNORMAL;
1298 break;
1299 case '%':
1300 case '#':
1301 {
1302 int cc = c;
1303 subtype = c == '#' ? VSTRIMLEFT :
1304 VSTRIMRIGHT;
1305 c = pgetc();
1306 if (c == cc)
1307 subtype++;
1308 else
1309 pungetc();
1310 break;
1311 }
1312 }
1313 } else {
1314 pungetc();
1315 }
1316 if (ISDBLQUOTE() || arinest)
1317 flags |= VSQUOTE;
1318 *(stackblock() + typeloc) = subtype | flags;
1319 if (subtype != VSNORMAL) {
1320 varnest++;
1321 if (varnest >= maxnest) {
1322 dblquotep = ckrealloc(dblquotep, maxnest / 8);
1323 dblquotep[(maxnest / 32) - 1] = 0;
1324 maxnest += 32;
1325 }
1326 }
1327 }
1328 goto parsesub_return;
1329 }
1330
1331
1332 /*
1333 * Called to parse command substitutions. Newstyle is set if the command
1334 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1335 * list of commands (passed by reference), and savelen is the number of
1336 * characters on the top of the stack which must be preserved.
1337 */
1338
1339 parsebackq: {
1340 struct nodelist **nlpp;
1341 int savepbq;
1342 union node *n;
1343 char *volatile str;
1344 struct jmploc jmploc;
1345 struct jmploc *volatile savehandler;
1346 int savelen;
1347 int saveprompt;
1348 #ifdef __GNUC__
1349 (void) &saveprompt;
1350 #endif
1351
1352 savepbq = parsebackquote;
1353 if (setjmp(jmploc.loc)) {
1354 if (str)
1355 ckfree(str);
1356 parsebackquote = 0;
1357 handler = savehandler;
1358 longjmp(handler->loc, 1);
1359 }
1360 INTOFF;
1361 str = NULL;
1362 savelen = out - stackblock();
1363 if (savelen > 0) {
1364 str = ckmalloc(savelen);
1365 memcpy(str, stackblock(), savelen);
1366 }
1367 savehandler = handler;
1368 handler = &jmploc;
1369 INTON;
1370 if (oldstyle) {
1371 /* We must read until the closing backquote, giving special
1372 treatment to some slashes, and then push the string and
1373 reread it as input, interpreting it normally. */
1374 char *pout;
1375 int pc;
1376 int psavelen;
1377 char *pstr;
1378
1379
1380 STARTSTACKSTR(pout);
1381 for (;;) {
1382 if (needprompt) {
1383 setprompt(2);
1384 needprompt = 0;
1385 }
1386 switch (pc = pgetc()) {
1387 case '`':
1388 goto done;
1389
1390 case '\\':
1391 if ((pc = pgetc()) == '\n') {
1392 plinno++;
1393 if (doprompt)
1394 setprompt(2);
1395 else
1396 setprompt(0);
1397 /*
1398 * If eating a newline, avoid putting
1399 * the newline into the new character
1400 * stream (via the STPUTC after the
1401 * switch).
1402 */
1403 continue;
1404 }
1405 if (pc != '\\' && pc != '`' && pc != '$'
1406 && (!ISDBLQUOTE() || pc != '"'))
1407 STPUTC('\\', pout);
1408 break;
1409
1410 case '\n':
1411 plinno++;
1412 needprompt = doprompt;
1413 break;
1414
1415 case PEOF:
1416 startlinno = plinno;
1417 synerror("EOF in backquote substitution");
1418 break;
1419
1420 default:
1421 break;
1422 }
1423 STPUTC(pc, pout);
1424 }
1425 done:
1426 STPUTC('\0', pout);
1427 psavelen = pout - stackblock();
1428 if (psavelen > 0) {
1429 pstr = grabstackstr(pout);
1430 setinputstring(pstr, 1);
1431 }
1432 }
1433 nlpp = &bqlist;
1434 while (*nlpp)
1435 nlpp = &(*nlpp)->next;
1436 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1437 (*nlpp)->next = NULL;
1438 parsebackquote = oldstyle;
1439
1440 if (oldstyle) {
1441 saveprompt = doprompt;
1442 doprompt = 0;
1443 }
1444
1445 n = list(0);
1446
1447 if (oldstyle)
1448 doprompt = saveprompt;
1449 else {
1450 if (readtoken() != TRP)
1451 synexpect(TRP);
1452 }
1453
1454 (*nlpp)->n = n;
1455 if (oldstyle) {
1456 /*
1457 * Start reading from old file again, ignoring any pushed back
1458 * tokens left from the backquote parsing
1459 */
1460 popfile();
1461 tokpushback = 0;
1462 }
1463 while (stackblocksize() <= savelen)
1464 growstackblock();
1465 STARTSTACKSTR(out);
1466 if (str) {
1467 memcpy(out, str, savelen);
1468 STADJUST(savelen, out);
1469 INTOFF;
1470 ckfree(str);
1471 str = NULL;
1472 INTON;
1473 }
1474 parsebackquote = savepbq;
1475 handler = savehandler;
1476 if (arinest || ISDBLQUOTE())
1477 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1478 else
1479 USTPUTC(CTLBACKQ, out);
1480 if (oldstyle)
1481 goto parsebackq_oldreturn;
1482 else
1483 goto parsebackq_newreturn;
1484 }
1485
1486 /*
1487 * Parse an arithmetic expansion (indicate start of one and set state)
1488 */
1489 parsearith: {
1490
1491 if (++arinest == 1) {
1492 prevsyntax = syntax;
1493 syntax = ARISYNTAX;
1494 USTPUTC(CTLARI, out);
1495 if (ISDBLQUOTE())
1496 USTPUTC('"',out);
1497 else
1498 USTPUTC(' ',out);
1499 } else {
1500 /*
1501 * we collapse embedded arithmetic expansion to
1502 * parenthesis, which should be equivalent
1503 */
1504 USTPUTC('(', out);
1505 }
1506 goto parsearith_return;
1507 }
1508
1509 } /* end of readtoken */
1510
1511
1512
1513 #ifdef mkinit
1514 RESET {
1515 tokpushback = 0;
1516 checkkwd = 0;
1517 }
1518 #endif
1519
1520 /*
1521 * Returns true if the text contains nothing to expand (no dollar signs
1522 * or backquotes).
1523 */
1524
1525 STATIC int
1526 noexpand(text)
1527 char *text;
1528 {
1529 char *p;
1530 char c;
1531
1532 p = text;
1533 while ((c = *p++) != '\0') {
1534 if (c == CTLQUOTEMARK)
1535 continue;
1536 if (c == CTLESC)
1537 p++;
1538 else if (BASESYNTAX[(int)c] == CCTL)
1539 return 0;
1540 }
1541 return 1;
1542 }
1543
1544
1545 /*
1546 * Return true if the argument is a legal variable name (a letter or
1547 * underscore followed by zero or more letters, underscores, and digits).
1548 */
1549
1550 int
1551 goodname(char *name)
1552 {
1553 char *p;
1554
1555 p = name;
1556 if (! is_name(*p))
1557 return 0;
1558 while (*++p) {
1559 if (! is_in_name(*p))
1560 return 0;
1561 }
1562 return 1;
1563 }
1564
1565
1566 /*
1567 * Called when an unexpected token is read during the parse. The argument
1568 * is the token that is expected, or -1 if more than one type of token can
1569 * occur at this point.
1570 */
1571
1572 STATIC void
1573 synexpect(token)
1574 int token;
1575 {
1576 char msg[64];
1577
1578 if (token >= 0) {
1579 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1580 tokname[lasttoken], tokname[token]);
1581 } else {
1582 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1583 }
1584 synerror(msg);
1585 /* NOTREACHED */
1586 }
1587
1588
1589 STATIC void
1590 synerror(msg)
1591 const char *msg;
1592 {
1593 if (commandname)
1594 outfmt(&errout, "%s: %d: ", commandname, startlinno);
1595 outfmt(&errout, "Syntax error: %s\n", msg);
1596 error((char *)NULL);
1597 /* NOTREACHED */
1598 }
1599
1600 STATIC void
1601 setprompt(which)
1602 int which;
1603 {
1604 whichprompt = which;
1605
1606 #ifndef SMALL
1607 if (!el)
1608 #endif
1609 out2str(getprompt(NULL));
1610 }
1611
1612 /*
1613 * called by editline -- any expansions to the prompt
1614 * should be added here.
1615 */
1616 const char *
1617 getprompt(void *unused)
1618 {
1619 switch (whichprompt) {
1620 case 0:
1621 return "";
1622 case 1:
1623 return ps1val();
1624 case 2:
1625 return ps2val();
1626 default:
1627 return "<internal prompt error>";
1628 }
1629 }
1630