parser.c revision 1.45 1 /* $NetBSD: parser.c,v 1.45 2000/07/27 04:09:27 cgd Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
43 #else
44 __RCSID("$NetBSD: parser.c,v 1.45 2000/07/27 04:09:27 cgd Exp $");
45 #endif
46 #endif /* not lint */
47
48 #include <stdlib.h>
49
50 #include "shell.h"
51 #include "parser.h"
52 #include "nodes.h"
53 #include "expand.h" /* defines rmescapes() */
54 #include "redir.h" /* defines copyfd() */
55 #include "syntax.h"
56 #include "options.h"
57 #include "input.h"
58 #include "output.h"
59 #include "var.h"
60 #include "error.h"
61 #include "memalloc.h"
62 #include "mystring.h"
63 #include "alias.h"
64 #include "show.h"
65 #ifndef SMALL
66 #include "myhistedit.h"
67 #endif
68
69 /*
70 * Shell command parser.
71 */
72
73 #define EOFMARKLEN 79
74
75 /* values returned by readtoken */
76 #include "token.h"
77
78
79
80 struct heredoc {
81 struct heredoc *next; /* next here document in list */
82 union node *here; /* redirection node */
83 char *eofmark; /* string indicating end of input */
84 int striptabs; /* if set, strip leading tabs */
85 };
86
87
88
89 struct heredoc *heredoclist; /* list of here documents to read */
90 int parsebackquote; /* nonzero if we are inside backquotes */
91 int doprompt; /* if set, prompt the user */
92 int needprompt; /* true if interactive and at start of line */
93 int lasttoken; /* last token read */
94 MKINIT int tokpushback; /* last token pushed back */
95 char *wordtext; /* text of last word returned by readtoken */
96 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */
97 struct nodelist *backquotelist;
98 union node *redirnode;
99 struct heredoc *heredoc;
100 int quoteflag; /* set if (part of) last token was quoted */
101 int startlinno; /* line # where last token started */
102
103
104 STATIC union node *list __P((int));
105 STATIC union node *andor __P((void));
106 STATIC union node *pipeline __P((void));
107 STATIC union node *command __P((void));
108 STATIC union node *simplecmd __P((union node **, union node *));
109 STATIC union node *makename __P((void));
110 STATIC void parsefname __P((void));
111 STATIC void parseheredoc __P((void));
112 STATIC int peektoken __P((void));
113 STATIC int readtoken __P((void));
114 STATIC int xxreadtoken __P((void));
115 STATIC int readtoken1 __P((int, char const *, char *, int));
116 STATIC int noexpand __P((char *));
117 STATIC void synexpect __P((int)) __attribute__((noreturn));
118 STATIC void synerror __P((const char *)) __attribute__((noreturn));
119 STATIC void setprompt __P((int));
120
121
122 /*
123 * Read and parse a command. Returns NEOF on end of file. (NULL is a
124 * valid parse tree indicating a blank line.)
125 */
126
127 union node *
128 parsecmd(int interact)
129 {
130 int t;
131
132 doprompt = interact;
133 if (doprompt)
134 setprompt(1);
135 else
136 setprompt(0);
137 needprompt = 0;
138 t = readtoken();
139 if (t == TEOF)
140 return NEOF;
141 if (t == TNL)
142 return NULL;
143 tokpushback++;
144 return list(1);
145 }
146
147
148 STATIC union node *
149 list(nlflag)
150 int nlflag;
151 {
152 union node *n1, *n2, *n3;
153 int tok;
154
155 checkkwd = 2;
156 if (nlflag == 0 && tokendlist[peektoken()])
157 return NULL;
158 n1 = NULL;
159 for (;;) {
160 n2 = andor();
161 tok = readtoken();
162 if (tok == TBACKGND) {
163 if (n2->type == NCMD || n2->type == NPIPE) {
164 n2->ncmd.backgnd = 1;
165 } else if (n2->type == NREDIR) {
166 n2->type = NBACKGND;
167 } else {
168 n3 = (union node *)stalloc(sizeof (struct nredir));
169 n3->type = NBACKGND;
170 n3->nredir.n = n2;
171 n3->nredir.redirect = NULL;
172 n2 = n3;
173 }
174 }
175 if (n1 == NULL) {
176 n1 = n2;
177 }
178 else {
179 n3 = (union node *)stalloc(sizeof (struct nbinary));
180 n3->type = NSEMI;
181 n3->nbinary.ch1 = n1;
182 n3->nbinary.ch2 = n2;
183 n1 = n3;
184 }
185 switch (tok) {
186 case TBACKGND:
187 case TSEMI:
188 tok = readtoken();
189 /* fall through */
190 case TNL:
191 if (tok == TNL) {
192 parseheredoc();
193 if (nlflag)
194 return n1;
195 } else {
196 tokpushback++;
197 }
198 checkkwd = 2;
199 if (tokendlist[peektoken()])
200 return n1;
201 break;
202 case TEOF:
203 if (heredoclist)
204 parseheredoc();
205 else
206 pungetc(); /* push back EOF on input */
207 return n1;
208 default:
209 if (nlflag)
210 synexpect(-1);
211 tokpushback++;
212 return n1;
213 }
214 }
215 }
216
217
218
219 STATIC union node *
220 andor() {
221 union node *n1, *n2, *n3;
222 int t;
223
224 n1 = pipeline();
225 for (;;) {
226 if ((t = readtoken()) == TAND) {
227 t = NAND;
228 } else if (t == TOR) {
229 t = NOR;
230 } else {
231 tokpushback++;
232 return n1;
233 }
234 n2 = pipeline();
235 n3 = (union node *)stalloc(sizeof (struct nbinary));
236 n3->type = t;
237 n3->nbinary.ch1 = n1;
238 n3->nbinary.ch2 = n2;
239 n1 = n3;
240 }
241 }
242
243
244
245 STATIC union node *
246 pipeline() {
247 union node *n1, *n2, *pipenode;
248 struct nodelist *lp, *prev;
249 int negate;
250
251 negate = 0;
252 TRACE(("pipeline: entered\n"));
253 while (readtoken() == TNOT)
254 negate = !negate;
255 tokpushback++;
256 n1 = command();
257 if (readtoken() == TPIPE) {
258 pipenode = (union node *)stalloc(sizeof (struct npipe));
259 pipenode->type = NPIPE;
260 pipenode->npipe.backgnd = 0;
261 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
262 pipenode->npipe.cmdlist = lp;
263 lp->n = n1;
264 do {
265 prev = lp;
266 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
267 lp->n = command();
268 prev->next = lp;
269 } while (readtoken() == TPIPE);
270 lp->next = NULL;
271 n1 = pipenode;
272 }
273 tokpushback++;
274 if (negate) {
275 n2 = (union node *)stalloc(sizeof (struct nnot));
276 n2->type = NNOT;
277 n2->nnot.com = n1;
278 return n2;
279 } else
280 return n1;
281 }
282
283
284
285 STATIC union node *
286 command() {
287 union node *n1, *n2;
288 union node *ap, **app;
289 union node *cp, **cpp;
290 union node *redir, **rpp;
291 int t, negate = 0;
292
293 checkkwd = 2;
294 redir = NULL;
295 n1 = NULL;
296 rpp = &redir;
297
298 /* Check for redirection which may precede command */
299 while (readtoken() == TREDIR) {
300 *rpp = n2 = redirnode;
301 rpp = &n2->nfile.next;
302 parsefname();
303 }
304 tokpushback++;
305
306 while (readtoken() == TNOT) {
307 TRACE(("command: TNOT recognized\n"));
308 negate = !negate;
309 }
310 tokpushback++;
311
312 switch (readtoken()) {
313 case TIF:
314 n1 = (union node *)stalloc(sizeof (struct nif));
315 n1->type = NIF;
316 n1->nif.test = list(0);
317 if (readtoken() != TTHEN)
318 synexpect(TTHEN);
319 n1->nif.ifpart = list(0);
320 n2 = n1;
321 while (readtoken() == TELIF) {
322 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
323 n2 = n2->nif.elsepart;
324 n2->type = NIF;
325 n2->nif.test = list(0);
326 if (readtoken() != TTHEN)
327 synexpect(TTHEN);
328 n2->nif.ifpart = list(0);
329 }
330 if (lasttoken == TELSE)
331 n2->nif.elsepart = list(0);
332 else {
333 n2->nif.elsepart = NULL;
334 tokpushback++;
335 }
336 if (readtoken() != TFI)
337 synexpect(TFI);
338 checkkwd = 1;
339 break;
340 case TWHILE:
341 case TUNTIL: {
342 int got;
343 n1 = (union node *)stalloc(sizeof (struct nbinary));
344 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
345 n1->nbinary.ch1 = list(0);
346 if ((got=readtoken()) != TDO) {
347 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
348 synexpect(TDO);
349 }
350 n1->nbinary.ch2 = list(0);
351 if (readtoken() != TDONE)
352 synexpect(TDONE);
353 checkkwd = 1;
354 break;
355 }
356 case TFOR:
357 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
358 synerror("Bad for loop variable");
359 n1 = (union node *)stalloc(sizeof (struct nfor));
360 n1->type = NFOR;
361 n1->nfor.var = wordtext;
362 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
363 app = ≈
364 while (readtoken() == TWORD) {
365 n2 = (union node *)stalloc(sizeof (struct narg));
366 n2->type = NARG;
367 n2->narg.text = wordtext;
368 n2->narg.backquote = backquotelist;
369 *app = n2;
370 app = &n2->narg.next;
371 }
372 *app = NULL;
373 n1->nfor.args = ap;
374 if (lasttoken != TNL && lasttoken != TSEMI)
375 synexpect(-1);
376 } else {
377 static char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
378 '@', '=', '\0'};
379 n2 = (union node *)stalloc(sizeof (struct narg));
380 n2->type = NARG;
381 n2->narg.text = argvars;
382 n2->narg.backquote = NULL;
383 n2->narg.next = NULL;
384 n1->nfor.args = n2;
385 /*
386 * Newline or semicolon here is optional (but note
387 * that the original Bourne shell only allowed NL).
388 */
389 if (lasttoken != TNL && lasttoken != TSEMI)
390 tokpushback++;
391 }
392 checkkwd = 2;
393 if ((t = readtoken()) == TDO)
394 t = TDONE;
395 else if (t == TBEGIN)
396 t = TEND;
397 else
398 synexpect(-1);
399 n1->nfor.body = list(0);
400 if (readtoken() != t)
401 synexpect(t);
402 checkkwd = 1;
403 break;
404 case TCASE:
405 n1 = (union node *)stalloc(sizeof (struct ncase));
406 n1->type = NCASE;
407 if (readtoken() != TWORD)
408 synexpect(TWORD);
409 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
410 n2->type = NARG;
411 n2->narg.text = wordtext;
412 n2->narg.backquote = backquotelist;
413 n2->narg.next = NULL;
414 while (readtoken() == TNL);
415 if (lasttoken != TWORD || ! equal(wordtext, "in"))
416 synerror("expecting \"in\"");
417 cpp = &n1->ncase.cases;
418 checkkwd = 2, readtoken();
419 do {
420 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
421 cp->type = NCLIST;
422 app = &cp->nclist.pattern;
423 for (;;) {
424 *app = ap = (union node *)stalloc(sizeof (struct narg));
425 ap->type = NARG;
426 ap->narg.text = wordtext;
427 ap->narg.backquote = backquotelist;
428 if (checkkwd = 2, readtoken() != TPIPE)
429 break;
430 app = &ap->narg.next;
431 readtoken();
432 }
433 ap->narg.next = NULL;
434 if (lasttoken != TRP)
435 synexpect(TRP);
436 cp->nclist.body = list(0);
437
438 checkkwd = 2;
439 if ((t = readtoken()) != TESAC) {
440 if (t != TENDCASE)
441 synexpect(TENDCASE);
442 else
443 checkkwd = 2, readtoken();
444 }
445 cpp = &cp->nclist.next;
446 } while(lasttoken != TESAC);
447 *cpp = NULL;
448 checkkwd = 1;
449 break;
450 case TLP:
451 n1 = (union node *)stalloc(sizeof (struct nredir));
452 n1->type = NSUBSHELL;
453 n1->nredir.n = list(0);
454 n1->nredir.redirect = NULL;
455 if (readtoken() != TRP)
456 synexpect(TRP);
457 checkkwd = 1;
458 break;
459 case TBEGIN:
460 n1 = list(0);
461 if (readtoken() != TEND)
462 synexpect(TEND);
463 checkkwd = 1;
464 break;
465 /* Handle an empty command like other simple commands. */
466 case TSEMI:
467 /*
468 * An empty command before a ; doesn't make much sense, and
469 * should certainly be disallowed in the case of `if ;'.
470 */
471 if (!redir)
472 synexpect(-1);
473 case TAND:
474 case TOR:
475 case TNL:
476 case TEOF:
477 case TWORD:
478 case TRP:
479 tokpushback++;
480 n1 = simplecmd(rpp, redir);
481 goto checkneg;
482 default:
483 synexpect(-1);
484 /* NOTREACHED */
485 }
486
487 /* Now check for redirection which may follow command */
488 while (readtoken() == TREDIR) {
489 *rpp = n2 = redirnode;
490 rpp = &n2->nfile.next;
491 parsefname();
492 }
493 tokpushback++;
494 *rpp = NULL;
495 if (redir) {
496 if (n1->type != NSUBSHELL) {
497 n2 = (union node *)stalloc(sizeof (struct nredir));
498 n2->type = NREDIR;
499 n2->nredir.n = n1;
500 n1 = n2;
501 }
502 n1->nredir.redirect = redir;
503 }
504
505 checkneg:
506 if (negate) {
507 n2 = (union node *)stalloc(sizeof (struct nnot));
508 n2->type = NNOT;
509 n2->nnot.com = n1;
510 return n2;
511 }
512 else
513 return n1;
514 }
515
516
517 STATIC union node *
518 simplecmd(rpp, redir)
519 union node **rpp, *redir;
520 {
521 union node *args, **app;
522 union node **orig_rpp = rpp;
523 union node *n = NULL, *n2;
524 int negate = 0;
525
526 /* If we don't have any redirections already, then we must reset */
527 /* rpp to be the address of the local redir variable. */
528 if (redir == 0)
529 rpp = &redir;
530
531 args = NULL;
532 app = &args;
533 /*
534 * We save the incoming value, because we need this for shell
535 * functions. There can not be a redirect or an argument between
536 * the function name and the open parenthesis.
537 */
538 orig_rpp = rpp;
539
540 while (readtoken() == TNOT) {
541 TRACE(("command: TNOT recognized\n"));
542 negate = !negate;
543 }
544 tokpushback++;
545
546 for (;;) {
547 if (readtoken() == TWORD) {
548 n = (union node *)stalloc(sizeof (struct narg));
549 n->type = NARG;
550 n->narg.text = wordtext;
551 n->narg.backquote = backquotelist;
552 *app = n;
553 app = &n->narg.next;
554 } else if (lasttoken == TREDIR) {
555 *rpp = n = redirnode;
556 rpp = &n->nfile.next;
557 parsefname(); /* read name of redirection file */
558 } else if (lasttoken == TLP && app == &args->narg.next
559 && rpp == orig_rpp) {
560 /* We have a function */
561 if (readtoken() != TRP)
562 synexpect(TRP);
563 #ifdef notdef
564 if (! goodname(n->narg.text))
565 synerror("Bad function name");
566 #endif
567 n->type = NDEFUN;
568 n->narg.next = command();
569 goto checkneg;
570 } else {
571 tokpushback++;
572 break;
573 }
574 }
575 *app = NULL;
576 *rpp = NULL;
577 n = (union node *)stalloc(sizeof (struct ncmd));
578 n->type = NCMD;
579 n->ncmd.backgnd = 0;
580 n->ncmd.args = args;
581 n->ncmd.redirect = redir;
582
583 checkneg:
584 if (negate) {
585 n2 = (union node *)stalloc(sizeof (struct nnot));
586 n2->type = NNOT;
587 n2->nnot.com = n;
588 return n2;
589 }
590 else
591 return n;
592 }
593
594 STATIC union node *
595 makename() {
596 union node *n;
597
598 n = (union node *)stalloc(sizeof (struct narg));
599 n->type = NARG;
600 n->narg.next = NULL;
601 n->narg.text = wordtext;
602 n->narg.backquote = backquotelist;
603 return n;
604 }
605
606 void fixredir(union node *n, const char *text, int err)
607 {
608 TRACE(("Fix redir %s %d\n", text, err));
609 if (!err)
610 n->ndup.vname = NULL;
611
612 if (is_digit(text[0]) && text[1] == '\0')
613 n->ndup.dupfd = digit_val(text[0]);
614 else if (text[0] == '-' && text[1] == '\0')
615 n->ndup.dupfd = -1;
616 else {
617
618 if (err)
619 synerror("Bad fd number");
620 else
621 n->ndup.vname = makename();
622 }
623 }
624
625
626 STATIC void
627 parsefname() {
628 union node *n = redirnode;
629
630 if (readtoken() != TWORD)
631 synexpect(-1);
632 if (n->type == NHERE) {
633 struct heredoc *here = heredoc;
634 struct heredoc *p;
635 int i;
636
637 if (quoteflag == 0)
638 n->type = NXHERE;
639 TRACE(("Here document %d\n", n->type));
640 if (here->striptabs) {
641 while (*wordtext == '\t')
642 wordtext++;
643 }
644 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
645 synerror("Illegal eof marker for << redirection");
646 rmescapes(wordtext);
647 here->eofmark = wordtext;
648 here->next = NULL;
649 if (heredoclist == NULL)
650 heredoclist = here;
651 else {
652 for (p = heredoclist ; p->next ; p = p->next);
653 p->next = here;
654 }
655 } else if (n->type == NTOFD || n->type == NFROMFD) {
656 fixredir(n, wordtext, 0);
657 } else {
658 n->nfile.fname = makename();
659 }
660 }
661
662
663 /*
664 * Input any here documents.
665 */
666
667 STATIC void
668 parseheredoc() {
669 struct heredoc *here;
670 union node *n;
671
672 while (heredoclist) {
673 here = heredoclist;
674 heredoclist = here->next;
675 if (needprompt) {
676 setprompt(2);
677 needprompt = 0;
678 }
679 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
680 here->eofmark, here->striptabs);
681 n = (union node *)stalloc(sizeof (struct narg));
682 n->narg.type = NARG;
683 n->narg.next = NULL;
684 n->narg.text = wordtext;
685 n->narg.backquote = backquotelist;
686 here->here->nhere.doc = n;
687 }
688 }
689
690 STATIC int
691 peektoken() {
692 int t;
693
694 t = readtoken();
695 tokpushback++;
696 return (t);
697 }
698
699 STATIC int
700 readtoken() {
701 int t;
702 int savecheckkwd = checkkwd;
703 struct alias *ap;
704 #ifdef DEBUG
705 int alreadyseen = tokpushback;
706 #endif
707
708 top:
709 t = xxreadtoken();
710
711 if (checkkwd) {
712 /*
713 * eat newlines
714 */
715 if (checkkwd == 2) {
716 checkkwd = 0;
717 while (t == TNL) {
718 parseheredoc();
719 t = xxreadtoken();
720 }
721 } else
722 checkkwd = 0;
723 /*
724 * check for keywords and aliases
725 */
726 if (t == TWORD && !quoteflag)
727 {
728 const char *const *pp;
729
730 for (pp = parsekwd; *pp; pp++) {
731 if (**pp == *wordtext && equal(*pp, wordtext))
732 {
733 lasttoken = t = pp -
734 parsekwd + KWDOFFSET;
735 TRACE(("keyword %s recognized\n", tokname[t]));
736 goto out;
737 }
738 }
739 if ((ap = lookupalias(wordtext, 1)) != NULL) {
740 pushstring(ap->val, strlen(ap->val), ap);
741 checkkwd = savecheckkwd;
742 goto top;
743 }
744 }
745 out:
746 checkkwd = (t == TNOT) ? savecheckkwd : 0;
747 }
748 #ifdef DEBUG
749 if (!alreadyseen)
750 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
751 else
752 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
753 #endif
754 return (t);
755 }
756
757
758 /*
759 * Read the next input token.
760 * If the token is a word, we set backquotelist to the list of cmds in
761 * backquotes. We set quoteflag to true if any part of the word was
762 * quoted.
763 * If the token is TREDIR, then we set redirnode to a structure containing
764 * the redirection.
765 * In all cases, the variable startlinno is set to the number of the line
766 * on which the token starts.
767 *
768 * [Change comment: here documents and internal procedures]
769 * [Readtoken shouldn't have any arguments. Perhaps we should make the
770 * word parsing code into a separate routine. In this case, readtoken
771 * doesn't need to have any internal procedures, but parseword does.
772 * We could also make parseoperator in essence the main routine, and
773 * have parseword (readtoken1?) handle both words and redirection.]
774 */
775
776 #define RETURN(token) return lasttoken = token
777
778 STATIC int
779 xxreadtoken() {
780 int c;
781
782 if (tokpushback) {
783 tokpushback = 0;
784 return lasttoken;
785 }
786 if (needprompt) {
787 setprompt(2);
788 needprompt = 0;
789 }
790 startlinno = plinno;
791 for (;;) { /* until token or start of word found */
792 c = pgetc_macro();
793 if (c == ' ' || c == '\t')
794 continue; /* quick check for white space first */
795 switch (c) {
796 case ' ': case '\t':
797 continue;
798 case '#':
799 while ((c = pgetc()) != '\n' && c != PEOF);
800 pungetc();
801 continue;
802 case '\\':
803 if (pgetc() == '\n') {
804 startlinno = ++plinno;
805 if (doprompt)
806 setprompt(2);
807 else
808 setprompt(0);
809 continue;
810 }
811 pungetc();
812 goto breakloop;
813 case '\n':
814 plinno++;
815 needprompt = doprompt;
816 RETURN(TNL);
817 case PEOF:
818 RETURN(TEOF);
819 case '&':
820 if (pgetc() == '&')
821 RETURN(TAND);
822 pungetc();
823 RETURN(TBACKGND);
824 case '|':
825 if (pgetc() == '|')
826 RETURN(TOR);
827 pungetc();
828 RETURN(TPIPE);
829 case ';':
830 if (pgetc() == ';')
831 RETURN(TENDCASE);
832 pungetc();
833 RETURN(TSEMI);
834 case '(':
835 RETURN(TLP);
836 case ')':
837 RETURN(TRP);
838 default:
839 goto breakloop;
840 }
841 }
842 breakloop:
843 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
844 #undef RETURN
845 }
846
847
848
849 /*
850 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
851 * is not NULL, read a here document. In the latter case, eofmark is the
852 * word which marks the end of the document and striptabs is true if
853 * leading tabs should be stripped from the document. The argument firstc
854 * is the first character of the input token or document.
855 *
856 * Because C does not have internal subroutines, I have simulated them
857 * using goto's to implement the subroutine linkage. The following macros
858 * will run code that appears at the end of readtoken1.
859 */
860
861 #define CHECKEND() {goto checkend; checkend_return:;}
862 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
863 #define PARSESUB() {goto parsesub; parsesub_return:;}
864 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
865 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
866 #define PARSEARITH() {goto parsearith; parsearith_return:;}
867
868 STATIC int
869 readtoken1(firstc, syntax, eofmark, striptabs)
870 int firstc;
871 char const *syntax;
872 char *eofmark;
873 int striptabs;
874 {
875 int c = firstc;
876 char *out;
877 int len;
878 char line[EOFMARKLEN + 1];
879 struct nodelist *bqlist;
880 int quotef;
881 int dblquote;
882 int varnest; /* levels of variables expansion */
883 int arinest; /* levels of arithmetic expansion */
884 int parenlevel; /* levels of parens in arithmetic */
885 int oldstyle;
886 char const *prevsyntax; /* syntax before arithmetic */
887 #if __GNUC__
888 /* Avoid longjmp clobbering */
889 (void) &out;
890 (void) "ef;
891 (void) &dblquote;
892 (void) &varnest;
893 (void) &arinest;
894 (void) &parenlevel;
895 (void) &oldstyle;
896 (void) &prevsyntax;
897 (void) &syntax;
898 #endif
899
900 startlinno = plinno;
901 dblquote = 0;
902 if (syntax == DQSYNTAX)
903 dblquote = 1;
904 quotef = 0;
905 bqlist = NULL;
906 varnest = 0;
907 arinest = 0;
908 parenlevel = 0;
909
910 STARTSTACKSTR(out);
911 loop: { /* for each line, until end of word */
912 #if ATTY
913 if (c == '\034' && doprompt
914 && attyset() && ! equal(termval(), "emacs")) {
915 attyline();
916 if (syntax == BASESYNTAX)
917 return readtoken();
918 c = pgetc();
919 goto loop;
920 }
921 #endif
922 CHECKEND(); /* set c to PEOF if at end of here document */
923 for (;;) { /* until end of line or end of word */
924 CHECKSTRSPACE(3, out); /* permit 3 calls to USTPUTC */
925 switch(syntax[c]) {
926 case CNL: /* '\n' */
927 if (syntax == BASESYNTAX)
928 goto endword; /* exit outer loop */
929 USTPUTC(c, out);
930 plinno++;
931 if (doprompt)
932 setprompt(2);
933 else
934 setprompt(0);
935 c = pgetc();
936 goto loop; /* continue outer loop */
937 case CWORD:
938 USTPUTC(c, out);
939 break;
940 case CCTL:
941 if (eofmark == NULL || dblquote)
942 USTPUTC(CTLESC, out);
943 USTPUTC(c, out);
944 break;
945 case CBACK: /* backslash */
946 c = pgetc();
947 if (c == PEOF) {
948 USTPUTC('\\', out);
949 pungetc();
950 } else if (c == '\n') {
951 if (doprompt)
952 setprompt(2);
953 else
954 setprompt(0);
955 } else {
956 if (dblquote && c != '\\' && c != '`' && c != '$'
957 && (c != '"' || eofmark != NULL))
958 USTPUTC('\\', out);
959 if (SQSYNTAX[c] == CCTL)
960 USTPUTC(CTLESC, out);
961 else if (eofmark == NULL)
962 USTPUTC(CTLQUOTEMARK, out);
963 USTPUTC(c, out);
964 quotef++;
965 }
966 break;
967 case CSQUOTE:
968 if (eofmark == NULL)
969 USTPUTC(CTLQUOTEMARK, out);
970 syntax = SQSYNTAX;
971 break;
972 case CDQUOTE:
973 if (eofmark == NULL)
974 USTPUTC(CTLQUOTEMARK, out);
975 syntax = DQSYNTAX;
976 dblquote = 1;
977 break;
978 case CENDQUOTE:
979 if (eofmark != NULL && arinest == 0 &&
980 varnest == 0) {
981 USTPUTC(c, out);
982 } else {
983 if (arinest) {
984 syntax = ARISYNTAX;
985 dblquote = 0;
986 } else if (eofmark == NULL) {
987 syntax = BASESYNTAX;
988 dblquote = 0;
989 }
990 quotef++;
991 }
992 break;
993 case CVAR: /* '$' */
994 PARSESUB(); /* parse substitution */
995 break;
996 case CENDVAR: /* '}' */
997 if (varnest > 0) {
998 varnest--;
999 USTPUTC(CTLENDVAR, out);
1000 } else {
1001 USTPUTC(c, out);
1002 }
1003 break;
1004 case CLP: /* '(' in arithmetic */
1005 parenlevel++;
1006 USTPUTC(c, out);
1007 break;
1008 case CRP: /* ')' in arithmetic */
1009 if (parenlevel > 0) {
1010 USTPUTC(c, out);
1011 --parenlevel;
1012 } else {
1013 if (pgetc() == ')') {
1014 if (--arinest == 0) {
1015 USTPUTC(CTLENDARI, out);
1016 syntax = prevsyntax;
1017 if (syntax == DQSYNTAX)
1018 dblquote = 1;
1019 else
1020 dblquote = 0;
1021 } else
1022 USTPUTC(')', out);
1023 } else {
1024 /*
1025 * unbalanced parens
1026 * (don't 2nd guess - no error)
1027 */
1028 pungetc();
1029 USTPUTC(')', out);
1030 }
1031 }
1032 break;
1033 case CBQUOTE: /* '`' */
1034 PARSEBACKQOLD();
1035 break;
1036 case CEOF:
1037 goto endword; /* exit outer loop */
1038 default:
1039 if (varnest == 0)
1040 goto endword; /* exit outer loop */
1041 USTPUTC(c, out);
1042 }
1043 c = pgetc_macro();
1044 }
1045 }
1046 endword:
1047 if (syntax == ARISYNTAX)
1048 synerror("Missing '))'");
1049 if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
1050 synerror("Unterminated quoted string");
1051 if (varnest != 0) {
1052 startlinno = plinno;
1053 synerror("Missing '}'");
1054 }
1055 USTPUTC('\0', out);
1056 len = out - stackblock();
1057 out = stackblock();
1058 if (eofmark == NULL) {
1059 if ((c == '>' || c == '<')
1060 && quotef == 0
1061 && len <= 2
1062 && (*out == '\0' || is_digit(*out))) {
1063 PARSEREDIR();
1064 return lasttoken = TREDIR;
1065 } else {
1066 pungetc();
1067 }
1068 }
1069 quoteflag = quotef;
1070 backquotelist = bqlist;
1071 grabstackblock(len);
1072 wordtext = out;
1073 return lasttoken = TWORD;
1074 /* end of readtoken routine */
1075
1076
1077
1078 /*
1079 * Check to see whether we are at the end of the here document. When this
1080 * is called, c is set to the first character of the next input line. If
1081 * we are at the end of the here document, this routine sets the c to PEOF.
1082 */
1083
1084 checkend: {
1085 if (eofmark) {
1086 if (striptabs) {
1087 while (c == '\t')
1088 c = pgetc();
1089 }
1090 if (c == *eofmark) {
1091 if (pfgets(line, sizeof line) != NULL) {
1092 char *p, *q;
1093
1094 p = line;
1095 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
1096 if (*p == '\n' && *q == '\0') {
1097 c = PEOF;
1098 plinno++;
1099 needprompt = doprompt;
1100 } else {
1101 pushstring(line, strlen(line), NULL);
1102 }
1103 }
1104 }
1105 }
1106 goto checkend_return;
1107 }
1108
1109
1110 /*
1111 * Parse a redirection operator. The variable "out" points to a string
1112 * specifying the fd to be redirected. The variable "c" contains the
1113 * first character of the redirection operator.
1114 */
1115
1116 parseredir: {
1117 char fd = *out;
1118 union node *np;
1119
1120 np = (union node *)stalloc(sizeof (struct nfile));
1121 if (c == '>') {
1122 np->nfile.fd = 1;
1123 c = pgetc();
1124 if (c == '>')
1125 np->type = NAPPEND;
1126 else if (c == '&')
1127 np->type = NTOFD;
1128 else {
1129 np->type = NTO;
1130 pungetc();
1131 }
1132 } else { /* c == '<' */
1133 np->nfile.fd = 0;
1134 switch (c = pgetc()) {
1135 case '<':
1136 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1137 np = (union node *)stalloc(sizeof (struct nhere));
1138 np->nfile.fd = 0;
1139 }
1140 np->type = NHERE;
1141 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1142 heredoc->here = np;
1143 if ((c = pgetc()) == '-') {
1144 heredoc->striptabs = 1;
1145 } else {
1146 heredoc->striptabs = 0;
1147 pungetc();
1148 }
1149 break;
1150
1151 case '&':
1152 np->type = NFROMFD;
1153 break;
1154
1155 case '>':
1156 np->type = NFROMTO;
1157 break;
1158
1159 default:
1160 np->type = NFROM;
1161 pungetc();
1162 break;
1163 }
1164 }
1165 if (fd != '\0')
1166 np->nfile.fd = digit_val(fd);
1167 redirnode = np;
1168 goto parseredir_return;
1169 }
1170
1171
1172 /*
1173 * Parse a substitution. At this point, we have read the dollar sign
1174 * and nothing else.
1175 */
1176
1177 parsesub: {
1178 int subtype;
1179 int typeloc;
1180 int flags;
1181 char *p;
1182 static const char types[] = "}-+?=";
1183
1184 c = pgetc();
1185 if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) {
1186 USTPUTC('$', out);
1187 pungetc();
1188 } else if (c == '(') { /* $(command) or $((arith)) */
1189 if (pgetc() == '(') {
1190 PARSEARITH();
1191 } else {
1192 pungetc();
1193 PARSEBACKQNEW();
1194 }
1195 } else {
1196 USTPUTC(CTLVAR, out);
1197 typeloc = out - stackblock();
1198 USTPUTC(VSNORMAL, out);
1199 subtype = VSNORMAL;
1200 if (c == '{') {
1201 c = pgetc();
1202 if (c == '#') {
1203 if ((c = pgetc()) == '}')
1204 c = '#';
1205 else
1206 subtype = VSLENGTH;
1207 }
1208 else
1209 subtype = 0;
1210 }
1211 if (is_name(c)) {
1212 do {
1213 STPUTC(c, out);
1214 c = pgetc();
1215 } while (is_in_name(c));
1216 } else if (is_digit(c)) {
1217 do {
1218 USTPUTC(c, out);
1219 c = pgetc();
1220 } while (is_digit(c));
1221 }
1222 else if (is_special(c)) {
1223 USTPUTC(c, out);
1224 c = pgetc();
1225 }
1226 else
1227 badsub: synerror("Bad substitution");
1228
1229 STPUTC('=', out);
1230 flags = 0;
1231 if (subtype == 0) {
1232 switch (c) {
1233 case ':':
1234 flags = VSNUL;
1235 c = pgetc();
1236 /*FALLTHROUGH*/
1237 default:
1238 p = strchr(types, c);
1239 if (p == NULL)
1240 goto badsub;
1241 subtype = p - types + VSNORMAL;
1242 break;
1243 case '%':
1244 case '#':
1245 {
1246 int cc = c;
1247 subtype = c == '#' ? VSTRIMLEFT :
1248 VSTRIMRIGHT;
1249 c = pgetc();
1250 if (c == cc)
1251 subtype++;
1252 else
1253 pungetc();
1254 break;
1255 }
1256 }
1257 } else {
1258 pungetc();
1259 }
1260 if (dblquote || arinest)
1261 flags |= VSQUOTE;
1262 *(stackblock() + typeloc) = subtype | flags;
1263 if (subtype != VSNORMAL)
1264 varnest++;
1265 }
1266 goto parsesub_return;
1267 }
1268
1269
1270 /*
1271 * Called to parse command substitutions. Newstyle is set if the command
1272 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1273 * list of commands (passed by reference), and savelen is the number of
1274 * characters on the top of the stack which must be preserved.
1275 */
1276
1277 parsebackq: {
1278 struct nodelist **nlpp;
1279 int savepbq;
1280 union node *n;
1281 char *volatile str;
1282 struct jmploc jmploc;
1283 struct jmploc *volatile savehandler;
1284 int savelen;
1285 int saveprompt;
1286 #ifdef __GNUC__
1287 (void) &saveprompt;
1288 #endif
1289
1290 savepbq = parsebackquote;
1291 if (setjmp(jmploc.loc)) {
1292 if (str)
1293 ckfree(str);
1294 parsebackquote = 0;
1295 handler = savehandler;
1296 longjmp(handler->loc, 1);
1297 }
1298 INTOFF;
1299 str = NULL;
1300 savelen = out - stackblock();
1301 if (savelen > 0) {
1302 str = ckmalloc(savelen);
1303 memcpy(str, stackblock(), savelen);
1304 }
1305 savehandler = handler;
1306 handler = &jmploc;
1307 INTON;
1308 if (oldstyle) {
1309 /* We must read until the closing backquote, giving special
1310 treatment to some slashes, and then push the string and
1311 reread it as input, interpreting it normally. */
1312 char *pout;
1313 int pc;
1314 int psavelen;
1315 char *pstr;
1316
1317
1318 STARTSTACKSTR(pout);
1319 for (;;) {
1320 if (needprompt) {
1321 setprompt(2);
1322 needprompt = 0;
1323 }
1324 switch (pc = pgetc()) {
1325 case '`':
1326 goto done;
1327
1328 case '\\':
1329 if ((pc = pgetc()) == '\n') {
1330 plinno++;
1331 if (doprompt)
1332 setprompt(2);
1333 else
1334 setprompt(0);
1335 /*
1336 * If eating a newline, avoid putting
1337 * the newline into the new character
1338 * stream (via the STPUTC after the
1339 * switch).
1340 */
1341 continue;
1342 }
1343 if (pc != '\\' && pc != '`' && pc != '$'
1344 && (!dblquote || pc != '"'))
1345 STPUTC('\\', pout);
1346 break;
1347
1348 case '\n':
1349 plinno++;
1350 needprompt = doprompt;
1351 break;
1352
1353 case PEOF:
1354 startlinno = plinno;
1355 synerror("EOF in backquote substitution");
1356 break;
1357
1358 default:
1359 break;
1360 }
1361 STPUTC(pc, pout);
1362 }
1363 done:
1364 STPUTC('\0', pout);
1365 psavelen = pout - stackblock();
1366 if (psavelen > 0) {
1367 pstr = grabstackstr(pout);
1368 setinputstring(pstr, 1);
1369 }
1370 }
1371 nlpp = &bqlist;
1372 while (*nlpp)
1373 nlpp = &(*nlpp)->next;
1374 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1375 (*nlpp)->next = NULL;
1376 parsebackquote = oldstyle;
1377
1378 if (oldstyle) {
1379 saveprompt = doprompt;
1380 doprompt = 0;
1381 }
1382
1383 n = list(0);
1384
1385 if (oldstyle)
1386 doprompt = saveprompt;
1387 else {
1388 if (readtoken() != TRP)
1389 synexpect(TRP);
1390 }
1391
1392 (*nlpp)->n = n;
1393 if (oldstyle) {
1394 /*
1395 * Start reading from old file again, ignoring any pushed back
1396 * tokens left from the backquote parsing
1397 */
1398 popfile();
1399 tokpushback = 0;
1400 }
1401 while (stackblocksize() <= savelen)
1402 growstackblock();
1403 STARTSTACKSTR(out);
1404 if (str) {
1405 memcpy(out, str, savelen);
1406 STADJUST(savelen, out);
1407 INTOFF;
1408 ckfree(str);
1409 str = NULL;
1410 INTON;
1411 }
1412 parsebackquote = savepbq;
1413 handler = savehandler;
1414 if (arinest || dblquote)
1415 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1416 else
1417 USTPUTC(CTLBACKQ, out);
1418 if (oldstyle)
1419 goto parsebackq_oldreturn;
1420 else
1421 goto parsebackq_newreturn;
1422 }
1423
1424 /*
1425 * Parse an arithmetic expansion (indicate start of one and set state)
1426 */
1427 parsearith: {
1428
1429 if (++arinest == 1) {
1430 prevsyntax = syntax;
1431 syntax = ARISYNTAX;
1432 USTPUTC(CTLARI, out);
1433 if (dblquote)
1434 USTPUTC('"',out);
1435 else
1436 USTPUTC(' ',out);
1437 } else {
1438 /*
1439 * we collapse embedded arithmetic expansion to
1440 * parenthesis, which should be equivalent
1441 */
1442 USTPUTC('(', out);
1443 }
1444 goto parsearith_return;
1445 }
1446
1447 } /* end of readtoken */
1448
1449
1450
1451 #ifdef mkinit
1452 RESET {
1453 tokpushback = 0;
1454 checkkwd = 0;
1455 }
1456 #endif
1457
1458 /*
1459 * Returns true if the text contains nothing to expand (no dollar signs
1460 * or backquotes).
1461 */
1462
1463 STATIC int
1464 noexpand(text)
1465 char *text;
1466 {
1467 char *p;
1468 char c;
1469
1470 p = text;
1471 while ((c = *p++) != '\0') {
1472 if (c == CTLQUOTEMARK)
1473 continue;
1474 if (c == CTLESC)
1475 p++;
1476 else if (BASESYNTAX[(int)c] == CCTL)
1477 return 0;
1478 }
1479 return 1;
1480 }
1481
1482
1483 /*
1484 * Return true if the argument is a legal variable name (a letter or
1485 * underscore followed by zero or more letters, underscores, and digits).
1486 */
1487
1488 int
1489 goodname(char *name)
1490 {
1491 char *p;
1492
1493 p = name;
1494 if (! is_name(*p))
1495 return 0;
1496 while (*++p) {
1497 if (! is_in_name(*p))
1498 return 0;
1499 }
1500 return 1;
1501 }
1502
1503
1504 /*
1505 * Called when an unexpected token is read during the parse. The argument
1506 * is the token that is expected, or -1 if more than one type of token can
1507 * occur at this point.
1508 */
1509
1510 STATIC void
1511 synexpect(token)
1512 int token;
1513 {
1514 char msg[64];
1515
1516 if (token >= 0) {
1517 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1518 tokname[lasttoken], tokname[token]);
1519 } else {
1520 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1521 }
1522 synerror(msg);
1523 /* NOTREACHED */
1524 }
1525
1526
1527 STATIC void
1528 synerror(msg)
1529 const char *msg;
1530 {
1531 if (commandname)
1532 outfmt(&errout, "%s: %d: ", commandname, startlinno);
1533 outfmt(&errout, "Syntax error: %s\n", msg);
1534 error((char *)NULL);
1535 /* NOTREACHED */
1536 }
1537
1538 STATIC void
1539 setprompt(which)
1540 int which;
1541 {
1542 whichprompt = which;
1543
1544 #ifndef SMALL
1545 if (!el)
1546 #endif
1547 out2str(getprompt(NULL));
1548 }
1549
1550 /*
1551 * called by editline -- any expansions to the prompt
1552 * should be added here.
1553 */
1554 const char *
1555 getprompt(void *unused)
1556 {
1557 switch (whichprompt) {
1558 case 0:
1559 return "";
1560 case 1:
1561 return ps1val();
1562 case 2:
1563 return ps2val();
1564 default:
1565 return "<internal prompt error>";
1566 }
1567 }
1568