parser.c revision 1.43 1 /* $NetBSD: parser.c,v 1.43 1999/07/09 03:05:50 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
43 #else
44 __RCSID("$NetBSD: parser.c,v 1.43 1999/07/09 03:05:50 christos Exp $");
45 #endif
46 #endif /* not lint */
47
48 #include <stdlib.h>
49
50 #include "shell.h"
51 #include "parser.h"
52 #include "nodes.h"
53 #include "expand.h" /* defines rmescapes() */
54 #include "redir.h" /* defines copyfd() */
55 #include "syntax.h"
56 #include "options.h"
57 #include "input.h"
58 #include "output.h"
59 #include "var.h"
60 #include "error.h"
61 #include "memalloc.h"
62 #include "mystring.h"
63 #include "alias.h"
64 #include "show.h"
65 #ifndef SMALL
66 #include "myhistedit.h"
67 #endif
68
69 /*
70 * Shell command parser.
71 */
72
73 #define EOFMARKLEN 79
74
75 /* values returned by readtoken */
76 #include "token.h"
77
78
79
80 struct heredoc {
81 struct heredoc *next; /* next here document in list */
82 union node *here; /* redirection node */
83 char *eofmark; /* string indicating end of input */
84 int striptabs; /* if set, strip leading tabs */
85 };
86
87
88
89 struct heredoc *heredoclist; /* list of here documents to read */
90 int parsebackquote; /* nonzero if we are inside backquotes */
91 int doprompt; /* if set, prompt the user */
92 int needprompt; /* true if interactive and at start of line */
93 int lasttoken; /* last token read */
94 MKINIT int tokpushback; /* last token pushed back */
95 char *wordtext; /* text of last word returned by readtoken */
96 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */
97 struct nodelist *backquotelist;
98 union node *redirnode;
99 struct heredoc *heredoc;
100 int quoteflag; /* set if (part of) last token was quoted */
101 int startlinno; /* line # where last token started */
102
103
104 STATIC union node *list __P((int));
105 STATIC union node *andor __P((void));
106 STATIC union node *pipeline __P((void));
107 STATIC union node *command __P((void));
108 STATIC union node *simplecmd __P((union node **, union node *));
109 STATIC union node *makename __P((void));
110 STATIC void parsefname __P((void));
111 STATIC void parseheredoc __P((void));
112 STATIC int peektoken __P((void));
113 STATIC int readtoken __P((void));
114 STATIC int xxreadtoken __P((void));
115 STATIC int readtoken1 __P((int, char const *, char *, int));
116 STATIC int noexpand __P((char *));
117 STATIC void synexpect __P((int)) __attribute__((noreturn));
118 STATIC void synerror __P((const char *)) __attribute__((noreturn));
119 STATIC void setprompt __P((int));
120
121
122 /*
123 * Read and parse a command. Returns NEOF on end of file. (NULL is a
124 * valid parse tree indicating a blank line.)
125 */
126
127 union node *
128 parsecmd(interact)
129 int interact;
130 {
131 int t;
132
133 doprompt = interact;
134 if (doprompt)
135 setprompt(1);
136 else
137 setprompt(0);
138 needprompt = 0;
139 t = readtoken();
140 if (t == TEOF)
141 return NEOF;
142 if (t == TNL)
143 return NULL;
144 tokpushback++;
145 return list(1);
146 }
147
148
149 STATIC union node *
150 list(nlflag)
151 int nlflag;
152 {
153 union node *n1, *n2, *n3;
154 int tok;
155
156 checkkwd = 2;
157 if (nlflag == 0 && tokendlist[peektoken()])
158 return NULL;
159 n1 = NULL;
160 for (;;) {
161 n2 = andor();
162 tok = readtoken();
163 if (tok == TBACKGND) {
164 if (n2->type == NCMD || n2->type == NPIPE) {
165 n2->ncmd.backgnd = 1;
166 } else if (n2->type == NREDIR) {
167 n2->type = NBACKGND;
168 } else {
169 n3 = (union node *)stalloc(sizeof (struct nredir));
170 n3->type = NBACKGND;
171 n3->nredir.n = n2;
172 n3->nredir.redirect = NULL;
173 n2 = n3;
174 }
175 }
176 if (n1 == NULL) {
177 n1 = n2;
178 }
179 else {
180 n3 = (union node *)stalloc(sizeof (struct nbinary));
181 n3->type = NSEMI;
182 n3->nbinary.ch1 = n1;
183 n3->nbinary.ch2 = n2;
184 n1 = n3;
185 }
186 switch (tok) {
187 case TBACKGND:
188 case TSEMI:
189 tok = readtoken();
190 /* fall through */
191 case TNL:
192 if (tok == TNL) {
193 parseheredoc();
194 if (nlflag)
195 return n1;
196 } else {
197 tokpushback++;
198 }
199 checkkwd = 2;
200 if (tokendlist[peektoken()])
201 return n1;
202 break;
203 case TEOF:
204 if (heredoclist)
205 parseheredoc();
206 else
207 pungetc(); /* push back EOF on input */
208 return n1;
209 default:
210 if (nlflag)
211 synexpect(-1);
212 tokpushback++;
213 return n1;
214 }
215 }
216 }
217
218
219
220 STATIC union node *
221 andor() {
222 union node *n1, *n2, *n3;
223 int t;
224
225 n1 = pipeline();
226 for (;;) {
227 if ((t = readtoken()) == TAND) {
228 t = NAND;
229 } else if (t == TOR) {
230 t = NOR;
231 } else {
232 tokpushback++;
233 return n1;
234 }
235 n2 = pipeline();
236 n3 = (union node *)stalloc(sizeof (struct nbinary));
237 n3->type = t;
238 n3->nbinary.ch1 = n1;
239 n3->nbinary.ch2 = n2;
240 n1 = n3;
241 }
242 }
243
244
245
246 STATIC union node *
247 pipeline() {
248 union node *n1, *pipenode;
249 struct nodelist *lp, *prev;
250
251 TRACE(("pipeline: entered\n"));
252 n1 = command();
253 if (readtoken() == TPIPE) {
254 pipenode = (union node *)stalloc(sizeof (struct npipe));
255 pipenode->type = NPIPE;
256 pipenode->npipe.backgnd = 0;
257 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
258 pipenode->npipe.cmdlist = lp;
259 lp->n = n1;
260 do {
261 prev = lp;
262 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
263 lp->n = command();
264 prev->next = lp;
265 } while (readtoken() == TPIPE);
266 lp->next = NULL;
267 n1 = pipenode;
268 }
269 tokpushback++;
270 return n1;
271 }
272
273
274
275 STATIC union node *
276 command() {
277 union node *n1, *n2;
278 union node *ap, **app;
279 union node *cp, **cpp;
280 union node *redir, **rpp;
281 int t, negate = 0;
282
283 checkkwd = 2;
284 redir = NULL;
285 n1 = NULL;
286 rpp = &redir;
287
288 /* Check for redirection which may precede command */
289 while (readtoken() == TREDIR) {
290 *rpp = n2 = redirnode;
291 rpp = &n2->nfile.next;
292 parsefname();
293 }
294 tokpushback++;
295
296 while (readtoken() == TNOT) {
297 TRACE(("command: TNOT recognized\n"));
298 negate = !negate;
299 }
300 tokpushback++;
301
302 switch (readtoken()) {
303 case TIF:
304 n1 = (union node *)stalloc(sizeof (struct nif));
305 n1->type = NIF;
306 n1->nif.test = list(0);
307 if (readtoken() != TTHEN)
308 synexpect(TTHEN);
309 n1->nif.ifpart = list(0);
310 n2 = n1;
311 while (readtoken() == TELIF) {
312 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
313 n2 = n2->nif.elsepart;
314 n2->type = NIF;
315 n2->nif.test = list(0);
316 if (readtoken() != TTHEN)
317 synexpect(TTHEN);
318 n2->nif.ifpart = list(0);
319 }
320 if (lasttoken == TELSE)
321 n2->nif.elsepart = list(0);
322 else {
323 n2->nif.elsepart = NULL;
324 tokpushback++;
325 }
326 if (readtoken() != TFI)
327 synexpect(TFI);
328 checkkwd = 1;
329 break;
330 case TWHILE:
331 case TUNTIL: {
332 int got;
333 n1 = (union node *)stalloc(sizeof (struct nbinary));
334 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
335 n1->nbinary.ch1 = list(0);
336 if ((got=readtoken()) != TDO) {
337 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
338 synexpect(TDO);
339 }
340 n1->nbinary.ch2 = list(0);
341 if (readtoken() != TDONE)
342 synexpect(TDONE);
343 checkkwd = 1;
344 break;
345 }
346 case TFOR:
347 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
348 synerror("Bad for loop variable");
349 n1 = (union node *)stalloc(sizeof (struct nfor));
350 n1->type = NFOR;
351 n1->nfor.var = wordtext;
352 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
353 app = ≈
354 while (readtoken() == TWORD) {
355 n2 = (union node *)stalloc(sizeof (struct narg));
356 n2->type = NARG;
357 n2->narg.text = wordtext;
358 n2->narg.backquote = backquotelist;
359 *app = n2;
360 app = &n2->narg.next;
361 }
362 *app = NULL;
363 n1->nfor.args = ap;
364 if (lasttoken != TNL && lasttoken != TSEMI)
365 synexpect(-1);
366 } else {
367 static char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
368 '@', '=', '\0'};
369 n2 = (union node *)stalloc(sizeof (struct narg));
370 n2->type = NARG;
371 n2->narg.text = argvars;
372 n2->narg.backquote = NULL;
373 n2->narg.next = NULL;
374 n1->nfor.args = n2;
375 /*
376 * Newline or semicolon here is optional (but note
377 * that the original Bourne shell only allowed NL).
378 */
379 if (lasttoken != TNL && lasttoken != TSEMI)
380 tokpushback++;
381 }
382 checkkwd = 2;
383 if ((t = readtoken()) == TDO)
384 t = TDONE;
385 else if (t == TBEGIN)
386 t = TEND;
387 else
388 synexpect(-1);
389 n1->nfor.body = list(0);
390 if (readtoken() != t)
391 synexpect(t);
392 checkkwd = 1;
393 break;
394 case TCASE:
395 n1 = (union node *)stalloc(sizeof (struct ncase));
396 n1->type = NCASE;
397 if (readtoken() != TWORD)
398 synexpect(TWORD);
399 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
400 n2->type = NARG;
401 n2->narg.text = wordtext;
402 n2->narg.backquote = backquotelist;
403 n2->narg.next = NULL;
404 while (readtoken() == TNL);
405 if (lasttoken != TWORD || ! equal(wordtext, "in"))
406 synerror("expecting \"in\"");
407 cpp = &n1->ncase.cases;
408 checkkwd = 2, readtoken();
409 do {
410 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
411 cp->type = NCLIST;
412 app = &cp->nclist.pattern;
413 for (;;) {
414 *app = ap = (union node *)stalloc(sizeof (struct narg));
415 ap->type = NARG;
416 ap->narg.text = wordtext;
417 ap->narg.backquote = backquotelist;
418 if (checkkwd = 2, readtoken() != TPIPE)
419 break;
420 app = &ap->narg.next;
421 readtoken();
422 }
423 ap->narg.next = NULL;
424 if (lasttoken != TRP)
425 synexpect(TRP);
426 cp->nclist.body = list(0);
427
428 checkkwd = 2;
429 if ((t = readtoken()) != TESAC) {
430 if (t != TENDCASE)
431 synexpect(TENDCASE);
432 else
433 checkkwd = 2, readtoken();
434 }
435 cpp = &cp->nclist.next;
436 } while(lasttoken != TESAC);
437 *cpp = NULL;
438 checkkwd = 1;
439 break;
440 case TLP:
441 n1 = (union node *)stalloc(sizeof (struct nredir));
442 n1->type = NSUBSHELL;
443 n1->nredir.n = list(0);
444 n1->nredir.redirect = NULL;
445 if (readtoken() != TRP)
446 synexpect(TRP);
447 checkkwd = 1;
448 break;
449 case TBEGIN:
450 n1 = list(0);
451 if (readtoken() != TEND)
452 synexpect(TEND);
453 checkkwd = 1;
454 break;
455 /* Handle an empty command like other simple commands. */
456 case TSEMI:
457 /*
458 * An empty command before a ; doesn't make much sense, and
459 * should certainly be disallowed in the case of `if ;'.
460 */
461 if (!redir)
462 synexpect(-1);
463 case TAND:
464 case TOR:
465 case TNL:
466 case TEOF:
467 case TWORD:
468 case TRP:
469 tokpushback++;
470 n1 = simplecmd(rpp, redir);
471 goto checkneg;
472 default:
473 synexpect(-1);
474 /* NOTREACHED */
475 }
476
477 /* Now check for redirection which may follow command */
478 while (readtoken() == TREDIR) {
479 *rpp = n2 = redirnode;
480 rpp = &n2->nfile.next;
481 parsefname();
482 }
483 tokpushback++;
484 *rpp = NULL;
485 if (redir) {
486 if (n1->type != NSUBSHELL) {
487 n2 = (union node *)stalloc(sizeof (struct nredir));
488 n2->type = NREDIR;
489 n2->nredir.n = n1;
490 n1 = n2;
491 }
492 n1->nredir.redirect = redir;
493 }
494
495 checkneg:
496 if (negate) {
497 n2 = (union node *)stalloc(sizeof (struct nnot));
498 n2->type = NNOT;
499 n2->nnot.com = n1;
500 return n2;
501 }
502 else
503 return n1;
504 }
505
506
507 STATIC union node *
508 simplecmd(rpp, redir)
509 union node **rpp, *redir;
510 {
511 union node *args, **app;
512 union node **orig_rpp = rpp;
513 union node *n = NULL, *n2;
514 int negate = 0;
515
516 /* If we don't have any redirections already, then we must reset */
517 /* rpp to be the address of the local redir variable. */
518 if (redir == 0)
519 rpp = &redir;
520
521 args = NULL;
522 app = &args;
523 /*
524 * We save the incoming value, because we need this for shell
525 * functions. There can not be a redirect or an argument between
526 * the function name and the open parenthesis.
527 */
528 orig_rpp = rpp;
529
530 while (readtoken() == TNOT) {
531 TRACE(("command: TNOT recognized\n"));
532 negate = !negate;
533 }
534 tokpushback++;
535
536 for (;;) {
537 if (readtoken() == TWORD) {
538 n = (union node *)stalloc(sizeof (struct narg));
539 n->type = NARG;
540 n->narg.text = wordtext;
541 n->narg.backquote = backquotelist;
542 *app = n;
543 app = &n->narg.next;
544 } else if (lasttoken == TREDIR) {
545 *rpp = n = redirnode;
546 rpp = &n->nfile.next;
547 parsefname(); /* read name of redirection file */
548 } else if (lasttoken == TLP && app == &args->narg.next
549 && rpp == orig_rpp) {
550 /* We have a function */
551 if (readtoken() != TRP)
552 synexpect(TRP);
553 #ifdef notdef
554 if (! goodname(n->narg.text))
555 synerror("Bad function name");
556 #endif
557 n->type = NDEFUN;
558 n->narg.next = command();
559 goto checkneg;
560 } else {
561 tokpushback++;
562 break;
563 }
564 }
565 *app = NULL;
566 *rpp = NULL;
567 n = (union node *)stalloc(sizeof (struct ncmd));
568 n->type = NCMD;
569 n->ncmd.backgnd = 0;
570 n->ncmd.args = args;
571 n->ncmd.redirect = redir;
572
573 checkneg:
574 if (negate) {
575 n2 = (union node *)stalloc(sizeof (struct nnot));
576 n2->type = NNOT;
577 n2->nnot.com = n;
578 return n2;
579 }
580 else
581 return n;
582 }
583
584 STATIC union node *
585 makename() {
586 union node *n;
587
588 n = (union node *)stalloc(sizeof (struct narg));
589 n->type = NARG;
590 n->narg.next = NULL;
591 n->narg.text = wordtext;
592 n->narg.backquote = backquotelist;
593 return n;
594 }
595
596 void fixredir(n, text, err)
597 union node *n;
598 const char *text;
599 int err;
600 {
601 TRACE(("Fix redir %s %d\n", text, err));
602 if (!err)
603 n->ndup.vname = NULL;
604
605 if (is_digit(text[0]) && text[1] == '\0')
606 n->ndup.dupfd = digit_val(text[0]);
607 else if (text[0] == '-' && text[1] == '\0')
608 n->ndup.dupfd = -1;
609 else {
610
611 if (err)
612 synerror("Bad fd number");
613 else
614 n->ndup.vname = makename();
615 }
616 }
617
618
619 STATIC void
620 parsefname() {
621 union node *n = redirnode;
622
623 if (readtoken() != TWORD)
624 synexpect(-1);
625 if (n->type == NHERE) {
626 struct heredoc *here = heredoc;
627 struct heredoc *p;
628 int i;
629
630 if (quoteflag == 0)
631 n->type = NXHERE;
632 TRACE(("Here document %d\n", n->type));
633 if (here->striptabs) {
634 while (*wordtext == '\t')
635 wordtext++;
636 }
637 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
638 synerror("Illegal eof marker for << redirection");
639 rmescapes(wordtext);
640 here->eofmark = wordtext;
641 here->next = NULL;
642 if (heredoclist == NULL)
643 heredoclist = here;
644 else {
645 for (p = heredoclist ; p->next ; p = p->next);
646 p->next = here;
647 }
648 } else if (n->type == NTOFD || n->type == NFROMFD) {
649 fixredir(n, wordtext, 0);
650 } else {
651 n->nfile.fname = makename();
652 }
653 }
654
655
656 /*
657 * Input any here documents.
658 */
659
660 STATIC void
661 parseheredoc() {
662 struct heredoc *here;
663 union node *n;
664
665 while (heredoclist) {
666 here = heredoclist;
667 heredoclist = here->next;
668 if (needprompt) {
669 setprompt(2);
670 needprompt = 0;
671 }
672 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
673 here->eofmark, here->striptabs);
674 n = (union node *)stalloc(sizeof (struct narg));
675 n->narg.type = NARG;
676 n->narg.next = NULL;
677 n->narg.text = wordtext;
678 n->narg.backquote = backquotelist;
679 here->here->nhere.doc = n;
680 }
681 }
682
683 STATIC int
684 peektoken() {
685 int t;
686
687 t = readtoken();
688 tokpushback++;
689 return (t);
690 }
691
692 STATIC int
693 readtoken() {
694 int t;
695 int savecheckkwd = checkkwd;
696 struct alias *ap;
697 #ifdef DEBUG
698 int alreadyseen = tokpushback;
699 #endif
700
701 top:
702 t = xxreadtoken();
703
704 if (checkkwd) {
705 /*
706 * eat newlines
707 */
708 if (checkkwd == 2) {
709 checkkwd = 0;
710 while (t == TNL) {
711 parseheredoc();
712 t = xxreadtoken();
713 }
714 } else
715 checkkwd = 0;
716 /*
717 * check for keywords and aliases
718 */
719 if (t == TWORD && !quoteflag)
720 {
721 const char *const *pp;
722
723 for (pp = parsekwd; *pp; pp++) {
724 if (**pp == *wordtext && equal(*pp, wordtext))
725 {
726 lasttoken = t = pp -
727 parsekwd + KWDOFFSET;
728 TRACE(("keyword %s recognized\n", tokname[t]));
729 goto out;
730 }
731 }
732 if ((ap = lookupalias(wordtext, 1)) != NULL) {
733 pushstring(ap->val, strlen(ap->val), ap);
734 checkkwd = savecheckkwd;
735 goto top;
736 }
737 }
738 out:
739 checkkwd = (t == TNOT) ? savecheckkwd : 0;
740 }
741 #ifdef DEBUG
742 if (!alreadyseen)
743 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
744 else
745 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
746 #endif
747 return (t);
748 }
749
750
751 /*
752 * Read the next input token.
753 * If the token is a word, we set backquotelist to the list of cmds in
754 * backquotes. We set quoteflag to true if any part of the word was
755 * quoted.
756 * If the token is TREDIR, then we set redirnode to a structure containing
757 * the redirection.
758 * In all cases, the variable startlinno is set to the number of the line
759 * on which the token starts.
760 *
761 * [Change comment: here documents and internal procedures]
762 * [Readtoken shouldn't have any arguments. Perhaps we should make the
763 * word parsing code into a separate routine. In this case, readtoken
764 * doesn't need to have any internal procedures, but parseword does.
765 * We could also make parseoperator in essence the main routine, and
766 * have parseword (readtoken1?) handle both words and redirection.]
767 */
768
769 #define RETURN(token) return lasttoken = token
770
771 STATIC int
772 xxreadtoken() {
773 int c;
774
775 if (tokpushback) {
776 tokpushback = 0;
777 return lasttoken;
778 }
779 if (needprompt) {
780 setprompt(2);
781 needprompt = 0;
782 }
783 startlinno = plinno;
784 for (;;) { /* until token or start of word found */
785 c = pgetc_macro();
786 if (c == ' ' || c == '\t')
787 continue; /* quick check for white space first */
788 switch (c) {
789 case ' ': case '\t':
790 continue;
791 case '#':
792 while ((c = pgetc()) != '\n' && c != PEOF);
793 pungetc();
794 continue;
795 case '\\':
796 if (pgetc() == '\n') {
797 startlinno = ++plinno;
798 if (doprompt)
799 setprompt(2);
800 else
801 setprompt(0);
802 continue;
803 }
804 pungetc();
805 goto breakloop;
806 case '\n':
807 plinno++;
808 needprompt = doprompt;
809 RETURN(TNL);
810 case PEOF:
811 RETURN(TEOF);
812 case '&':
813 if (pgetc() == '&')
814 RETURN(TAND);
815 pungetc();
816 RETURN(TBACKGND);
817 case '|':
818 if (pgetc() == '|')
819 RETURN(TOR);
820 pungetc();
821 RETURN(TPIPE);
822 case ';':
823 if (pgetc() == ';')
824 RETURN(TENDCASE);
825 pungetc();
826 RETURN(TSEMI);
827 case '(':
828 RETURN(TLP);
829 case ')':
830 RETURN(TRP);
831 default:
832 goto breakloop;
833 }
834 }
835 breakloop:
836 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
837 #undef RETURN
838 }
839
840
841
842 /*
843 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
844 * is not NULL, read a here document. In the latter case, eofmark is the
845 * word which marks the end of the document and striptabs is true if
846 * leading tabs should be stripped from the document. The argument firstc
847 * is the first character of the input token or document.
848 *
849 * Because C does not have internal subroutines, I have simulated them
850 * using goto's to implement the subroutine linkage. The following macros
851 * will run code that appears at the end of readtoken1.
852 */
853
854 #define CHECKEND() {goto checkend; checkend_return:;}
855 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
856 #define PARSESUB() {goto parsesub; parsesub_return:;}
857 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
858 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
859 #define PARSEARITH() {goto parsearith; parsearith_return:;}
860
861 STATIC int
862 readtoken1(firstc, syntax, eofmark, striptabs)
863 int firstc;
864 char const *syntax;
865 char *eofmark;
866 int striptabs;
867 {
868 int c = firstc;
869 char *out;
870 int len;
871 char line[EOFMARKLEN + 1];
872 struct nodelist *bqlist;
873 int quotef;
874 int dblquote;
875 int varnest; /* levels of variables expansion */
876 int arinest; /* levels of arithmetic expansion */
877 int parenlevel; /* levels of parens in arithmetic */
878 int oldstyle;
879 char const *prevsyntax; /* syntax before arithmetic */
880 #if __GNUC__
881 /* Avoid longjmp clobbering */
882 (void) &out;
883 (void) "ef;
884 (void) &dblquote;
885 (void) &varnest;
886 (void) &arinest;
887 (void) &parenlevel;
888 (void) &oldstyle;
889 (void) &prevsyntax;
890 (void) &syntax;
891 #endif
892
893 startlinno = plinno;
894 dblquote = 0;
895 if (syntax == DQSYNTAX)
896 dblquote = 1;
897 quotef = 0;
898 bqlist = NULL;
899 varnest = 0;
900 arinest = 0;
901 parenlevel = 0;
902
903 STARTSTACKSTR(out);
904 loop: { /* for each line, until end of word */
905 #if ATTY
906 if (c == '\034' && doprompt
907 && attyset() && ! equal(termval(), "emacs")) {
908 attyline();
909 if (syntax == BASESYNTAX)
910 return readtoken();
911 c = pgetc();
912 goto loop;
913 }
914 #endif
915 CHECKEND(); /* set c to PEOF if at end of here document */
916 for (;;) { /* until end of line or end of word */
917 CHECKSTRSPACE(3, out); /* permit 3 calls to USTPUTC */
918 switch(syntax[c]) {
919 case CNL: /* '\n' */
920 if (syntax == BASESYNTAX)
921 goto endword; /* exit outer loop */
922 USTPUTC(c, out);
923 plinno++;
924 if (doprompt)
925 setprompt(2);
926 else
927 setprompt(0);
928 c = pgetc();
929 goto loop; /* continue outer loop */
930 case CWORD:
931 USTPUTC(c, out);
932 break;
933 case CCTL:
934 if (eofmark == NULL || dblquote)
935 USTPUTC(CTLESC, out);
936 USTPUTC(c, out);
937 break;
938 case CBACK: /* backslash */
939 c = pgetc();
940 if (c == PEOF) {
941 USTPUTC('\\', out);
942 pungetc();
943 } else if (c == '\n') {
944 if (doprompt)
945 setprompt(2);
946 else
947 setprompt(0);
948 } else {
949 if (dblquote && c != '\\' && c != '`' && c != '$'
950 && (c != '"' || eofmark != NULL))
951 USTPUTC('\\', out);
952 if (SQSYNTAX[c] == CCTL)
953 USTPUTC(CTLESC, out);
954 else if (eofmark == NULL)
955 USTPUTC(CTLQUOTEMARK, out);
956 USTPUTC(c, out);
957 quotef++;
958 }
959 break;
960 case CSQUOTE:
961 if (eofmark == NULL)
962 USTPUTC(CTLQUOTEMARK, out);
963 syntax = SQSYNTAX;
964 break;
965 case CDQUOTE:
966 if (eofmark == NULL)
967 USTPUTC(CTLQUOTEMARK, out);
968 syntax = DQSYNTAX;
969 dblquote = 1;
970 break;
971 case CENDQUOTE:
972 if (eofmark != NULL && arinest == 0 &&
973 varnest == 0) {
974 USTPUTC(c, out);
975 } else {
976 if (arinest) {
977 syntax = ARISYNTAX;
978 dblquote = 0;
979 } else if (eofmark == NULL) {
980 syntax = BASESYNTAX;
981 dblquote = 0;
982 }
983 quotef++;
984 }
985 break;
986 case CVAR: /* '$' */
987 PARSESUB(); /* parse substitution */
988 break;
989 case CENDVAR: /* '}' */
990 if (varnest > 0) {
991 varnest--;
992 USTPUTC(CTLENDVAR, out);
993 } else {
994 USTPUTC(c, out);
995 }
996 break;
997 case CLP: /* '(' in arithmetic */
998 parenlevel++;
999 USTPUTC(c, out);
1000 break;
1001 case CRP: /* ')' in arithmetic */
1002 if (parenlevel > 0) {
1003 USTPUTC(c, out);
1004 --parenlevel;
1005 } else {
1006 if (pgetc() == ')') {
1007 if (--arinest == 0) {
1008 USTPUTC(CTLENDARI, out);
1009 syntax = prevsyntax;
1010 if (syntax == DQSYNTAX)
1011 dblquote = 1;
1012 else
1013 dblquote = 0;
1014 } else
1015 USTPUTC(')', out);
1016 } else {
1017 /*
1018 * unbalanced parens
1019 * (don't 2nd guess - no error)
1020 */
1021 pungetc();
1022 USTPUTC(')', out);
1023 }
1024 }
1025 break;
1026 case CBQUOTE: /* '`' */
1027 PARSEBACKQOLD();
1028 break;
1029 case CEOF:
1030 goto endword; /* exit outer loop */
1031 default:
1032 if (varnest == 0)
1033 goto endword; /* exit outer loop */
1034 USTPUTC(c, out);
1035 }
1036 c = pgetc_macro();
1037 }
1038 }
1039 endword:
1040 if (syntax == ARISYNTAX)
1041 synerror("Missing '))'");
1042 if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
1043 synerror("Unterminated quoted string");
1044 if (varnest != 0) {
1045 startlinno = plinno;
1046 synerror("Missing '}'");
1047 }
1048 USTPUTC('\0', out);
1049 len = out - stackblock();
1050 out = stackblock();
1051 if (eofmark == NULL) {
1052 if ((c == '>' || c == '<')
1053 && quotef == 0
1054 && len <= 2
1055 && (*out == '\0' || is_digit(*out))) {
1056 PARSEREDIR();
1057 return lasttoken = TREDIR;
1058 } else {
1059 pungetc();
1060 }
1061 }
1062 quoteflag = quotef;
1063 backquotelist = bqlist;
1064 grabstackblock(len);
1065 wordtext = out;
1066 return lasttoken = TWORD;
1067 /* end of readtoken routine */
1068
1069
1070
1071 /*
1072 * Check to see whether we are at the end of the here document. When this
1073 * is called, c is set to the first character of the next input line. If
1074 * we are at the end of the here document, this routine sets the c to PEOF.
1075 */
1076
1077 checkend: {
1078 if (eofmark) {
1079 if (striptabs) {
1080 while (c == '\t')
1081 c = pgetc();
1082 }
1083 if (c == *eofmark) {
1084 if (pfgets(line, sizeof line) != NULL) {
1085 char *p, *q;
1086
1087 p = line;
1088 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
1089 if (*p == '\n' && *q == '\0') {
1090 c = PEOF;
1091 plinno++;
1092 needprompt = doprompt;
1093 } else {
1094 pushstring(line, strlen(line), NULL);
1095 }
1096 }
1097 }
1098 }
1099 goto checkend_return;
1100 }
1101
1102
1103 /*
1104 * Parse a redirection operator. The variable "out" points to a string
1105 * specifying the fd to be redirected. The variable "c" contains the
1106 * first character of the redirection operator.
1107 */
1108
1109 parseredir: {
1110 char fd = *out;
1111 union node *np;
1112
1113 np = (union node *)stalloc(sizeof (struct nfile));
1114 if (c == '>') {
1115 np->nfile.fd = 1;
1116 c = pgetc();
1117 if (c == '>')
1118 np->type = NAPPEND;
1119 else if (c == '&')
1120 np->type = NTOFD;
1121 else {
1122 np->type = NTO;
1123 pungetc();
1124 }
1125 } else { /* c == '<' */
1126 np->nfile.fd = 0;
1127 switch (c = pgetc()) {
1128 case '<':
1129 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1130 np = (union node *)stalloc(sizeof (struct nhere));
1131 np->nfile.fd = 0;
1132 }
1133 np->type = NHERE;
1134 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1135 heredoc->here = np;
1136 if ((c = pgetc()) == '-') {
1137 heredoc->striptabs = 1;
1138 } else {
1139 heredoc->striptabs = 0;
1140 pungetc();
1141 }
1142 break;
1143
1144 case '&':
1145 np->type = NFROMFD;
1146 break;
1147
1148 case '>':
1149 np->type = NFROMTO;
1150 break;
1151
1152 default:
1153 np->type = NFROM;
1154 pungetc();
1155 break;
1156 }
1157 }
1158 if (fd != '\0')
1159 np->nfile.fd = digit_val(fd);
1160 redirnode = np;
1161 goto parseredir_return;
1162 }
1163
1164
1165 /*
1166 * Parse a substitution. At this point, we have read the dollar sign
1167 * and nothing else.
1168 */
1169
1170 parsesub: {
1171 int subtype;
1172 int typeloc;
1173 int flags;
1174 char *p;
1175 static const char types[] = "}-+?=";
1176
1177 c = pgetc();
1178 if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) {
1179 USTPUTC('$', out);
1180 pungetc();
1181 } else if (c == '(') { /* $(command) or $((arith)) */
1182 if (pgetc() == '(') {
1183 PARSEARITH();
1184 } else {
1185 pungetc();
1186 PARSEBACKQNEW();
1187 }
1188 } else {
1189 USTPUTC(CTLVAR, out);
1190 typeloc = out - stackblock();
1191 USTPUTC(VSNORMAL, out);
1192 subtype = VSNORMAL;
1193 if (c == '{') {
1194 c = pgetc();
1195 if (c == '#') {
1196 if ((c = pgetc()) == '}')
1197 c = '#';
1198 else
1199 subtype = VSLENGTH;
1200 }
1201 else
1202 subtype = 0;
1203 }
1204 if (is_name(c)) {
1205 do {
1206 STPUTC(c, out);
1207 c = pgetc();
1208 } while (is_in_name(c));
1209 } else if (is_digit(c)) {
1210 do {
1211 USTPUTC(c, out);
1212 c = pgetc();
1213 } while (is_digit(c));
1214 }
1215 else if (is_special(c)) {
1216 USTPUTC(c, out);
1217 c = pgetc();
1218 }
1219 else
1220 badsub: synerror("Bad substitution");
1221
1222 STPUTC('=', out);
1223 flags = 0;
1224 if (subtype == 0) {
1225 switch (c) {
1226 case ':':
1227 flags = VSNUL;
1228 c = pgetc();
1229 /*FALLTHROUGH*/
1230 default:
1231 p = strchr(types, c);
1232 if (p == NULL)
1233 goto badsub;
1234 subtype = p - types + VSNORMAL;
1235 break;
1236 case '%':
1237 case '#':
1238 {
1239 int cc = c;
1240 subtype = c == '#' ? VSTRIMLEFT :
1241 VSTRIMRIGHT;
1242 c = pgetc();
1243 if (c == cc)
1244 subtype++;
1245 else
1246 pungetc();
1247 break;
1248 }
1249 }
1250 } else {
1251 pungetc();
1252 }
1253 if (dblquote || arinest)
1254 flags |= VSQUOTE;
1255 *(stackblock() + typeloc) = subtype | flags;
1256 if (subtype != VSNORMAL)
1257 varnest++;
1258 }
1259 goto parsesub_return;
1260 }
1261
1262
1263 /*
1264 * Called to parse command substitutions. Newstyle is set if the command
1265 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1266 * list of commands (passed by reference), and savelen is the number of
1267 * characters on the top of the stack which must be preserved.
1268 */
1269
1270 parsebackq: {
1271 struct nodelist **nlpp;
1272 int savepbq;
1273 union node *n;
1274 char *volatile str;
1275 struct jmploc jmploc;
1276 struct jmploc *volatile savehandler;
1277 int savelen;
1278 int saveprompt;
1279 #ifdef __GNUC__
1280 (void) &saveprompt;
1281 #endif
1282
1283 savepbq = parsebackquote;
1284 if (setjmp(jmploc.loc)) {
1285 if (str)
1286 ckfree(str);
1287 parsebackquote = 0;
1288 handler = savehandler;
1289 longjmp(handler->loc, 1);
1290 }
1291 INTOFF;
1292 str = NULL;
1293 savelen = out - stackblock();
1294 if (savelen > 0) {
1295 str = ckmalloc(savelen);
1296 memcpy(str, stackblock(), savelen);
1297 }
1298 savehandler = handler;
1299 handler = &jmploc;
1300 INTON;
1301 if (oldstyle) {
1302 /* We must read until the closing backquote, giving special
1303 treatment to some slashes, and then push the string and
1304 reread it as input, interpreting it normally. */
1305 char *pout;
1306 int pc;
1307 int psavelen;
1308 char *pstr;
1309
1310
1311 STARTSTACKSTR(pout);
1312 for (;;) {
1313 if (needprompt) {
1314 setprompt(2);
1315 needprompt = 0;
1316 }
1317 switch (pc = pgetc()) {
1318 case '`':
1319 goto done;
1320
1321 case '\\':
1322 if ((pc = pgetc()) == '\n') {
1323 plinno++;
1324 if (doprompt)
1325 setprompt(2);
1326 else
1327 setprompt(0);
1328 /*
1329 * If eating a newline, avoid putting
1330 * the newline into the new character
1331 * stream (via the STPUTC after the
1332 * switch).
1333 */
1334 continue;
1335 }
1336 if (pc != '\\' && pc != '`' && pc != '$'
1337 && (!dblquote || pc != '"'))
1338 STPUTC('\\', pout);
1339 break;
1340
1341 case '\n':
1342 plinno++;
1343 needprompt = doprompt;
1344 break;
1345
1346 case PEOF:
1347 startlinno = plinno;
1348 synerror("EOF in backquote substitution");
1349 break;
1350
1351 default:
1352 break;
1353 }
1354 STPUTC(pc, pout);
1355 }
1356 done:
1357 STPUTC('\0', pout);
1358 psavelen = pout - stackblock();
1359 if (psavelen > 0) {
1360 pstr = grabstackstr(pout);
1361 setinputstring(pstr, 1);
1362 }
1363 }
1364 nlpp = &bqlist;
1365 while (*nlpp)
1366 nlpp = &(*nlpp)->next;
1367 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1368 (*nlpp)->next = NULL;
1369 parsebackquote = oldstyle;
1370
1371 if (oldstyle) {
1372 saveprompt = doprompt;
1373 doprompt = 0;
1374 }
1375
1376 n = list(0);
1377
1378 if (oldstyle)
1379 doprompt = saveprompt;
1380 else {
1381 if (readtoken() != TRP)
1382 synexpect(TRP);
1383 }
1384
1385 (*nlpp)->n = n;
1386 if (oldstyle) {
1387 /*
1388 * Start reading from old file again, ignoring any pushed back
1389 * tokens left from the backquote parsing
1390 */
1391 popfile();
1392 tokpushback = 0;
1393 }
1394 while (stackblocksize() <= savelen)
1395 growstackblock();
1396 STARTSTACKSTR(out);
1397 if (str) {
1398 memcpy(out, str, savelen);
1399 STADJUST(savelen, out);
1400 INTOFF;
1401 ckfree(str);
1402 str = NULL;
1403 INTON;
1404 }
1405 parsebackquote = savepbq;
1406 handler = savehandler;
1407 if (arinest || dblquote)
1408 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1409 else
1410 USTPUTC(CTLBACKQ, out);
1411 if (oldstyle)
1412 goto parsebackq_oldreturn;
1413 else
1414 goto parsebackq_newreturn;
1415 }
1416
1417 /*
1418 * Parse an arithmetic expansion (indicate start of one and set state)
1419 */
1420 parsearith: {
1421
1422 if (++arinest == 1) {
1423 prevsyntax = syntax;
1424 syntax = ARISYNTAX;
1425 USTPUTC(CTLARI, out);
1426 if (dblquote)
1427 USTPUTC('"',out);
1428 else
1429 USTPUTC(' ',out);
1430 } else {
1431 /*
1432 * we collapse embedded arithmetic expansion to
1433 * parenthesis, which should be equivalent
1434 */
1435 USTPUTC('(', out);
1436 }
1437 goto parsearith_return;
1438 }
1439
1440 } /* end of readtoken */
1441
1442
1443
1444 #ifdef mkinit
1445 RESET {
1446 tokpushback = 0;
1447 checkkwd = 0;
1448 }
1449 #endif
1450
1451 /*
1452 * Returns true if the text contains nothing to expand (no dollar signs
1453 * or backquotes).
1454 */
1455
1456 STATIC int
1457 noexpand(text)
1458 char *text;
1459 {
1460 char *p;
1461 char c;
1462
1463 p = text;
1464 while ((c = *p++) != '\0') {
1465 if (c == CTLQUOTEMARK)
1466 continue;
1467 if (c == CTLESC)
1468 p++;
1469 else if (BASESYNTAX[(int)c] == CCTL)
1470 return 0;
1471 }
1472 return 1;
1473 }
1474
1475
1476 /*
1477 * Return true if the argument is a legal variable name (a letter or
1478 * underscore followed by zero or more letters, underscores, and digits).
1479 */
1480
1481 int
1482 goodname(name)
1483 char *name;
1484 {
1485 char *p;
1486
1487 p = name;
1488 if (! is_name(*p))
1489 return 0;
1490 while (*++p) {
1491 if (! is_in_name(*p))
1492 return 0;
1493 }
1494 return 1;
1495 }
1496
1497
1498 /*
1499 * Called when an unexpected token is read during the parse. The argument
1500 * is the token that is expected, or -1 if more than one type of token can
1501 * occur at this point.
1502 */
1503
1504 STATIC void
1505 synexpect(token)
1506 int token;
1507 {
1508 char msg[64];
1509
1510 if (token >= 0) {
1511 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1512 tokname[lasttoken], tokname[token]);
1513 } else {
1514 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1515 }
1516 synerror(msg);
1517 /* NOTREACHED */
1518 }
1519
1520
1521 STATIC void
1522 synerror(msg)
1523 const char *msg;
1524 {
1525 if (commandname)
1526 outfmt(&errout, "%s: %d: ", commandname, startlinno);
1527 outfmt(&errout, "Syntax error: %s\n", msg);
1528 error((char *)NULL);
1529 /* NOTREACHED */
1530 }
1531
1532 STATIC void
1533 setprompt(which)
1534 int which;
1535 {
1536 whichprompt = which;
1537
1538 #ifndef SMALL
1539 if (!el)
1540 #endif
1541 out2str(getprompt(NULL));
1542 }
1543
1544 /*
1545 * called by editline -- any expansions to the prompt
1546 * should be added here.
1547 */
1548 const char *
1549 getprompt(unused)
1550 void *unused;
1551 {
1552 switch (whichprompt) {
1553 case 0:
1554 return "";
1555 case 1:
1556 return ps1val();
1557 case 2:
1558 return ps2val();
1559 default:
1560 return "<internal prompt error>";
1561 }
1562 }
1563