parser.c revision 1.36 1 /* $NetBSD: parser.c,v 1.36 1997/07/04 21:02:19 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
43 #else
44 __RCSID("$NetBSD: parser.c,v 1.36 1997/07/04 21:02:19 christos Exp $");
45 #endif
46 #endif /* not lint */
47
48 #include <stdlib.h>
49
50 #include "shell.h"
51 #include "parser.h"
52 #include "nodes.h"
53 #include "expand.h" /* defines rmescapes() */
54 #include "redir.h" /* defines copyfd() */
55 #include "syntax.h"
56 #include "options.h"
57 #include "input.h"
58 #include "output.h"
59 #include "var.h"
60 #include "error.h"
61 #include "memalloc.h"
62 #include "mystring.h"
63 #include "alias.h"
64 #include "show.h"
65 #ifndef SMALL
66 #include "myhistedit.h"
67 #endif
68
69 /*
70 * Shell command parser.
71 */
72
73 #define EOFMARKLEN 79
74
75 /* values returned by readtoken */
76 #include "token.h"
77
78
79
80 struct heredoc {
81 struct heredoc *next; /* next here document in list */
82 union node *here; /* redirection node */
83 char *eofmark; /* string indicating end of input */
84 int striptabs; /* if set, strip leading tabs */
85 };
86
87
88
89 struct heredoc *heredoclist; /* list of here documents to read */
90 int parsebackquote; /* nonzero if we are inside backquotes */
91 int doprompt; /* if set, prompt the user */
92 int needprompt; /* true if interactive and at start of line */
93 int lasttoken; /* last token read */
94 MKINIT int tokpushback; /* last token pushed back */
95 char *wordtext; /* text of last word returned by readtoken */
96 MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */
97 struct nodelist *backquotelist;
98 union node *redirnode;
99 struct heredoc *heredoc;
100 int quoteflag; /* set if (part of) last token was quoted */
101 int startlinno; /* line # where last token started */
102
103
104 #define GDB_HACK 1 /* avoid local declarations which gdb can't handle */
105 #ifdef GDB_HACK
106 static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'};
107 static const char types[] = "}-+?=";
108 #endif
109
110
111 STATIC union node *list __P((int));
112 STATIC union node *andor __P((void));
113 STATIC union node *pipeline __P((void));
114 STATIC union node *command __P((void));
115 STATIC union node *simplecmd __P((union node **, union node *));
116 STATIC union node *makename __P((void));
117 STATIC void parsefname __P((void));
118 STATIC void parseheredoc __P((void));
119 STATIC int peektoken __P((void));
120 STATIC int readtoken __P((void));
121 STATIC int xxreadtoken __P((void));
122 STATIC int readtoken1 __P((int, char const *, char *, int));
123 STATIC int noexpand __P((char *));
124 STATIC void synexpect __P((int));
125 STATIC void synerror __P((char *));
126 STATIC void setprompt __P((int));
127
128
129 /*
130 * Read and parse a command. Returns NEOF on end of file. (NULL is a
131 * valid parse tree indicating a blank line.)
132 */
133
134 union node *
135 parsecmd(interact)
136 int interact;
137 {
138 int t;
139
140 doprompt = interact;
141 if (doprompt)
142 setprompt(1);
143 else
144 setprompt(0);
145 needprompt = 0;
146 t = readtoken();
147 if (t == TEOF)
148 return NEOF;
149 if (t == TNL)
150 return NULL;
151 tokpushback++;
152 return list(1);
153 }
154
155
156 STATIC union node *
157 list(nlflag)
158 int nlflag;
159 {
160 union node *n1, *n2, *n3;
161 int tok;
162
163 checkkwd = 2;
164 if (nlflag == 0 && tokendlist[peektoken()])
165 return NULL;
166 n1 = NULL;
167 for (;;) {
168 n2 = andor();
169 tok = readtoken();
170 if (tok == TBACKGND) {
171 if (n2->type == NCMD || n2->type == NPIPE) {
172 n2->ncmd.backgnd = 1;
173 } else if (n2->type == NREDIR) {
174 n2->type = NBACKGND;
175 } else {
176 n3 = (union node *)stalloc(sizeof (struct nredir));
177 n3->type = NBACKGND;
178 n3->nredir.n = n2;
179 n3->nredir.redirect = NULL;
180 n2 = n3;
181 }
182 }
183 if (n1 == NULL) {
184 n1 = n2;
185 }
186 else {
187 n3 = (union node *)stalloc(sizeof (struct nbinary));
188 n3->type = NSEMI;
189 n3->nbinary.ch1 = n1;
190 n3->nbinary.ch2 = n2;
191 n1 = n3;
192 }
193 switch (tok) {
194 case TBACKGND:
195 case TSEMI:
196 tok = readtoken();
197 /* fall through */
198 case TNL:
199 if (tok == TNL) {
200 parseheredoc();
201 if (nlflag)
202 return n1;
203 } else {
204 tokpushback++;
205 }
206 checkkwd = 2;
207 if (tokendlist[peektoken()])
208 return n1;
209 break;
210 case TEOF:
211 if (heredoclist)
212 parseheredoc();
213 else
214 pungetc(); /* push back EOF on input */
215 return n1;
216 default:
217 if (nlflag)
218 synexpect(-1);
219 tokpushback++;
220 return n1;
221 }
222 }
223 }
224
225
226
227 STATIC union node *
228 andor() {
229 union node *n1, *n2, *n3;
230 int t;
231
232 n1 = pipeline();
233 for (;;) {
234 if ((t = readtoken()) == TAND) {
235 t = NAND;
236 } else if (t == TOR) {
237 t = NOR;
238 } else {
239 tokpushback++;
240 return n1;
241 }
242 n2 = pipeline();
243 n3 = (union node *)stalloc(sizeof (struct nbinary));
244 n3->type = t;
245 n3->nbinary.ch1 = n1;
246 n3->nbinary.ch2 = n2;
247 n1 = n3;
248 }
249 }
250
251
252
253 STATIC union node *
254 pipeline() {
255 union node *n1, *pipenode;
256 struct nodelist *lp, *prev;
257
258 TRACE(("pipeline: entered\n"));
259 n1 = command();
260 if (readtoken() == TPIPE) {
261 pipenode = (union node *)stalloc(sizeof (struct npipe));
262 pipenode->type = NPIPE;
263 pipenode->npipe.backgnd = 0;
264 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
265 pipenode->npipe.cmdlist = lp;
266 lp->n = n1;
267 do {
268 prev = lp;
269 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
270 lp->n = command();
271 prev->next = lp;
272 } while (readtoken() == TPIPE);
273 lp->next = NULL;
274 n1 = pipenode;
275 }
276 tokpushback++;
277 return n1;
278 }
279
280
281
282 STATIC union node *
283 command() {
284 union node *n1, *n2;
285 union node *ap, **app;
286 union node *cp, **cpp;
287 union node *redir, **rpp;
288 int t, negate = 0;
289
290 checkkwd = 2;
291 redir = NULL;
292 n1 = NULL;
293 rpp = &redir;
294
295 /* Check for redirection which may precede command */
296 while (readtoken() == TREDIR) {
297 *rpp = n2 = redirnode;
298 rpp = &n2->nfile.next;
299 parsefname();
300 }
301 tokpushback++;
302
303 while (readtoken() == TNOT) {
304 TRACE(("command: TNOT recognized\n"));
305 negate = !negate;
306 }
307 tokpushback++;
308
309 switch (readtoken()) {
310 case TIF:
311 n1 = (union node *)stalloc(sizeof (struct nif));
312 n1->type = NIF;
313 n1->nif.test = list(0);
314 if (readtoken() != TTHEN)
315 synexpect(TTHEN);
316 n1->nif.ifpart = list(0);
317 n2 = n1;
318 while (readtoken() == TELIF) {
319 n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
320 n2 = n2->nif.elsepart;
321 n2->type = NIF;
322 n2->nif.test = list(0);
323 if (readtoken() != TTHEN)
324 synexpect(TTHEN);
325 n2->nif.ifpart = list(0);
326 }
327 if (lasttoken == TELSE)
328 n2->nif.elsepart = list(0);
329 else {
330 n2->nif.elsepart = NULL;
331 tokpushback++;
332 }
333 if (readtoken() != TFI)
334 synexpect(TFI);
335 checkkwd = 1;
336 break;
337 case TWHILE:
338 case TUNTIL: {
339 int got;
340 n1 = (union node *)stalloc(sizeof (struct nbinary));
341 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
342 n1->nbinary.ch1 = list(0);
343 if ((got=readtoken()) != TDO) {
344 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
345 synexpect(TDO);
346 }
347 n1->nbinary.ch2 = list(0);
348 if (readtoken() != TDONE)
349 synexpect(TDONE);
350 checkkwd = 1;
351 break;
352 }
353 case TFOR:
354 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
355 synerror("Bad for loop variable");
356 n1 = (union node *)stalloc(sizeof (struct nfor));
357 n1->type = NFOR;
358 n1->nfor.var = wordtext;
359 if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
360 app = ≈
361 while (readtoken() == TWORD) {
362 n2 = (union node *)stalloc(sizeof (struct narg));
363 n2->type = NARG;
364 n2->narg.text = wordtext;
365 n2->narg.backquote = backquotelist;
366 *app = n2;
367 app = &n2->narg.next;
368 }
369 *app = NULL;
370 n1->nfor.args = ap;
371 if (lasttoken != TNL && lasttoken != TSEMI)
372 synexpect(-1);
373 } else {
374 #ifndef GDB_HACK
375 static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
376 '@', '=', '\0'};
377 #endif
378 n2 = (union node *)stalloc(sizeof (struct narg));
379 n2->type = NARG;
380 n2->narg.text = (char *)argvars;
381 n2->narg.backquote = NULL;
382 n2->narg.next = NULL;
383 n1->nfor.args = n2;
384 /*
385 * Newline or semicolon here is optional (but note
386 * that the original Bourne shell only allowed NL).
387 */
388 if (lasttoken != TNL && lasttoken != TSEMI)
389 tokpushback++;
390 }
391 checkkwd = 2;
392 if ((t = readtoken()) == TDO)
393 t = TDONE;
394 else if (t == TBEGIN)
395 t = TEND;
396 else
397 synexpect(-1);
398 n1->nfor.body = list(0);
399 if (readtoken() != t)
400 synexpect(t);
401 checkkwd = 1;
402 break;
403 case TCASE:
404 n1 = (union node *)stalloc(sizeof (struct ncase));
405 n1->type = NCASE;
406 if (readtoken() != TWORD)
407 synexpect(TWORD);
408 n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
409 n2->type = NARG;
410 n2->narg.text = wordtext;
411 n2->narg.backquote = backquotelist;
412 n2->narg.next = NULL;
413 while (readtoken() == TNL);
414 if (lasttoken != TWORD || ! equal(wordtext, "in"))
415 synerror("expecting \"in\"");
416 cpp = &n1->ncase.cases;
417 checkkwd = 2, readtoken();
418 do {
419 *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
420 cp->type = NCLIST;
421 app = &cp->nclist.pattern;
422 for (;;) {
423 *app = ap = (union node *)stalloc(sizeof (struct narg));
424 ap->type = NARG;
425 ap->narg.text = wordtext;
426 ap->narg.backquote = backquotelist;
427 if (checkkwd = 2, readtoken() != TPIPE)
428 break;
429 app = &ap->narg.next;
430 readtoken();
431 }
432 ap->narg.next = NULL;
433 if (lasttoken != TRP)
434 synexpect(TRP);
435 cp->nclist.body = list(0);
436
437 checkkwd = 2;
438 if ((t = readtoken()) != TESAC) {
439 if (t != TENDCASE)
440 synexpect(TENDCASE);
441 else
442 checkkwd = 2, readtoken();
443 }
444 cpp = &cp->nclist.next;
445 } while(lasttoken != TESAC);
446 *cpp = NULL;
447 checkkwd = 1;
448 break;
449 case TLP:
450 n1 = (union node *)stalloc(sizeof (struct nredir));
451 n1->type = NSUBSHELL;
452 n1->nredir.n = list(0);
453 n1->nredir.redirect = NULL;
454 if (readtoken() != TRP)
455 synexpect(TRP);
456 checkkwd = 1;
457 break;
458 case TBEGIN:
459 n1 = list(0);
460 if (readtoken() != TEND)
461 synexpect(TEND);
462 checkkwd = 1;
463 break;
464 /* Handle an empty command like other simple commands. */
465 case TSEMI:
466 /*
467 * An empty command before a ; doesn't make much sense, and
468 * should certainly be disallowed in the case of `if ;'.
469 */
470 if (!redir)
471 synexpect(-1);
472 case TAND:
473 case TOR:
474 case TNL:
475 case TEOF:
476 case TWORD:
477 case TRP:
478 tokpushback++;
479 n1 = simplecmd(rpp, redir);
480 goto checkneg;
481 default:
482 synexpect(-1);
483 }
484
485 /* Now check for redirection which may follow command */
486 while (readtoken() == TREDIR) {
487 *rpp = n2 = redirnode;
488 rpp = &n2->nfile.next;
489 parsefname();
490 }
491 tokpushback++;
492 *rpp = NULL;
493 if (redir) {
494 if (n1->type != NSUBSHELL) {
495 n2 = (union node *)stalloc(sizeof (struct nredir));
496 n2->type = NREDIR;
497 n2->nredir.n = n1;
498 n1 = n2;
499 }
500 n1->nredir.redirect = redir;
501 }
502
503 checkneg:
504 if (negate) {
505 n2 = (union node *)stalloc(sizeof (struct nnot));
506 n2->type = NNOT;
507 n2->nnot.com = n1;
508 return n2;
509 }
510 else
511 return n1;
512 }
513
514
515 STATIC union node *
516 simplecmd(rpp, redir)
517 union node **rpp, *redir;
518 {
519 union node *args, **app;
520 union node **orig_rpp = rpp;
521 union node *n = NULL, *n2;
522 int negate = 0;
523
524 /* If we don't have any redirections already, then we must reset */
525 /* rpp to be the address of the local redir variable. */
526 if (redir == 0)
527 rpp = &redir;
528
529 args = NULL;
530 app = &args;
531 /*
532 * We save the incoming value, because we need this for shell
533 * functions. There can not be a redirect or an argument between
534 * the function name and the open parenthesis.
535 */
536 orig_rpp = rpp;
537
538 while (readtoken() == TNOT) {
539 TRACE(("command: TNOT recognized\n"));
540 negate = !negate;
541 }
542 tokpushback++;
543
544 for (;;) {
545 if (readtoken() == TWORD) {
546 n = (union node *)stalloc(sizeof (struct narg));
547 n->type = NARG;
548 n->narg.text = wordtext;
549 n->narg.backquote = backquotelist;
550 *app = n;
551 app = &n->narg.next;
552 } else if (lasttoken == TREDIR) {
553 *rpp = n = redirnode;
554 rpp = &n->nfile.next;
555 parsefname(); /* read name of redirection file */
556 } else if (lasttoken == TLP && app == &args->narg.next
557 && rpp == orig_rpp) {
558 /* We have a function */
559 if (readtoken() != TRP)
560 synexpect(TRP);
561 #ifdef notdef
562 if (! goodname(n->narg.text))
563 synerror("Bad function name");
564 #endif
565 n->type = NDEFUN;
566 n->narg.next = command();
567 goto checkneg;
568 } else {
569 tokpushback++;
570 break;
571 }
572 }
573 *app = NULL;
574 *rpp = NULL;
575 n = (union node *)stalloc(sizeof (struct ncmd));
576 n->type = NCMD;
577 n->ncmd.backgnd = 0;
578 n->ncmd.args = args;
579 n->ncmd.redirect = redir;
580
581 checkneg:
582 if (negate) {
583 n2 = (union node *)stalloc(sizeof (struct nnot));
584 n2->type = NNOT;
585 n2->nnot.com = n;
586 return n2;
587 }
588 else
589 return n;
590 }
591
592 STATIC union node *
593 makename() {
594 union node *n;
595
596 n = (union node *)stalloc(sizeof (struct narg));
597 n->type = NARG;
598 n->narg.next = NULL;
599 n->narg.text = wordtext;
600 n->narg.backquote = backquotelist;
601 return n;
602 }
603
604 void fixredir(n, text, err)
605 union node *n;
606 const char *text;
607 int err;
608 {
609 TRACE(("Fix redir %s %d\n", text, err));
610 if (!err)
611 n->ndup.vname = NULL;
612
613 if (is_digit(text[0]) && text[1] == '\0')
614 n->ndup.dupfd = digit_val(text[0]);
615 else if (text[0] == '-' && text[1] == '\0')
616 n->ndup.dupfd = -1;
617 else {
618
619 if (err)
620 synerror("Bad fd number");
621 else
622 n->ndup.vname = makename();
623 }
624 }
625
626
627 STATIC void
628 parsefname() {
629 union node *n = redirnode;
630
631 if (readtoken() != TWORD)
632 synexpect(-1);
633 if (n->type == NHERE) {
634 struct heredoc *here = heredoc;
635 struct heredoc *p;
636 int i;
637
638 if (quoteflag == 0)
639 n->type = NXHERE;
640 TRACE(("Here document %d\n", n->type));
641 if (here->striptabs) {
642 while (*wordtext == '\t')
643 wordtext++;
644 }
645 if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
646 synerror("Illegal eof marker for << redirection");
647 rmescapes(wordtext);
648 here->eofmark = wordtext;
649 here->next = NULL;
650 if (heredoclist == NULL)
651 heredoclist = here;
652 else {
653 for (p = heredoclist ; p->next ; p = p->next);
654 p->next = here;
655 }
656 } else if (n->type == NTOFD || n->type == NFROMFD) {
657 fixredir(n, wordtext, 0);
658 } else {
659 n->nfile.fname = makename();
660 }
661 }
662
663
664 /*
665 * Input any here documents.
666 */
667
668 STATIC void
669 parseheredoc() {
670 struct heredoc *here;
671 union node *n;
672
673 while (heredoclist) {
674 here = heredoclist;
675 heredoclist = here->next;
676 if (needprompt) {
677 setprompt(2);
678 needprompt = 0;
679 }
680 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
681 here->eofmark, here->striptabs);
682 n = (union node *)stalloc(sizeof (struct narg));
683 n->narg.type = NARG;
684 n->narg.next = NULL;
685 n->narg.text = wordtext;
686 n->narg.backquote = backquotelist;
687 here->here->nhere.doc = n;
688 }
689 }
690
691 STATIC int
692 peektoken() {
693 int t;
694
695 t = readtoken();
696 tokpushback++;
697 return (t);
698 }
699
700 STATIC int
701 readtoken() {
702 int t;
703 int savecheckkwd = checkkwd;
704 struct alias *ap;
705 #ifdef DEBUG
706 int alreadyseen = tokpushback;
707 #endif
708
709 top:
710 t = xxreadtoken();
711
712 if (checkkwd) {
713 /*
714 * eat newlines
715 */
716 if (checkkwd == 2) {
717 checkkwd = 0;
718 while (t == TNL) {
719 parseheredoc();
720 t = xxreadtoken();
721 }
722 } else
723 checkkwd = 0;
724 /*
725 * check for keywords and aliases
726 */
727 if (t == TWORD && !quoteflag)
728 {
729 char * const *pp;
730
731 for (pp = (char **)parsekwd; *pp; pp++) {
732 if (**pp == *wordtext && equal(*pp, wordtext))
733 {
734 lasttoken = t = pp - parsekwd + KWDOFFSET;
735 TRACE(("keyword %s recognized\n", tokname[t]));
736 goto out;
737 }
738 }
739 if ((ap = lookupalias(wordtext, 1)) != NULL) {
740 pushstring(ap->val, strlen(ap->val), ap);
741 checkkwd = savecheckkwd;
742 goto top;
743 }
744 }
745 out:
746 checkkwd = (t == TNOT) ? savecheckkwd : 0;
747 }
748 #ifdef DEBUG
749 if (!alreadyseen)
750 TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
751 else
752 TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
753 #endif
754 return (t);
755 }
756
757
758 /*
759 * Read the next input token.
760 * If the token is a word, we set backquotelist to the list of cmds in
761 * backquotes. We set quoteflag to true if any part of the word was
762 * quoted.
763 * If the token is TREDIR, then we set redirnode to a structure containing
764 * the redirection.
765 * In all cases, the variable startlinno is set to the number of the line
766 * on which the token starts.
767 *
768 * [Change comment: here documents and internal procedures]
769 * [Readtoken shouldn't have any arguments. Perhaps we should make the
770 * word parsing code into a separate routine. In this case, readtoken
771 * doesn't need to have any internal procedures, but parseword does.
772 * We could also make parseoperator in essence the main routine, and
773 * have parseword (readtoken1?) handle both words and redirection.]
774 */
775
776 #define RETURN(token) return lasttoken = token
777
778 STATIC int
779 xxreadtoken() {
780 int c;
781
782 if (tokpushback) {
783 tokpushback = 0;
784 return lasttoken;
785 }
786 if (needprompt) {
787 setprompt(2);
788 needprompt = 0;
789 }
790 startlinno = plinno;
791 for (;;) { /* until token or start of word found */
792 c = pgetc_macro();
793 if (c == ' ' || c == '\t')
794 continue; /* quick check for white space first */
795 switch (c) {
796 case ' ': case '\t':
797 continue;
798 case '#':
799 while ((c = pgetc()) != '\n' && c != PEOF);
800 pungetc();
801 continue;
802 case '\\':
803 if (pgetc() == '\n') {
804 startlinno = ++plinno;
805 if (doprompt)
806 setprompt(2);
807 else
808 setprompt(0);
809 continue;
810 }
811 pungetc();
812 goto breakloop;
813 case '\n':
814 plinno++;
815 needprompt = doprompt;
816 RETURN(TNL);
817 case PEOF:
818 RETURN(TEOF);
819 case '&':
820 if (pgetc() == '&')
821 RETURN(TAND);
822 pungetc();
823 RETURN(TBACKGND);
824 case '|':
825 if (pgetc() == '|')
826 RETURN(TOR);
827 pungetc();
828 RETURN(TPIPE);
829 case ';':
830 if (pgetc() == ';')
831 RETURN(TENDCASE);
832 pungetc();
833 RETURN(TSEMI);
834 case '(':
835 RETURN(TLP);
836 case ')':
837 RETURN(TRP);
838 default:
839 goto breakloop;
840 }
841 }
842 breakloop:
843 return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
844 #undef RETURN
845 }
846
847
848
849 /*
850 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
851 * is not NULL, read a here document. In the latter case, eofmark is the
852 * word which marks the end of the document and striptabs is true if
853 * leading tabs should be stripped from the document. The argument firstc
854 * is the first character of the input token or document.
855 *
856 * Because C does not have internal subroutines, I have simulated them
857 * using goto's to implement the subroutine linkage. The following macros
858 * will run code that appears at the end of readtoken1.
859 */
860
861 #define CHECKEND() {goto checkend; checkend_return:;}
862 #define PARSEREDIR() {goto parseredir; parseredir_return:;}
863 #define PARSESUB() {goto parsesub; parsesub_return:;}
864 #define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
865 #define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
866 #define PARSEARITH() {goto parsearith; parsearith_return:;}
867
868 STATIC int
869 readtoken1(firstc, syntax, eofmark, striptabs)
870 int firstc;
871 char const *syntax;
872 char *eofmark;
873 int striptabs;
874 {
875 int c = firstc;
876 char *out;
877 int len;
878 char line[EOFMARKLEN + 1];
879 struct nodelist *bqlist;
880 int quotef;
881 int dblquote;
882 int varnest; /* levels of variables expansion */
883 int arinest; /* levels of arithmetic expansion */
884 int parenlevel; /* levels of parens in arithmetic */
885 int oldstyle;
886 char const *prevsyntax; /* syntax before arithmetic */
887 #if __GNUC__
888 /* Avoid longjmp clobbering */
889 (void) &out;
890 (void) "ef;
891 (void) &dblquote;
892 (void) &varnest;
893 (void) &arinest;
894 (void) &parenlevel;
895 (void) &oldstyle;
896 (void) &prevsyntax;
897 (void) &syntax;
898 #endif
899
900 startlinno = plinno;
901 dblquote = 0;
902 if (syntax == DQSYNTAX)
903 dblquote = 1;
904 quotef = 0;
905 bqlist = NULL;
906 varnest = 0;
907 arinest = 0;
908 parenlevel = 0;
909
910 STARTSTACKSTR(out);
911 loop: { /* for each line, until end of word */
912 #if ATTY
913 if (c == '\034' && doprompt
914 && attyset() && ! equal(termval(), "emacs")) {
915 attyline();
916 if (syntax == BASESYNTAX)
917 return readtoken();
918 c = pgetc();
919 goto loop;
920 }
921 #endif
922 CHECKEND(); /* set c to PEOF if at end of here document */
923 for (;;) { /* until end of line or end of word */
924 CHECKSTRSPACE(3, out); /* permit 3 calls to USTPUTC */
925 switch(syntax[c]) {
926 case CNL: /* '\n' */
927 if (syntax == BASESYNTAX)
928 goto endword; /* exit outer loop */
929 USTPUTC(c, out);
930 plinno++;
931 if (doprompt)
932 setprompt(2);
933 else
934 setprompt(0);
935 c = pgetc();
936 goto loop; /* continue outer loop */
937 case CWORD:
938 USTPUTC(c, out);
939 break;
940 case CCTL:
941 if (eofmark == NULL || dblquote)
942 USTPUTC(CTLESC, out);
943 USTPUTC(c, out);
944 break;
945 case CBACK: /* backslash */
946 c = pgetc();
947 if (c == PEOF) {
948 USTPUTC('\\', out);
949 pungetc();
950 } else if (c == '\n') {
951 if (doprompt)
952 setprompt(2);
953 else
954 setprompt(0);
955 } else {
956 if (dblquote && c != '\\' && c != '`' && c != '$'
957 && (c != '"' || eofmark != NULL))
958 USTPUTC('\\', out);
959 if (SQSYNTAX[c] == CCTL)
960 USTPUTC(CTLESC, out);
961 USTPUTC(c, out);
962 quotef++;
963 }
964 break;
965 case CSQUOTE:
966 syntax = SQSYNTAX;
967 break;
968 case CDQUOTE:
969 syntax = DQSYNTAX;
970 dblquote = 1;
971 break;
972 case CENDQUOTE:
973 if (eofmark) {
974 USTPUTC(c, out);
975 } else {
976 if (arinest)
977 syntax = ARISYNTAX;
978 else
979 syntax = BASESYNTAX;
980 quotef++;
981 dblquote = 0;
982 }
983 break;
984 case CVAR: /* '$' */
985 PARSESUB(); /* parse substitution */
986 break;
987 case CENDVAR: /* '}' */
988 if (varnest > 0) {
989 varnest--;
990 USTPUTC(CTLENDVAR, out);
991 } else {
992 USTPUTC(c, out);
993 }
994 break;
995 case CLP: /* '(' in arithmetic */
996 parenlevel++;
997 USTPUTC(c, out);
998 break;
999 case CRP: /* ')' in arithmetic */
1000 if (parenlevel > 0) {
1001 USTPUTC(c, out);
1002 --parenlevel;
1003 } else {
1004 if (pgetc() == ')') {
1005 if (--arinest == 0) {
1006 USTPUTC(CTLENDARI, out);
1007 syntax = prevsyntax;
1008 } else
1009 USTPUTC(')', out);
1010 } else {
1011 /*
1012 * unbalanced parens
1013 * (don't 2nd guess - no error)
1014 */
1015 pungetc();
1016 USTPUTC(')', out);
1017 }
1018 }
1019 break;
1020 case CBQUOTE: /* '`' */
1021 PARSEBACKQOLD();
1022 break;
1023 case CEOF:
1024 goto endword; /* exit outer loop */
1025 default:
1026 if (varnest == 0)
1027 goto endword; /* exit outer loop */
1028 USTPUTC(c, out);
1029 }
1030 c = pgetc_macro();
1031 }
1032 }
1033 endword:
1034 if (syntax == ARISYNTAX)
1035 synerror("Missing '))'");
1036 if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
1037 synerror("Unterminated quoted string");
1038 if (varnest != 0) {
1039 startlinno = plinno;
1040 synerror("Missing '}'");
1041 }
1042 USTPUTC('\0', out);
1043 len = out - stackblock();
1044 out = stackblock();
1045 if (eofmark == NULL) {
1046 if ((c == '>' || c == '<')
1047 && quotef == 0
1048 && len <= 2
1049 && (*out == '\0' || is_digit(*out))) {
1050 PARSEREDIR();
1051 return lasttoken = TREDIR;
1052 } else {
1053 pungetc();
1054 }
1055 }
1056 quoteflag = quotef;
1057 backquotelist = bqlist;
1058 grabstackblock(len);
1059 wordtext = out;
1060 return lasttoken = TWORD;
1061 /* end of readtoken routine */
1062
1063
1064
1065 /*
1066 * Check to see whether we are at the end of the here document. When this
1067 * is called, c is set to the first character of the next input line. If
1068 * we are at the end of the here document, this routine sets the c to PEOF.
1069 */
1070
1071 checkend: {
1072 if (eofmark) {
1073 if (striptabs) {
1074 while (c == '\t')
1075 c = pgetc();
1076 }
1077 if (c == *eofmark) {
1078 if (pfgets(line, sizeof line) != NULL) {
1079 char *p, *q;
1080
1081 p = line;
1082 for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
1083 if (*p == '\n' && *q == '\0') {
1084 c = PEOF;
1085 plinno++;
1086 needprompt = doprompt;
1087 } else {
1088 pushstring(line, strlen(line), NULL);
1089 }
1090 }
1091 }
1092 }
1093 goto checkend_return;
1094 }
1095
1096
1097 /*
1098 * Parse a redirection operator. The variable "out" points to a string
1099 * specifying the fd to be redirected. The variable "c" contains the
1100 * first character of the redirection operator.
1101 */
1102
1103 parseredir: {
1104 char fd = *out;
1105 union node *np;
1106
1107 np = (union node *)stalloc(sizeof (struct nfile));
1108 if (c == '>') {
1109 np->nfile.fd = 1;
1110 c = pgetc();
1111 if (c == '>')
1112 np->type = NAPPEND;
1113 else if (c == '&')
1114 np->type = NTOFD;
1115 else {
1116 np->type = NTO;
1117 pungetc();
1118 }
1119 } else { /* c == '<' */
1120 np->nfile.fd = 0;
1121 c = pgetc();
1122 if (c == '<') {
1123 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1124 np = (union node *)stalloc(sizeof (struct nhere));
1125 np->nfile.fd = 0;
1126 }
1127 np->type = NHERE;
1128 heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1129 heredoc->here = np;
1130 if ((c = pgetc()) == '-') {
1131 heredoc->striptabs = 1;
1132 } else {
1133 heredoc->striptabs = 0;
1134 pungetc();
1135 }
1136 } else if (c == '&')
1137 np->type = NFROMFD;
1138 else {
1139 np->type = NFROM;
1140 pungetc();
1141 }
1142 }
1143 if (fd != '\0')
1144 np->nfile.fd = digit_val(fd);
1145 redirnode = np;
1146 goto parseredir_return;
1147 }
1148
1149
1150 /*
1151 * Parse a substitution. At this point, we have read the dollar sign
1152 * and nothing else.
1153 */
1154
1155 parsesub: {
1156 int subtype;
1157 int typeloc;
1158 int flags;
1159 char *p;
1160 #ifndef GDB_HACK
1161 static const char types[] = "}-+?=";
1162 #endif
1163
1164 c = pgetc();
1165 if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) {
1166 USTPUTC('$', out);
1167 pungetc();
1168 } else if (c == '(') { /* $(command) or $((arith)) */
1169 if (pgetc() == '(') {
1170 PARSEARITH();
1171 } else {
1172 pungetc();
1173 PARSEBACKQNEW();
1174 }
1175 } else {
1176 USTPUTC(CTLVAR, out);
1177 typeloc = out - stackblock();
1178 USTPUTC(VSNORMAL, out);
1179 subtype = VSNORMAL;
1180 if (c == '{') {
1181 c = pgetc();
1182 if (c == '#') {
1183 if ((c = pgetc()) == '}')
1184 c = '#';
1185 else
1186 subtype = VSLENGTH;
1187 }
1188 else
1189 subtype = 0;
1190 }
1191 if (is_name(c)) {
1192 do {
1193 STPUTC(c, out);
1194 c = pgetc();
1195 } while (is_in_name(c));
1196 } else if (is_digit(c)) {
1197 do {
1198 USTPUTC(c, out);
1199 c = pgetc();
1200 } while (is_digit(c));
1201 }
1202 else if (is_special(c)) {
1203 USTPUTC(c, out);
1204 c = pgetc();
1205 }
1206 else
1207 badsub: synerror("Bad substitution");
1208
1209 STPUTC('=', out);
1210 flags = 0;
1211 if (subtype == 0) {
1212 switch (c) {
1213 case ':':
1214 flags = VSNUL;
1215 c = pgetc();
1216 /*FALLTHROUGH*/
1217 default:
1218 p = strchr(types, c);
1219 if (p == NULL)
1220 goto badsub;
1221 subtype = p - types + VSNORMAL;
1222 break;
1223 case '%':
1224 case '#':
1225 {
1226 int cc = c;
1227 subtype = c == '#' ? VSTRIMLEFT :
1228 VSTRIMRIGHT;
1229 c = pgetc();
1230 if (c == cc)
1231 subtype++;
1232 else
1233 pungetc();
1234 break;
1235 }
1236 }
1237 } else {
1238 pungetc();
1239 }
1240 if (dblquote || arinest)
1241 flags |= VSQUOTE;
1242 *(stackblock() + typeloc) = subtype | flags;
1243 if (subtype != VSNORMAL)
1244 varnest++;
1245 }
1246 goto parsesub_return;
1247 }
1248
1249
1250 /*
1251 * Called to parse command substitutions. Newstyle is set if the command
1252 * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1253 * list of commands (passed by reference), and savelen is the number of
1254 * characters on the top of the stack which must be preserved.
1255 */
1256
1257 parsebackq: {
1258 struct nodelist **nlpp;
1259 int savepbq;
1260 union node *n;
1261 char *volatile str;
1262 struct jmploc jmploc;
1263 struct jmploc *volatile savehandler;
1264 int savelen;
1265 int saveprompt;
1266 #ifdef __GNUC__
1267 (void) &saveprompt;
1268 #endif
1269
1270 savepbq = parsebackquote;
1271 if (setjmp(jmploc.loc)) {
1272 if (str)
1273 ckfree(str);
1274 parsebackquote = 0;
1275 handler = savehandler;
1276 longjmp(handler->loc, 1);
1277 }
1278 INTOFF;
1279 str = NULL;
1280 savelen = out - stackblock();
1281 if (savelen > 0) {
1282 str = ckmalloc(savelen);
1283 memcpy(str, stackblock(), savelen);
1284 }
1285 savehandler = handler;
1286 handler = &jmploc;
1287 INTON;
1288 if (oldstyle) {
1289 /* We must read until the closing backquote, giving special
1290 treatment to some slashes, and then push the string and
1291 reread it as input, interpreting it normally. */
1292 char *out;
1293 int c;
1294 int savelen;
1295 char *str;
1296
1297
1298 STARTSTACKSTR(out);
1299 for (;;) {
1300 if (needprompt) {
1301 setprompt(2);
1302 needprompt = 0;
1303 }
1304 switch (c = pgetc()) {
1305 case '`':
1306 goto done;
1307
1308 case '\\':
1309 if ((c = pgetc()) == '\n') {
1310 plinno++;
1311 if (doprompt)
1312 setprompt(2);
1313 else
1314 setprompt(0);
1315 /*
1316 * If eating a newline, avoid putting
1317 * the newline into the new character
1318 * stream (via the STPUTC after the
1319 * switch).
1320 */
1321 continue;
1322 }
1323 if (c != '\\' && c != '`' && c != '$'
1324 && (!dblquote || c != '"'))
1325 STPUTC('\\', out);
1326 break;
1327
1328 case '\n':
1329 plinno++;
1330 needprompt = doprompt;
1331 break;
1332
1333 case PEOF:
1334 startlinno = plinno;
1335 synerror("EOF in backquote substitution");
1336 break;
1337
1338 default:
1339 break;
1340 }
1341 STPUTC(c, out);
1342 }
1343 done:
1344 STPUTC('\0', out);
1345 savelen = out - stackblock();
1346 if (savelen > 0) {
1347 str = ckmalloc(savelen);
1348 memcpy(str, stackblock(), savelen);
1349 setinputstring(str, 1);
1350 }
1351 }
1352 nlpp = &bqlist;
1353 while (*nlpp)
1354 nlpp = &(*nlpp)->next;
1355 *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1356 (*nlpp)->next = NULL;
1357 parsebackquote = oldstyle;
1358
1359 if (oldstyle) {
1360 saveprompt = doprompt;
1361 doprompt = 0;
1362 }
1363
1364 n = list(0);
1365
1366 if (oldstyle)
1367 doprompt = saveprompt;
1368 else {
1369 if (readtoken() != TRP)
1370 synexpect(TRP);
1371 }
1372
1373 (*nlpp)->n = n;
1374 if (oldstyle) {
1375 /*
1376 * Start reading from old file again, ignoring any pushed back
1377 * tokens left from the backquote parsing
1378 */
1379 popfile();
1380 tokpushback = 0;
1381 }
1382 while (stackblocksize() <= savelen)
1383 growstackblock();
1384 STARTSTACKSTR(out);
1385 if (str) {
1386 memcpy(out, str, savelen);
1387 STADJUST(savelen, out);
1388 INTOFF;
1389 ckfree(str);
1390 str = NULL;
1391 INTON;
1392 }
1393 parsebackquote = savepbq;
1394 handler = savehandler;
1395 if (arinest || dblquote)
1396 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1397 else
1398 USTPUTC(CTLBACKQ, out);
1399 if (oldstyle)
1400 goto parsebackq_oldreturn;
1401 else
1402 goto parsebackq_newreturn;
1403 }
1404
1405 /*
1406 * Parse an arithmetic expansion (indicate start of one and set state)
1407 */
1408 parsearith: {
1409
1410 if (++arinest == 1) {
1411 prevsyntax = syntax;
1412 syntax = ARISYNTAX;
1413 USTPUTC(CTLARI, out);
1414 } else {
1415 /*
1416 * we collapse embedded arithmetic expansion to
1417 * parenthesis, which should be equivalent
1418 */
1419 USTPUTC('(', out);
1420 }
1421 goto parsearith_return;
1422 }
1423
1424 } /* end of readtoken */
1425
1426
1427
1428 #ifdef mkinit
1429 RESET {
1430 tokpushback = 0;
1431 checkkwd = 0;
1432 }
1433 #endif
1434
1435 /*
1436 * Returns true if the text contains nothing to expand (no dollar signs
1437 * or backquotes).
1438 */
1439
1440 STATIC int
1441 noexpand(text)
1442 char *text;
1443 {
1444 char *p;
1445 char c;
1446
1447 p = text;
1448 while ((c = *p++) != '\0') {
1449 if (c == CTLESC)
1450 p++;
1451 else if (BASESYNTAX[c] == CCTL)
1452 return 0;
1453 }
1454 return 1;
1455 }
1456
1457
1458 /*
1459 * Return true if the argument is a legal variable name (a letter or
1460 * underscore followed by zero or more letters, underscores, and digits).
1461 */
1462
1463 int
1464 goodname(name)
1465 char *name;
1466 {
1467 char *p;
1468
1469 p = name;
1470 if (! is_name(*p))
1471 return 0;
1472 while (*++p) {
1473 if (! is_in_name(*p))
1474 return 0;
1475 }
1476 return 1;
1477 }
1478
1479
1480 /*
1481 * Called when an unexpected token is read during the parse. The argument
1482 * is the token that is expected, or -1 if more than one type of token can
1483 * occur at this point.
1484 */
1485
1486 STATIC void
1487 synexpect(token)
1488 int token;
1489 {
1490 char msg[64];
1491
1492 if (token >= 0) {
1493 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1494 tokname[lasttoken], tokname[token]);
1495 } else {
1496 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1497 }
1498 synerror(msg);
1499 }
1500
1501
1502 STATIC void
1503 synerror(msg)
1504 char *msg;
1505 {
1506 if (commandname)
1507 outfmt(&errout, "%s: %d: ", commandname, startlinno);
1508 outfmt(&errout, "Syntax error: %s\n", msg);
1509 error((char *)NULL);
1510 }
1511
1512 STATIC void
1513 setprompt(which)
1514 int which;
1515 {
1516 whichprompt = which;
1517
1518 #ifndef SMALL
1519 if (!el)
1520 #endif
1521 out2str(getprompt(NULL));
1522 }
1523
1524 /*
1525 * called by editline -- any expansions to the prompt
1526 * should be added here.
1527 */
1528 char *
1529 getprompt(unused)
1530 void *unused;
1531 {
1532 switch (whichprompt) {
1533 case 0:
1534 return "";
1535 case 1:
1536 return ps1val();
1537 case 2:
1538 return ps2val();
1539 default:
1540 return "<internal prompt error>";
1541 }
1542 }
1543