parser.c revision 1.140 1 /* $NetBSD: parser.c,v 1.140 2017/06/30 23:02:56 kre Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.140 2017/06/30 23:02:56 kre Exp $");
41 #endif
42 #endif /* not lint */
43
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <limits.h>
47
48 #include "shell.h"
49 #include "parser.h"
50 #include "nodes.h"
51 #include "expand.h" /* defines rmescapes() */
52 #include "eval.h" /* defines commandname */
53 #include "syntax.h"
54 #include "options.h"
55 #include "input.h"
56 #include "output.h"
57 #include "var.h"
58 #include "error.h"
59 #include "memalloc.h"
60 #include "mystring.h"
61 #include "alias.h"
62 #include "show.h"
63 #ifndef SMALL
64 #include "myhistedit.h"
65 #endif
66
67 /*
68 * Shell command parser.
69 */
70
71 /* values returned by readtoken */
72 #include "token.h"
73
74 #define OPENBRACE '{'
75 #define CLOSEBRACE '}'
76
77 struct HereDoc {
78 struct HereDoc *next; /* next here document in list */
79 union node *here; /* redirection node */
80 char *eofmark; /* string indicating end of input */
81 int striptabs; /* if set, strip leading tabs */
82 int startline; /* line number where << seen */
83 };
84
85 MKINIT struct parse_state parse_state;
86 union parse_state_p psp = { .c_current_parser = &parse_state };
87
88 static const struct parse_state init_parse_state = { /* all 0's ... */
89 .ps_noalias = 0,
90 .ps_heredoclist = NULL,
91 .ps_parsebackquote = 0,
92 .ps_doprompt = 0,
93 .ps_needprompt = 0,
94 .ps_lasttoken = 0,
95 .ps_tokpushback = 0,
96 .ps_wordtext = NULL,
97 .ps_checkkwd = 0,
98 .ps_redirnode = NULL,
99 .ps_heredoc = NULL,
100 .ps_quoteflag = 0,
101 .ps_startlinno = 0,
102 .ps_funclinno = 0,
103 .ps_elided_nl = 0,
104 };
105
106 STATIC union node *list(int, int);
107 STATIC union node *andor(void);
108 STATIC union node *pipeline(void);
109 STATIC union node *command(void);
110 STATIC union node *simplecmd(union node **, union node *);
111 STATIC union node *makename(void);
112 STATIC void parsefname(void);
113 STATIC int slurp_heredoc(char *const, const int, const int);
114 STATIC void readheredocs(void);
115 STATIC int peektoken(void);
116 STATIC int readtoken(void);
117 STATIC int xxreadtoken(void);
118 STATIC int readtoken1(int, char const *, int);
119 STATIC int noexpand(char *);
120 STATIC void synexpect(int, const char *) __dead;
121 STATIC void synerror(const char *) __dead;
122 STATIC void setprompt(int);
123 STATIC int pgetc_linecont(void);
124
125 static const char EOFhere[] = "EOF reading here (<<) document";
126
127 #ifdef DEBUG
128 int parsing = 0;
129 #endif
130
131 /*
132 * Read and parse a command. Returns NEOF on end of file. (NULL is a
133 * valid parse tree indicating a blank line.)
134 */
135
136 union node *
137 parsecmd(int interact)
138 {
139 int t;
140 union node *n;
141
142 #ifdef DEBUG
143 parsing++;
144 #endif
145 tokpushback = 0;
146 doprompt = interact;
147 if (doprompt)
148 setprompt(1);
149 else
150 setprompt(0);
151 needprompt = 0;
152 t = readtoken();
153 #ifdef DEBUG
154 parsing--;
155 #endif
156 if (t == TEOF)
157 return NEOF;
158 if (t == TNL)
159 return NULL;
160
161 #ifdef DEBUG
162 parsing++;
163 #endif
164 tokpushback++;
165 n = list(1, 0);
166 #ifdef DEBUG
167 parsing--;
168 #endif
169 if (heredoclist)
170 error("%d: Here document (<<%s) expected but not present",
171 heredoclist->startline, heredoclist->eofmark);
172 return n;
173 }
174
175
176 STATIC union node *
177 list(int nlflag, int erflag)
178 {
179 union node *n1, *n2, *n3;
180 int tok;
181
182 CTRACE(DBG_PARSE, ("list(%d,%d): entered @%d\n",nlflag,erflag,plinno));
183
184 checkkwd = 2;
185 if (nlflag == 0 && tokendlist[peektoken()])
186 return NULL;
187 n1 = NULL;
188 for (;;) {
189 n2 = andor();
190 tok = readtoken();
191 if (tok == TBACKGND) {
192 if (n2->type == NCMD || n2->type == NPIPE) {
193 n2->ncmd.backgnd = 1;
194 } else if (n2->type == NREDIR) {
195 n2->type = NBACKGND;
196 } else {
197 n3 = stalloc(sizeof(struct nredir));
198 n3->type = NBACKGND;
199 n3->nredir.n = n2;
200 n3->nredir.redirect = NULL;
201 n2 = n3;
202 }
203 }
204 if (n1 == NULL) {
205 n1 = n2;
206 }
207 else {
208 n3 = stalloc(sizeof(struct nbinary));
209 n3->type = NSEMI;
210 n3->nbinary.ch1 = n1;
211 n3->nbinary.ch2 = n2;
212 n1 = n3;
213 }
214 switch (tok) {
215 case TBACKGND:
216 case TSEMI:
217 tok = readtoken();
218 /* FALLTHROUGH */
219 case TNL:
220 if (tok == TNL) {
221 readheredocs();
222 if (nlflag)
223 return n1;
224 } else {
225 tokpushback++;
226 }
227 checkkwd = 2;
228 if (tokendlist[peektoken()])
229 return n1;
230 break;
231 case TEOF:
232 pungetc(); /* push back EOF on input */
233 return n1;
234 default:
235 if (nlflag || erflag)
236 synexpect(-1, 0);
237 tokpushback++;
238 return n1;
239 }
240 }
241 }
242
243 STATIC union node *
244 andor(void)
245 {
246 union node *n1, *n2, *n3;
247 int t;
248
249 CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno));
250
251 n1 = pipeline();
252 for (;;) {
253 if ((t = readtoken()) == TAND) {
254 t = NAND;
255 } else if (t == TOR) {
256 t = NOR;
257 } else {
258 tokpushback++;
259 return n1;
260 }
261 n2 = pipeline();
262 n3 = stalloc(sizeof(struct nbinary));
263 n3->type = t;
264 n3->nbinary.ch1 = n1;
265 n3->nbinary.ch2 = n2;
266 n1 = n3;
267 }
268 }
269
270 STATIC union node *
271 pipeline(void)
272 {
273 union node *n1, *n2, *pipenode;
274 struct nodelist *lp, *prev;
275 int negate;
276
277 CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno));
278
279 negate = 0;
280 checkkwd = 2;
281 while (readtoken() == TNOT) {
282 CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n"));
283 #ifndef BOGUS_NOT_COMMAND
284 if (posix && negate)
285 synerror("2nd \"!\" unexpected");
286 #endif
287 negate++;
288 }
289 tokpushback++;
290 n1 = command();
291 if (readtoken() == TPIPE) {
292 pipenode = stalloc(sizeof(struct npipe));
293 pipenode->type = NPIPE;
294 pipenode->npipe.backgnd = 0;
295 lp = stalloc(sizeof(struct nodelist));
296 pipenode->npipe.cmdlist = lp;
297 lp->n = n1;
298 do {
299 prev = lp;
300 lp = stalloc(sizeof(struct nodelist));
301 lp->n = command();
302 prev->next = lp;
303 } while (readtoken() == TPIPE);
304 lp->next = NULL;
305 n1 = pipenode;
306 }
307 tokpushback++;
308 if (negate) {
309 CTRACE(DBG_PARSE, ("%snegate pipeline\n",
310 (negate&1) ? "" : "double "));
311 n2 = stalloc(sizeof(struct nnot));
312 n2->type = (negate & 1) ? NNOT : NDNOT;
313 n2->nnot.com = n1;
314 return n2;
315 } else
316 return n1;
317 }
318
319
320
321 STATIC union node *
322 command(void)
323 {
324 union node *n1, *n2;
325 union node *ap, **app;
326 union node *cp, **cpp;
327 union node *redir, **rpp;
328 int t;
329 #ifdef BOGUS_NOT_COMMAND
330 int negate = 0;
331 #endif
332
333 CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno));
334
335 checkkwd = 2;
336 redir = NULL;
337 n1 = NULL;
338 rpp = &redir;
339
340 /* Check for redirection which may precede command */
341 while (readtoken() == TREDIR) {
342 *rpp = n2 = redirnode;
343 rpp = &n2->nfile.next;
344 parsefname();
345 }
346 tokpushback++;
347
348 #ifdef BOGUS_NOT_COMMAND /* only in pileline() */
349 while (readtoken() == TNOT) {
350 CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n"));
351 negate++;
352 }
353 tokpushback++;
354 #endif
355
356 switch (readtoken()) {
357 case TIF:
358 n1 = stalloc(sizeof(struct nif));
359 n1->type = NIF;
360 n1->nif.test = list(0, 0);
361 if (readtoken() != TTHEN)
362 synexpect(TTHEN, 0);
363 n1->nif.ifpart = list(0, 0);
364 n2 = n1;
365 while (readtoken() == TELIF) {
366 n2->nif.elsepart = stalloc(sizeof(struct nif));
367 n2 = n2->nif.elsepart;
368 n2->type = NIF;
369 n2->nif.test = list(0, 0);
370 if (readtoken() != TTHEN)
371 synexpect(TTHEN, 0);
372 n2->nif.ifpart = list(0, 0);
373 }
374 if (lasttoken == TELSE)
375 n2->nif.elsepart = list(0, 0);
376 else {
377 n2->nif.elsepart = NULL;
378 tokpushback++;
379 }
380 if (readtoken() != TFI)
381 synexpect(TFI, 0);
382 checkkwd = 1;
383 break;
384 case TWHILE:
385 case TUNTIL: {
386 int got;
387
388 n1 = stalloc(sizeof(struct nbinary));
389 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
390 n1->nbinary.ch1 = list(0, 0);
391 if ((got=readtoken()) != TDO) {
392 VTRACE(DBG_PARSE, ("expecting DO got %s %s\n",
393 tokname[got], got == TWORD ? wordtext : ""));
394 synexpect(TDO, 0);
395 }
396 n1->nbinary.ch2 = list(0, 0);
397 if (readtoken() != TDONE)
398 synexpect(TDONE, 0);
399 checkkwd = 1;
400 break;
401 }
402 case TFOR:
403 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
404 synerror("Bad for loop variable");
405 n1 = stalloc(sizeof(struct nfor));
406 n1->type = NFOR;
407 n1->nfor.var = wordtext;
408 if (readtoken()==TWORD && !quoteflag && equal(wordtext,"in")) {
409 app = ≈
410 while (readtoken() == TWORD) {
411 n2 = stalloc(sizeof(struct narg));
412 n2->type = NARG;
413 n2->narg.text = wordtext;
414 n2->narg.backquote = backquotelist;
415 n2->narg.lineno = startlinno;
416 *app = n2;
417 app = &n2->narg.next;
418 }
419 *app = NULL;
420 n1->nfor.args = ap;
421 if (lasttoken != TNL && lasttoken != TSEMI)
422 synexpect(-1, 0);
423 } else {
424 static char argvars[5] = {
425 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
426 };
427
428 n2 = stalloc(sizeof(struct narg));
429 n2->type = NARG;
430 n2->narg.text = argvars;
431 n2->narg.backquote = NULL;
432 n2->narg.next = NULL;
433 n2->narg.lineno = startlinno;
434 n1->nfor.args = n2;
435 /*
436 * Newline or semicolon here is optional (but note
437 * that the original Bourne shell only allowed NL).
438 */
439 if (lasttoken != TNL && lasttoken != TSEMI)
440 tokpushback++;
441 }
442 checkkwd = 2;
443 if ((t = readtoken()) == TDO)
444 t = TDONE;
445 else if (t == TBEGIN)
446 t = TEND;
447 else
448 synexpect(-1, 0);
449 n1->nfor.body = list(0, 0);
450 if (readtoken() != t)
451 synexpect(t, 0);
452 checkkwd = 1;
453 break;
454 case TCASE:
455 n1 = stalloc(sizeof(struct ncase));
456 n1->type = NCASE;
457 n1->ncase.lineno = startlinno - elided_nl;
458 if (readtoken() != TWORD)
459 synexpect(TWORD, 0);
460 n1->ncase.expr = n2 = stalloc(sizeof(struct narg));
461 n2->type = NARG;
462 n2->narg.text = wordtext;
463 n2->narg.backquote = backquotelist;
464 n2->narg.lineno = startlinno;
465 n2->narg.next = NULL;
466 while (readtoken() == TNL);
467 if (lasttoken != TWORD || ! equal(wordtext, "in"))
468 synexpect(-1, "in");
469 cpp = &n1->ncase.cases;
470 noalias = 1;
471 checkkwd = 2, readtoken();
472 /*
473 * Both ksh and bash accept 'case x in esac'
474 * so configure scripts started taking advantage of this.
475 * The page: http://pubs.opengroup.org/onlinepubs/\
476 * 009695399/utilities/xcu_chap02.html contradicts itself,
477 * as to if this is legal; the "Case Conditional Format"
478 * paragraph shows one case is required, but the "Grammar"
479 * section shows a grammar that explicitly allows the no
480 * case option.
481 */
482 while (lasttoken != TESAC) {
483 *cpp = cp = stalloc(sizeof(struct nclist));
484 if (lasttoken == TLP)
485 readtoken();
486 cp->type = NCLIST;
487 app = &cp->nclist.pattern;
488 for (;;) {
489 *app = ap = stalloc(sizeof(struct narg));
490 ap->type = NARG;
491 ap->narg.lineno = startlinno;
492 ap->narg.text = wordtext;
493 ap->narg.backquote = backquotelist;
494 if (checkkwd = 2, readtoken() != TPIPE)
495 break;
496 app = &ap->narg.next;
497 readtoken();
498 }
499 ap->narg.next = NULL;
500 noalias = 0;
501 if (lasttoken != TRP) {
502 synexpect(TRP, 0);
503 }
504 cp->nclist.lineno = startlinno;
505 cp->nclist.body = list(0, 0);
506
507 checkkwd = 2;
508 if ((t = readtoken()) != TESAC) {
509 if (t != TENDCASE && t != TCASEFALL) {
510 noalias = 0;
511 synexpect(TENDCASE, 0);
512 } else {
513 if (t == TCASEFALL)
514 cp->type = NCLISTCONT;
515 noalias = 1;
516 checkkwd = 2;
517 readtoken();
518 }
519 }
520 cpp = &cp->nclist.next;
521 }
522 noalias = 0;
523 *cpp = NULL;
524 checkkwd = 1;
525 break;
526 case TLP:
527 n1 = stalloc(sizeof(struct nredir));
528 n1->type = NSUBSHELL;
529 n1->nredir.n = list(0, 0);
530 n1->nredir.redirect = NULL;
531 if (n1->nredir.n == NULL)
532 synexpect(-1, 0);
533 if (readtoken() != TRP)
534 synexpect(TRP, 0);
535 checkkwd = 1;
536 break;
537 case TBEGIN:
538 n1 = list(0, 0);
539 if (posix && n1 == NULL)
540 synexpect(-1, 0);
541 if (readtoken() != TEND)
542 synexpect(TEND, 0);
543 checkkwd = 1;
544 break;
545
546 case TSEMI:
547 case TAND:
548 case TOR:
549 case TPIPE:
550 case TNL:
551 case TEOF:
552 case TRP:
553 /*
554 * simple commands must have something in them,
555 * either a word (which at this point includes a=b)
556 * or a redirection. If we reached the end of the
557 * command (which one of these tokens indicates)
558 * when we are just starting, and have not had a
559 * redirect, then ...
560 *
561 * nb: it is still possible to end up with empty
562 * simple commands, if the "command" is a var
563 * expansion that produces nothing
564 * X= ; $X && $X
565 * --> &&
566 * I am not sure if this is intended to be legal or not.
567 */
568 if (!redir)
569 synexpect(-1, 0);
570 case TWORD:
571 tokpushback++;
572 n1 = simplecmd(rpp, redir);
573 goto checkneg;
574 case TENDCASE:
575 if (redir) {
576 tokpushback++;
577 goto checkneg;
578 }
579 /* FALLTHROUGH */
580 default:
581 synexpect(-1, 0);
582 /* NOTREACHED */
583 }
584
585 /* Now check for redirection which may follow command */
586 while (readtoken() == TREDIR) {
587 *rpp = n2 = redirnode;
588 rpp = &n2->nfile.next;
589 parsefname();
590 }
591 tokpushback++;
592 *rpp = NULL;
593 if (redir) {
594 if (n1->type != NSUBSHELL) {
595 n2 = stalloc(sizeof(struct nredir));
596 n2->type = NREDIR;
597 n2->nredir.n = n1;
598 n1 = n2;
599 }
600 n1->nredir.redirect = redir;
601 }
602
603 checkneg:
604 #ifdef BOGUS_NOT_COMMAND
605 if (negate) {
606 VTRACE(DBG_PARSE, ("bogus %snegate command\n",
607 (negate&1) ? "" : "double "));
608 n2 = stalloc(sizeof(struct nnot));
609 n2->type = (negate & 1) ? NNOT : NDNOT;
610 n2->nnot.com = n1;
611 return n2;
612 }
613 else
614 #endif
615 return n1;
616 }
617
618
619 STATIC union node *
620 simplecmd(union node **rpp, union node *redir)
621 {
622 union node *args, **app;
623 union node *n = NULL;
624 int line = 0;
625 #ifdef BOGUS_NOT_COMMAND
626 union node *n2;
627 int negate = 0;
628 #endif
629
630 CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n",
631 redir ? "" : "out", plinno));
632
633 /* If we don't have any redirections already, then we must reset */
634 /* rpp to be the address of the local redir variable. */
635 if (redir == 0)
636 rpp = &redir;
637
638 args = NULL;
639 app = &args;
640
641 #ifdef BOGUS_NOT_COMMAND /* pipelines get negated, commands do not */
642 while (readtoken() == TNOT) {
643 VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n"));
644 negate++;
645 }
646 tokpushback++;
647 #endif
648
649 for (;;) {
650 if (readtoken() == TWORD) {
651 if (line == 0)
652 line = startlinno;
653 n = stalloc(sizeof(struct narg));
654 n->type = NARG;
655 n->narg.text = wordtext;
656 n->narg.backquote = backquotelist;
657 n->narg.lineno = startlinno;
658 *app = n;
659 app = &n->narg.next;
660 } else if (lasttoken == TREDIR) {
661 if (line == 0)
662 line = startlinno;
663 *rpp = n = redirnode;
664 rpp = &n->nfile.next;
665 parsefname(); /* read name of redirection file */
666 } else if (lasttoken == TLP && app == &args->narg.next
667 && redir == 0) {
668 /* We have a function */
669 if (readtoken() != TRP)
670 synexpect(TRP, 0);
671 funclinno = plinno;
672 rmescapes(n->narg.text);
673 if (strchr(n->narg.text, '/'))
674 synerror("Bad function name");
675 VTRACE(DBG_PARSE, ("Function '%s' seen @%d\n",
676 n->narg.text, plinno));
677 n->type = NDEFUN;
678 n->narg.lineno = plinno - elided_nl;
679 n->narg.next = command();
680 funclinno = 0;
681 goto checkneg;
682 } else {
683 tokpushback++;
684 break;
685 }
686 }
687
688 if (args == NULL && redir == NULL)
689 synexpect(-1, 0);
690 *app = NULL;
691 *rpp = NULL;
692 n = stalloc(sizeof(struct ncmd));
693 n->type = NCMD;
694 n->ncmd.lineno = line - elided_nl;
695 n->ncmd.backgnd = 0;
696 n->ncmd.args = args;
697 n->ncmd.redirect = redir;
698 n->ncmd.lineno = startlinno;
699
700 checkneg:
701 #ifdef BOGUS_NOT_COMMAND
702 if (negate) {
703 VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n",
704 (negate&1) ? "" : "double "));
705 n2 = stalloc(sizeof(struct nnot));
706 n2->type = (negate & 1) ? NNOT : NDNOT;
707 n2->nnot.com = n;
708 return n2;
709 }
710 else
711 #endif
712 return n;
713 }
714
715 STATIC union node *
716 makename(void)
717 {
718 union node *n;
719
720 n = stalloc(sizeof(struct narg));
721 n->type = NARG;
722 n->narg.next = NULL;
723 n->narg.text = wordtext;
724 n->narg.lineno = startlinno;
725 n->narg.backquote = backquotelist;
726 n->narg.lineno = startlinno - elided_nl;
727 return n;
728 }
729
730 void
731 fixredir(union node *n, const char *text, int err)
732 {
733
734 VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err));
735 if (!err)
736 n->ndup.vname = NULL;
737
738 if (is_number(text))
739 n->ndup.dupfd = number(text);
740 else if (text[0] == '-' && text[1] == '\0')
741 n->ndup.dupfd = -1;
742 else {
743
744 if (err)
745 synerror("Bad fd number");
746 else
747 n->ndup.vname = makename();
748 }
749 }
750
751
752 STATIC void
753 parsefname(void)
754 {
755 union node *n = redirnode;
756
757 if (readtoken() != TWORD)
758 synexpect(-1, 0);
759 if (n->type == NHERE) {
760 struct HereDoc *here = heredoc;
761 struct HereDoc *p;
762
763 if (quoteflag == 0)
764 n->type = NXHERE;
765 VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno));
766 if (here->striptabs) {
767 while (*wordtext == '\t')
768 wordtext++;
769 }
770
771 /*
772 * this test is not really necessary, we are not
773 * required to expand wordtext, but there's no reason
774 * it cannot be $$ or something like that - that would
775 * not mean the pid, but literally two '$' characters.
776 * There is no need for limits on what the word can be.
777 * However, it needs to stay literal as entered, not
778 * have $ converted to CTLVAR or something, which as
779 * the parser is, at the minute, is impossible to prevent.
780 * So, leave it like this until the rest of the parser is fixed.
781 */
782 if (!noexpand(wordtext))
783 synerror("Illegal eof marker for << redirection");
784
785 rmescapes(wordtext);
786 here->eofmark = wordtext;
787 here->next = NULL;
788 if (heredoclist == NULL)
789 heredoclist = here;
790 else {
791 for (p = heredoclist ; p->next ; p = p->next)
792 continue;
793 p->next = here;
794 }
795 } else if (n->type == NTOFD || n->type == NFROMFD) {
796 fixredir(n, wordtext, 0);
797 } else {
798 n->nfile.fname = makename();
799 }
800 }
801
802 /*
803 * Check to see whether we are at the end of the here document. When this
804 * is called, c is set to the first character of the next input line. If
805 * we are at the end of the here document, this routine sets the c to PEOF.
806 * The new value of c is returned.
807 */
808
809 static int
810 checkend(int c, char * const eofmark, const int striptabs)
811 {
812
813 if (striptabs) {
814 while (c == '\t')
815 c = pgetc();
816 }
817 if (c == PEOF) {
818 if (*eofmark == '\0')
819 return (c);
820 synerror(EOFhere);
821 }
822 if (c == *eofmark) {
823 int c2;
824 char *q;
825
826 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
827 if (c2 == '\n') {
828 plinno++;
829 needprompt = doprompt;
830 }
831 if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
832 c = PEOF;
833 if (c2 == '\n') {
834 plinno++;
835 needprompt = doprompt;
836 }
837 } else {
838 pungetc();
839 pushstring(eofmark + 1, q - (eofmark + 1), NULL);
840 }
841 } else if (c == '\n' && *eofmark == '\0') {
842 c = PEOF;
843 plinno++;
844 needprompt = doprompt;
845 }
846 return (c);
847 }
848
849
850 /*
851 * Input any here documents.
852 */
853
854 STATIC int
855 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
856 {
857 int c;
858 char *out;
859 int lines = plinno;
860
861 c = pgetc();
862
863 /*
864 * If we hit EOF on the input, and the eofmark is a null string ('')
865 * we consider this empty line to be the eofmark, and exit without err.
866 */
867 if (c == PEOF && *eofmark != '\0')
868 synerror(EOFhere);
869
870 STARTSTACKSTR(out);
871
872 while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
873 do {
874 if (sq) {
875 /*
876 * in single quoted mode (eofmark quoted)
877 * all we look for is \n so we can check
878 * for the epfmark - everything saved literally.
879 */
880 STPUTC(c, out);
881 if (c == '\n') {
882 plinno++;
883 break;
884 }
885 continue;
886 }
887 /*
888 * In double quoted (non-quoted eofmark)
889 * we must handle \ followed by \n here
890 * otherwise we can mismatch the end mark.
891 * All other uses of \ will be handled later
892 * when the here doc is expanded.
893 *
894 * This also makes sure \\ followed by \n does
895 * not suppress the newline (the \ quotes itself)
896 */
897 if (c == '\\') { /* A backslash */
898 STPUTC(c, out);
899 c = pgetc(); /* followed by */
900 if (c == '\n') { /* a newline? */
901 STPUTC(c, out);
902 plinno++;
903 continue; /* don't break */
904 }
905 }
906 STPUTC(c, out); /* keep the char */
907 if (c == '\n') { /* at end of line */
908 plinno++;
909 break; /* look for eofmark */
910 }
911 } while ((c = pgetc()) != PEOF);
912
913 /*
914 * If we have read a line, and reached EOF, without
915 * finding the eofmark, whether the EOF comes before
916 * or immediately after the \n, that is an error.
917 */
918 if (c == PEOF || (c = pgetc()) == PEOF)
919 synerror(EOFhere);
920 }
921 STPUTC('\0', out);
922
923 c = out - stackblock();
924 out = stackblock();
925 grabstackblock(c);
926 wordtext = out;
927
928 VTRACE(DBG_PARSE,
929 ("Slurped a %d line %sheredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n",
930 plinno - lines, sq ? "quoted " : "", eofmark,
931 striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c),
932 wordtext, (c > 16 ? "..." : ""), plinno));
933
934 return (plinno - lines);
935 }
936
937 static char *
938 insert_elided_nl(char *str)
939 {
940 while (elided_nl > 0) {
941 STPUTC(CTLNONL, str);
942 elided_nl--;
943 }
944 return str;
945 }
946
947 STATIC void
948 readheredocs(void)
949 {
950 struct HereDoc *here;
951 union node *n;
952 int line, l;
953
954 line = 0; /*XXX - gcc! obviously unneeded */
955 if (heredoclist)
956 line = heredoclist->startline + 1;
957 l = 0;
958 while (heredoclist) {
959 line += l;
960 here = heredoclist;
961 heredoclist = here->next;
962 if (needprompt) {
963 setprompt(2);
964 needprompt = 0;
965 }
966
967 l = slurp_heredoc(here->eofmark, here->striptabs,
968 here->here->nhere.type == NHERE);
969
970 n = stalloc(sizeof(struct narg));
971 n->narg.type = NARG;
972 n->narg.next = NULL;
973 n->narg.text = wordtext;
974 n->narg.lineno = line;
975 n->narg.backquote = backquotelist;
976 here->here->nhere.doc = n;
977
978 if (here->here->nhere.type == NHERE)
979 continue;
980
981 /*
982 * Now "parse" here docs that have unquoted eofmarkers.
983 */
984 setinputstring(wordtext, 1, line);
985 VTRACE(DBG_PARSE, ("Reprocessing %d line here doc from %d\n",
986 l, line));
987 readtoken1(pgetc(), DQSYNTAX, 1);
988 n->narg.text = wordtext;
989 n->narg.backquote = backquotelist;
990 popfile();
991 }
992 }
993
994 STATIC int
995 peektoken(void)
996 {
997 int t;
998
999 t = readtoken();
1000 tokpushback++;
1001 return (t);
1002 }
1003
1004 STATIC int
1005 readtoken(void)
1006 {
1007 int t;
1008 int savecheckkwd = checkkwd;
1009 #ifdef DEBUG
1010 int alreadyseen = tokpushback;
1011 #endif
1012 struct alias *ap;
1013
1014 top:
1015 t = xxreadtoken();
1016
1017 if (checkkwd) {
1018 /*
1019 * eat newlines
1020 */
1021 if (checkkwd == 2) {
1022 checkkwd = 0;
1023 while (t == TNL) {
1024 readheredocs();
1025 t = xxreadtoken();
1026 }
1027 } else
1028 checkkwd = 0;
1029 /*
1030 * check for keywords and aliases
1031 */
1032 if (t == TWORD && !quoteflag) {
1033 const char *const *pp;
1034
1035 for (pp = parsekwd; *pp; pp++) {
1036 if (**pp == *wordtext && equal(*pp, wordtext)) {
1037 lasttoken = t = pp -
1038 parsekwd + KWDOFFSET;
1039 VTRACE(DBG_PARSE,
1040 ("keyword %s recognized @%d\n",
1041 tokname[t], plinno));
1042 goto out;
1043 }
1044 }
1045 if (!noalias &&
1046 (ap = lookupalias(wordtext, 1)) != NULL) {
1047 VTRACE(DBG_PARSE,
1048 ("alias '%s' recognized -> <:%s:>\n",
1049 wordtext, ap->val));
1050 pushstring(ap->val, strlen(ap->val), ap);
1051 checkkwd = savecheckkwd;
1052 goto top;
1053 }
1054 }
1055 out:
1056 checkkwd = (t == TNOT) ? savecheckkwd : 0;
1057 }
1058 VTRACE(DBG_PARSE, ("%stoken %s %s @%d\n", alreadyseen ? "reread " : "",
1059 tokname[t], t == TWORD ? wordtext : "", plinno));
1060 return (t);
1061 }
1062
1063
1064 /*
1065 * Read the next input token.
1066 * If the token is a word, we set backquotelist to the list of cmds in
1067 * backquotes. We set quoteflag to true if any part of the word was
1068 * quoted.
1069 * If the token is TREDIR, then we set redirnode to a structure containing
1070 * the redirection.
1071 * In all cases, the variable startlinno is set to the number of the line
1072 * on which the token starts.
1073 *
1074 * [Change comment: here documents and internal procedures]
1075 * [Readtoken shouldn't have any arguments. Perhaps we should make the
1076 * word parsing code into a separate routine. In this case, readtoken
1077 * doesn't need to have any internal procedures, but parseword does.
1078 * We could also make parseoperator in essence the main routine, and
1079 * have parseword (readtoken1?) handle both words and redirection.]
1080 */
1081
1082 #define RETURN(token) return lasttoken = token
1083
1084 STATIC int
1085 xxreadtoken(void)
1086 {
1087 int c;
1088
1089 if (tokpushback) {
1090 tokpushback = 0;
1091 return lasttoken;
1092 }
1093 if (needprompt) {
1094 setprompt(2);
1095 needprompt = 0;
1096 }
1097 elided_nl = 0;
1098 startlinno = plinno;
1099 for (;;) { /* until token or start of word found */
1100 c = pgetc_macro();
1101 switch (c) {
1102 case ' ': case '\t':
1103 continue;
1104 case '#':
1105 while ((c = pgetc()) != '\n' && c != PEOF)
1106 continue;
1107 pungetc();
1108 continue;
1109
1110 case '\n':
1111 plinno++;
1112 needprompt = doprompt;
1113 RETURN(TNL);
1114 case PEOF:
1115 RETURN(TEOF);
1116
1117 case '&':
1118 if (pgetc_linecont() == '&')
1119 RETURN(TAND);
1120 pungetc();
1121 RETURN(TBACKGND);
1122 case '|':
1123 if (pgetc_linecont() == '|')
1124 RETURN(TOR);
1125 pungetc();
1126 RETURN(TPIPE);
1127 case ';':
1128 switch (pgetc_linecont()) {
1129 case ';':
1130 RETURN(TENDCASE);
1131 case '&':
1132 RETURN(TCASEFALL);
1133 default:
1134 pungetc();
1135 RETURN(TSEMI);
1136 }
1137 case '(':
1138 RETURN(TLP);
1139 case ')':
1140 RETURN(TRP);
1141
1142 case '\\':
1143 switch (pgetc()) {
1144 case '\n':
1145 startlinno = ++plinno;
1146 if (doprompt)
1147 setprompt(2);
1148 else
1149 setprompt(0);
1150 continue;
1151 case PEOF:
1152 RETURN(TEOF);
1153 default:
1154 pungetc();
1155 break;
1156 }
1157 /* FALLTHROUGH */
1158 default:
1159 return readtoken1(c, BASESYNTAX, 0);
1160 }
1161 }
1162 #undef RETURN
1163 }
1164
1165
1166
1167 /*
1168 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
1169 * is not NULL, read a here document. In the latter case, eofmark is the
1170 * word which marks the end of the document and striptabs is true if
1171 * leading tabs should be stripped from the document. The argument firstc
1172 * is the first character of the input token or document.
1173 *
1174 * Because C does not have internal subroutines, I have simulated them
1175 * using goto's to implement the subroutine linkage. The following macros
1176 * will run code that appears at the end of readtoken1.
1177 */
1178
1179 /*
1180 * We used to remember only the current syntax, variable nesting level,
1181 * double quote state for each var nesting level, and arith nesting
1182 * level (unrelated to var nesting) and one prev syntax when in arith
1183 * syntax. This worked for simple cases, but can't handle arith inside
1184 * var expansion inside arith inside var with some quoted and some not.
1185 *
1186 * Inspired by FreeBSD's implementation (though it was the obvious way)
1187 * though implemented differently, we now have a stack that keeps track
1188 * of what we are doing now, and what we were doing previously.
1189 * Every time something changes, which will eventually end and should
1190 * revert to the previous state, we push this stack, and then pop it
1191 * again later (that is every ${} with an operator (to parse the word
1192 * or pattern that follows) ${x} and $x are too simple to need it)
1193 * $(( )) $( ) and "...". Always. Really, always!
1194 *
1195 * The stack is implemented as one static (on the C stack) base block
1196 * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1197 * enough for the vast majority of cases. For torture tests, we
1198 * malloc more blocks as needed. All accesses through the inline
1199 * functions below.
1200 */
1201
1202 /*
1203 * varnest & arinest will typically be 0 or 1
1204 * (varnest can increment in usages like ${x=${y}} but probably
1205 * does not really need to)
1206 * parenlevel allows balancing parens inside a $(( )), it is reset
1207 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1208 * quoted is special - we need to know 2 things ... are we inside "..."
1209 * (even if inherited from some previous nesting level) and was there
1210 * an opening '"' at this level (so the next will be closing).
1211 * "..." can span nesting levels, but cannot be opened in one and
1212 * closed in a different one.
1213 * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1214 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1215 * is really so special that this setting is not very important)
1216 * and 0x10 that indicates that an opening quote has been seen.
1217 * The bottom 4 bits are inherited, the 0x10 bit is not.
1218 */
1219 struct tokenstate {
1220 const char *ts_syntax;
1221 unsigned short ts_parenlevel; /* counters */
1222 unsigned short ts_varnest; /* 64000 levels should be enough! */
1223 unsigned short ts_arinest;
1224 unsigned short ts_quoted; /* 1 -> single, 2 -> double */
1225 };
1226
1227 #define NQ 0x00 /* Unquoted */
1228 #define SQ 0x01 /* Single Quotes */
1229 #define DQ 0x02 /* Double Quotes (or equivalent) */
1230 #define QF 0x0F /* Mask to extract previous values */
1231 #define QS 0x10 /* Quoting started at this level in stack */
1232
1233 #define LEVELS_PER_BLOCK 8
1234 #define VSS struct statestack
1235
1236 struct statestack {
1237 VSS *prev; /* previous block in list */
1238 int cur; /* which of our tokenstates is current */
1239 struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1240 };
1241
1242 static inline struct tokenstate *
1243 currentstate(VSS *stack)
1244 {
1245 return &stack->tokenstate[stack->cur];
1246 }
1247
1248 static inline struct tokenstate *
1249 prevstate(VSS *stack)
1250 {
1251 if (stack->cur != 0)
1252 return &stack->tokenstate[stack->cur - 1];
1253 if (stack->prev == NULL) /* cannot drop below base */
1254 return &stack->tokenstate[0];
1255 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1256 }
1257
1258 static inline VSS *
1259 bump_state_level(VSS *stack)
1260 {
1261 struct tokenstate *os, *ts;
1262
1263 os = currentstate(stack);
1264
1265 if (++stack->cur >= LEVELS_PER_BLOCK) {
1266 VSS *ss;
1267
1268 ss = (VSS *)ckmalloc(sizeof (struct statestack));
1269 ss->cur = 0;
1270 ss->prev = stack;
1271 stack = ss;
1272 }
1273
1274 ts = currentstate(stack);
1275
1276 ts->ts_parenlevel = 0; /* parens inside never match outside */
1277
1278 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */
1279 ts->ts_varnest = os->ts_varnest;
1280 ts->ts_arinest = os->ts_arinest; /* when appropriate */
1281 ts->ts_syntax = os->ts_syntax; /* they will be altered */
1282
1283 return stack;
1284 }
1285
1286 static inline VSS *
1287 drop_state_level(VSS *stack)
1288 {
1289 if (stack->cur == 0) {
1290 VSS *ss;
1291
1292 ss = stack;
1293 stack = ss->prev;
1294 if (stack == NULL)
1295 return ss;
1296 ckfree(ss);
1297 }
1298 --stack->cur;
1299 return stack;
1300 }
1301
1302 static inline void
1303 cleanup_state_stack(VSS *stack)
1304 {
1305 while (stack->prev != NULL) {
1306 stack->cur = 0;
1307 stack = drop_state_level(stack);
1308 }
1309 }
1310
1311 #define PARSESUB() {goto parsesub; parsesub_return:;}
1312 #define PARSEARITH() {goto parsearith; parsearith_return:;}
1313
1314 /*
1315 * The following macros all assume the existance of a local var "stack"
1316 * which contains a pointer to the current struct stackstate
1317 */
1318
1319 /*
1320 * These are macros rather than inline funcs to avoid code churn as much
1321 * as possible - they replace macros of the same name used previously.
1322 */
1323 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS)
1324 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ)
1325 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \
1326 stack->cur != 0 || stack->prev ? \
1327 prevstate(stack)->ts_quoted & QF : 0)
1328
1329 /*
1330 * This set are just to avoid excess typing and line lengths...
1331 * The ones that "look like" var names must be implemented to be lvalues
1332 */
1333 #define syntax (currentstate(stack)->ts_syntax)
1334 #define parenlevel (currentstate(stack)->ts_parenlevel)
1335 #define varnest (currentstate(stack)->ts_varnest)
1336 #define arinest (currentstate(stack)->ts_arinest)
1337 #define quoted (currentstate(stack)->ts_quoted)
1338 #define TS_PUSH() (stack = bump_state_level(stack))
1339 #define TS_POP() (stack = drop_state_level(stack))
1340
1341 /*
1342 * Called to parse command substitutions. oldstyle is true if the command
1343 * is enclosed inside `` (otherwise it was enclosed in "$( )")
1344 *
1345 * Internally nlpp is a pointer to the head of the linked
1346 * list of commands (passed by reference), and savelen is the number of
1347 * characters on the top of the stack which must be preserved.
1348 */
1349 static char *
1350 parsebackq(VSS *const stack, char * const in,
1351 struct nodelist **const pbqlist, const int oldstyle, const int magicq)
1352 {
1353 struct nodelist **nlpp;
1354 const int savepbq = parsebackquote;
1355 union node *n;
1356 char *out;
1357 char *str = NULL;
1358 char *volatile sstr = str;
1359 struct jmploc jmploc;
1360 struct jmploc *const savehandler = handler;
1361 const int savelen = in - stackblock();
1362 int saveprompt;
1363 int lno;
1364
1365 if (setjmp(jmploc.loc)) {
1366 if (sstr)
1367 ckfree(__UNVOLATILE(sstr));
1368 cleanup_state_stack(stack);
1369 parsebackquote = 0;
1370 handler = savehandler;
1371 longjmp(handler->loc, 1);
1372 }
1373 INTOFF;
1374 sstr = str = NULL;
1375 if (savelen > 0) {
1376 sstr = str = ckmalloc(savelen);
1377 memcpy(str, stackblock(), savelen);
1378 }
1379 handler = &jmploc;
1380 INTON;
1381 if (oldstyle) {
1382 /*
1383 * We must read until the closing backquote, giving special
1384 * treatment to some slashes, and then push the string and
1385 * reread it as input, interpreting it normally.
1386 */
1387 int pc;
1388 int psavelen;
1389 char *pstr;
1390 int line1 = plinno;
1391
1392 VTRACE(DBG_PARSE, ("parsebackq: repackaging `` as $( )"));
1393 /*
1394 * Because the entire `...` is read here, we don't
1395 * need to bother the state stack. That will be used
1396 * (as appropriate) when the processed string is re-read.
1397 */
1398 STARTSTACKSTR(out);
1399 #ifdef DEBUG
1400 for (psavelen = 0;;psavelen++) {
1401 #else
1402 for (;;) {
1403 #endif
1404 if (needprompt) {
1405 setprompt(2);
1406 needprompt = 0;
1407 }
1408 pc = pgetc();
1409 if (pc == '`')
1410 break;
1411 switch (pc) {
1412 case '\\':
1413 pc = pgetc();
1414 #ifdef DEBUG
1415 psavelen++;
1416 #endif
1417 if (pc == '\n') { /* keep \ \n for later */
1418 plinno++;
1419 needprompt = doprompt;
1420 }
1421 if (pc != '\\' && pc != '`' && pc != '$'
1422 && (!ISDBLQUOTE() || pc != '"'))
1423 STPUTC('\\', out);
1424 break;
1425
1426 case '\n':
1427 plinno++;
1428 needprompt = doprompt;
1429 break;
1430
1431 case PEOF:
1432 startlinno = line1;
1433 synerror("EOF in backquote substitution");
1434 break;
1435
1436 default:
1437 break;
1438 }
1439 STPUTC(pc, out);
1440 }
1441 STPUTC('\0', out);
1442 VTRACE(DBG_PARSE, (" read %d", psavelen));
1443 psavelen = out - stackblock();
1444 VTRACE(DBG_PARSE, (" produced %d\n", psavelen));
1445 if (psavelen > 0) {
1446 pstr = grabstackstr(out);
1447 setinputstring(pstr, 1, line1);
1448 }
1449 }
1450 nlpp = pbqlist;
1451 while (*nlpp)
1452 nlpp = &(*nlpp)->next;
1453 *nlpp = stalloc(sizeof(struct nodelist));
1454 (*nlpp)->next = NULL;
1455 parsebackquote = oldstyle;
1456
1457 if (oldstyle) {
1458 saveprompt = doprompt;
1459 doprompt = 0;
1460 } else
1461 saveprompt = 0;
1462
1463 lno = -plinno;
1464 n = list(0, oldstyle);
1465 lno += plinno;
1466
1467 if (oldstyle)
1468 doprompt = saveprompt;
1469 else {
1470 if (readtoken() != TRP) {
1471 cleanup_state_stack(stack);
1472 synexpect(TRP, 0);
1473 }
1474 }
1475
1476 (*nlpp)->n = n;
1477 if (oldstyle) {
1478 /*
1479 * Start reading from old file again, ignoring any pushed back
1480 * tokens left from the backquote parsing
1481 */
1482 popfile();
1483 tokpushback = 0;
1484 }
1485
1486 while (stackblocksize() <= savelen)
1487 growstackblock();
1488 STARTSTACKSTR(out);
1489 if (str) {
1490 memcpy(out, str, savelen);
1491 STADJUST(savelen, out);
1492 INTOFF;
1493 ckfree(str);
1494 sstr = str = NULL;
1495 INTON;
1496 }
1497 parsebackquote = savepbq;
1498 handler = savehandler;
1499 if (arinest || ISDBLQUOTE()) {
1500 STPUTC(CTLBACKQ | CTLQUOTE, out);
1501 while (--lno >= 0)
1502 STPUTC(CTLNONL, out);
1503 } else
1504 STPUTC(CTLBACKQ, out);
1505
1506 return out;
1507 }
1508
1509 /*
1510 * Parse a redirection operator. The parameter "out" points to a string
1511 * specifying the fd to be redirected. It is guaranteed to be either ""
1512 * or a numeric string (for now anyway). The parameter "c" contains the
1513 * first character of the redirection operator.
1514 *
1515 * Note the string "out" is on the stack, which we are about to clobber,
1516 * so process it first...
1517 */
1518
1519 static void
1520 parseredir(const char *out, int c)
1521 {
1522 union node *np;
1523 int fd;
1524
1525 fd = (*out == '\0') ? -1 : atoi(out);
1526
1527 np = stalloc(sizeof(struct nfile));
1528 if (c == '>') {
1529 if (fd < 0)
1530 fd = 1;
1531 c = pgetc_linecont();
1532 if (c == '>')
1533 np->type = NAPPEND;
1534 else if (c == '|')
1535 np->type = NCLOBBER;
1536 else if (c == '&')
1537 np->type = NTOFD;
1538 else {
1539 np->type = NTO;
1540 pungetc();
1541 }
1542 } else { /* c == '<' */
1543 if (fd < 0)
1544 fd = 0;
1545 switch (c = pgetc_linecont()) {
1546 case '<':
1547 if (sizeof (struct nfile) != sizeof (struct nhere)) {
1548 np = stalloc(sizeof(struct nhere));
1549 np->nfile.fd = 0;
1550 }
1551 np->type = NHERE;
1552 heredoc = stalloc(sizeof(struct HereDoc));
1553 heredoc->here = np;
1554 heredoc->startline = plinno;
1555 if ((c = pgetc_linecont()) == '-') {
1556 heredoc->striptabs = 1;
1557 } else {
1558 heredoc->striptabs = 0;
1559 pungetc();
1560 }
1561 break;
1562
1563 case '&':
1564 np->type = NFROMFD;
1565 break;
1566
1567 case '>':
1568 np->type = NFROMTO;
1569 break;
1570
1571 default:
1572 np->type = NFROM;
1573 pungetc();
1574 break;
1575 }
1576 }
1577 np->nfile.fd = fd;
1578
1579 redirnode = np; /* this is the "value" of TRENODE */
1580 }
1581
1582
1583 /*
1584 * The lowest level basic tokenizer.
1585 *
1586 * The next input byte (character) is in firstc, syn says which
1587 * syntax tables we are to use (basic, single or double quoted, or arith)
1588 * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1589 * quote character itself is not special (used parsing here docs and similar)
1590 *
1591 * The result is the type of the next token (its value, when there is one,
1592 * is saved in the relevant global var - must fix that someday!) which is
1593 * also saved for re-reading ("lasttoken").
1594 *
1595 * Overall, this routine does far more parsing than it is supposed to.
1596 * That will also need fixing, someday...
1597 */
1598 STATIC int
1599 readtoken1(int firstc, char const *syn, int magicq)
1600 {
1601 int c;
1602 char * out;
1603 int len;
1604 struct nodelist *bqlist;
1605 int quotef;
1606 VSS static_stack;
1607 VSS *stack = &static_stack;
1608
1609 stack->prev = NULL;
1610 stack->cur = 0;
1611
1612 syntax = syn;
1613
1614 startlinno = plinno;
1615 varnest = 0;
1616 quoted = 0;
1617 if (syntax == DQSYNTAX)
1618 SETDBLQUOTE();
1619 quotef = 0;
1620 bqlist = NULL;
1621 arinest = 0;
1622 parenlevel = 0;
1623 elided_nl = 0;
1624
1625 STARTSTACKSTR(out);
1626
1627 for (c = firstc ;; c = pgetc_macro()) { /* until of token */
1628 if (syntax == ARISYNTAX)
1629 out = insert_elided_nl(out);
1630 CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
1631 switch (syntax[c]) {
1632 case CNL: /* '\n' */
1633 if (syntax == BASESYNTAX)
1634 break; /* exit loop */
1635 USTPUTC(c, out);
1636 plinno++;
1637 if (doprompt)
1638 setprompt(2);
1639 else
1640 setprompt(0);
1641 continue;
1642
1643 case CWORD:
1644 USTPUTC(c, out);
1645 continue;
1646 case CCTL:
1647 if (!magicq || ISDBLQUOTE())
1648 USTPUTC(CTLESC, out);
1649 USTPUTC(c, out);
1650 continue;
1651 case CBACK: /* backslash */
1652 c = pgetc();
1653 if (c == PEOF) {
1654 USTPUTC('\\', out);
1655 pungetc();
1656 continue;
1657 }
1658 if (c == '\n') {
1659 plinno++;
1660 elided_nl++;
1661 if (doprompt)
1662 setprompt(2);
1663 else
1664 setprompt(0);
1665 continue;
1666 }
1667 quotef = 1; /* current token is quoted */
1668 if (ISDBLQUOTE() && c != '\\' && c != '`' &&
1669 c != '$' && (c != '"' || magicq))
1670 USTPUTC('\\', out);
1671 if (SQSYNTAX[c] == CCTL)
1672 USTPUTC(CTLESC, out);
1673 else if (!magicq) {
1674 USTPUTC(CTLQUOTEMARK, out);
1675 USTPUTC(c, out);
1676 if (varnest != 0)
1677 USTPUTC(CTLQUOTEEND, out);
1678 continue;
1679 }
1680 USTPUTC(c, out);
1681 continue;
1682 case CSQUOTE:
1683 if (syntax != SQSYNTAX) {
1684 if (!magicq)
1685 USTPUTC(CTLQUOTEMARK, out);
1686 quotef = 1;
1687 TS_PUSH();
1688 syntax = SQSYNTAX;
1689 quoted = SQ;
1690 continue;
1691 }
1692 if (magicq && arinest == 0 && varnest == 0) {
1693 /* Ignore inside quoted here document */
1694 USTPUTC(c, out);
1695 continue;
1696 }
1697 /* End of single quotes... */
1698 TS_POP();
1699 if (syntax == BASESYNTAX && varnest != 0)
1700 USTPUTC(CTLQUOTEEND, out);
1701 continue;
1702 case CDQUOTE:
1703 if (magicq && arinest == 0 && varnest == 0) {
1704 /* Ignore inside here document */
1705 USTPUTC(c, out);
1706 continue;
1707 }
1708 quotef = 1;
1709 if (arinest) {
1710 if (ISDBLQUOTE()) {
1711 TS_POP();
1712 } else {
1713 TS_PUSH();
1714 syntax = DQSYNTAX;
1715 SETDBLQUOTE();
1716 USTPUTC(CTLQUOTEMARK, out);
1717 }
1718 continue;
1719 }
1720 if (magicq)
1721 continue;
1722 if (ISDBLQUOTE()) {
1723 TS_POP();
1724 if (varnest != 0)
1725 USTPUTC(CTLQUOTEEND, out);
1726 } else {
1727 TS_PUSH();
1728 syntax = DQSYNTAX;
1729 SETDBLQUOTE();
1730 USTPUTC(CTLQUOTEMARK, out);
1731 }
1732 continue;
1733 case CVAR: /* '$' */
1734 out = insert_elided_nl(out);
1735 PARSESUB(); /* parse substitution */
1736 continue;
1737 case CENDVAR: /* CLOSEBRACE */
1738 if (varnest > 0 && !ISDBLQUOTE()) {
1739 TS_POP();
1740 USTPUTC(CTLENDVAR, out);
1741 } else {
1742 USTPUTC(c, out);
1743 }
1744 out = insert_elided_nl(out);
1745 continue;
1746 case CLP: /* '(' in arithmetic */
1747 parenlevel++;
1748 USTPUTC(c, out);
1749 continue;;
1750 case CRP: /* ')' in arithmetic */
1751 if (parenlevel > 0) {
1752 USTPUTC(c, out);
1753 --parenlevel;
1754 } else {
1755 if (pgetc_linecont() == /*(*/ ')') {
1756 out = insert_elided_nl(out);
1757 if (--arinest == 0) {
1758 TS_POP();
1759 USTPUTC(CTLENDARI, out);
1760 } else
1761 USTPUTC(/*(*/ ')', out);
1762 } else {
1763 /*
1764 * unbalanced parens
1765 * (don't 2nd guess - no error)
1766 */
1767 pungetc();
1768 USTPUTC(/*(*/ ')', out);
1769 }
1770 }
1771 continue;
1772 case CBQUOTE: /* '`' */
1773 out = parsebackq(stack, out, &bqlist, 1, magicq);
1774 continue;
1775 case CEOF: /* --> c == PEOF */
1776 break; /* will exit loop */
1777 default:
1778 if (varnest == 0 && !ISDBLQUOTE())
1779 break; /* exit loop */
1780 USTPUTC(c, out);
1781 continue;
1782 }
1783 break; /* break from switch -> break from for loop too */
1784 }
1785
1786 if (syntax == ARISYNTAX) {
1787 cleanup_state_stack(stack);
1788 synerror(/*((*/ "Missing '))'");
1789 }
1790 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
1791 cleanup_state_stack(stack);
1792 synerror("Unterminated quoted string");
1793 }
1794 if (varnest != 0) {
1795 cleanup_state_stack(stack);
1796 startlinno = plinno;
1797 /* { */
1798 synerror("Missing '}'");
1799 }
1800
1801 STPUTC('\0', out);
1802 len = out - stackblock();
1803 out = stackblock();
1804
1805 if (!magicq) {
1806 if ((c == '<' || c == '>')
1807 && quotef == 0 && (*out == '\0' || is_number(out))) {
1808 parseredir(out, c);
1809 cleanup_state_stack(stack);
1810 return lasttoken = TREDIR;
1811 } else {
1812 pungetc();
1813 }
1814 }
1815
1816 VTRACE(DBG_PARSE,
1817 ("readtoken1 %sword \"%s\", completed%s (%d) left %d enl\n",
1818 (quotef ? "quoted " : ""), out, (bqlist ? " with cmdsubs" : ""),
1819 len, elided_nl));
1820
1821 quoteflag = quotef;
1822 backquotelist = bqlist;
1823 grabstackblock(len);
1824 wordtext = out;
1825 cleanup_state_stack(stack);
1826 return lasttoken = TWORD;
1827 /* end of readtoken routine */
1828
1829
1830 /*
1831 * Parse a substitution. At this point, we have read the dollar sign
1832 * and nothing else.
1833 */
1834
1835 parsesub: {
1836 int subtype;
1837 int typeloc;
1838 int flags;
1839 char *p;
1840 static const char types[] = "}-+?=";
1841
1842 c = pgetc_linecont();
1843 if (c != '('/*)*/ && c != OPENBRACE && !is_name(c) && !is_special(c)) {
1844 USTPUTC('$', out);
1845 pungetc();
1846 } else if (c == '('/*)*/) { /* $(command) or $((arith)) */
1847 if (pgetc_linecont() == '(' /*')'*/ ) {
1848 out = insert_elided_nl(out);
1849 PARSEARITH();
1850 } else {
1851 out = insert_elided_nl(out);
1852 pungetc();
1853 out = parsebackq(stack, out, &bqlist, 0, magicq);
1854 }
1855 } else {
1856 USTPUTC(CTLVAR, out);
1857 typeloc = out - stackblock();
1858 USTPUTC(VSNORMAL, out);
1859 subtype = VSNORMAL;
1860 flags = 0;
1861 if (c == OPENBRACE) {
1862 c = pgetc_linecont();
1863 if (c == '#') {
1864 if ((c = pgetc_linecont()) == CLOSEBRACE)
1865 c = '#';
1866 else if (is_name(c) || isdigit(c))
1867 subtype = VSLENGTH;
1868 else if (is_special(c)) {
1869 /*
1870 * ${#} is $# - the number of sh params
1871 * ${##} is the length of ${#}
1872 * ${###} is ${#} with as much nothing
1873 * as possible removed from start
1874 * ${##1} is ${#} with leading 1 gone
1875 * ${##\#} is ${#} with leading # gone
1876 *
1877 * this stuff is UGLY!
1878 */
1879 if (pgetc_linecont() == CLOSEBRACE) {
1880 pungetc();
1881 subtype = VSLENGTH;
1882 } else {
1883 static char cbuf[2];
1884
1885 pungetc(); /* would like 2 */
1886 cbuf[0] = c; /* so ... */
1887 cbuf[1] = '\0';
1888 pushstring(cbuf, 1, NULL);
1889 c = '#'; /* ${#:...} */
1890 subtype = 0; /* .. or similar */
1891 }
1892 } else {
1893 pungetc();
1894 c = '#';
1895 subtype = 0;
1896 }
1897 }
1898 else
1899 subtype = 0;
1900 }
1901 if (is_name(c)) {
1902 p = out;
1903 do {
1904 STPUTC(c, out);
1905 c = pgetc_linecont();
1906 } while (is_in_name(c));
1907 #if 0
1908 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
1909 int i;
1910 int linno;
1911 char buf[10];
1912
1913 /*
1914 * The "LINENO hack"
1915 *
1916 * Replace the variable name with the
1917 * current line number.
1918 */
1919 linno = plinno;
1920 if (funclinno != 0)
1921 linno -= funclinno - 1;
1922 snprintf(buf, sizeof(buf), "%d", linno);
1923 STADJUST(-6, out);
1924 for (i = 0; buf[i] != '\0'; i++)
1925 STPUTC(buf[i], out);
1926 flags |= VSLINENO;
1927 }
1928 #endif
1929 } else if (is_digit(c)) {
1930 do {
1931 STPUTC(c, out);
1932 c = pgetc_linecont();
1933 } while (subtype != VSNORMAL && is_digit(c));
1934 }
1935 else if (is_special(c)) {
1936 USTPUTC(c, out);
1937 c = pgetc_linecont();
1938 }
1939 else {
1940 badsub:
1941 cleanup_state_stack(stack);
1942 synerror("Bad substitution");
1943 }
1944
1945 STPUTC('=', out);
1946 if (subtype == 0) {
1947 switch (c) {
1948 case ':':
1949 flags |= VSNUL;
1950 c = pgetc_linecont();
1951 /*FALLTHROUGH*/
1952 default:
1953 p = strchr(types, c);
1954 if (p == NULL)
1955 goto badsub;
1956 subtype = p - types + VSNORMAL;
1957 break;
1958 case '%':
1959 case '#':
1960 {
1961 int cc = c;
1962 subtype = c == '#' ? VSTRIMLEFT :
1963 VSTRIMRIGHT;
1964 c = pgetc_linecont();
1965 if (c == cc)
1966 subtype++;
1967 else
1968 pungetc();
1969 break;
1970 }
1971 }
1972 } else {
1973 if (subtype == VSLENGTH && c != /*{*/ '}')
1974 synerror("no modifiers allowed with ${#var}");
1975 pungetc();
1976 }
1977 if (ISDBLQUOTE() || arinest)
1978 flags |= VSQUOTE;
1979 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
1980 flags |= VSPATQ;
1981 *(stackblock() + typeloc) = subtype | flags;
1982 if (subtype != VSNORMAL) {
1983 TS_PUSH();
1984 varnest++;
1985 arinest = 0;
1986 if (subtype > VSASSIGN) { /* # ## % %% */
1987 syntax = BASESYNTAX;
1988 CLRDBLQUOTE();
1989 }
1990 }
1991 }
1992 goto parsesub_return;
1993 }
1994
1995
1996 /*
1997 * Parse an arithmetic expansion (indicate start of one and set state)
1998 */
1999 parsearith: {
2000
2001 #if 0
2002 if (syntax == ARISYNTAX) {
2003 /*
2004 * we collapse embedded arithmetic expansion to
2005 * parentheses, which should be equivalent
2006 *
2007 * XXX It isn't, must fix, soonish...
2008 */
2009 USTPUTC('(' /*)*/, out);
2010 USTPUTC('(' /*)*/, out);
2011 /*
2012 * Need 2 of them because there will (should be)
2013 * two closing ))'s to follow later.
2014 */
2015 parenlevel += 2;
2016 } else
2017 #endif
2018 {
2019 USTPUTC(CTLARI, out);
2020 if (ISDBLQUOTE())
2021 USTPUTC('"',out);
2022 else
2023 USTPUTC(' ',out);
2024
2025 TS_PUSH();
2026 syntax = ARISYNTAX;
2027 arinest = 1;
2028 varnest = 0;
2029 }
2030 goto parsearith_return;
2031 }
2032
2033 } /* end of readtoken */
2034
2035
2036
2037
2038 #ifdef mkinit
2039 INCLUDE "parser.h"
2040
2041 RESET {
2042 psp.v_current_parser = &parse_state;
2043
2044 parse_state.ps_tokpushback = 0;
2045 parse_state.ps_checkkwd = 0;
2046 parse_state.ps_heredoclist = NULL;
2047 }
2048 #endif
2049
2050 /*
2051 * Returns true if the text contains nothing to expand (no dollar signs
2052 * or backquotes).
2053 */
2054
2055 STATIC int
2056 noexpand(char *text)
2057 {
2058 char *p;
2059 char c;
2060
2061 p = text;
2062 while ((c = *p++) != '\0') {
2063 if (c == CTLQUOTEMARK)
2064 continue;
2065 if (c == CTLESC)
2066 p++;
2067 else if (BASESYNTAX[(int)c] == CCTL)
2068 return 0;
2069 }
2070 return 1;
2071 }
2072
2073
2074 /*
2075 * Return true if the argument is a legal variable name (a letter or
2076 * underscore followed by zero or more letters, underscores, and digits).
2077 */
2078
2079 int
2080 goodname(char *name)
2081 {
2082 char *p;
2083
2084 p = name;
2085 if (! is_name(*p))
2086 return 0;
2087 while (*++p) {
2088 if (! is_in_name(*p))
2089 return 0;
2090 }
2091 return 1;
2092 }
2093
2094
2095 /*
2096 * Called when an unexpected token is read during the parse. The argument
2097 * is the token that is expected, or -1 if more than one type of token can
2098 * occur at this point.
2099 */
2100
2101 STATIC void
2102 synexpect(int token, const char *text)
2103 {
2104 char msg[64];
2105 char *p;
2106
2107 if (lasttoken == TWORD) {
2108 size_t len = strlen(wordtext);
2109
2110 if (len <= 13)
2111 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
2112 else
2113 fmtstr(msg, 34,
2114 "Word \"%.10s...\" unexpected", wordtext);
2115 } else
2116 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
2117
2118 p = strchr(msg, '\0');
2119 if (text)
2120 fmtstr(p, 30, " (expecting \"%.10s\")", text);
2121 else if (token >= 0)
2122 fmtstr(p, 30, " (expecting %s)", tokname[token]);
2123
2124 synerror(msg);
2125 /* NOTREACHED */
2126 }
2127
2128
2129 STATIC void
2130 synerror(const char *msg)
2131 {
2132 error("%d: Syntax error: %s", startlinno, msg);
2133 /* NOTREACHED */
2134 }
2135
2136 STATIC void
2137 setprompt(int which)
2138 {
2139 whichprompt = which;
2140
2141 #ifndef SMALL
2142 if (!el)
2143 #endif
2144 out2str(getprompt(NULL));
2145 }
2146
2147 /*
2148 * handle getting the next character, while ignoring \ \n
2149 * (which is a little tricky as we only have one char of pushback
2150 * and we need that one elsewhere).
2151 */
2152 STATIC int
2153 pgetc_linecont(void)
2154 {
2155 int c;
2156
2157 while ((c = pgetc_macro()) == '\\') {
2158 c = pgetc();
2159 if (c == '\n') {
2160 plinno++;
2161 elided_nl++;
2162 if (doprompt)
2163 setprompt(2);
2164 else
2165 setprompt(0);
2166 } else {
2167 pungetc();
2168 /* Allow the backslash to be pushed back. */
2169 pushstring("\\", 1, NULL);
2170 return (pgetc());
2171 }
2172 }
2173 return (c);
2174 }
2175
2176 /*
2177 * called by editline -- any expansions to the prompt
2178 * should be added here.
2179 */
2180 const char *
2181 getprompt(void *unused)
2182 {
2183 char *p;
2184 const char *cp;
2185
2186 if (!doprompt)
2187 return "";
2188
2189 VTRACE(DBG_PARSE|DBG_EXPAND, ("getprompt %d\n", whichprompt));
2190
2191 switch (whichprompt) {
2192 case 0:
2193 return "";
2194 case 1:
2195 p = ps1val();
2196 break;
2197 case 2:
2198 p = ps2val();
2199 break;
2200 default:
2201 return "<internal prompt error>";
2202 }
2203 if (p == NULL)
2204 return "";
2205
2206 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt <<%s>>\n", p));
2207
2208 cp = expandstr(p, plinno);
2209
2210 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt -> <<%s>>\n", cp));
2211
2212 return cp;
2213 }
2214
2215 /*
2216 * Expand a string ... used for expanding prompts (PS1...)
2217 *
2218 * Never return NULL, always some string (return input string if invalid)
2219 */
2220 const char *
2221 expandstr(char *ps, int lineno)
2222 {
2223 union node n;
2224 struct jmploc jmploc;
2225 struct jmploc *const savehandler = handler;
2226 struct parsefile *const savetopfile = getcurrentfile();
2227 const int save_x = xflag;
2228 struct parse_state new_state = init_parse_state;
2229 struct parse_state *const saveparser = psp.v_current_parser;
2230 struct stackmark smark;
2231 const char *result = NULL;
2232
2233 setstackmark(&smark);
2234 /*
2235 * At this point we anticipate that there may be a string
2236 * growing on the stack, but we have no idea how big it is.
2237 * However we know that it cannot be bigger than the current
2238 * allocated stack block, so simply reserve the whole thing,
2239 * then we can use the stack without barfing all over what
2240 * is there already... (the stack mark undoes this later.)
2241 */
2242 (void) stalloc(stackblocksize());
2243
2244 if (!setjmp(jmploc.loc)) {
2245 handler = &jmploc;
2246
2247 psp.v_current_parser = &new_state;
2248 setinputstring(ps, 1, lineno);
2249
2250 readtoken1(pgetc(), DQSYNTAX, 1);
2251 if (backquotelist != NULL && !promptcmds)
2252 result = "-o promptcmds not set: ";
2253 else {
2254 n.narg.type = NARG;
2255 n.narg.next = NULL;
2256 n.narg.text = wordtext;
2257 n.narg.lineno = lineno;
2258 n.narg.backquote = backquotelist;
2259
2260 xflag = 0; /* we might be expanding PS4 ... */
2261 expandarg(&n, NULL, 0);
2262 result = stackblock();
2263 }
2264 INTOFF;
2265 }
2266 psp.v_current_parser = saveparser;
2267 xflag = save_x;
2268 popfilesupto(savetopfile);
2269 handler = savehandler;
2270 popstackmark(&smark);
2271
2272 if (result != NULL) {
2273 INTON;
2274 } else {
2275 if (exception == EXINT)
2276 exraise(SIGINT);
2277 result = ps;
2278 }
2279
2280 return result;
2281 }
2282