parser.c revision 1.170 1 /* $NetBSD: parser.c,v 1.170 2020/05/14 08:34:17 msaitoh Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.170 2020/05/14 08:34:17 msaitoh Exp $");
41 #endif
42 #endif /* not lint */
43
44 #include <limits.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48
49 #include "shell.h"
50 #include "parser.h"
51 #include "nodes.h"
52 #include "expand.h" /* defines rmescapes() */
53 #include "eval.h" /* defines commandname */
54 #include "syntax.h"
55 #include "options.h"
56 #include "input.h"
57 #include "output.h"
58 #include "var.h"
59 #include "error.h"
60 #include "memalloc.h"
61 #include "mystring.h"
62 #include "alias.h"
63 #include "show.h"
64 #ifndef SMALL
65 #include "myhistedit.h"
66 #endif
67 #ifdef DEBUG
68 #include "nodenames.h"
69 #endif
70
71 /*
72 * Shell command parser.
73 */
74
75 /* values returned by readtoken */
76 #include "token.h"
77
78 #define OPENBRACE '{'
79 #define CLOSEBRACE '}'
80
81 struct HereDoc {
82 struct HereDoc *next; /* next here document in list */
83 union node *here; /* redirection node */
84 char *eofmark; /* string indicating end of input */
85 int striptabs; /* if set, strip leading tabs */
86 int startline; /* line number where << seen */
87 };
88
89 MKINIT struct parse_state parse_state;
90 union parse_state_p psp = { .c_current_parser = &parse_state };
91
92 static const struct parse_state init_parse_state = { /* all 0's ... */
93 .ps_heredoclist = NULL,
94 .ps_parsebackquote = 0,
95 .ps_doprompt = 0,
96 .ps_needprompt = 0,
97 .ps_lasttoken = 0,
98 .ps_tokpushback = 0,
99 .ps_wordtext = NULL,
100 .ps_checkkwd = 0,
101 .ps_redirnode = NULL,
102 .ps_heredoc = NULL,
103 .ps_quoteflag = 0,
104 .ps_startlinno = 0,
105 .ps_funclinno = 0,
106 .ps_elided_nl = 0,
107 };
108
109 STATIC union node *list(int);
110 STATIC union node *andor(void);
111 STATIC union node *pipeline(void);
112 STATIC union node *command(void);
113 STATIC union node *simplecmd(union node **, union node *);
114 STATIC union node *makeword(int);
115 STATIC void parsefname(void);
116 STATIC int slurp_heredoc(char *const, const int, const int);
117 STATIC void readheredocs(void);
118 STATIC int peektoken(void);
119 STATIC int readtoken(void);
120 STATIC int xxreadtoken(void);
121 STATIC int readtoken1(int, char const *, int);
122 STATIC int noexpand(char *);
123 STATIC void linebreak(void);
124 STATIC void consumetoken(int);
125 STATIC void synexpect(int, const char *) __dead;
126 STATIC void synerror(const char *) __dead;
127 STATIC void setprompt(int);
128 STATIC int pgetc_linecont(void);
129
130 static const char EOFhere[] = "EOF reading here (<<) document";
131
132 #ifdef DEBUG
133 int parsing = 0;
134 #endif
135
136 /*
137 * Read and parse a command. Returns NEOF on end of file. (NULL is a
138 * valid parse tree indicating a blank line.)
139 */
140
141 union node *
142 parsecmd(int interact)
143 {
144 int t;
145 union node *n;
146
147 #ifdef DEBUG
148 parsing++;
149 #endif
150 tokpushback = 0;
151 checkkwd = 0;
152 doprompt = interact;
153 if (doprompt)
154 setprompt(1);
155 else
156 setprompt(0);
157 needprompt = 0;
158 t = readtoken();
159 #ifdef DEBUG
160 parsing--;
161 #endif
162 if (t == TEOF)
163 return NEOF;
164 if (t == TNL)
165 return NULL;
166
167 #ifdef DEBUG
168 parsing++;
169 #endif
170 tokpushback++;
171 n = list(1);
172 #ifdef DEBUG
173 parsing--;
174 #endif
175 if (heredoclist)
176 error("%d: Here document (<<%s) expected but not present",
177 heredoclist->startline, heredoclist->eofmark);
178 return n;
179 }
180
181
182 STATIC union node *
183 list(int nlflag)
184 {
185 union node *ntop, *n1, *n2, *n3;
186 int tok;
187
188 CTRACE(DBG_PARSE, ("list(%d): entered @%d\n",nlflag,plinno));
189
190 checkkwd = CHKNL | CHKKWD | CHKALIAS;
191 if (nlflag == 0 && tokendlist[peektoken()])
192 return NULL;
193 ntop = n1 = NULL;
194 for (;;) {
195 n2 = andor();
196 tok = readtoken();
197 if (tok == TBACKGND) {
198 if (n2->type == NCMD || n2->type == NPIPE)
199 n2->ncmd.backgnd = 1;
200 else if (n2->type == NREDIR)
201 n2->type = NBACKGND;
202 else {
203 n3 = stalloc(sizeof(struct nredir));
204 n3->type = NBACKGND;
205 n3->nredir.n = n2;
206 n3->nredir.redirect = NULL;
207 n2 = n3;
208 }
209 }
210
211 if (ntop == NULL)
212 ntop = n2;
213 else if (n1 == NULL) {
214 n1 = stalloc(sizeof(struct nbinary));
215 n1->type = NSEMI;
216 n1->nbinary.ch1 = ntop;
217 n1->nbinary.ch2 = n2;
218 ntop = n1;
219 } else {
220 n3 = stalloc(sizeof(struct nbinary));
221 n3->type = NSEMI;
222 n3->nbinary.ch1 = n1->nbinary.ch2;
223 n3->nbinary.ch2 = n2;
224 n1->nbinary.ch2 = n3;
225 n1 = n3;
226 }
227
228 switch (tok) {
229 case TBACKGND:
230 case TSEMI:
231 tok = readtoken();
232 /* FALLTHROUGH */
233 case TNL:
234 if (tok == TNL) {
235 readheredocs();
236 if (nlflag)
237 return ntop;
238 } else if (tok == TEOF && nlflag)
239 return ntop;
240 else
241 tokpushback++;
242
243 checkkwd = CHKNL | CHKKWD | CHKALIAS;
244 if (!nlflag && tokendlist[peektoken()])
245 return ntop;
246 break;
247 case TEOF:
248 pungetc(); /* push back EOF on input */
249 return ntop;
250 default:
251 if (nlflag)
252 synexpect(-1, 0);
253 tokpushback++;
254 return ntop;
255 }
256 }
257 }
258
259 STATIC union node *
260 andor(void)
261 {
262 union node *n1, *n2, *n3;
263 int t;
264
265 CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno));
266
267 n1 = pipeline();
268 for (;;) {
269 if ((t = readtoken()) == TAND) {
270 t = NAND;
271 } else if (t == TOR) {
272 t = NOR;
273 } else {
274 tokpushback++;
275 return n1;
276 }
277 n2 = pipeline();
278 n3 = stalloc(sizeof(struct nbinary));
279 n3->type = t;
280 n3->nbinary.ch1 = n1;
281 n3->nbinary.ch2 = n2;
282 n1 = n3;
283 }
284 }
285
286 STATIC union node *
287 pipeline(void)
288 {
289 union node *n1, *n2, *pipenode;
290 struct nodelist *lp, *prev;
291 int negate;
292
293 CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno));
294
295 negate = 0;
296 checkkwd = CHKNL | CHKKWD | CHKALIAS;
297 while (readtoken() == TNOT) {
298 CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n"));
299 #ifndef BOGUS_NOT_COMMAND
300 if (posix && negate)
301 synerror("2nd \"!\" unexpected");
302 #endif
303 negate++;
304 }
305 tokpushback++;
306 n1 = command();
307 if (readtoken() == TPIPE) {
308 pipenode = stalloc(sizeof(struct npipe));
309 pipenode->type = NPIPE;
310 pipenode->npipe.backgnd = 0;
311 lp = stalloc(sizeof(struct nodelist));
312 pipenode->npipe.cmdlist = lp;
313 lp->n = n1;
314 do {
315 prev = lp;
316 lp = stalloc(sizeof(struct nodelist));
317 lp->n = command();
318 prev->next = lp;
319 } while (readtoken() == TPIPE);
320 lp->next = NULL;
321 n1 = pipenode;
322 }
323 tokpushback++;
324 if (negate) {
325 CTRACE(DBG_PARSE, ("%snegate pipeline\n",
326 (negate&1) ? "" : "double "));
327 n2 = stalloc(sizeof(struct nnot));
328 n2->type = (negate & 1) ? NNOT : NDNOT;
329 n2->nnot.com = n1;
330 return n2;
331 } else
332 return n1;
333 }
334
335
336
337 STATIC union node *
338 command(void)
339 {
340 union node *n1, *n2;
341 union node *ap, **app;
342 union node *cp, **cpp;
343 union node *redir, **rpp;
344 int t;
345 #ifdef BOGUS_NOT_COMMAND
346 int negate = 0;
347 #endif
348
349 CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno));
350
351 checkkwd = CHKNL | CHKKWD | CHKALIAS;
352 redir = NULL;
353 n1 = NULL;
354 rpp = &redir;
355
356 /* Check for redirection which may precede command */
357 while (readtoken() == TREDIR) {
358 *rpp = n2 = redirnode;
359 rpp = &n2->nfile.next;
360 parsefname();
361 }
362 tokpushback++;
363
364 #ifdef BOGUS_NOT_COMMAND /* only in pileline() */
365 while (readtoken() == TNOT) {
366 CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n"));
367 negate++;
368 }
369 tokpushback++;
370 #endif
371
372 switch (readtoken()) {
373 case TIF:
374 n1 = stalloc(sizeof(struct nif));
375 n1->type = NIF;
376 n1->nif.test = list(0);
377 consumetoken(TTHEN);
378 n1->nif.ifpart = list(0);
379 n2 = n1;
380 while (readtoken() == TELIF) {
381 n2->nif.elsepart = stalloc(sizeof(struct nif));
382 n2 = n2->nif.elsepart;
383 n2->type = NIF;
384 n2->nif.test = list(0);
385 consumetoken(TTHEN);
386 n2->nif.ifpart = list(0);
387 }
388 if (lasttoken == TELSE)
389 n2->nif.elsepart = list(0);
390 else {
391 n2->nif.elsepart = NULL;
392 tokpushback++;
393 }
394 consumetoken(TFI);
395 checkkwd = CHKKWD | CHKALIAS;
396 break;
397 case TWHILE:
398 case TUNTIL:
399 n1 = stalloc(sizeof(struct nbinary));
400 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
401 n1->nbinary.ch1 = list(0);
402 consumetoken(TDO);
403 n1->nbinary.ch2 = list(0);
404 consumetoken(TDONE);
405 checkkwd = CHKKWD | CHKALIAS;
406 break;
407 case TFOR:
408 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
409 synerror("Bad for loop variable");
410 n1 = stalloc(sizeof(struct nfor));
411 n1->type = NFOR;
412 n1->nfor.var = wordtext;
413 linebreak();
414 if (lasttoken==TWORD && !quoteflag && equal(wordtext,"in")) {
415 app = ≈
416 while (readtoken() == TWORD) {
417 n2 = makeword(startlinno);
418 *app = n2;
419 app = &n2->narg.next;
420 }
421 *app = NULL;
422 n1->nfor.args = ap;
423 if (lasttoken != TNL && lasttoken != TSEMI)
424 synexpect(TSEMI, 0);
425 } else {
426 static char argvars[5] = {
427 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
428 };
429
430 n2 = stalloc(sizeof(struct narg));
431 n2->type = NARG;
432 n2->narg.text = argvars;
433 n2->narg.backquote = NULL;
434 n2->narg.next = NULL;
435 n2->narg.lineno = startlinno;
436 n1->nfor.args = n2;
437 /*
438 * Newline or semicolon here is optional (but note
439 * that the original Bourne shell only allowed NL).
440 */
441 if (lasttoken != TNL && lasttoken != TSEMI)
442 tokpushback++;
443 }
444 checkkwd = CHKNL | CHKKWD | CHKALIAS;
445 if ((t = readtoken()) == TDO)
446 t = TDONE;
447 else if (t == TBEGIN)
448 t = TEND;
449 else
450 synexpect(TDO, 0);
451 n1->nfor.body = list(0);
452 consumetoken(t);
453 checkkwd = CHKKWD | CHKALIAS;
454 break;
455 case TCASE:
456 n1 = stalloc(sizeof(struct ncase));
457 n1->type = NCASE;
458 n1->ncase.lineno = startlinno - elided_nl;
459 consumetoken(TWORD);
460 n1->ncase.expr = makeword(startlinno);
461 linebreak();
462 if (lasttoken != TWORD || !equal(wordtext, "in"))
463 synexpect(-1, "in");
464 cpp = &n1->ncase.cases;
465 checkkwd = CHKNL | CHKKWD;
466 readtoken();
467 /*
468 * Both ksh and bash accept 'case x in esac'
469 * so configure scripts started taking advantage of this.
470 * The page: http://pubs.opengroup.org/onlinepubs/\
471 * 009695399/utilities/xcu_chap02.html contradicts itself,
472 * as to if this is legal; the "Case Conditional Format"
473 * paragraph shows one case is required, but the "Grammar"
474 * section shows a grammar that explicitly allows the no
475 * case option.
476 *
477 * The standard also says (section 2.10):
478 * This formal syntax shall take precedence over the
479 * preceding text syntax description.
480 * ie: the "Grammar" section wins. The text is just
481 * a rough guide (introduction to the common case.)
482 */
483 while (lasttoken != TESAC) {
484 *cpp = cp = stalloc(sizeof(struct nclist));
485 cp->type = NCLIST;
486 app = &cp->nclist.pattern;
487 if (lasttoken == TLP)
488 readtoken();
489 for (;;) {
490 if (lasttoken < TWORD)
491 synexpect(TWORD, 0);
492 *app = ap = makeword(startlinno);
493 checkkwd = CHKNL | CHKKWD;
494 if (readtoken() != TPIPE)
495 break;
496 app = &ap->narg.next;
497 readtoken();
498 }
499 if (lasttoken != TRP)
500 synexpect(TRP, 0);
501 cp->nclist.lineno = startlinno;
502 cp->nclist.body = list(0);
503
504 checkkwd = CHKNL | CHKKWD | CHKALIAS;
505 if ((t = readtoken()) != TESAC) {
506 if (t != TENDCASE && t != TCASEFALL) {
507 synexpect(TENDCASE, 0);
508 } else {
509 if (t == TCASEFALL)
510 cp->type = NCLISTCONT;
511 checkkwd = CHKNL | CHKKWD;
512 readtoken();
513 }
514 }
515 cpp = &cp->nclist.next;
516 }
517 *cpp = NULL;
518 checkkwd = CHKKWD | CHKALIAS;
519 break;
520 case TLP:
521 n1 = stalloc(sizeof(struct nredir));
522 n1->type = NSUBSHELL;
523 n1->nredir.n = list(0);
524 n1->nredir.redirect = NULL;
525 if (n1->nredir.n == NULL)
526 synexpect(-1, 0);
527 consumetoken(TRP);
528 checkkwd = CHKKWD | CHKALIAS;
529 break;
530 case TBEGIN:
531 n1 = list(0);
532 if (posix && n1 == NULL)
533 synexpect(-1, 0);
534 consumetoken(TEND);
535 checkkwd = CHKKWD | CHKALIAS;
536 break;
537
538 case TBACKGND:
539 case TSEMI:
540 case TAND:
541 case TOR:
542 case TPIPE:
543 case TNL:
544 case TEOF:
545 case TRP:
546 case TENDCASE:
547 case TCASEFALL:
548 /*
549 * simple commands must have something in them,
550 * either a word (which at this point includes a=b)
551 * or a redirection. If we reached the end of the
552 * command (which one of these tokens indicates)
553 * when we are just starting, and have not had a
554 * redirect, then ...
555 *
556 * nb: it is still possible to end up with empty
557 * simple commands, if the "command" is a var
558 * expansion that produces nothing:
559 * X= ; $X && $X
560 * --> &&
561 * That is OK and is handled after word expansions.
562 */
563 if (!redir)
564 synexpect(-1, 0);
565 /*
566 * continue to build a node containing the redirect.
567 * the tokpushback means that our ending token will be
568 * read again in simplecmd, causing it to terminate,
569 * so only the redirect(s) will be contained in the
570 * returned n1
571 */
572 /* FALLTHROUGH */
573 case TWORD:
574 tokpushback++;
575 n1 = simplecmd(rpp, redir);
576 goto checkneg;
577 default:
578 synexpect(-1, 0);
579 /* NOTREACHED */
580 }
581
582 /* Now check for redirection which may follow command */
583 while (readtoken() == TREDIR) {
584 *rpp = n2 = redirnode;
585 rpp = &n2->nfile.next;
586 parsefname();
587 }
588 tokpushback++;
589 *rpp = NULL;
590 if (redir) {
591 if (n1 == NULL || n1->type != NSUBSHELL) {
592 n2 = stalloc(sizeof(struct nredir));
593 n2->type = NREDIR;
594 n2->nredir.n = n1;
595 n1 = n2;
596 }
597 n1->nredir.redirect = redir;
598 }
599
600 checkneg:
601 #ifdef BOGUS_NOT_COMMAND
602 if (negate) {
603 VTRACE(DBG_PARSE, ("bogus %snegate command\n",
604 (negate&1) ? "" : "double "));
605 n2 = stalloc(sizeof(struct nnot));
606 n2->type = (negate & 1) ? NNOT : NDNOT;
607 n2->nnot.com = n1;
608 return n2;
609 }
610 else
611 #endif
612 return n1;
613 }
614
615
616 STATIC union node *
617 simplecmd(union node **rpp, union node *redir)
618 {
619 union node *args, **app;
620 union node *n = NULL;
621 int line = 0;
622 int savecheckkwd;
623 #ifdef BOGUS_NOT_COMMAND
624 union node *n2;
625 int negate = 0;
626 #endif
627
628 CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n",
629 redir ? "" : "out", plinno));
630
631 /* If we don't have any redirections already, then we must reset */
632 /* rpp to be the address of the local redir variable. */
633 if (redir == 0)
634 rpp = &redir;
635
636 args = NULL;
637 app = &args;
638
639 #ifdef BOGUS_NOT_COMMAND /* pipelines get negated, commands do not */
640 while (readtoken() == TNOT) {
641 VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n"));
642 negate++;
643 }
644 tokpushback++;
645 #endif
646
647 savecheckkwd = CHKALIAS;
648 for (;;) {
649 checkkwd = savecheckkwd;
650 if (readtoken() == TWORD) {
651 if (line == 0)
652 line = startlinno;
653 n = makeword(startlinno);
654 *app = n;
655 app = &n->narg.next;
656 if (savecheckkwd != 0 && !isassignment(wordtext))
657 savecheckkwd = 0;
658 } else if (lasttoken == TREDIR) {
659 if (line == 0)
660 line = startlinno;
661 *rpp = n = redirnode;
662 rpp = &n->nfile.next;
663 parsefname(); /* read name of redirection file */
664 } else if (lasttoken == TLP && app == &args->narg.next
665 && redir == 0) {
666 /* We have a function */
667 consumetoken(TRP);
668 funclinno = plinno;
669 rmescapes(n->narg.text);
670 if (strchr(n->narg.text, '/'))
671 synerror("Bad function name");
672 VTRACE(DBG_PARSE, ("Function '%s' seen @%d\n",
673 n->narg.text, plinno));
674 n->type = NDEFUN;
675 n->narg.lineno = plinno - elided_nl;
676 n->narg.next = command();
677 funclinno = 0;
678 goto checkneg;
679 } else {
680 tokpushback++;
681 break;
682 }
683 }
684
685 if (args == NULL && redir == NULL)
686 synexpect(-1, 0);
687 *app = NULL;
688 *rpp = NULL;
689 n = stalloc(sizeof(struct ncmd));
690 n->type = NCMD;
691 n->ncmd.lineno = line - elided_nl;
692 n->ncmd.backgnd = 0;
693 n->ncmd.args = args;
694 n->ncmd.redirect = redir;
695 n->ncmd.lineno = startlinno;
696
697 checkneg:
698 #ifdef BOGUS_NOT_COMMAND
699 if (negate) {
700 VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n",
701 (negate&1) ? "" : "double "));
702 n2 = stalloc(sizeof(struct nnot));
703 n2->type = (negate & 1) ? NNOT : NDNOT;
704 n2->nnot.com = n;
705 return n2;
706 }
707 else
708 #endif
709 return n;
710 }
711
712 STATIC union node *
713 makeword(int lno)
714 {
715 union node *n;
716
717 n = stalloc(sizeof(struct narg));
718 n->type = NARG;
719 n->narg.next = NULL;
720 n->narg.text = wordtext;
721 n->narg.backquote = backquotelist;
722 n->narg.lineno = lno;
723 return n;
724 }
725
726 void
727 fixredir(union node *n, const char *text, int err)
728 {
729
730 VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err));
731 if (!err)
732 n->ndup.vname = NULL;
733
734 if (is_number(text))
735 n->ndup.dupfd = number(text);
736 else if (text[0] == '-' && text[1] == '\0')
737 n->ndup.dupfd = -1;
738 else {
739
740 if (err)
741 synerror("Bad fd number");
742 else
743 n->ndup.vname = makeword(startlinno - elided_nl);
744 }
745 }
746
747
748 STATIC void
749 parsefname(void)
750 {
751 union node *n = redirnode;
752
753 if (readtoken() != TWORD)
754 synexpect(-1, 0);
755 if (n->type == NHERE) {
756 struct HereDoc *here = heredoc;
757 struct HereDoc *p;
758
759 if (quoteflag == 0)
760 n->type = NXHERE;
761 VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno));
762 if (here->striptabs) {
763 while (*wordtext == '\t')
764 wordtext++;
765 }
766
767 /*
768 * this test is not really necessary, we are not
769 * required to expand wordtext, but there's no reason
770 * it cannot be $$ or something like that - that would
771 * not mean the pid, but literally two '$' characters.
772 * There is no need for limits on what the word can be.
773 * However, it needs to stay literal as entered, not
774 * have $ converted to CTLVAR or something, which as
775 * the parser is, at the minute, is impossible to prevent.
776 * So, leave it like this until the rest of the parser is fixed.
777 */
778 if (!noexpand(wordtext))
779 synerror("Illegal eof marker for << redirection");
780
781 rmescapes(wordtext);
782 here->eofmark = wordtext;
783 here->next = NULL;
784 if (heredoclist == NULL)
785 heredoclist = here;
786 else {
787 for (p = heredoclist ; p->next ; p = p->next)
788 continue;
789 p->next = here;
790 }
791 } else if (n->type == NTOFD || n->type == NFROMFD) {
792 fixredir(n, wordtext, 0);
793 } else {
794 n->nfile.fname = makeword(startlinno - elided_nl);
795 }
796 }
797
798 /*
799 * Check to see whether we are at the end of the here document. When this
800 * is called, c is set to the first character of the next input line. If
801 * we are at the end of the here document, this routine sets the c to PEOF.
802 * The new value of c is returned.
803 */
804
805 static int
806 checkend(int c, char * const eofmark, const int striptabs)
807 {
808
809 if (striptabs) {
810 while (c == '\t')
811 c = pgetc();
812 }
813 if (c == PEOF) {
814 if (*eofmark == '\0')
815 return (c);
816 synerror(EOFhere);
817 }
818 if (c == *eofmark) {
819 int c2;
820 char *q;
821
822 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
823 if (c2 == '\n') {
824 plinno++;
825 needprompt = doprompt;
826 }
827 if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
828 c = PEOF;
829 if (c2 == '\n') {
830 plinno++;
831 needprompt = doprompt;
832 }
833 } else {
834 pungetc();
835 pushstring(eofmark + 1, q - (eofmark + 1), NULL);
836 }
837 } else if (c == '\n' && *eofmark == '\0') {
838 c = PEOF;
839 plinno++;
840 needprompt = doprompt;
841 }
842 return (c);
843 }
844
845
846 /*
847 * Input any here documents.
848 */
849
850 STATIC int
851 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
852 {
853 int c;
854 char *out;
855 int lines = plinno;
856
857 c = pgetc();
858
859 /*
860 * If we hit EOF on the input, and the eofmark is a null string ('')
861 * we consider this empty line to be the eofmark, and exit without err.
862 */
863 if (c == PEOF && *eofmark != '\0')
864 synerror(EOFhere);
865
866 STARTSTACKSTR(out);
867
868 while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
869 do {
870 if (sq) {
871 /*
872 * in single quoted mode (eofmark quoted)
873 * all we look for is \n so we can check
874 * for the epfmark - everything saved literally.
875 */
876 STPUTC(c, out);
877 if (c == '\n') {
878 plinno++;
879 break;
880 }
881 continue;
882 }
883 /*
884 * In double quoted (non-quoted eofmark)
885 * we must handle \ followed by \n here
886 * otherwise we can mismatch the end mark.
887 * All other uses of \ will be handled later
888 * when the here doc is expanded.
889 *
890 * This also makes sure \\ followed by \n does
891 * not suppress the newline (the \ quotes itself)
892 */
893 if (c == '\\') { /* A backslash */
894 STPUTC(c, out);
895 c = pgetc(); /* followed by */
896 if (c == '\n') { /* a newline? */
897 STPUTC(c, out);
898 plinno++;
899 continue; /* don't break */
900 }
901 }
902 STPUTC(c, out); /* keep the char */
903 if (c == '\n') { /* at end of line */
904 plinno++;
905 break; /* look for eofmark */
906 }
907 } while ((c = pgetc()) != PEOF);
908
909 /*
910 * If we have read a line, and reached EOF, without
911 * finding the eofmark, whether the EOF comes before
912 * or immediately after the \n, that is an error.
913 */
914 if (c == PEOF || (c = pgetc()) == PEOF)
915 synerror(EOFhere);
916 }
917 STPUTC('\0', out);
918
919 c = out - stackblock();
920 out = stackblock();
921 grabstackblock(c);
922 wordtext = out;
923
924 VTRACE(DBG_PARSE,
925 ("Slurped a %d line %sheredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n",
926 plinno - lines, sq ? "quoted " : "", eofmark,
927 striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c),
928 wordtext, (c > 16 ? "..." : ""), plinno));
929
930 return (plinno - lines);
931 }
932
933 static char *
934 insert_elided_nl(char *str)
935 {
936 while (elided_nl > 0) {
937 STPUTC(CTLNONL, str);
938 elided_nl--;
939 }
940 return str;
941 }
942
943 STATIC void
944 readheredocs(void)
945 {
946 struct HereDoc *here;
947 union node *n;
948 int line, l;
949
950 line = 0; /*XXX - gcc! obviously unneeded */
951 if (heredoclist)
952 line = heredoclist->startline + 1;
953 l = 0;
954 while (heredoclist) {
955 line += l;
956 here = heredoclist;
957 heredoclist = here->next;
958 if (needprompt) {
959 setprompt(2);
960 needprompt = 0;
961 }
962
963 l = slurp_heredoc(here->eofmark, here->striptabs,
964 here->here->nhere.type == NHERE);
965
966 here->here->nhere.doc = n = makeword(line);
967
968 if (here->here->nhere.type == NHERE)
969 continue;
970
971 /*
972 * Now "parse" here docs that have unquoted eofmarkers.
973 */
974 setinputstring(wordtext, 1, line);
975 VTRACE(DBG_PARSE, ("Reprocessing %d line here doc from %d\n",
976 l, line));
977 readtoken1(pgetc(), DQSYNTAX, 1);
978 n->narg.text = wordtext;
979 n->narg.backquote = backquotelist;
980 popfile();
981 }
982 }
983
984 STATIC int
985 peektoken(void)
986 {
987 int t;
988
989 t = readtoken();
990 tokpushback++;
991 return (t);
992 }
993
994 STATIC int
995 readtoken(void)
996 {
997 int t;
998 #ifdef DEBUG
999 int alreadyseen = tokpushback;
1000 int savecheckkwd = checkkwd;
1001 #endif
1002 struct alias *ap;
1003
1004 top:
1005 t = xxreadtoken();
1006
1007 if (checkkwd & CHKNL) {
1008 while (t == TNL) {
1009 readheredocs();
1010 t = xxreadtoken();
1011 }
1012 }
1013
1014 /*
1015 * check for keywords and aliases
1016 */
1017 if (t == TWORD && !quoteflag) {
1018 const char *const *pp;
1019
1020 if (checkkwd & CHKKWD)
1021 for (pp = parsekwd; *pp; pp++) {
1022 if (**pp == *wordtext && equal(*pp, wordtext)) {
1023 lasttoken = t = pp -
1024 parsekwd + KWDOFFSET;
1025 VTRACE(DBG_PARSE,
1026 ("keyword %s recognized @%d\n",
1027 tokname[t], plinno));
1028 goto out;
1029 }
1030 }
1031
1032 if (checkkwd & CHKALIAS &&
1033 (ap = lookupalias(wordtext, 1)) != NULL) {
1034 VTRACE(DBG_PARSE,
1035 ("alias '%s' recognized -> <:%s:>\n",
1036 wordtext, ap->val));
1037 pushstring(ap->val, strlen(ap->val), ap);
1038 goto top;
1039 }
1040 }
1041 out:
1042 if (t != TNOT)
1043 checkkwd = 0;
1044
1045 VTRACE(DBG_PARSE, ("%stoken %s %s @%d (chkkwd %x->%x)\n",
1046 alreadyseen ? "reread " : "", tokname[t],
1047 t == TWORD ? wordtext : "", plinno, savecheckkwd, checkkwd));
1048 return (t);
1049 }
1050
1051
1052 /*
1053 * Read the next input token.
1054 * If the token is a word, we set backquotelist to the list of cmds in
1055 * backquotes. We set quoteflag to true if any part of the word was
1056 * quoted.
1057 * If the token is TREDIR, then we set redirnode to a structure containing
1058 * the redirection.
1059 * In all cases, the variable startlinno is set to the number of the line
1060 * on which the token starts.
1061 *
1062 * [Change comment: here documents and internal procedures]
1063 * [Readtoken shouldn't have any arguments. Perhaps we should make the
1064 * word parsing code into a separate routine. In this case, readtoken
1065 * doesn't need to have any internal procedures, but parseword does.
1066 * We could also make parseoperator in essence the main routine, and
1067 * have parseword (readtoken1?) handle both words and redirection.]
1068 */
1069
1070 #define RETURN(token) return lasttoken = (token)
1071
1072 STATIC int
1073 xxreadtoken(void)
1074 {
1075 int c;
1076
1077 if (tokpushback) {
1078 tokpushback = 0;
1079 CTRACE(DBG_LEXER,
1080 ("xxreadtoken() returns %s (%d) again\n",
1081 tokname[lasttoken], lasttoken));
1082 return lasttoken;
1083 }
1084 if (needprompt) {
1085 setprompt(2);
1086 needprompt = 0;
1087 }
1088 elided_nl = 0;
1089 startlinno = plinno;
1090 for (;;) { /* until token or start of word found */
1091 c = pgetc_macro();
1092 CTRACE(DBG_LEXER, ("xxreadtoken() sees '%c' (%#.2x) ",
1093 c&0xFF, c&0x1FF));
1094 switch (c) {
1095 case ' ': case '\t': case PFAKE:
1096 CTRACE(DBG_LEXER, (" ignored\n"));
1097 continue;
1098 case '#':
1099 while ((c = pgetc()) != '\n' && c != PEOF)
1100 continue;
1101 CTRACE(DBG_LEXER,
1102 ("skipped comment to (not incl) \\n\n"));
1103 pungetc();
1104 continue;
1105
1106 case '\n':
1107 plinno++;
1108 CTRACE(DBG_LEXER, ("newline now @%d\n", plinno));
1109 needprompt = doprompt;
1110 RETURN(TNL);
1111 case PEOF:
1112 CTRACE(DBG_LEXER, ("EOF -> TEOF (return)\n"));
1113 RETURN(TEOF);
1114
1115 case '&':
1116 if (pgetc_linecont() == '&') {
1117 CTRACE(DBG_LEXER,
1118 ("and another -> TAND (return)\n"));
1119 RETURN(TAND);
1120 }
1121 pungetc();
1122 CTRACE(DBG_LEXER, (" -> TBACKGND (return)\n"));
1123 RETURN(TBACKGND);
1124 case '|':
1125 if (pgetc_linecont() == '|') {
1126 CTRACE(DBG_LEXER,
1127 ("and another -> TOR (return)\n"));
1128 RETURN(TOR);
1129 }
1130 pungetc();
1131 CTRACE(DBG_LEXER, (" -> TPIPE (return)\n"));
1132 RETURN(TPIPE);
1133 case ';':
1134 switch (pgetc_linecont()) {
1135 case ';':
1136 CTRACE(DBG_LEXER,
1137 ("and another -> TENDCASE (return)\n"));
1138 RETURN(TENDCASE);
1139 case '&':
1140 CTRACE(DBG_LEXER,
1141 ("and '&' -> TCASEFALL (return)\n"));
1142 RETURN(TCASEFALL);
1143 default:
1144 pungetc();
1145 CTRACE(DBG_LEXER, (" -> TSEMI (return)\n"));
1146 RETURN(TSEMI);
1147 }
1148 case '(':
1149 CTRACE(DBG_LEXER, (" -> TLP (return)\n"));
1150 RETURN(TLP);
1151 case ')':
1152 CTRACE(DBG_LEXER, (" -> TRP (return)\n"));
1153 RETURN(TRP);
1154
1155 case '\\':
1156 switch (pgetc()) {
1157 case '\n':
1158 startlinno = ++plinno;
1159 CTRACE(DBG_LEXER, ("\\\n ignored, now @%d\n",
1160 plinno));
1161 if (doprompt)
1162 setprompt(2);
1163 else
1164 setprompt(0);
1165 continue;
1166 case PEOF:
1167 CTRACE(DBG_LEXER,
1168 ("then EOF -> TEOF (return) '\\' dropped\n"));
1169 RETURN(TEOF);
1170 default:
1171 CTRACE(DBG_LEXER, ("not \\\n or EOF: "));
1172 pungetc();
1173 break;
1174 }
1175 /* FALLTHROUGH */
1176 default:
1177 CTRACE(DBG_LEXER, ("getting a word\n"));
1178 return readtoken1(c, BASESYNTAX, 0);
1179 }
1180 }
1181 #undef RETURN
1182 }
1183
1184
1185
1186 /*
1187 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
1188 * is not NULL, read a here document. In the latter case, eofmark is the
1189 * word which marks the end of the document and striptabs is true if
1190 * leading tabs should be stripped from the document. The argument firstc
1191 * is the first character of the input token or document.
1192 *
1193 * Because C does not have internal subroutines, I have simulated them
1194 * using goto's to implement the subroutine linkage. The following macros
1195 * will run code that appears at the end of readtoken1.
1196 */
1197
1198 /*
1199 * We used to remember only the current syntax, variable nesting level,
1200 * double quote state for each var nesting level, and arith nesting
1201 * level (unrelated to var nesting) and one prev syntax when in arith
1202 * syntax. This worked for simple cases, but can't handle arith inside
1203 * var expansion inside arith inside var with some quoted and some not.
1204 *
1205 * Inspired by FreeBSD's implementation (though it was the obvious way)
1206 * though implemented differently, we now have a stack that keeps track
1207 * of what we are doing now, and what we were doing previously.
1208 * Every time something changes, which will eventually end and should
1209 * revert to the previous state, we push this stack, and then pop it
1210 * again later (that is every ${} with an operator (to parse the word
1211 * or pattern that follows) ${x} and $x are too simple to need it)
1212 * $(( )) $( ) and "...". Always. Really, always!
1213 *
1214 * The stack is implemented as one static (on the C stack) base block
1215 * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1216 * enough for the vast majority of cases. For torture tests, we
1217 * malloc more blocks as needed. All accesses through the inline
1218 * functions below.
1219 */
1220
1221 /*
1222 * varnest & arinest will typically be 0 or 1
1223 * (varnest can increment in usages like ${x=${y}} but probably
1224 * does not really need to)
1225 * parenlevel allows balancing parens inside a $(( )), it is reset
1226 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1227 * quoted is special - we need to know 2 things ... are we inside "..."
1228 * (even if inherited from some previous nesting level) and was there
1229 * an opening '"' at this level (so the next will be closing).
1230 * "..." can span nesting levels, but cannot be opened in one and
1231 * closed in a different one.
1232 * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1233 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1234 * is really so special that this setting is not very important)
1235 * and 0x10 that indicates that an opening quote has been seen.
1236 * The bottom 4 bits are inherited, the 0x10 bit is not.
1237 */
1238 struct tokenstate {
1239 const char *ts_syntax;
1240 unsigned short ts_parenlevel; /* counters */
1241 unsigned short ts_varnest; /* 64000 levels should be enough! */
1242 unsigned short ts_arinest;
1243 unsigned short ts_quoted; /* 1 -> single, 2 -> double */
1244 unsigned short ts_magicq; /* heredoc or word expand */
1245 };
1246
1247 #define NQ 0x00 /* Unquoted */
1248 #define SQ 0x01 /* Single Quotes */
1249 #define DQ 0x02 /* Double Quotes (or equivalent) */
1250 #define CQ 0x03 /* C style Single Quotes */
1251 #define QF 0x0F /* Mask to extract previous values */
1252 #define QS 0x10 /* Quoting started at this level in stack */
1253
1254 #define LEVELS_PER_BLOCK 8
1255 #define VSS struct statestack
1256
1257 struct statestack {
1258 VSS *prev; /* previous block in list */
1259 int cur; /* which of our tokenstates is current */
1260 struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1261 };
1262
1263 static inline struct tokenstate *
1264 currentstate(VSS *stack)
1265 {
1266 return &stack->tokenstate[stack->cur];
1267 }
1268
1269 #ifdef notdef
1270 static inline struct tokenstate *
1271 prevstate(VSS *stack)
1272 {
1273 if (stack->cur != 0)
1274 return &stack->tokenstate[stack->cur - 1];
1275 if (stack->prev == NULL) /* cannot drop below base */
1276 return &stack->tokenstate[0];
1277 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1278 }
1279 #endif
1280
1281 static inline VSS *
1282 bump_state_level(VSS *stack)
1283 {
1284 struct tokenstate *os, *ts;
1285
1286 os = currentstate(stack);
1287
1288 if (++stack->cur >= LEVELS_PER_BLOCK) {
1289 VSS *ss;
1290
1291 ss = (VSS *)ckmalloc(sizeof (struct statestack));
1292 ss->cur = 0;
1293 ss->prev = stack;
1294 stack = ss;
1295 }
1296
1297 ts = currentstate(stack);
1298
1299 ts->ts_parenlevel = 0; /* parens inside never match outside */
1300
1301 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */
1302 ts->ts_varnest = os->ts_varnest;
1303 ts->ts_arinest = os->ts_arinest; /* when appropriate */
1304 ts->ts_syntax = os->ts_syntax; /* they will be altered */
1305 ts->ts_magicq = os->ts_magicq;
1306
1307 return stack;
1308 }
1309
1310 static inline VSS *
1311 drop_state_level(VSS *stack)
1312 {
1313 if (stack->cur == 0) {
1314 VSS *ss;
1315
1316 ss = stack;
1317 stack = ss->prev;
1318 if (stack == NULL)
1319 return ss;
1320 ckfree(ss);
1321 }
1322 --stack->cur;
1323 return stack;
1324 }
1325
1326 static inline void
1327 cleanup_state_stack(VSS *stack)
1328 {
1329 while (stack->prev != NULL) {
1330 stack->cur = 0;
1331 stack = drop_state_level(stack);
1332 }
1333 }
1334
1335 #define PARSESUB() {goto parsesub; parsesub_return:;}
1336 #define PARSEARITH() {goto parsearith; parsearith_return:;}
1337
1338 /*
1339 * The following macros all assume the existance of a local var "stack"
1340 * which contains a pointer to the current struct stackstate
1341 */
1342
1343 /*
1344 * These are macros rather than inline funcs to avoid code churn as much
1345 * as possible - they replace macros of the same name used previously.
1346 */
1347 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS)
1348 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ)
1349 #ifdef notdef
1350 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \
1351 stack->cur != 0 || stack->prev ? \
1352 prevstate(stack)->ts_quoted & QF : 0)
1353 #endif
1354
1355 /*
1356 * This set are just to avoid excess typing and line lengths...
1357 * The ones that "look like" var names must be implemented to be lvalues
1358 */
1359 #define syntax (currentstate(stack)->ts_syntax)
1360 #define parenlevel (currentstate(stack)->ts_parenlevel)
1361 #define varnest (currentstate(stack)->ts_varnest)
1362 #define arinest (currentstate(stack)->ts_arinest)
1363 #define quoted (currentstate(stack)->ts_quoted)
1364 #define magicq (currentstate(stack)->ts_magicq)
1365 #define TS_PUSH() (stack = bump_state_level(stack))
1366 #define TS_POP() (stack = drop_state_level(stack))
1367
1368 /*
1369 * Called to parse command substitutions. oldstyle is true if the command
1370 * is enclosed inside `` (otherwise it was enclosed in "$( )")
1371 *
1372 * Internally nlpp is a pointer to the head of the linked
1373 * list of commands (passed by reference), and savelen is the number of
1374 * characters on the top of the stack which must be preserved.
1375 */
1376 static char *
1377 parsebackq(VSS *const stack, char * const in,
1378 struct nodelist **const pbqlist, const int oldstyle)
1379 {
1380 struct nodelist **nlpp;
1381 const int savepbq = parsebackquote;
1382 union node *n;
1383 char *out;
1384 char *str = NULL;
1385 char *volatile sstr = str;
1386 struct jmploc jmploc;
1387 struct jmploc *const savehandler = handler;
1388 struct parsefile *const savetopfile = getcurrentfile();
1389 const int savelen = in - stackblock();
1390 int saveprompt;
1391 int lno;
1392
1393 if (setjmp(jmploc.loc)) {
1394 popfilesupto(savetopfile);
1395 if (sstr)
1396 ckfree(__UNVOLATILE(sstr));
1397 cleanup_state_stack(stack);
1398 parsebackquote = 0;
1399 handler = savehandler;
1400 CTRACE(DBG_LEXER, ("parsebackq() err (%d), unwinding\n",
1401 exception));
1402 longjmp(handler->loc, 1);
1403 }
1404 INTOFF;
1405 sstr = str = NULL;
1406 if (savelen > 0) {
1407 sstr = str = ckmalloc(savelen);
1408 memcpy(str, stackblock(), savelen);
1409 }
1410 handler = &jmploc;
1411 INTON;
1412 if (oldstyle) {
1413 /*
1414 * We must read until the closing backquote, giving special
1415 * treatment to some slashes, and then push the string and
1416 * reread it as input, interpreting it normally.
1417 */
1418 int pc;
1419 int psavelen;
1420 char *pstr;
1421 int line1 = plinno;
1422
1423 VTRACE(DBG_PARSE|DBG_LEXER,
1424 ("parsebackq: repackaging `` as $( )"));
1425 /*
1426 * Because the entire `...` is read here, we don't
1427 * need to bother the state stack. That will be used
1428 * (as appropriate) when the processed string is re-read.
1429 */
1430 STARTSTACKSTR(out);
1431 #ifdef DEBUG
1432 for (psavelen = 0;;psavelen++) { /* } */
1433 #else
1434 for (;;) {
1435 #endif
1436 if (needprompt) {
1437 setprompt(2);
1438 needprompt = 0;
1439 }
1440 pc = pgetc();
1441 VTRACE(DBG_LEXER,
1442 ("parsebackq() got '%c'(%#.2x) in `` %s", pc&0xFF,
1443 pc&0x1FF, pc == '`' ? "terminator\n" : ""));
1444 if (pc == '`')
1445 break;
1446 switch (pc) {
1447 case '\\':
1448 pc = pgetc();
1449 VTRACE(DBG_LEXER, ("then '%c'(%#.2x) ",
1450 pc&0xFF, pc&0x1FF));
1451 #ifdef DEBUG
1452 psavelen++;
1453 #endif
1454 if (pc == '\n') { /* keep \ \n for later */
1455 plinno++;
1456 VTRACE(DBG_LEXER, ("@%d ", plinno));
1457 needprompt = doprompt;
1458 }
1459 if (pc != '\\' && pc != '`' && pc != '$'
1460 && (!ISDBLQUOTE() || pc != '"')) {
1461 VTRACE(DBG_LEXER, ("keep '\\' "));
1462 STPUTC('\\', out);
1463 }
1464 break;
1465
1466 case '\n':
1467 plinno++;
1468 VTRACE(DBG_LEXER, ("@%d ", plinno));
1469 needprompt = doprompt;
1470 break;
1471
1472 case PEOF:
1473 startlinno = line1;
1474 VTRACE(DBG_LEXER, ("EOF\n", plinno));
1475 synerror("EOF in backquote substitution");
1476 break;
1477
1478 default:
1479 break;
1480 }
1481 VTRACE(DBG_LEXER, (".\n", plinno));
1482 STPUTC(pc, out);
1483 }
1484 STPUTC('\0', out);
1485 VTRACE(DBG_LEXER, ("parsebackq() ``:"));
1486 VTRACE(DBG_PARSE|DBG_LEXER, (" read %d", psavelen));
1487 psavelen = out - stackblock();
1488 VTRACE(DBG_PARSE|DBG_LEXER, (" produced %d\n", psavelen));
1489 if (psavelen > 0) {
1490 pstr = grabstackstr(out);
1491 CTRACE(DBG_LEXER,
1492 ("parsebackq() reprocessing as $(%s)\n", pstr));
1493 setinputstring(pstr, 1, line1);
1494 }
1495 }
1496 nlpp = pbqlist;
1497 while (*nlpp)
1498 nlpp = &(*nlpp)->next;
1499 *nlpp = stalloc(sizeof(struct nodelist));
1500 (*nlpp)->next = NULL;
1501 parsebackquote = oldstyle;
1502
1503 if (oldstyle) {
1504 saveprompt = doprompt;
1505 doprompt = 0;
1506 } else
1507 saveprompt = 0;
1508
1509 lno = -plinno;
1510 CTRACE(DBG_LEXER, ("parsebackq() parsing embedded command list\n"));
1511 n = list(0);
1512 CTRACE(DBG_LEXER, ("parsebackq() parsed $() (%d -> %d)\n", -lno,
1513 lno + plinno));
1514 lno += plinno;
1515
1516 if (oldstyle) {
1517 if (peektoken() != TEOF)
1518 synexpect(-1, 0);
1519 doprompt = saveprompt;
1520 } else
1521 consumetoken(TRP);
1522
1523 (*nlpp)->n = n;
1524 if (oldstyle) {
1525 /*
1526 * Start reading from old file again, ignoring any pushed back
1527 * tokens left from the backquote parsing
1528 */
1529 CTRACE(DBG_LEXER, ("parsebackq() back to previous input\n"));
1530 popfile();
1531 tokpushback = 0;
1532 }
1533
1534 while (stackblocksize() <= savelen)
1535 growstackblock();
1536 STARTSTACKSTR(out);
1537 if (str) {
1538 memcpy(out, str, savelen);
1539 STADJUST(savelen, out);
1540 INTOFF;
1541 ckfree(str);
1542 sstr = str = NULL;
1543 INTON;
1544 }
1545 parsebackquote = savepbq;
1546 handler = savehandler;
1547 if (arinest || ISDBLQUOTE()) {
1548 STPUTC(CTLBACKQ | CTLQUOTE, out);
1549 while (--lno >= 0)
1550 STPUTC(CTLNONL, out);
1551 } else
1552 STPUTC(CTLBACKQ, out);
1553
1554 return out;
1555 }
1556
1557 /*
1558 * Parse a redirection operator. The parameter "out" points to a string
1559 * specifying the fd to be redirected. It is guaranteed to be either ""
1560 * or a numeric string (for now anyway). The parameter "c" contains the
1561 * first character of the redirection operator.
1562 *
1563 * Note the string "out" is on the stack, which we are about to clobber,
1564 * so process it first...
1565 */
1566
1567 static void
1568 parseredir(const char *out, int c)
1569 {
1570 union node *np;
1571 int fd;
1572
1573 np = stalloc(sizeof(struct nfile));
1574
1575 fd = (*out == '\0') ? -1 : number(out); /* number(out) >= 0 */
1576 np->nfile.fd = fd; /* do this again later with updated fd */
1577 if (fd != np->nfile.fd)
1578 error("file descriptor (%d) out of range", fd);
1579
1580 VTRACE(DBG_LEXER, ("parseredir after '%s%c' ", out, c));
1581 if (c == '>') {
1582 if (fd < 0)
1583 fd = 1;
1584 c = pgetc_linecont();
1585 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1586 if (c == '>')
1587 np->type = NAPPEND;
1588 else if (c == '|')
1589 np->type = NCLOBBER;
1590 else if (c == '&')
1591 np->type = NTOFD;
1592 else {
1593 np->type = NTO;
1594 VTRACE(DBG_LEXER, ("unwanted ", c));
1595 pungetc();
1596 }
1597 } else { /* c == '<' */
1598 if (fd < 0)
1599 fd = 0;
1600 c = pgetc_linecont();
1601 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1602 switch (c) {
1603 case '<':
1604 /* if sizes differ, just discard the old one */
1605 if (sizeof (struct nfile) != sizeof (struct nhere))
1606 np = stalloc(sizeof(struct nhere));
1607 np->type = NHERE;
1608 np->nhere.fd = 0;
1609 heredoc = stalloc(sizeof(struct HereDoc));
1610 heredoc->here = np;
1611 heredoc->startline = plinno;
1612 if ((c = pgetc_linecont()) == '-') {
1613 CTRACE(DBG_LEXER, ("and '%c'(%#.2x) ",
1614 c & 0xFF, c & 0x1FF));
1615 heredoc->striptabs = 1;
1616 } else {
1617 heredoc->striptabs = 0;
1618 pungetc();
1619 }
1620 break;
1621
1622 case '&':
1623 np->type = NFROMFD;
1624 break;
1625
1626 case '>':
1627 np->type = NFROMTO;
1628 break;
1629
1630 default:
1631 np->type = NFROM;
1632 VTRACE(DBG_LEXER, ("unwanted('%c'0#.2x)", c&0xFF,
1633 c&0x1FF));
1634 pungetc();
1635 break;
1636 }
1637 }
1638 np->nfile.fd = fd;
1639
1640 VTRACE(DBG_LEXER, (" ->%"PRIdsNT" fd=%d\n", NODETYPENAME(np->type),fd));
1641
1642 redirnode = np; /* this is the "value" of TRENODE */
1643 }
1644
1645 /*
1646 * Called to parse a backslash escape sequence inside $'...'.
1647 * The backslash has already been read.
1648 */
1649 static char *
1650 readcstyleesc(char *out)
1651 {
1652 int c, vc, i, n;
1653 unsigned int v;
1654
1655 c = pgetc();
1656 VTRACE(DBG_LEXER, ("CSTR(\\%c)(\\%#x)", c&0xFF, c&0x1FF));
1657 switch (c) {
1658 case '\0':
1659 case PEOF:
1660 synerror("Unterminated quoted string ($'...)");
1661 case '\n':
1662 plinno++;
1663 VTRACE(DBG_LEXER, ("@%d ", plinno));
1664 if (doprompt)
1665 setprompt(2);
1666 else
1667 setprompt(0);
1668 return out;
1669
1670 case '\\':
1671 case '\'':
1672 case '"':
1673 v = c;
1674 break;
1675
1676 case 'a': v = '\a'; break;
1677 case 'b': v = '\b'; break;
1678 case 'e': v = '\033'; break;
1679 case 'f': v = '\f'; break;
1680 case 'n': v = '\n'; break;
1681 case 'r': v = '\r'; break;
1682 case 't': v = '\t'; break;
1683 case 'v': v = '\v'; break;
1684
1685 case '0': case '1': case '2': case '3':
1686 case '4': case '5': case '6': case '7':
1687 v = c - '0';
1688 c = pgetc();
1689 if (c >= '0' && c <= '7') {
1690 v <<= 3;
1691 v += c - '0';
1692 c = pgetc();
1693 if (c >= '0' && c <= '7') {
1694 v <<= 3;
1695 v += c - '0';
1696 } else
1697 pungetc();
1698 } else
1699 pungetc();
1700 break;
1701
1702 case 'c':
1703 c = pgetc();
1704 if (c < 0x3f || c > 0x7a || c == 0x60)
1705 synerror("Bad \\c escape sequence");
1706 if (c == '\\' && pgetc() != '\\')
1707 synerror("Bad \\c\\ escape sequence");
1708 if (c == '?')
1709 v = 127;
1710 else
1711 v = c & 0x1f;
1712 break;
1713
1714 case 'x':
1715 n = 2;
1716 goto hexval;
1717 case 'u':
1718 n = 4;
1719 goto hexval;
1720 case 'U':
1721 n = 8;
1722 hexval:
1723 v = 0;
1724 for (i = 0; i < n; i++) {
1725 c = pgetc();
1726 if (c >= '0' && c <= '9')
1727 v = (v << 4) + c - '0';
1728 else if (c >= 'A' && c <= 'F')
1729 v = (v << 4) + c - 'A' + 10;
1730 else if (c >= 'a' && c <= 'f')
1731 v = (v << 4) + c - 'a' + 10;
1732 else {
1733 pungetc();
1734 break;
1735 }
1736 }
1737 if (n > 2 && v > 127) {
1738 if (v >= 0xd800 && v <= 0xdfff)
1739 synerror("Invalid \\u escape sequence");
1740
1741 /* XXX should we use iconv here. What locale? */
1742 CHECKSTRSPACE(12, out);
1743
1744 /*
1745 * Add a byte to output string, while checking if it needs to
1746 * be escaped -- if its value happens to match the value of one
1747 * of our internal CTL* chars - which would (at a minumum) be
1748 * summarily removed later, if not escaped.
1749 *
1750 * The current definition of ISCTL() allows the compiler to
1751 * optimise away either half, or all, of the test in most of
1752 * the cases here (0xc0 | anything) cannot be between 0x80 and 0x9f
1753 * for example, so there a test is not needed).
1754 *
1755 * Which tests can be removed depends upon the actual values
1756 * selected for the CTL* chars.
1757 */
1758 #define ESC_USTPUTC(c, o) do { \
1759 char _ch = (c); \
1760 \
1761 if (ISCTL(_ch)) \
1762 USTPUTC(CTLESC, o); \
1763 USTPUTC(_ch, o); \
1764 } while (0)
1765
1766 VTRACE(DBG_LEXER, ("CSTR(\\%c%8.8x)", n==4?'u':'U', v));
1767 if (v <= 0x7ff) {
1768 ESC_USTPUTC(0xc0 | v >> 6, out);
1769 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1770 return out;
1771 } else if (v <= 0xffff) {
1772 ESC_USTPUTC(0xe0 | v >> 12, out);
1773 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1774 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1775 return out;
1776 } else if (v <= 0x10ffff) {
1777 ESC_USTPUTC(0xf0 | v >> 18, out);
1778 ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1779 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1780 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1781 return out;
1782
1783 /* these next two are not very likely, but we may as well be complete */
1784 } else if (v <= 0x3FFFFFF) {
1785 ESC_USTPUTC(0xf8 | v >> 24, out);
1786 ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out);
1787 ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1788 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1789 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1790 return out;
1791 } else if (v <= 0x7FFFFFFF) {
1792 ESC_USTPUTC(0xfC | v >> 30, out);
1793 ESC_USTPUTC(0x80 | ((v >> 24) & 0x3f), out);
1794 ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out);
1795 ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1796 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1797 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1798 return out;
1799 }
1800 if (v > 127)
1801 v = '?';
1802 }
1803 break;
1804 default:
1805 synerror("Unknown $'' escape sequence");
1806 }
1807 vc = (char)v;
1808 VTRACE(DBG_LEXER, ("->%u(%#x)['%c']", v, v, vc&0xFF));
1809
1810 /*
1811 * If we managed to create a \n from a \ sequence (no matter how)
1812 * then we replace it with the magic CRTCNL control char, which
1813 * will turn into a \n again later, but in the meantime, never
1814 * causes LINENO increments.
1815 */
1816 if (vc == '\n') {
1817 VTRACE(DBG_LEXER, ("CTLCNL."));
1818 USTPUTC(CTLCNL, out);
1819 return out;
1820 }
1821
1822 /*
1823 * We can't handle NUL bytes.
1824 * POSIX says we should skip till the closing quote.
1825 */
1826 if (vc == '\0') {
1827 CTRACE(DBG_LEXER, ("\\0: skip to '", v, v, vc&0xFF));
1828 while ((c = pgetc()) != '\'') {
1829 if (c == '\\')
1830 c = pgetc();
1831 if (c == PEOF)
1832 synerror("Unterminated quoted string ($'...)");
1833 if (c == '\n') {
1834 plinno++;
1835 if (doprompt)
1836 setprompt(2);
1837 else
1838 setprompt(0);
1839 }
1840 }
1841 pungetc();
1842 return out;
1843 }
1844 CVTRACE(DBG_LEXER, NEEDESC(vc), ("CTLESC-"));
1845 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", vc&0xFF, vc&0x1FF));
1846 if (NEEDESC(vc))
1847 USTPUTC(CTLESC, out);
1848 USTPUTC(vc, out);
1849 return out;
1850 }
1851
1852 /*
1853 * The lowest level basic tokenizer.
1854 *
1855 * The next input byte (character) is in firstc, syn says which
1856 * syntax tables we are to use (basic, single or double quoted, or arith)
1857 * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1858 * quote character itself is not special (used parsing here docs and similar)
1859 *
1860 * The result is the type of the next token (its value, when there is one,
1861 * is saved in the relevant global var - must fix that someday!) which is
1862 * also saved for re-reading ("lasttoken").
1863 *
1864 * Overall, this routine does far more parsing than it is supposed to.
1865 * That will also need fixing, someday...
1866 */
1867 STATIC int
1868 readtoken1(int firstc, char const *syn, int oneword)
1869 {
1870 int c;
1871 char * out;
1872 int len;
1873 struct nodelist *bqlist;
1874 int quotef;
1875 VSS static_stack;
1876 VSS *stack = &static_stack;
1877
1878 stack->prev = NULL;
1879 stack->cur = 0;
1880
1881 syntax = syn;
1882
1883 #ifdef DEBUG
1884 #define SYNTAX ( syntax == BASESYNTAX ? "BASE" : \
1885 syntax == DQSYNTAX ? "DQ" : \
1886 syntax == SQSYNTAX ? "SQ" : \
1887 syntax == ARISYNTAX ? "ARI" : \
1888 "???" )
1889 #endif
1890
1891 startlinno = plinno;
1892 varnest = 0;
1893 quoted = 0;
1894 if (syntax == DQSYNTAX)
1895 SETDBLQUOTE();
1896 quotef = 0;
1897 bqlist = NULL;
1898 arinest = 0;
1899 parenlevel = 0;
1900 elided_nl = 0;
1901 magicq = oneword;
1902
1903 CTRACE(DBG_LEXER, ("readtoken1(%c) syntax=%s %s%s(quoted=%x)\n",
1904 firstc&0xFF, SYNTAX, magicq ? "magic quotes" : "",
1905 ISDBLQUOTE()?" ISDBLQUOTE":"", quoted));
1906
1907 STARTSTACKSTR(out);
1908
1909 for (c = firstc ;; c = pgetc_macro()) { /* until of token */
1910 if (syntax == ARISYNTAX)
1911 out = insert_elided_nl(out);
1912 CHECKSTRSPACE(6, out); /* permit 6 calls to USTPUTC */
1913 switch (syntax[c]) {
1914 case CFAKE:
1915 VTRACE(DBG_LEXER, ("CFAKE"));
1916 if (syntax == BASESYNTAX && varnest == 0)
1917 break;
1918 VTRACE(DBG_LEXER, (","));
1919 continue;
1920 case CNL: /* '\n' */
1921 VTRACE(DBG_LEXER, ("CNL"));
1922 if (syntax == BASESYNTAX && varnest == 0)
1923 break; /* exit loop */
1924 USTPUTC(c, out);
1925 plinno++;
1926 VTRACE(DBG_LEXER, ("@%d,", plinno));
1927 if (doprompt)
1928 setprompt(2);
1929 else
1930 setprompt(0);
1931 continue;
1932
1933 case CSBACK: /* single quoted backslash */
1934 if ((quoted & QF) == CQ) {
1935 out = readcstyleesc(out);
1936 continue;
1937 }
1938 VTRACE(DBG_LEXER, ("ESC:"));
1939 USTPUTC(CTLESC, out);
1940 /* FALLTHROUGH */
1941 case CWORD:
1942 VTRACE(DBG_LEXER, ("'%c'", c));
1943 USTPUTC(c, out);
1944 continue;
1945
1946 case CCTL:
1947 CVTRACE(DBG_LEXER, !magicq || ISDBLQUOTE(),
1948 ("%s%sESC:",!magicq?"!m":"",ISDBLQUOTE()?"DQ":""));
1949 if (!magicq || ISDBLQUOTE())
1950 USTPUTC(CTLESC, out);
1951 VTRACE(DBG_LEXER, ("'%c'", c));
1952 USTPUTC(c, out);
1953 continue;
1954 case CBACK: /* backslash */
1955 c = pgetc();
1956 VTRACE(DBG_LEXER, ("\\'%c'(%#.2x)", c&0xFF, c&0x1FF));
1957 if (c == PEOF) {
1958 VTRACE(DBG_LEXER, ("EOF, keep \\ "));
1959 USTPUTC('\\', out);
1960 pungetc();
1961 continue;
1962 }
1963 if (c == '\n') {
1964 plinno++;
1965 elided_nl++;
1966 VTRACE(DBG_LEXER, ("eli \\n (%d) @%d ",
1967 elided_nl, plinno));
1968 if (doprompt)
1969 setprompt(2);
1970 else
1971 setprompt(0);
1972 continue;
1973 }
1974 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
1975 quotef = 1; /* current token is quoted */
1976 if (quoted && c != '\\' && c != '`' &&
1977 (c != '}' || varnest == 0) &&
1978 c != '$' && (c != '"' || magicq)) {
1979 /*
1980 * retain the \ (which we *know* needs CTLESC)
1981 * when in "..." and the following char is
1982 * not one of the magic few.)
1983 * Otherwise the \ has done its work, and
1984 * is dropped.
1985 */
1986 VTRACE(DBG_LEXER, ("ESC:'\\'"));
1987 USTPUTC(CTLESC, out);
1988 USTPUTC('\\', out);
1989 }
1990 CVTRACE(DBG_LEXER, NEEDESC(c) || !magicq,
1991 ("%sESC:", NEEDESC(c) ? "+" : "m"));
1992 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", c&0xFF, c&0x1FF));
1993 if (NEEDESC(c))
1994 USTPUTC(CTLESC, out);
1995 else if (!magicq) {
1996 USTPUTC(CTLESC, out);
1997 USTPUTC(c, out);
1998 continue;
1999 }
2000 USTPUTC(c, out);
2001 continue;
2002 case CSQUOTE:
2003 if (syntax != SQSYNTAX) {
2004 CVTRACE(DBG_LEXER, !magicq, (" CQM "));
2005 if (!magicq)
2006 USTPUTC(CTLQUOTEMARK, out);
2007 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
2008 quotef = 1;
2009 TS_PUSH();
2010 syntax = SQSYNTAX;
2011 quoted = SQ;
2012 VTRACE(DBG_LEXER, (" TS_PUSH(SQ)"));
2013 continue;
2014 }
2015 if (magicq && arinest == 0 && varnest == 0) {
2016 /* Ignore inside quoted here document */
2017 VTRACE(DBG_LEXER, ("<<'>>"));
2018 USTPUTC(c, out);
2019 continue;
2020 }
2021 /* End of single quotes... */
2022 TS_POP();
2023 VTRACE(DBG_LEXER, ("SQ TS_POP->%s ", SYNTAX));
2024 CVTRACE(DBG_LEXER, syntax == BASESYNTAX, (" CQE "));
2025 if (syntax == BASESYNTAX)
2026 USTPUTC(CTLQUOTEEND, out);
2027 continue;
2028 case CDQUOTE:
2029 if (magicq && arinest == 0 /* && varnest == 0 */) {
2030 VTRACE(DBG_LEXER, ("<<\">>"));
2031 /* Ignore inside here document */
2032 USTPUTC(c, out);
2033 continue;
2034 }
2035 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
2036 quotef = 1;
2037 if (arinest) {
2038 if (ISDBLQUOTE()) {
2039 VTRACE(DBG_LEXER,
2040 (" CQE ari(%d", arinest));
2041 USTPUTC(CTLQUOTEEND, out);
2042 TS_POP();
2043 VTRACE(DBG_LEXER, ("%d)TS_POP->%s ",
2044 arinest, SYNTAX));
2045 } else {
2046 VTRACE(DBG_LEXER,
2047 (" ari(%d) %s TS_PUSH->DQ CQM ",
2048 arinest, SYNTAX));
2049 TS_PUSH();
2050 syntax = DQSYNTAX;
2051 SETDBLQUOTE();
2052 USTPUTC(CTLQUOTEMARK, out);
2053 }
2054 continue;
2055 }
2056 CVTRACE(DBG_LEXER, magicq, (" MQignDQ "));
2057 if (magicq)
2058 continue;
2059 if (ISDBLQUOTE()) {
2060 TS_POP();
2061 VTRACE(DBG_LEXER,
2062 (" DQ TS_POP->%s CQE ", SYNTAX));
2063 USTPUTC(CTLQUOTEEND, out);
2064 } else {
2065 VTRACE(DBG_LEXER,
2066 (" %s TS_POP->DQ CQM ", SYNTAX));
2067 TS_PUSH();
2068 syntax = DQSYNTAX;
2069 SETDBLQUOTE();
2070 USTPUTC(CTLQUOTEMARK, out);
2071 }
2072 continue;
2073 case CVAR: /* '$' */
2074 VTRACE(DBG_LEXER, ("'$'..."));
2075 out = insert_elided_nl(out);
2076 PARSESUB(); /* parse substitution */
2077 continue;
2078 case CENDVAR: /* CLOSEBRACE */
2079 if (varnest > 0 && !ISDBLQUOTE()) {
2080 VTRACE(DBG_LEXER, ("vn=%d !DQ", varnest));
2081 TS_POP();
2082 VTRACE(DBG_LEXER, (" TS_POP->%s CEV ", SYNTAX));
2083 USTPUTC(CTLENDVAR, out);
2084 } else {
2085 VTRACE(DBG_LEXER, ("'%c'", c));
2086 USTPUTC(c, out);
2087 }
2088 out = insert_elided_nl(out);
2089 continue;
2090 case CLP: /* '(' in arithmetic */
2091 parenlevel++;
2092 VTRACE(DBG_LEXER, ("'('(%d)", parenlevel));
2093 USTPUTC(c, out);
2094 continue;
2095 case CRP: /* ')' in arithmetic */
2096 if (parenlevel > 0) {
2097 USTPUTC(c, out);
2098 --parenlevel;
2099 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2100 } else {
2101 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2102 if (pgetc_linecont() == /*(*/ ')') {
2103 out = insert_elided_nl(out);
2104 if (--arinest == 0) {
2105 TS_POP();
2106 USTPUTC(CTLENDARI, out);
2107 } else
2108 USTPUTC(/*(*/ ')', out);
2109 } else {
2110 break; /* to synerror() just below */
2111 #if 0 /* the old way, causes weird errors on bad input */
2112 /*
2113 * unbalanced parens
2114 * (don't 2nd guess - no error)
2115 */
2116 pungetc();
2117 USTPUTC(/*(*/ ')', out);
2118 #endif
2119 }
2120 }
2121 continue;
2122 case CBQUOTE: /* '`' */
2123 VTRACE(DBG_LEXER, ("'`' -> parsebackq()\n"));
2124 out = parsebackq(stack, out, &bqlist, 1);
2125 VTRACE(DBG_LEXER, ("parsebackq() -> readtoken1: "));
2126 continue;
2127 case CEOF: /* --> c == PEOF */
2128 VTRACE(DBG_LEXER, ("EOF "));
2129 break; /* will exit loop */
2130 default:
2131 VTRACE(DBG_LEXER, ("['%c'(%#.2x)]", c&0xFF, c&0x1FF));
2132 if (varnest == 0 && !ISDBLQUOTE())
2133 break; /* exit loop */
2134 USTPUTC(c, out);
2135 VTRACE(DBG_LEXER, (","));
2136 continue;
2137 }
2138 VTRACE(DBG_LEXER, (" END TOKEN\n", c&0xFF, c&0x1FF));
2139 break; /* break from switch -> break from for loop too */
2140 }
2141
2142 if (syntax == ARISYNTAX) {
2143 cleanup_state_stack(stack);
2144 synerror(/*((*/ "Missing '))'");
2145 }
2146 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
2147 cleanup_state_stack(stack);
2148 synerror("Unterminated quoted string");
2149 }
2150 if (varnest != 0) {
2151 cleanup_state_stack(stack);
2152 startlinno = plinno;
2153 /* { */
2154 synerror("Missing '}'");
2155 }
2156
2157 STPUTC('\0', out);
2158 len = out - stackblock();
2159 out = stackblock();
2160
2161 if (!magicq) {
2162 if ((c == '<' || c == '>')
2163 && quotef == 0 && (*out == '\0' || is_number(out))) {
2164 parseredir(out, c);
2165 cleanup_state_stack(stack);
2166 return lasttoken = TREDIR;
2167 } else {
2168 pungetc();
2169 }
2170 }
2171
2172 VTRACE(DBG_PARSE|DBG_LEXER,
2173 ("readtoken1 %sword \"%s\", completed%s (%d) left %d enl\n",
2174 (quotef ? "quoted " : ""), out, (bqlist ? " with cmdsubs" : ""),
2175 len, elided_nl));
2176
2177 quoteflag = quotef;
2178 backquotelist = bqlist;
2179 grabstackblock(len);
2180 wordtext = out;
2181 cleanup_state_stack(stack);
2182 return lasttoken = TWORD;
2183 /* end of readtoken routine */
2184
2185
2186 /*
2187 * Parse a substitution. At this point, we have read the dollar sign
2188 * and nothing else.
2189 */
2190
2191 parsesub: {
2192 int subtype;
2193 int typeloc;
2194 int flags;
2195 char *p;
2196 static const char types[] = "}-+?=";
2197
2198 c = pgetc_linecont();
2199 VTRACE(DBG_LEXER, ("\"$%c\"(%#.2x)", c&0xFF, c&0x1FF));
2200 if (c == '(' /*)*/) { /* $(command) or $((arith)) */
2201 if (pgetc_linecont() == '(' /*')'*/ ) {
2202 VTRACE(DBG_LEXER, ("\"$((\" ARITH "));
2203 out = insert_elided_nl(out);
2204 PARSEARITH();
2205 } else {
2206 VTRACE(DBG_LEXER, ("\"$(\" CSUB->parsebackq()\n"));
2207 out = insert_elided_nl(out);
2208 pungetc();
2209 out = parsebackq(stack, out, &bqlist, 0);
2210 VTRACE(DBG_LEXER, ("parseback()->readtoken1(): "));
2211 }
2212 } else if (c == OPENBRACE || is_name(c) || is_special(c)) {
2213 VTRACE(DBG_LEXER, (" $EXP:CTLVAR "));
2214 USTPUTC(CTLVAR, out);
2215 typeloc = out - stackblock();
2216 USTPUTC(VSNORMAL, out);
2217 subtype = VSNORMAL;
2218 flags = 0;
2219 if (c == OPENBRACE) {
2220 c = pgetc_linecont();
2221 if (c == '#') {
2222 if ((c = pgetc_linecont()) == CLOSEBRACE)
2223 c = '#';
2224 else if (is_name(c) || isdigit(c))
2225 subtype = VSLENGTH;
2226 else if (is_special(c)) {
2227 /*
2228 * ${#} is $# - the number of sh params
2229 * ${##} is the length of ${#}
2230 * ${###} is ${#} with as much nothing
2231 * as possible removed from start
2232 * ${##1} is ${#} with leading 1 gone
2233 * ${##\#} is ${#} with leading # gone
2234 *
2235 * this stuff is UGLY!
2236 */
2237 if (pgetc_linecont() == CLOSEBRACE) {
2238 pungetc();
2239 subtype = VSLENGTH;
2240 } else {
2241 static char cbuf[2];
2242
2243 pungetc(); /* would like 2 */
2244 cbuf[0] = c; /* so ... */
2245 cbuf[1] = '\0';
2246 pushstring(cbuf, 1, NULL);
2247 c = '#'; /* ${#:...} */
2248 subtype = 0; /* .. or similar */
2249 }
2250 } else {
2251 pungetc();
2252 c = '#';
2253 subtype = 0;
2254 }
2255 }
2256 else
2257 subtype = 0;
2258 VTRACE(DBG_LEXER, ("${ st=%d ", subtype));
2259 }
2260 if (is_name(c)) {
2261 p = out;
2262 do {
2263 VTRACE(DBG_LEXER, ("%c", c));
2264 STPUTC(c, out);
2265 c = pgetc_linecont();
2266 } while (is_in_name(c));
2267
2268 #if 0
2269 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
2270 int i;
2271 int linno;
2272 char buf[10];
2273
2274 /*
2275 * The "LINENO hack"
2276 *
2277 * Replace the variable name with the
2278 * current line number.
2279 */
2280 linno = plinno;
2281 if (funclinno != 0)
2282 linno -= funclinno - 1;
2283 snprintf(buf, sizeof(buf), "%d", linno);
2284 STADJUST(-6, out);
2285 for (i = 0; buf[i] != '\0'; i++)
2286 STPUTC(buf[i], out);
2287 flags |= VSLINENO;
2288 }
2289 #endif
2290 } else if (is_digit(c)) {
2291 do {
2292 VTRACE(DBG_LEXER, ("%c", c));
2293 STPUTC(c, out);
2294 c = pgetc_linecont();
2295 } while (subtype != VSNORMAL && is_digit(c));
2296 }
2297 else if (is_special(c)) {
2298 VTRACE(DBG_LEXER, ("\"$%c", c));
2299 USTPUTC(c, out);
2300 c = pgetc_linecont();
2301 }
2302 else {
2303 VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??\n", c&0xFF,c&0x1FF));
2304 badsub:
2305 cleanup_state_stack(stack);
2306 synerror("Bad substitution");
2307 }
2308
2309 STPUTC('=', out);
2310 if (subtype == 0) {
2311 switch (c) {
2312 case ':':
2313 flags |= VSNUL;
2314 c = pgetc_linecont();
2315 /*FALLTHROUGH*/
2316 default:
2317 p = strchr(types, c);
2318 if (p == NULL)
2319 goto badsub;
2320 subtype = p - types + VSNORMAL;
2321 break;
2322 case '%':
2323 case '#':
2324 {
2325 int cc = c;
2326 subtype = c == '#' ? VSTRIMLEFT :
2327 VSTRIMRIGHT;
2328 c = pgetc_linecont();
2329 if (c == cc)
2330 subtype++;
2331 else
2332 pungetc();
2333 break;
2334 }
2335 }
2336 } else {
2337 if (subtype == VSLENGTH && c != /*{*/ '}')
2338 synerror("no modifiers allowed with ${#var}");
2339 pungetc();
2340 }
2341 if (quoted || arinest)
2342 flags |= VSQUOTE;
2343 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
2344 flags |= VSPATQ;
2345 VTRACE(DBG_LEXER, (" st%d:%x", subtype, flags));
2346 *(stackblock() + typeloc) = subtype | flags;
2347 if (subtype != VSNORMAL) {
2348 TS_PUSH();
2349 varnest++;
2350 arinest = 0;
2351 if (subtype > VSASSIGN) { /* # ## % %% */
2352 syntax = BASESYNTAX;
2353 quoted = 0;
2354 magicq = 0;
2355 }
2356 VTRACE(DBG_LEXER, (" TS_PUSH->%s vn=%d%s ",
2357 SYNTAX, varnest, quoted ? " Q" : ""));
2358 }
2359 } else if (c == '\'' && syntax == BASESYNTAX) {
2360 USTPUTC(CTLQUOTEMARK, out);
2361 VTRACE(DBG_LEXER, (" CSTR \"$'\" CQM "));
2362 CVTRACE(DBG_LEXER, quotef==0, ("QF=1 "));
2363 quotef = 1;
2364 TS_PUSH();
2365 syntax = SQSYNTAX;
2366 quoted = CQ;
2367 VTRACE(DBG_LEXER, ("%s->TS_PUSH()->SQ ", SYNTAX));
2368 } else {
2369 VTRACE(DBG_LEXER, ("$unk -> '$' (pushback '%c'%#.2x)",
2370 c & 0xFF, c & 0x1FF));
2371 USTPUTC('$', out);
2372 pungetc();
2373 }
2374 goto parsesub_return;
2375 }
2376
2377
2378 /*
2379 * Parse an arithmetic expansion (indicate start of one and set state)
2380 */
2381 parsearith: {
2382
2383 #if 0
2384 if (syntax == ARISYNTAX) {
2385 /*
2386 * we collapse embedded arithmetic expansion to
2387 * parentheses, which should be equivalent
2388 *
2389 * XXX It isn't, must fix, soonish...
2390 */
2391 USTPUTC('(' /*)*/, out);
2392 USTPUTC('(' /*)*/, out);
2393 /*
2394 * Need 2 of them because there will (should be)
2395 * two closing ))'s to follow later.
2396 */
2397 parenlevel += 2;
2398 } else
2399 #endif
2400 {
2401 VTRACE(DBG_LEXER, (" CTLARI%c ", ISDBLQUOTE()?'"':'_'));
2402 USTPUTC(CTLARI, out);
2403 if (ISDBLQUOTE())
2404 USTPUTC('"',out);
2405 else
2406 USTPUTC(' ',out);
2407
2408 VTRACE(DBG_LEXER, ("%s->TS_PUSH->ARI(1)", SYNTAX));
2409 TS_PUSH();
2410 syntax = ARISYNTAX;
2411 arinest = 1;
2412 varnest = 0;
2413 magicq = 1;
2414 }
2415 goto parsearith_return;
2416 }
2417
2418 } /* end of readtoken */
2419
2420
2421
2422
2423 #ifdef mkinit
2424 INCLUDE "parser.h"
2425
2426 RESET {
2427 psp.v_current_parser = &parse_state;
2428
2429 parse_state.ps_tokpushback = 0;
2430 parse_state.ps_checkkwd = 0;
2431 parse_state.ps_heredoclist = NULL;
2432 }
2433 #endif
2434
2435 /*
2436 * Returns true if the text contains nothing to expand (no dollar signs
2437 * or backquotes).
2438 */
2439
2440 STATIC int
2441 noexpand(char *text)
2442 {
2443 char *p;
2444 char c;
2445
2446 p = text;
2447 while ((c = *p++) != '\0') {
2448 if (c == CTLQUOTEMARK || c == CTLQUOTEEND)
2449 continue;
2450 if (c == CTLESC)
2451 p++;
2452 else if (ISCTL(c))
2453 return 0;
2454 }
2455 return 1;
2456 }
2457
2458
2459 /*
2460 * Return true if the argument is a legal variable name (a letter or
2461 * underscore followed by zero or more letters, underscores, and digits).
2462 */
2463
2464 int
2465 goodname(const char *name)
2466 {
2467 const char *p;
2468
2469 p = name;
2470 if (! is_name(*p))
2471 return 0;
2472 while (*++p) {
2473 if (! is_in_name(*p))
2474 return 0;
2475 }
2476 return 1;
2477 }
2478
2479 int
2480 isassignment(const char *p)
2481 {
2482 if (!is_name(*p))
2483 return 0;
2484 while (*++p != '=')
2485 if (*p == '\0' || !is_in_name(*p))
2486 return 0;
2487 return 1;
2488 }
2489
2490 /*
2491 * skip past any \n's, and leave lasttoken set to whatever follows
2492 */
2493 STATIC void
2494 linebreak(void)
2495 {
2496 while (readtoken() == TNL)
2497 ;
2498 }
2499
2500 /*
2501 * The next token must be "token" -- check, then move past it
2502 */
2503 STATIC void
2504 consumetoken(int token)
2505 {
2506 if (readtoken() != token) {
2507 VTRACE(DBG_PARSE, ("consumetoken(%d): expecting %s got %s",
2508 token, tokname[token], tokname[lasttoken]));
2509 CVTRACE(DBG_PARSE, (lasttoken==TWORD), (" \"%s\"", wordtext));
2510 VTRACE(DBG_PARSE, ("\n"));
2511 synexpect(token, NULL);
2512 }
2513 }
2514
2515 /*
2516 * Called when an unexpected token is read during the parse. The argument
2517 * is the token that is expected, or -1 if more than one type of token can
2518 * occur at this point.
2519 */
2520
2521 STATIC void
2522 synexpect(int token, const char *text)
2523 {
2524 char msg[64];
2525 char *p;
2526
2527 if (lasttoken == TWORD) {
2528 size_t len = strlen(wordtext);
2529
2530 if (len <= 13)
2531 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
2532 else
2533 fmtstr(msg, 34,
2534 "Word \"%.10s...\" unexpected", wordtext);
2535 } else
2536 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
2537
2538 p = strchr(msg, '\0');
2539 if (text)
2540 fmtstr(p, 30, " (expecting \"%.10s\")", text);
2541 else if (token >= 0)
2542 fmtstr(p, 30, " (expecting %s)", tokname[token]);
2543
2544 synerror(msg);
2545 /* NOTREACHED */
2546 }
2547
2548
2549 STATIC void
2550 synerror(const char *msg)
2551 {
2552 error("%d: Syntax error: %s", startlinno, msg);
2553 /* NOTREACHED */
2554 }
2555
2556 STATIC void
2557 setprompt(int which)
2558 {
2559 whichprompt = which;
2560
2561 #ifndef SMALL
2562 if (!el)
2563 #endif
2564 out2str(getprompt(NULL));
2565 }
2566
2567 /*
2568 * handle getting the next character, while ignoring \ \n
2569 * (which is a little tricky as we only have one char of pushback
2570 * and we need that one elsewhere).
2571 */
2572 STATIC int
2573 pgetc_linecont(void)
2574 {
2575 int c;
2576
2577 while ((c = pgetc()) == '\\') {
2578 c = pgetc();
2579 if (c == '\n') {
2580 plinno++;
2581 elided_nl++;
2582 VTRACE(DBG_LEXER, ("\"\\n\"drop(el=%d@%d)",
2583 elided_nl, plinno));
2584 if (doprompt)
2585 setprompt(2);
2586 else
2587 setprompt(0);
2588 } else {
2589 pungetc();
2590 /* Allow the backslash to be pushed back. */
2591 pushstring("\\", 1, NULL);
2592 return (pgetc());
2593 }
2594 }
2595 return (c);
2596 }
2597
2598 /*
2599 * called by editline -- any expansions to the prompt
2600 * should be added here.
2601 */
2602 const char *
2603 getprompt(void *unused)
2604 {
2605 char *p;
2606 const char *cp;
2607 int wp;
2608
2609 if (!doprompt)
2610 return "";
2611
2612 VTRACE(DBG_PARSE|DBG_EXPAND, ("getprompt %d\n", whichprompt));
2613
2614 switch (wp = whichprompt) {
2615 case 0:
2616 return "";
2617 case 1:
2618 p = ps1val();
2619 break;
2620 case 2:
2621 p = ps2val();
2622 break;
2623 default:
2624 return "<internal prompt error>";
2625 }
2626 if (p == NULL)
2627 return "";
2628
2629 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt <<%s>>\n", p));
2630
2631 cp = expandstr(p, plinno);
2632 whichprompt = wp; /* history depends on it not changing */
2633
2634 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt -> <<%s>>\n", cp));
2635
2636 return cp;
2637 }
2638
2639 /*
2640 * Expand a string ... used for expanding prompts (PS1...)
2641 *
2642 * Never return NULL, always some string (return input string if invalid)
2643 *
2644 * The internal routine does the work, leaving the result on the
2645 * stack (or in a static string, or even the input string) and
2646 * handles parser recursion, and cleanup after an error while parsing.
2647 *
2648 * The visible interface copies the result off the stack (if it is there),
2649 * and handles stack management, leaving the stack in the exact same
2650 * state it was when expandstr() was called (so it can be used part way
2651 * through building a stack data structure - as in when PS2 is being
2652 * expanded half way through reading a "command line")
2653 *
2654 * on error, expandonstack() cleans up the parser state, but then
2655 * simply jumps out through expandstr() withut doing any stack cleanup,
2656 * which is OK, as the error handler must deal with that anyway.
2657 *
2658 * The split into two funcs is to avoid problems with setjmp/longjmp
2659 * and local variables which could otherwise be optimised into bizarre
2660 * behaviour.
2661 */
2662 static const char *
2663 expandonstack(char *ps, int cmdsub, int lineno)
2664 {
2665 union node n;
2666 struct jmploc jmploc;
2667 struct jmploc *const savehandler = handler;
2668 struct parsefile *const savetopfile = getcurrentfile();
2669 const int save_x = xflag;
2670 const int save_e_s = errors_suppressed;
2671 struct parse_state new_state = init_parse_state;
2672 struct parse_state *const saveparser = psp.v_current_parser;
2673 const char *result = NULL;
2674
2675 if (!setjmp(jmploc.loc)) {
2676 handler = &jmploc;
2677 errors_suppressed = 1;
2678
2679 psp.v_current_parser = &new_state;
2680 setinputstring(ps, 1, lineno);
2681
2682 readtoken1(pgetc(), DQSYNTAX, 1);
2683 if (backquotelist != NULL) {
2684 if (!cmdsub)
2685 result = ps;
2686 else if (!promptcmds)
2687 result = "-o promptcmds not set: ";
2688 }
2689 if (result == NULL) {
2690 n.narg.type = NARG;
2691 n.narg.next = NULL;
2692 n.narg.text = wordtext;
2693 n.narg.lineno = lineno;
2694 n.narg.backquote = backquotelist;
2695
2696 xflag = 0; /* we might be expanding PS4 ... */
2697 expandarg(&n, NULL, 0);
2698 result = stackblock();
2699 }
2700 } else {
2701 psp.v_current_parser = saveparser;
2702 xflag = save_x;
2703 popfilesupto(savetopfile);
2704 handler = savehandler;
2705 errors_suppressed = save_e_s;
2706
2707 if (exception == EXEXIT)
2708 longjmp(handler->loc, 1);
2709 if (exception == EXINT)
2710 exraise(SIGINT);
2711 return "";
2712 }
2713 psp.v_current_parser = saveparser;
2714 xflag = save_x;
2715 popfilesupto(savetopfile);
2716 handler = savehandler;
2717 errors_suppressed = save_e_s;
2718
2719 if (result == NULL)
2720 result = ps;
2721
2722 return result;
2723 }
2724
2725 const char *
2726 expandstr(char *ps, int lineno)
2727 {
2728 const char *result = NULL;
2729 struct stackmark smark;
2730 static char *buffer = NULL; /* storage for prompt, never freed */
2731 static size_t bufferlen = 0;
2732
2733 setstackmark(&smark);
2734 /*
2735 * At this point we anticipate that there may be a string
2736 * growing on the stack, but we have no idea how big it is.
2737 * However we know that it cannot be bigger than the current
2738 * allocated stack block, so simply reserve the whole thing,
2739 * then we can use the stack without barfing all over what
2740 * is there already... (the stack mark undoes this later.)
2741 */
2742 (void) stalloc(stackblocksize());
2743
2744 result = expandonstack(ps, 1, lineno);
2745
2746 if (__predict_true(result == stackblock())) {
2747 size_t len = strlen(result) + 1;
2748
2749 /*
2750 * the result (usual case) is on the stack, which we
2751 * are just about to discard (popstackmark()) so we
2752 * need to move it somewhere safe first.
2753 */
2754
2755 if (__predict_false(len > bufferlen)) {
2756 char *new;
2757 size_t newlen = bufferlen;
2758
2759 if (__predict_false(len > (SIZE_MAX >> 4))) {
2760 result = "huge prompt: ";
2761 goto getout;
2762 }
2763
2764 if (newlen == 0)
2765 newlen = 32;
2766 while (newlen <= len)
2767 newlen <<= 1;
2768
2769 new = (char *)realloc(buffer, newlen);
2770
2771 if (__predict_false(new == NULL)) {
2772 /*
2773 * this should rarely (if ever) happen
2774 * but we must do something when it does...
2775 */
2776 result = "No mem for prompt: ";
2777 goto getout;
2778 } else {
2779 buffer = new;
2780 bufferlen = newlen;
2781 }
2782 }
2783 (void)memcpy(buffer, result, len);
2784 result = buffer;
2785 }
2786
2787 getout:;
2788 popstackmark(&smark);
2789
2790 return result;
2791 }
2792
2793 /*
2794 * and a simpler version, which does no $( ) expansions, for
2795 * use during shell startup when we know we are not parsing,
2796 * and so the stack is not in use - we can do what we like,
2797 * and do not need to clean up (that's handled externally).
2798 *
2799 * Simply return the result, even if it is on the stack
2800 */
2801 const char *
2802 expandenv(char *arg)
2803 {
2804 return expandonstack(arg, 0, 0);
2805 }
2806