parser.c revision 1.166 1 /* $NetBSD: parser.c,v 1.166 2019/02/09 09:50:31 kre Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.166 2019/02/09 09:50:31 kre Exp $");
41 #endif
42 #endif /* not lint */
43
44 #include <limits.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48
49 #include "shell.h"
50 #include "parser.h"
51 #include "nodes.h"
52 #include "expand.h" /* defines rmescapes() */
53 #include "eval.h" /* defines commandname */
54 #include "syntax.h"
55 #include "options.h"
56 #include "input.h"
57 #include "output.h"
58 #include "var.h"
59 #include "error.h"
60 #include "memalloc.h"
61 #include "mystring.h"
62 #include "alias.h"
63 #include "show.h"
64 #ifndef SMALL
65 #include "myhistedit.h"
66 #endif
67 #ifdef DEBUG
68 #include "nodenames.h"
69 #endif
70
71 /*
72 * Shell command parser.
73 */
74
75 /* values returned by readtoken */
76 #include "token.h"
77
78 #define OPENBRACE '{'
79 #define CLOSEBRACE '}'
80
81 struct HereDoc {
82 struct HereDoc *next; /* next here document in list */
83 union node *here; /* redirection node */
84 char *eofmark; /* string indicating end of input */
85 int striptabs; /* if set, strip leading tabs */
86 int startline; /* line number where << seen */
87 };
88
89 MKINIT struct parse_state parse_state;
90 union parse_state_p psp = { .c_current_parser = &parse_state };
91
92 static const struct parse_state init_parse_state = { /* all 0's ... */
93 .ps_heredoclist = NULL,
94 .ps_parsebackquote = 0,
95 .ps_doprompt = 0,
96 .ps_needprompt = 0,
97 .ps_lasttoken = 0,
98 .ps_tokpushback = 0,
99 .ps_wordtext = NULL,
100 .ps_checkkwd = 0,
101 .ps_redirnode = NULL,
102 .ps_heredoc = NULL,
103 .ps_quoteflag = 0,
104 .ps_startlinno = 0,
105 .ps_funclinno = 0,
106 .ps_elided_nl = 0,
107 };
108
109 STATIC union node *list(int);
110 STATIC union node *andor(void);
111 STATIC union node *pipeline(void);
112 STATIC union node *command(void);
113 STATIC union node *simplecmd(union node **, union node *);
114 STATIC union node *makeword(int);
115 STATIC void parsefname(void);
116 STATIC int slurp_heredoc(char *const, const int, const int);
117 STATIC void readheredocs(void);
118 STATIC int peektoken(void);
119 STATIC int readtoken(void);
120 STATIC int xxreadtoken(void);
121 STATIC int readtoken1(int, char const *, int);
122 STATIC int noexpand(char *);
123 STATIC void linebreak(void);
124 STATIC void consumetoken(int);
125 STATIC void synexpect(int, const char *) __dead;
126 STATIC void synerror(const char *) __dead;
127 STATIC void setprompt(int);
128 STATIC int pgetc_linecont(void);
129
130 static const char EOFhere[] = "EOF reading here (<<) document";
131
132 #ifdef DEBUG
133 int parsing = 0;
134 #endif
135
136 /*
137 * Read and parse a command. Returns NEOF on end of file. (NULL is a
138 * valid parse tree indicating a blank line.)
139 */
140
141 union node *
142 parsecmd(int interact)
143 {
144 int t;
145 union node *n;
146
147 #ifdef DEBUG
148 parsing++;
149 #endif
150 tokpushback = 0;
151 checkkwd = 0;
152 doprompt = interact;
153 if (doprompt)
154 setprompt(1);
155 else
156 setprompt(0);
157 needprompt = 0;
158 t = readtoken();
159 #ifdef DEBUG
160 parsing--;
161 #endif
162 if (t == TEOF)
163 return NEOF;
164 if (t == TNL)
165 return NULL;
166
167 #ifdef DEBUG
168 parsing++;
169 #endif
170 tokpushback++;
171 n = list(1);
172 #ifdef DEBUG
173 parsing--;
174 #endif
175 if (heredoclist)
176 error("%d: Here document (<<%s) expected but not present",
177 heredoclist->startline, heredoclist->eofmark);
178 return n;
179 }
180
181
182 STATIC union node *
183 list(int nlflag)
184 {
185 union node *ntop, *n1, *n2, *n3;
186 int tok;
187
188 CTRACE(DBG_PARSE, ("list(%d): entered @%d\n",nlflag,plinno));
189
190 checkkwd = CHKNL | CHKKWD | CHKALIAS;
191 if (nlflag == 0 && tokendlist[peektoken()])
192 return NULL;
193 ntop = n1 = NULL;
194 for (;;) {
195 n2 = andor();
196 tok = readtoken();
197 if (tok == TBACKGND) {
198 if (n2->type == NCMD || n2->type == NPIPE)
199 n2->ncmd.backgnd = 1;
200 else if (n2->type == NREDIR)
201 n2->type = NBACKGND;
202 else {
203 n3 = stalloc(sizeof(struct nredir));
204 n3->type = NBACKGND;
205 n3->nredir.n = n2;
206 n3->nredir.redirect = NULL;
207 n2 = n3;
208 }
209 }
210
211 if (ntop == NULL)
212 ntop = n2;
213 else if (n1 == NULL) {
214 n1 = stalloc(sizeof(struct nbinary));
215 n1->type = NSEMI;
216 n1->nbinary.ch1 = ntop;
217 n1->nbinary.ch2 = n2;
218 ntop = n1;
219 } else {
220 n3 = stalloc(sizeof(struct nbinary));
221 n3->type = NSEMI;
222 n3->nbinary.ch1 = n1->nbinary.ch2;
223 n3->nbinary.ch2 = n2;
224 n1->nbinary.ch2 = n3;
225 n1 = n3;
226 }
227
228 switch (tok) {
229 case TBACKGND:
230 case TSEMI:
231 tok = readtoken();
232 /* FALLTHROUGH */
233 case TNL:
234 if (tok == TNL) {
235 readheredocs();
236 if (nlflag)
237 return ntop;
238 } else if (tok == TEOF && nlflag)
239 return ntop;
240 else
241 tokpushback++;
242
243 checkkwd = CHKNL | CHKKWD | CHKALIAS;
244 if (!nlflag && tokendlist[peektoken()])
245 return ntop;
246 break;
247 case TEOF:
248 pungetc(); /* push back EOF on input */
249 return ntop;
250 default:
251 if (nlflag)
252 synexpect(-1, 0);
253 tokpushback++;
254 return ntop;
255 }
256 }
257 }
258
259 STATIC union node *
260 andor(void)
261 {
262 union node *n1, *n2, *n3;
263 int t;
264
265 CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno));
266
267 n1 = pipeline();
268 for (;;) {
269 if ((t = readtoken()) == TAND) {
270 t = NAND;
271 } else if (t == TOR) {
272 t = NOR;
273 } else {
274 tokpushback++;
275 return n1;
276 }
277 n2 = pipeline();
278 n3 = stalloc(sizeof(struct nbinary));
279 n3->type = t;
280 n3->nbinary.ch1 = n1;
281 n3->nbinary.ch2 = n2;
282 n1 = n3;
283 }
284 }
285
286 STATIC union node *
287 pipeline(void)
288 {
289 union node *n1, *n2, *pipenode;
290 struct nodelist *lp, *prev;
291 int negate;
292
293 CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno));
294
295 negate = 0;
296 checkkwd = CHKNL | CHKKWD | CHKALIAS;
297 while (readtoken() == TNOT) {
298 CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n"));
299 #ifndef BOGUS_NOT_COMMAND
300 if (posix && negate)
301 synerror("2nd \"!\" unexpected");
302 #endif
303 negate++;
304 }
305 tokpushback++;
306 n1 = command();
307 if (readtoken() == TPIPE) {
308 pipenode = stalloc(sizeof(struct npipe));
309 pipenode->type = NPIPE;
310 pipenode->npipe.backgnd = 0;
311 lp = stalloc(sizeof(struct nodelist));
312 pipenode->npipe.cmdlist = lp;
313 lp->n = n1;
314 do {
315 prev = lp;
316 lp = stalloc(sizeof(struct nodelist));
317 lp->n = command();
318 prev->next = lp;
319 } while (readtoken() == TPIPE);
320 lp->next = NULL;
321 n1 = pipenode;
322 }
323 tokpushback++;
324 if (negate) {
325 CTRACE(DBG_PARSE, ("%snegate pipeline\n",
326 (negate&1) ? "" : "double "));
327 n2 = stalloc(sizeof(struct nnot));
328 n2->type = (negate & 1) ? NNOT : NDNOT;
329 n2->nnot.com = n1;
330 return n2;
331 } else
332 return n1;
333 }
334
335
336
337 STATIC union node *
338 command(void)
339 {
340 union node *n1, *n2;
341 union node *ap, **app;
342 union node *cp, **cpp;
343 union node *redir, **rpp;
344 int t;
345 #ifdef BOGUS_NOT_COMMAND
346 int negate = 0;
347 #endif
348
349 CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno));
350
351 checkkwd = CHKNL | CHKKWD | CHKALIAS;
352 redir = NULL;
353 n1 = NULL;
354 rpp = &redir;
355
356 /* Check for redirection which may precede command */
357 while (readtoken() == TREDIR) {
358 *rpp = n2 = redirnode;
359 rpp = &n2->nfile.next;
360 parsefname();
361 }
362 tokpushback++;
363
364 #ifdef BOGUS_NOT_COMMAND /* only in pileline() */
365 while (readtoken() == TNOT) {
366 CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n"));
367 negate++;
368 }
369 tokpushback++;
370 #endif
371
372 switch (readtoken()) {
373 case TIF:
374 n1 = stalloc(sizeof(struct nif));
375 n1->type = NIF;
376 n1->nif.test = list(0);
377 consumetoken(TTHEN);
378 n1->nif.ifpart = list(0);
379 n2 = n1;
380 while (readtoken() == TELIF) {
381 n2->nif.elsepart = stalloc(sizeof(struct nif));
382 n2 = n2->nif.elsepart;
383 n2->type = NIF;
384 n2->nif.test = list(0);
385 consumetoken(TTHEN);
386 n2->nif.ifpart = list(0);
387 }
388 if (lasttoken == TELSE)
389 n2->nif.elsepart = list(0);
390 else {
391 n2->nif.elsepart = NULL;
392 tokpushback++;
393 }
394 consumetoken(TFI);
395 checkkwd = CHKKWD | CHKALIAS;
396 break;
397 case TWHILE:
398 case TUNTIL:
399 n1 = stalloc(sizeof(struct nbinary));
400 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
401 n1->nbinary.ch1 = list(0);
402 consumetoken(TDO);
403 n1->nbinary.ch2 = list(0);
404 consumetoken(TDONE);
405 checkkwd = CHKKWD | CHKALIAS;
406 break;
407 case TFOR:
408 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
409 synerror("Bad for loop variable");
410 n1 = stalloc(sizeof(struct nfor));
411 n1->type = NFOR;
412 n1->nfor.var = wordtext;
413 linebreak();
414 if (lasttoken==TWORD && !quoteflag && equal(wordtext,"in")) {
415 app = ≈
416 while (readtoken() == TWORD) {
417 n2 = makeword(startlinno);
418 *app = n2;
419 app = &n2->narg.next;
420 }
421 *app = NULL;
422 n1->nfor.args = ap;
423 if (lasttoken != TNL && lasttoken != TSEMI)
424 synexpect(TSEMI, 0);
425 } else {
426 static char argvars[5] = {
427 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
428 };
429
430 n2 = stalloc(sizeof(struct narg));
431 n2->type = NARG;
432 n2->narg.text = argvars;
433 n2->narg.backquote = NULL;
434 n2->narg.next = NULL;
435 n2->narg.lineno = startlinno;
436 n1->nfor.args = n2;
437 /*
438 * Newline or semicolon here is optional (but note
439 * that the original Bourne shell only allowed NL).
440 */
441 if (lasttoken != TNL && lasttoken != TSEMI)
442 tokpushback++;
443 }
444 checkkwd = CHKNL | CHKKWD | CHKALIAS;
445 if ((t = readtoken()) == TDO)
446 t = TDONE;
447 else if (t == TBEGIN)
448 t = TEND;
449 else
450 synexpect(TDO, 0);
451 n1->nfor.body = list(0);
452 consumetoken(t);
453 checkkwd = CHKKWD | CHKALIAS;
454 break;
455 case TCASE:
456 n1 = stalloc(sizeof(struct ncase));
457 n1->type = NCASE;
458 n1->ncase.lineno = startlinno - elided_nl;
459 consumetoken(TWORD);
460 n1->ncase.expr = makeword(startlinno);
461 linebreak();
462 if (lasttoken != TWORD || !equal(wordtext, "in"))
463 synexpect(-1, "in");
464 cpp = &n1->ncase.cases;
465 checkkwd = CHKNL | CHKKWD;
466 readtoken();
467 /*
468 * Both ksh and bash accept 'case x in esac'
469 * so configure scripts started taking advantage of this.
470 * The page: http://pubs.opengroup.org/onlinepubs/\
471 * 009695399/utilities/xcu_chap02.html contradicts itself,
472 * as to if this is legal; the "Case Conditional Format"
473 * paragraph shows one case is required, but the "Grammar"
474 * section shows a grammar that explicitly allows the no
475 * case option.
476 *
477 * The standard also says (section 2.10):
478 * This formal syntax shall take precedence over the
479 * preceding text syntax description.
480 * ie: the "Grammar" section wins. The text is just
481 * a rough guide (introduction to the common case.)
482 */
483 while (lasttoken != TESAC) {
484 *cpp = cp = stalloc(sizeof(struct nclist));
485 cp->type = NCLIST;
486 app = &cp->nclist.pattern;
487 if (lasttoken == TLP)
488 readtoken();
489 for (;;) {
490 if (lasttoken < TWORD)
491 synexpect(TWORD, 0);
492 *app = ap = makeword(startlinno);
493 checkkwd = CHKNL | CHKKWD;
494 if (readtoken() != TPIPE)
495 break;
496 app = &ap->narg.next;
497 readtoken();
498 }
499 if (lasttoken != TRP)
500 synexpect(TRP, 0);
501 cp->nclist.lineno = startlinno;
502 cp->nclist.body = list(0);
503
504 checkkwd = CHKNL | CHKKWD | CHKALIAS;
505 if ((t = readtoken()) != TESAC) {
506 if (t != TENDCASE && t != TCASEFALL) {
507 synexpect(TENDCASE, 0);
508 } else {
509 if (t == TCASEFALL)
510 cp->type = NCLISTCONT;
511 checkkwd = CHKNL | CHKKWD;
512 readtoken();
513 }
514 }
515 cpp = &cp->nclist.next;
516 }
517 *cpp = NULL;
518 checkkwd = CHKKWD | CHKALIAS;
519 break;
520 case TLP:
521 n1 = stalloc(sizeof(struct nredir));
522 n1->type = NSUBSHELL;
523 n1->nredir.n = list(0);
524 n1->nredir.redirect = NULL;
525 if (n1->nredir.n == NULL)
526 synexpect(-1, 0);
527 consumetoken(TRP);
528 checkkwd = CHKKWD | CHKALIAS;
529 break;
530 case TBEGIN:
531 n1 = list(0);
532 if (posix && n1 == NULL)
533 synexpect(-1, 0);
534 consumetoken(TEND);
535 checkkwd = CHKKWD | CHKALIAS;
536 break;
537
538 case TBACKGND:
539 case TSEMI:
540 case TAND:
541 case TOR:
542 case TPIPE:
543 case TNL:
544 case TEOF:
545 case TRP:
546 case TENDCASE:
547 case TCASEFALL:
548 /*
549 * simple commands must have something in them,
550 * either a word (which at this point includes a=b)
551 * or a redirection. If we reached the end of the
552 * command (which one of these tokens indicates)
553 * when we are just starting, and have not had a
554 * redirect, then ...
555 *
556 * nb: it is still possible to end up with empty
557 * simple commands, if the "command" is a var
558 * expansion that produces nothing:
559 * X= ; $X && $X
560 * --> &&
561 * That is OK and is handled after word expansions.
562 */
563 if (!redir)
564 synexpect(-1, 0);
565 /*
566 * continue to build a node containing the redirect.
567 * the tokpushback means that our ending token will be
568 * read again in simplecmd, causing it to terminate,
569 * so only the redirect(s) will be contained in the
570 * returned n1
571 */
572 /* FALLTHROUGH */
573 case TWORD:
574 tokpushback++;
575 n1 = simplecmd(rpp, redir);
576 goto checkneg;
577 default:
578 synexpect(-1, 0);
579 /* NOTREACHED */
580 }
581
582 /* Now check for redirection which may follow command */
583 while (readtoken() == TREDIR) {
584 *rpp = n2 = redirnode;
585 rpp = &n2->nfile.next;
586 parsefname();
587 }
588 tokpushback++;
589 *rpp = NULL;
590 if (redir) {
591 if (n1 == NULL || n1->type != NSUBSHELL) {
592 n2 = stalloc(sizeof(struct nredir));
593 n2->type = NREDIR;
594 n2->nredir.n = n1;
595 n1 = n2;
596 }
597 n1->nredir.redirect = redir;
598 }
599
600 checkneg:
601 #ifdef BOGUS_NOT_COMMAND
602 if (negate) {
603 VTRACE(DBG_PARSE, ("bogus %snegate command\n",
604 (negate&1) ? "" : "double "));
605 n2 = stalloc(sizeof(struct nnot));
606 n2->type = (negate & 1) ? NNOT : NDNOT;
607 n2->nnot.com = n1;
608 return n2;
609 }
610 else
611 #endif
612 return n1;
613 }
614
615
616 STATIC union node *
617 simplecmd(union node **rpp, union node *redir)
618 {
619 union node *args, **app;
620 union node *n = NULL;
621 int line = 0;
622 int savecheckkwd;
623 #ifdef BOGUS_NOT_COMMAND
624 union node *n2;
625 int negate = 0;
626 #endif
627
628 CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n",
629 redir ? "" : "out", plinno));
630
631 /* If we don't have any redirections already, then we must reset */
632 /* rpp to be the address of the local redir variable. */
633 if (redir == 0)
634 rpp = &redir;
635
636 args = NULL;
637 app = &args;
638
639 #ifdef BOGUS_NOT_COMMAND /* pipelines get negated, commands do not */
640 while (readtoken() == TNOT) {
641 VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n"));
642 negate++;
643 }
644 tokpushback++;
645 #endif
646
647 savecheckkwd = CHKALIAS;
648 for (;;) {
649 checkkwd = savecheckkwd;
650 if (readtoken() == TWORD) {
651 if (line == 0)
652 line = startlinno;
653 n = makeword(startlinno);
654 *app = n;
655 app = &n->narg.next;
656 if (savecheckkwd != 0 && !isassignment(wordtext))
657 savecheckkwd = 0;
658 } else if (lasttoken == TREDIR) {
659 if (line == 0)
660 line = startlinno;
661 *rpp = n = redirnode;
662 rpp = &n->nfile.next;
663 parsefname(); /* read name of redirection file */
664 } else if (lasttoken == TLP && app == &args->narg.next
665 && redir == 0) {
666 /* We have a function */
667 consumetoken(TRP);
668 funclinno = plinno;
669 rmescapes(n->narg.text);
670 if (strchr(n->narg.text, '/'))
671 synerror("Bad function name");
672 VTRACE(DBG_PARSE, ("Function '%s' seen @%d\n",
673 n->narg.text, plinno));
674 n->type = NDEFUN;
675 n->narg.lineno = plinno - elided_nl;
676 n->narg.next = command();
677 funclinno = 0;
678 goto checkneg;
679 } else {
680 tokpushback++;
681 break;
682 }
683 }
684
685 if (args == NULL && redir == NULL)
686 synexpect(-1, 0);
687 *app = NULL;
688 *rpp = NULL;
689 n = stalloc(sizeof(struct ncmd));
690 n->type = NCMD;
691 n->ncmd.lineno = line - elided_nl;
692 n->ncmd.backgnd = 0;
693 n->ncmd.args = args;
694 n->ncmd.redirect = redir;
695 n->ncmd.lineno = startlinno;
696
697 checkneg:
698 #ifdef BOGUS_NOT_COMMAND
699 if (negate) {
700 VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n",
701 (negate&1) ? "" : "double "));
702 n2 = stalloc(sizeof(struct nnot));
703 n2->type = (negate & 1) ? NNOT : NDNOT;
704 n2->nnot.com = n;
705 return n2;
706 }
707 else
708 #endif
709 return n;
710 }
711
712 STATIC union node *
713 makeword(int lno)
714 {
715 union node *n;
716
717 n = stalloc(sizeof(struct narg));
718 n->type = NARG;
719 n->narg.next = NULL;
720 n->narg.text = wordtext;
721 n->narg.backquote = backquotelist;
722 n->narg.lineno = lno;
723 return n;
724 }
725
726 void
727 fixredir(union node *n, const char *text, int err)
728 {
729
730 VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err));
731 if (!err)
732 n->ndup.vname = NULL;
733
734 if (is_number(text))
735 n->ndup.dupfd = number(text);
736 else if (text[0] == '-' && text[1] == '\0')
737 n->ndup.dupfd = -1;
738 else {
739
740 if (err)
741 synerror("Bad fd number");
742 else
743 n->ndup.vname = makeword(startlinno - elided_nl);
744 }
745 }
746
747
748 STATIC void
749 parsefname(void)
750 {
751 union node *n = redirnode;
752
753 if (readtoken() != TWORD)
754 synexpect(-1, 0);
755 if (n->type == NHERE) {
756 struct HereDoc *here = heredoc;
757 struct HereDoc *p;
758
759 if (quoteflag == 0)
760 n->type = NXHERE;
761 VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno));
762 if (here->striptabs) {
763 while (*wordtext == '\t')
764 wordtext++;
765 }
766
767 /*
768 * this test is not really necessary, we are not
769 * required to expand wordtext, but there's no reason
770 * it cannot be $$ or something like that - that would
771 * not mean the pid, but literally two '$' characters.
772 * There is no need for limits on what the word can be.
773 * However, it needs to stay literal as entered, not
774 * have $ converted to CTLVAR or something, which as
775 * the parser is, at the minute, is impossible to prevent.
776 * So, leave it like this until the rest of the parser is fixed.
777 */
778 if (!noexpand(wordtext))
779 synerror("Illegal eof marker for << redirection");
780
781 rmescapes(wordtext);
782 here->eofmark = wordtext;
783 here->next = NULL;
784 if (heredoclist == NULL)
785 heredoclist = here;
786 else {
787 for (p = heredoclist ; p->next ; p = p->next)
788 continue;
789 p->next = here;
790 }
791 } else if (n->type == NTOFD || n->type == NFROMFD) {
792 fixredir(n, wordtext, 0);
793 } else {
794 n->nfile.fname = makeword(startlinno - elided_nl);
795 }
796 }
797
798 /*
799 * Check to see whether we are at the end of the here document. When this
800 * is called, c is set to the first character of the next input line. If
801 * we are at the end of the here document, this routine sets the c to PEOF.
802 * The new value of c is returned.
803 */
804
805 static int
806 checkend(int c, char * const eofmark, const int striptabs)
807 {
808
809 if (striptabs) {
810 while (c == '\t')
811 c = pgetc();
812 }
813 if (c == PEOF) {
814 if (*eofmark == '\0')
815 return (c);
816 synerror(EOFhere);
817 }
818 if (c == *eofmark) {
819 int c2;
820 char *q;
821
822 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
823 if (c2 == '\n') {
824 plinno++;
825 needprompt = doprompt;
826 }
827 if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
828 c = PEOF;
829 if (c2 == '\n') {
830 plinno++;
831 needprompt = doprompt;
832 }
833 } else {
834 pungetc();
835 pushstring(eofmark + 1, q - (eofmark + 1), NULL);
836 }
837 } else if (c == '\n' && *eofmark == '\0') {
838 c = PEOF;
839 plinno++;
840 needprompt = doprompt;
841 }
842 return (c);
843 }
844
845
846 /*
847 * Input any here documents.
848 */
849
850 STATIC int
851 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
852 {
853 int c;
854 char *out;
855 int lines = plinno;
856
857 c = pgetc();
858
859 /*
860 * If we hit EOF on the input, and the eofmark is a null string ('')
861 * we consider this empty line to be the eofmark, and exit without err.
862 */
863 if (c == PEOF && *eofmark != '\0')
864 synerror(EOFhere);
865
866 STARTSTACKSTR(out);
867
868 while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
869 do {
870 if (sq) {
871 /*
872 * in single quoted mode (eofmark quoted)
873 * all we look for is \n so we can check
874 * for the epfmark - everything saved literally.
875 */
876 STPUTC(c, out);
877 if (c == '\n') {
878 plinno++;
879 break;
880 }
881 continue;
882 }
883 /*
884 * In double quoted (non-quoted eofmark)
885 * we must handle \ followed by \n here
886 * otherwise we can mismatch the end mark.
887 * All other uses of \ will be handled later
888 * when the here doc is expanded.
889 *
890 * This also makes sure \\ followed by \n does
891 * not suppress the newline (the \ quotes itself)
892 */
893 if (c == '\\') { /* A backslash */
894 STPUTC(c, out);
895 c = pgetc(); /* followed by */
896 if (c == '\n') { /* a newline? */
897 STPUTC(c, out);
898 plinno++;
899 continue; /* don't break */
900 }
901 }
902 STPUTC(c, out); /* keep the char */
903 if (c == '\n') { /* at end of line */
904 plinno++;
905 break; /* look for eofmark */
906 }
907 } while ((c = pgetc()) != PEOF);
908
909 /*
910 * If we have read a line, and reached EOF, without
911 * finding the eofmark, whether the EOF comes before
912 * or immediately after the \n, that is an error.
913 */
914 if (c == PEOF || (c = pgetc()) == PEOF)
915 synerror(EOFhere);
916 }
917 STPUTC('\0', out);
918
919 c = out - stackblock();
920 out = stackblock();
921 grabstackblock(c);
922 wordtext = out;
923
924 VTRACE(DBG_PARSE,
925 ("Slurped a %d line %sheredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n",
926 plinno - lines, sq ? "quoted " : "", eofmark,
927 striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c),
928 wordtext, (c > 16 ? "..." : ""), plinno));
929
930 return (plinno - lines);
931 }
932
933 static char *
934 insert_elided_nl(char *str)
935 {
936 while (elided_nl > 0) {
937 STPUTC(CTLNONL, str);
938 elided_nl--;
939 }
940 return str;
941 }
942
943 STATIC void
944 readheredocs(void)
945 {
946 struct HereDoc *here;
947 union node *n;
948 int line, l;
949
950 line = 0; /*XXX - gcc! obviously unneeded */
951 if (heredoclist)
952 line = heredoclist->startline + 1;
953 l = 0;
954 while (heredoclist) {
955 line += l;
956 here = heredoclist;
957 heredoclist = here->next;
958 if (needprompt) {
959 setprompt(2);
960 needprompt = 0;
961 }
962
963 l = slurp_heredoc(here->eofmark, here->striptabs,
964 here->here->nhere.type == NHERE);
965
966 here->here->nhere.doc = n = makeword(line);
967
968 if (here->here->nhere.type == NHERE)
969 continue;
970
971 /*
972 * Now "parse" here docs that have unquoted eofmarkers.
973 */
974 setinputstring(wordtext, 1, line);
975 VTRACE(DBG_PARSE, ("Reprocessing %d line here doc from %d\n",
976 l, line));
977 readtoken1(pgetc(), DQSYNTAX, 1);
978 n->narg.text = wordtext;
979 n->narg.backquote = backquotelist;
980 popfile();
981 }
982 }
983
984 STATIC int
985 peektoken(void)
986 {
987 int t;
988
989 t = readtoken();
990 tokpushback++;
991 return (t);
992 }
993
994 STATIC int
995 readtoken(void)
996 {
997 int t;
998 #ifdef DEBUG
999 int alreadyseen = tokpushback;
1000 int savecheckkwd = checkkwd;
1001 #endif
1002 struct alias *ap;
1003
1004 top:
1005 t = xxreadtoken();
1006
1007 if (checkkwd & CHKNL) {
1008 while (t == TNL) {
1009 readheredocs();
1010 t = xxreadtoken();
1011 }
1012 }
1013
1014 /*
1015 * check for keywords and aliases
1016 */
1017 if (t == TWORD && !quoteflag) {
1018 const char *const *pp;
1019
1020 if (checkkwd & CHKKWD)
1021 for (pp = parsekwd; *pp; pp++) {
1022 if (**pp == *wordtext && equal(*pp, wordtext)) {
1023 lasttoken = t = pp -
1024 parsekwd + KWDOFFSET;
1025 VTRACE(DBG_PARSE,
1026 ("keyword %s recognized @%d\n",
1027 tokname[t], plinno));
1028 goto out;
1029 }
1030 }
1031
1032 if (checkkwd & CHKALIAS &&
1033 (ap = lookupalias(wordtext, 1)) != NULL) {
1034 VTRACE(DBG_PARSE,
1035 ("alias '%s' recognized -> <:%s:>\n",
1036 wordtext, ap->val));
1037 pushstring(ap->val, strlen(ap->val), ap);
1038 goto top;
1039 }
1040 }
1041 out:
1042 if (t != TNOT)
1043 checkkwd = 0;
1044
1045 VTRACE(DBG_PARSE, ("%stoken %s %s @%d (chkkwd %x->%x)\n",
1046 alreadyseen ? "reread " : "", tokname[t],
1047 t == TWORD ? wordtext : "", plinno, savecheckkwd, checkkwd));
1048 return (t);
1049 }
1050
1051
1052 /*
1053 * Read the next input token.
1054 * If the token is a word, we set backquotelist to the list of cmds in
1055 * backquotes. We set quoteflag to true if any part of the word was
1056 * quoted.
1057 * If the token is TREDIR, then we set redirnode to a structure containing
1058 * the redirection.
1059 * In all cases, the variable startlinno is set to the number of the line
1060 * on which the token starts.
1061 *
1062 * [Change comment: here documents and internal procedures]
1063 * [Readtoken shouldn't have any arguments. Perhaps we should make the
1064 * word parsing code into a separate routine. In this case, readtoken
1065 * doesn't need to have any internal procedures, but parseword does.
1066 * We could also make parseoperator in essence the main routine, and
1067 * have parseword (readtoken1?) handle both words and redirection.]
1068 */
1069
1070 #define RETURN(token) return lasttoken = (token)
1071
1072 STATIC int
1073 xxreadtoken(void)
1074 {
1075 int c;
1076
1077 if (tokpushback) {
1078 tokpushback = 0;
1079 CTRACE(DBG_LEXER,
1080 ("xxreadtoken() returns %s (%d) again\n",
1081 tokname[lasttoken], lasttoken));
1082 return lasttoken;
1083 }
1084 if (needprompt) {
1085 setprompt(2);
1086 needprompt = 0;
1087 }
1088 elided_nl = 0;
1089 startlinno = plinno;
1090 for (;;) { /* until token or start of word found */
1091 c = pgetc_macro();
1092 CTRACE(DBG_LEXER, ("xxreadtoken() sees '%c' (%#.2x) ",
1093 c&0xFF, c&0x1FF));
1094 switch (c) {
1095 case ' ': case '\t': case PFAKE:
1096 CTRACE(DBG_LEXER, (" ignored\n"));
1097 continue;
1098 case '#':
1099 while ((c = pgetc()) != '\n' && c != PEOF)
1100 continue;
1101 CTRACE(DBG_LEXER,
1102 ("skipped comment to (not incl) \\n\n"));
1103 pungetc();
1104 continue;
1105
1106 case '\n':
1107 plinno++;
1108 CTRACE(DBG_LEXER, ("newline now @%d\n", plinno));
1109 needprompt = doprompt;
1110 RETURN(TNL);
1111 case PEOF:
1112 CTRACE(DBG_LEXER, ("EOF -> TEOF (return)\n"));
1113 RETURN(TEOF);
1114
1115 case '&':
1116 if (pgetc_linecont() == '&') {
1117 CTRACE(DBG_LEXER,
1118 ("and another -> TAND (return)\n"));
1119 RETURN(TAND);
1120 }
1121 pungetc();
1122 CTRACE(DBG_LEXER, (" -> TBACKGND (return)\n"));
1123 RETURN(TBACKGND);
1124 case '|':
1125 if (pgetc_linecont() == '|') {
1126 CTRACE(DBG_LEXER,
1127 ("and another -> TOR (return)\n"));
1128 RETURN(TOR);
1129 }
1130 pungetc();
1131 CTRACE(DBG_LEXER, (" -> TPIPE (return)\n"));
1132 RETURN(TPIPE);
1133 case ';':
1134 switch (pgetc_linecont()) {
1135 case ';':
1136 CTRACE(DBG_LEXER,
1137 ("and another -> TENDCASE (return)\n"));
1138 RETURN(TENDCASE);
1139 case '&':
1140 CTRACE(DBG_LEXER,
1141 ("and '&' -> TCASEFALL (return)\n"));
1142 RETURN(TCASEFALL);
1143 default:
1144 pungetc();
1145 CTRACE(DBG_LEXER, (" -> TSEMI (return)\n"));
1146 RETURN(TSEMI);
1147 }
1148 case '(':
1149 CTRACE(DBG_LEXER, (" -> TLP (return)\n"));
1150 RETURN(TLP);
1151 case ')':
1152 CTRACE(DBG_LEXER, (" -> TRP (return)\n"));
1153 RETURN(TRP);
1154
1155 case '\\':
1156 switch (pgetc()) {
1157 case '\n':
1158 startlinno = ++plinno;
1159 CTRACE(DBG_LEXER, ("\\\n ignored, now @%d\n",
1160 plinno));
1161 if (doprompt)
1162 setprompt(2);
1163 else
1164 setprompt(0);
1165 continue;
1166 case PEOF:
1167 CTRACE(DBG_LEXER,
1168 ("then EOF -> TEOF (return) '\\' dropped\n"));
1169 RETURN(TEOF);
1170 default:
1171 CTRACE(DBG_LEXER, ("not \\\n or EOF: "));
1172 pungetc();
1173 break;
1174 }
1175 /* FALLTHROUGH */
1176 default:
1177 CTRACE(DBG_LEXER, ("getting a word\n"));
1178 return readtoken1(c, BASESYNTAX, 0);
1179 }
1180 }
1181 #undef RETURN
1182 }
1183
1184
1185
1186 /*
1187 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
1188 * is not NULL, read a here document. In the latter case, eofmark is the
1189 * word which marks the end of the document and striptabs is true if
1190 * leading tabs should be stripped from the document. The argument firstc
1191 * is the first character of the input token or document.
1192 *
1193 * Because C does not have internal subroutines, I have simulated them
1194 * using goto's to implement the subroutine linkage. The following macros
1195 * will run code that appears at the end of readtoken1.
1196 */
1197
1198 /*
1199 * We used to remember only the current syntax, variable nesting level,
1200 * double quote state for each var nesting level, and arith nesting
1201 * level (unrelated to var nesting) and one prev syntax when in arith
1202 * syntax. This worked for simple cases, but can't handle arith inside
1203 * var expansion inside arith inside var with some quoted and some not.
1204 *
1205 * Inspired by FreeBSD's implementation (though it was the obvious way)
1206 * though implemented differently, we now have a stack that keeps track
1207 * of what we are doing now, and what we were doing previously.
1208 * Every time something changes, which will eventually end and should
1209 * revert to the previous state, we push this stack, and then pop it
1210 * again later (that is every ${} with an operator (to parse the word
1211 * or pattern that follows) ${x} and $x are too simple to need it)
1212 * $(( )) $( ) and "...". Always. Really, always!
1213 *
1214 * The stack is implemented as one static (on the C stack) base block
1215 * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1216 * enough for the vast majority of cases. For torture tests, we
1217 * malloc more blocks as needed. All accesses through the inline
1218 * functions below.
1219 */
1220
1221 /*
1222 * varnest & arinest will typically be 0 or 1
1223 * (varnest can increment in usages like ${x=${y}} but probably
1224 * does not really need to)
1225 * parenlevel allows balancing parens inside a $(( )), it is reset
1226 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1227 * quoted is special - we need to know 2 things ... are we inside "..."
1228 * (even if inherited from some previous nesting level) and was there
1229 * an opening '"' at this level (so the next will be closing).
1230 * "..." can span nesting levels, but cannot be opened in one and
1231 * closed in a different one.
1232 * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1233 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1234 * is really so special that this setting is not very important)
1235 * and 0x10 that indicates that an opening quote has been seen.
1236 * The bottom 4 bits are inherited, the 0x10 bit is not.
1237 */
1238 struct tokenstate {
1239 const char *ts_syntax;
1240 unsigned short ts_parenlevel; /* counters */
1241 unsigned short ts_varnest; /* 64000 levels should be enough! */
1242 unsigned short ts_arinest;
1243 unsigned short ts_quoted; /* 1 -> single, 2 -> double */
1244 unsigned short ts_magicq; /* heredoc or word expand */
1245 };
1246
1247 #define NQ 0x00 /* Unquoted */
1248 #define SQ 0x01 /* Single Quotes */
1249 #define DQ 0x02 /* Double Quotes (or equivalent) */
1250 #define CQ 0x03 /* C style Single Quotes */
1251 #define QF 0x0F /* Mask to extract previous values */
1252 #define QS 0x10 /* Quoting started at this level in stack */
1253
1254 #define LEVELS_PER_BLOCK 8
1255 #define VSS struct statestack
1256
1257 struct statestack {
1258 VSS *prev; /* previous block in list */
1259 int cur; /* which of our tokenstates is current */
1260 struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1261 };
1262
1263 static inline struct tokenstate *
1264 currentstate(VSS *stack)
1265 {
1266 return &stack->tokenstate[stack->cur];
1267 }
1268
1269 #ifdef notdef
1270 static inline struct tokenstate *
1271 prevstate(VSS *stack)
1272 {
1273 if (stack->cur != 0)
1274 return &stack->tokenstate[stack->cur - 1];
1275 if (stack->prev == NULL) /* cannot drop below base */
1276 return &stack->tokenstate[0];
1277 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1278 }
1279 #endif
1280
1281 static inline VSS *
1282 bump_state_level(VSS *stack)
1283 {
1284 struct tokenstate *os, *ts;
1285
1286 os = currentstate(stack);
1287
1288 if (++stack->cur >= LEVELS_PER_BLOCK) {
1289 VSS *ss;
1290
1291 ss = (VSS *)ckmalloc(sizeof (struct statestack));
1292 ss->cur = 0;
1293 ss->prev = stack;
1294 stack = ss;
1295 }
1296
1297 ts = currentstate(stack);
1298
1299 ts->ts_parenlevel = 0; /* parens inside never match outside */
1300
1301 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */
1302 ts->ts_varnest = os->ts_varnest;
1303 ts->ts_arinest = os->ts_arinest; /* when appropriate */
1304 ts->ts_syntax = os->ts_syntax; /* they will be altered */
1305 ts->ts_magicq = os->ts_magicq;
1306
1307 return stack;
1308 }
1309
1310 static inline VSS *
1311 drop_state_level(VSS *stack)
1312 {
1313 if (stack->cur == 0) {
1314 VSS *ss;
1315
1316 ss = stack;
1317 stack = ss->prev;
1318 if (stack == NULL)
1319 return ss;
1320 ckfree(ss);
1321 }
1322 --stack->cur;
1323 return stack;
1324 }
1325
1326 static inline void
1327 cleanup_state_stack(VSS *stack)
1328 {
1329 while (stack->prev != NULL) {
1330 stack->cur = 0;
1331 stack = drop_state_level(stack);
1332 }
1333 }
1334
1335 #define PARSESUB() {goto parsesub; parsesub_return:;}
1336 #define PARSEARITH() {goto parsearith; parsearith_return:;}
1337
1338 /*
1339 * The following macros all assume the existance of a local var "stack"
1340 * which contains a pointer to the current struct stackstate
1341 */
1342
1343 /*
1344 * These are macros rather than inline funcs to avoid code churn as much
1345 * as possible - they replace macros of the same name used previously.
1346 */
1347 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS)
1348 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ)
1349 #ifdef notdef
1350 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \
1351 stack->cur != 0 || stack->prev ? \
1352 prevstate(stack)->ts_quoted & QF : 0)
1353 #endif
1354
1355 /*
1356 * This set are just to avoid excess typing and line lengths...
1357 * The ones that "look like" var names must be implemented to be lvalues
1358 */
1359 #define syntax (currentstate(stack)->ts_syntax)
1360 #define parenlevel (currentstate(stack)->ts_parenlevel)
1361 #define varnest (currentstate(stack)->ts_varnest)
1362 #define arinest (currentstate(stack)->ts_arinest)
1363 #define quoted (currentstate(stack)->ts_quoted)
1364 #define magicq (currentstate(stack)->ts_magicq)
1365 #define TS_PUSH() (stack = bump_state_level(stack))
1366 #define TS_POP() (stack = drop_state_level(stack))
1367
1368 /*
1369 * Called to parse command substitutions. oldstyle is true if the command
1370 * is enclosed inside `` (otherwise it was enclosed in "$( )")
1371 *
1372 * Internally nlpp is a pointer to the head of the linked
1373 * list of commands (passed by reference), and savelen is the number of
1374 * characters on the top of the stack which must be preserved.
1375 */
1376 static char *
1377 parsebackq(VSS *const stack, char * const in,
1378 struct nodelist **const pbqlist, const int oldstyle)
1379 {
1380 struct nodelist **nlpp;
1381 const int savepbq = parsebackquote;
1382 union node *n;
1383 char *out;
1384 char *str = NULL;
1385 char *volatile sstr = str;
1386 struct jmploc jmploc;
1387 struct jmploc *const savehandler = handler;
1388 struct parsefile *const savetopfile = getcurrentfile();
1389 const int savelen = in - stackblock();
1390 int saveprompt;
1391 int lno;
1392
1393 if (setjmp(jmploc.loc)) {
1394 popfilesupto(savetopfile);
1395 if (sstr)
1396 ckfree(__UNVOLATILE(sstr));
1397 cleanup_state_stack(stack);
1398 parsebackquote = 0;
1399 handler = savehandler;
1400 CTRACE(DBG_LEXER, ("parsebackq() err (%d), unwinding\n",
1401 exception));
1402 longjmp(handler->loc, 1);
1403 }
1404 INTOFF;
1405 sstr = str = NULL;
1406 if (savelen > 0) {
1407 sstr = str = ckmalloc(savelen);
1408 memcpy(str, stackblock(), savelen);
1409 }
1410 handler = &jmploc;
1411 INTON;
1412 if (oldstyle) {
1413 /*
1414 * We must read until the closing backquote, giving special
1415 * treatment to some slashes, and then push the string and
1416 * reread it as input, interpreting it normally.
1417 */
1418 int pc;
1419 int psavelen;
1420 char *pstr;
1421 int line1 = plinno;
1422
1423 VTRACE(DBG_PARSE|DBG_LEXER,
1424 ("parsebackq: repackaging `` as $( )"));
1425 /*
1426 * Because the entire `...` is read here, we don't
1427 * need to bother the state stack. That will be used
1428 * (as appropriate) when the processed string is re-read.
1429 */
1430 STARTSTACKSTR(out);
1431 #ifdef DEBUG
1432 for (psavelen = 0;;psavelen++) { /* } */
1433 #else
1434 for (;;) {
1435 #endif
1436 if (needprompt) {
1437 setprompt(2);
1438 needprompt = 0;
1439 }
1440 pc = pgetc();
1441 VTRACE(DBG_LEXER,
1442 ("parsebackq() got '%c'(%#.2x) in `` %s", pc&0xFF,
1443 pc&0x1FF, pc == '`' ? "terminator\n" : ""));
1444 if (pc == '`')
1445 break;
1446 switch (pc) {
1447 case '\\':
1448 pc = pgetc();
1449 VTRACE(DBG_LEXER, ("then '%c'(%#.2x) ",
1450 pc&0xFF, pc&0x1FF));
1451 #ifdef DEBUG
1452 psavelen++;
1453 #endif
1454 if (pc == '\n') { /* keep \ \n for later */
1455 plinno++;
1456 VTRACE(DBG_LEXER, ("@%d ", plinno));
1457 needprompt = doprompt;
1458 }
1459 if (pc != '\\' && pc != '`' && pc != '$'
1460 && (!ISDBLQUOTE() || pc != '"')) {
1461 VTRACE(DBG_LEXER, ("keep '\\' "));
1462 STPUTC('\\', out);
1463 }
1464 break;
1465
1466 case '\n':
1467 plinno++;
1468 VTRACE(DBG_LEXER, ("@%d ", plinno));
1469 needprompt = doprompt;
1470 break;
1471
1472 case PEOF:
1473 startlinno = line1;
1474 VTRACE(DBG_LEXER, ("EOF\n", plinno));
1475 synerror("EOF in backquote substitution");
1476 break;
1477
1478 default:
1479 break;
1480 }
1481 VTRACE(DBG_LEXER, (".\n", plinno));
1482 STPUTC(pc, out);
1483 }
1484 STPUTC('\0', out);
1485 VTRACE(DBG_LEXER, ("parsebackq() ``:"));
1486 VTRACE(DBG_PARSE|DBG_LEXER, (" read %d", psavelen));
1487 psavelen = out - stackblock();
1488 VTRACE(DBG_PARSE|DBG_LEXER, (" produced %d\n", psavelen));
1489 if (psavelen > 0) {
1490 pstr = grabstackstr(out);
1491 CTRACE(DBG_LEXER,
1492 ("parsebackq() reprocessing as $(%s)\n", pstr));
1493 setinputstring(pstr, 1, line1);
1494 }
1495 }
1496 nlpp = pbqlist;
1497 while (*nlpp)
1498 nlpp = &(*nlpp)->next;
1499 *nlpp = stalloc(sizeof(struct nodelist));
1500 (*nlpp)->next = NULL;
1501 parsebackquote = oldstyle;
1502
1503 if (oldstyle) {
1504 saveprompt = doprompt;
1505 doprompt = 0;
1506 } else
1507 saveprompt = 0;
1508
1509 lno = -plinno;
1510 CTRACE(DBG_LEXER, ("parsebackq() parsing embedded command list\n"));
1511 n = list(0);
1512 CTRACE(DBG_LEXER, ("parsebackq() parsed $() (%d -> %d)\n", -lno,
1513 lno + plinno));
1514 lno += plinno;
1515
1516 if (oldstyle) {
1517 if (peektoken() != TEOF)
1518 synexpect(-1, 0);
1519 doprompt = saveprompt;
1520 } else
1521 consumetoken(TRP);
1522
1523 (*nlpp)->n = n;
1524 if (oldstyle) {
1525 /*
1526 * Start reading from old file again, ignoring any pushed back
1527 * tokens left from the backquote parsing
1528 */
1529 CTRACE(DBG_LEXER, ("parsebackq() back to previous input\n"));
1530 popfile();
1531 tokpushback = 0;
1532 }
1533
1534 while (stackblocksize() <= savelen)
1535 growstackblock();
1536 STARTSTACKSTR(out);
1537 if (str) {
1538 memcpy(out, str, savelen);
1539 STADJUST(savelen, out);
1540 INTOFF;
1541 ckfree(str);
1542 sstr = str = NULL;
1543 INTON;
1544 }
1545 parsebackquote = savepbq;
1546 handler = savehandler;
1547 if (arinest || ISDBLQUOTE()) {
1548 STPUTC(CTLBACKQ | CTLQUOTE, out);
1549 while (--lno >= 0)
1550 STPUTC(CTLNONL, out);
1551 } else
1552 STPUTC(CTLBACKQ, out);
1553
1554 return out;
1555 }
1556
1557 /*
1558 * Parse a redirection operator. The parameter "out" points to a string
1559 * specifying the fd to be redirected. It is guaranteed to be either ""
1560 * or a numeric string (for now anyway). The parameter "c" contains the
1561 * first character of the redirection operator.
1562 *
1563 * Note the string "out" is on the stack, which we are about to clobber,
1564 * so process it first...
1565 */
1566
1567 static void
1568 parseredir(const char *out, int c)
1569 {
1570 union node *np;
1571 int fd;
1572
1573 np = stalloc(sizeof(struct nfile));
1574
1575 fd = (*out == '\0') ? -1 : number(out); /* number(out) >= 0 */
1576 np->nfile.fd = fd; /* do this again later with updated fd */
1577 if (fd != np->nfile.fd)
1578 error("file descriptor (%d) out of range", fd);
1579
1580 VTRACE(DBG_LEXER, ("parseredir after '%s%c' ", out, c));
1581 if (c == '>') {
1582 if (fd < 0)
1583 fd = 1;
1584 c = pgetc_linecont();
1585 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1586 if (c == '>')
1587 np->type = NAPPEND;
1588 else if (c == '|')
1589 np->type = NCLOBBER;
1590 else if (c == '&')
1591 np->type = NTOFD;
1592 else {
1593 np->type = NTO;
1594 VTRACE(DBG_LEXER, ("unwanted ", c));
1595 pungetc();
1596 }
1597 } else { /* c == '<' */
1598 if (fd < 0)
1599 fd = 0;
1600 c = pgetc_linecont();
1601 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1602 switch (c) {
1603 case '<':
1604 /* if sizes differ, just discard the old one */
1605 if (sizeof (struct nfile) != sizeof (struct nhere))
1606 np = stalloc(sizeof(struct nhere));
1607 np->type = NHERE;
1608 np->nhere.fd = 0;
1609 heredoc = stalloc(sizeof(struct HereDoc));
1610 heredoc->here = np;
1611 heredoc->startline = plinno;
1612 if ((c = pgetc_linecont()) == '-') {
1613 CTRACE(DBG_LEXER, ("and '%c'(%#.2x) ",
1614 c & 0xFF, c & 0x1FF));
1615 heredoc->striptabs = 1;
1616 } else {
1617 heredoc->striptabs = 0;
1618 pungetc();
1619 }
1620 break;
1621
1622 case '&':
1623 np->type = NFROMFD;
1624 break;
1625
1626 case '>':
1627 np->type = NFROMTO;
1628 break;
1629
1630 default:
1631 np->type = NFROM;
1632 VTRACE(DBG_LEXER, ("unwanted('%c'0#.2x)", c&0xFF,
1633 c&0x1FF));
1634 pungetc();
1635 break;
1636 }
1637 }
1638 np->nfile.fd = fd;
1639
1640 VTRACE(DBG_LEXER, (" ->%"PRIdsNT" fd=%d\n", NODETYPENAME(np->type),fd));
1641
1642 redirnode = np; /* this is the "value" of TRENODE */
1643 }
1644
1645 /*
1646 * Called to parse a backslash escape sequence inside $'...'.
1647 * The backslash has already been read.
1648 */
1649 static char *
1650 readcstyleesc(char *out)
1651 {
1652 int c, vc, i, n;
1653 unsigned int v;
1654
1655 c = pgetc();
1656 VTRACE(DBG_LEXER, ("CSTR(\\%c)(\\%#x)", c&0xFF, c&0x1FF));
1657 switch (c) {
1658 case '\0':
1659 case PEOF:
1660 synerror("Unterminated quoted string");
1661 case '\n':
1662 plinno++;
1663 VTRACE(DBG_LEXER, ("@%d ", plinno));
1664 if (doprompt)
1665 setprompt(2);
1666 else
1667 setprompt(0);
1668 return out;
1669
1670 case '\\':
1671 case '\'':
1672 case '"':
1673 v = c;
1674 break;
1675
1676 case 'a': v = '\a'; break;
1677 case 'b': v = '\b'; break;
1678 case 'e': v = '\033'; break;
1679 case 'f': v = '\f'; break;
1680 case 'n': v = '\n'; break;
1681 case 'r': v = '\r'; break;
1682 case 't': v = '\t'; break;
1683 case 'v': v = '\v'; break;
1684
1685 case '0': case '1': case '2': case '3':
1686 case '4': case '5': case '6': case '7':
1687 v = c - '0';
1688 c = pgetc();
1689 if (c >= '0' && c <= '7') {
1690 v <<= 3;
1691 v += c - '0';
1692 c = pgetc();
1693 if (c >= '0' && c <= '7') {
1694 v <<= 3;
1695 v += c - '0';
1696 } else
1697 pungetc();
1698 } else
1699 pungetc();
1700 break;
1701
1702 case 'c':
1703 c = pgetc();
1704 if (c < 0x3f || c > 0x7a || c == 0x60)
1705 synerror("Bad \\c escape sequence");
1706 if (c == '\\' && pgetc() != '\\')
1707 synerror("Bad \\c\\ escape sequence");
1708 if (c == '?')
1709 v = 127;
1710 else
1711 v = c & 0x1f;
1712 break;
1713
1714 case 'x':
1715 n = 2;
1716 goto hexval;
1717 case 'u':
1718 n = 4;
1719 goto hexval;
1720 case 'U':
1721 n = 8;
1722 hexval:
1723 v = 0;
1724 for (i = 0; i < n; i++) {
1725 c = pgetc();
1726 if (c >= '0' && c <= '9')
1727 v = (v << 4) + c - '0';
1728 else if (c >= 'A' && c <= 'F')
1729 v = (v << 4) + c - 'A' + 10;
1730 else if (c >= 'a' && c <= 'f')
1731 v = (v << 4) + c - 'a' + 10;
1732 else {
1733 pungetc();
1734 break;
1735 }
1736 }
1737 if (n > 2 && v > 127) {
1738 if (v >= 0xd800 && v <= 0xdfff)
1739 synerror("Invalid \\u escape sequence");
1740
1741 /* XXX should we use iconv here. What locale? */
1742 CHECKSTRSPACE(4, out);
1743
1744 if (v <= 0x7ff) {
1745 USTPUTC(0xc0 | v >> 6, out);
1746 USTPUTC(0x80 | (v & 0x3f), out);
1747 return out;
1748 } else if (v <= 0xffff) {
1749 USTPUTC(0xe0 | v >> 12, out);
1750 USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1751 USTPUTC(0x80 | (v & 0x3f), out);
1752 return out;
1753 } else if (v <= 0x10ffff) {
1754 USTPUTC(0xf0 | v >> 18, out);
1755 USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1756 USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1757 USTPUTC(0x80 | (v & 0x3f), out);
1758 return out;
1759 }
1760 if (v > 127)
1761 v = '?';
1762 }
1763 break;
1764 default:
1765 synerror("Unknown $'' escape sequence");
1766 }
1767 vc = (char)v;
1768 VTRACE(DBG_LEXER, ("->%u(%#x)['%c']", v, v, vc&0xFF));
1769
1770 /*
1771 * If we managed to create a \n from a \ sequence (no matter how)
1772 * then we replace it with the magic CRTCNL control char, which
1773 * will turn into a \n again later, but in the meantime, never
1774 * causes LINENO increments.
1775 */
1776 if (vc == '\n') {
1777 VTRACE(DBG_LEXER, ("CTLCNL."));
1778 USTPUTC(CTLCNL, out);
1779 return out;
1780 }
1781
1782 /*
1783 * We can't handle NUL bytes.
1784 * POSIX says we should skip till the closing quote.
1785 */
1786 if (vc == '\0') {
1787 CTRACE(DBG_LEXER, ("\\0: skip to '", v, v, vc&0xFF));
1788 while ((c = pgetc()) != '\'') {
1789 if (c == '\\')
1790 c = pgetc();
1791 if (c == PEOF)
1792 synerror("Unterminated quoted string");
1793 if (c == '\n') {
1794 plinno++;
1795 if (doprompt)
1796 setprompt(2);
1797 else
1798 setprompt(0);
1799 }
1800 }
1801 pungetc();
1802 return out;
1803 }
1804 CVTRACE(DBG_LEXER, NEEDESC(vc), ("CTLESC-"));
1805 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", vc&0xFF, vc&0x1FF));
1806 if (NEEDESC(vc))
1807 USTPUTC(CTLESC, out);
1808 USTPUTC(vc, out);
1809 return out;
1810 }
1811
1812 /*
1813 * The lowest level basic tokenizer.
1814 *
1815 * The next input byte (character) is in firstc, syn says which
1816 * syntax tables we are to use (basic, single or double quoted, or arith)
1817 * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1818 * quote character itself is not special (used parsing here docs and similar)
1819 *
1820 * The result is the type of the next token (its value, when there is one,
1821 * is saved in the relevant global var - must fix that someday!) which is
1822 * also saved for re-reading ("lasttoken").
1823 *
1824 * Overall, this routine does far more parsing than it is supposed to.
1825 * That will also need fixing, someday...
1826 */
1827 STATIC int
1828 readtoken1(int firstc, char const *syn, int oneword)
1829 {
1830 int c;
1831 char * out;
1832 int len;
1833 struct nodelist *bqlist;
1834 int quotef;
1835 VSS static_stack;
1836 VSS *stack = &static_stack;
1837
1838 stack->prev = NULL;
1839 stack->cur = 0;
1840
1841 syntax = syn;
1842
1843 #ifdef DEBUG
1844 #define SYNTAX ( syntax == BASESYNTAX ? "BASE" : \
1845 syntax == DQSYNTAX ? "DQ" : \
1846 syntax == SQSYNTAX ? "SQ" : \
1847 syntax == ARISYNTAX ? "ARI" : \
1848 "???" )
1849 #endif
1850
1851 startlinno = plinno;
1852 varnest = 0;
1853 quoted = 0;
1854 if (syntax == DQSYNTAX)
1855 SETDBLQUOTE();
1856 quotef = 0;
1857 bqlist = NULL;
1858 arinest = 0;
1859 parenlevel = 0;
1860 elided_nl = 0;
1861 magicq = oneword;
1862
1863 CTRACE(DBG_LEXER, ("readtoken1(%c) syntax=%s %s%s(quoted=%x)\n",
1864 firstc&0xFF, SYNTAX, magicq ? "magic quotes" : "",
1865 ISDBLQUOTE()?" ISDBLQUOTE":"", quoted));
1866
1867 STARTSTACKSTR(out);
1868
1869 for (c = firstc ;; c = pgetc_macro()) { /* until of token */
1870 if (syntax == ARISYNTAX)
1871 out = insert_elided_nl(out);
1872 CHECKSTRSPACE(6, out); /* permit 6 calls to USTPUTC */
1873 switch (syntax[c]) {
1874 case CFAKE:
1875 VTRACE(DBG_LEXER, ("CFAKE"));
1876 if (syntax == BASESYNTAX && varnest == 0)
1877 break;
1878 VTRACE(DBG_LEXER, (","));
1879 continue;
1880 case CNL: /* '\n' */
1881 VTRACE(DBG_LEXER, ("CNL"));
1882 if (syntax == BASESYNTAX && varnest == 0)
1883 break; /* exit loop */
1884 USTPUTC(c, out);
1885 plinno++;
1886 VTRACE(DBG_LEXER, ("@%d,", plinno));
1887 if (doprompt)
1888 setprompt(2);
1889 else
1890 setprompt(0);
1891 continue;
1892
1893 case CSBACK: /* single quoted backslash */
1894 if ((quoted & QF) == CQ) {
1895 out = readcstyleesc(out);
1896 continue;
1897 }
1898 VTRACE(DBG_LEXER, ("ESC:"));
1899 USTPUTC(CTLESC, out);
1900 /* FALLTHROUGH */
1901 case CWORD:
1902 VTRACE(DBG_LEXER, ("'%c'", c));
1903 USTPUTC(c, out);
1904 continue;
1905
1906 case CCTL:
1907 CVTRACE(DBG_LEXER, !magicq || ISDBLQUOTE(),
1908 ("%s%sESC:",!magicq?"!m":"",ISDBLQUOTE()?"DQ":""));
1909 if (!magicq || ISDBLQUOTE())
1910 USTPUTC(CTLESC, out);
1911 VTRACE(DBG_LEXER, ("'%c'", c));
1912 USTPUTC(c, out);
1913 continue;
1914 case CBACK: /* backslash */
1915 c = pgetc();
1916 VTRACE(DBG_LEXER, ("\\'%c'(%#.2x)", c&0xFF, c&0x1FF));
1917 if (c == PEOF) {
1918 VTRACE(DBG_LEXER, ("EOF, keep \\ "));
1919 USTPUTC('\\', out);
1920 pungetc();
1921 continue;
1922 }
1923 if (c == '\n') {
1924 plinno++;
1925 elided_nl++;
1926 VTRACE(DBG_LEXER, ("eli \\n (%d) @%d ",
1927 elided_nl, plinno));
1928 if (doprompt)
1929 setprompt(2);
1930 else
1931 setprompt(0);
1932 continue;
1933 }
1934 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
1935 quotef = 1; /* current token is quoted */
1936 if (quoted && c != '\\' && c != '`' &&
1937 c != '$' && (c != '"' || magicq)) {
1938 /*
1939 * retain the \ (which we *know* needs CTLESC)
1940 * when in "..." and the following char is
1941 * not one of the magic few.)
1942 * Otherwise the \ has done its work, and
1943 * is dropped.
1944 */
1945 VTRACE(DBG_LEXER, ("ESC:'\\'"));
1946 USTPUTC(CTLESC, out);
1947 USTPUTC('\\', out);
1948 }
1949 CVTRACE(DBG_LEXER, NEEDESC(c) || !magicq,
1950 ("%sESC:", NEEDESC(c) ? "+" : "m"));
1951 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", c&0xFF, c&0x1FF));
1952 if (NEEDESC(c))
1953 USTPUTC(CTLESC, out);
1954 else if (!magicq) {
1955 USTPUTC(CTLESC, out);
1956 USTPUTC(c, out);
1957 continue;
1958 }
1959 USTPUTC(c, out);
1960 continue;
1961 case CSQUOTE:
1962 if (syntax != SQSYNTAX) {
1963 CVTRACE(DBG_LEXER, !magicq, (" CQM "));
1964 if (!magicq)
1965 USTPUTC(CTLQUOTEMARK, out);
1966 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
1967 quotef = 1;
1968 TS_PUSH();
1969 syntax = SQSYNTAX;
1970 quoted = SQ;
1971 VTRACE(DBG_LEXER, (" TS_PUSH(SQ)"));
1972 continue;
1973 }
1974 if (magicq && arinest == 0 && varnest == 0) {
1975 /* Ignore inside quoted here document */
1976 VTRACE(DBG_LEXER, ("<<'>>"));
1977 USTPUTC(c, out);
1978 continue;
1979 }
1980 /* End of single quotes... */
1981 TS_POP();
1982 VTRACE(DBG_LEXER, ("SQ TS_POP->%s ", SYNTAX));
1983 CVTRACE(DBG_LEXER, syntax == BASESYNTAX, (" CQE "));
1984 if (syntax == BASESYNTAX)
1985 USTPUTC(CTLQUOTEEND, out);
1986 continue;
1987 case CDQUOTE:
1988 if (magicq && arinest == 0 /* && varnest == 0 */) {
1989 VTRACE(DBG_LEXER, ("<<\">>"));
1990 /* Ignore inside here document */
1991 USTPUTC(c, out);
1992 continue;
1993 }
1994 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
1995 quotef = 1;
1996 if (arinest) {
1997 if (ISDBLQUOTE()) {
1998 VTRACE(DBG_LEXER,
1999 (" CQE ari(%d", arinest));
2000 USTPUTC(CTLQUOTEEND, out);
2001 TS_POP();
2002 VTRACE(DBG_LEXER, ("%d)TS_POP->%s ",
2003 arinest, SYNTAX));
2004 } else {
2005 VTRACE(DBG_LEXER,
2006 (" ari(%d) %s TS_PUSH->DQ CQM ",
2007 arinest, SYNTAX));
2008 TS_PUSH();
2009 syntax = DQSYNTAX;
2010 SETDBLQUOTE();
2011 USTPUTC(CTLQUOTEMARK, out);
2012 }
2013 continue;
2014 }
2015 CVTRACE(DBG_LEXER, magicq, (" MQignDQ "));
2016 if (magicq)
2017 continue;
2018 if (ISDBLQUOTE()) {
2019 TS_POP();
2020 VTRACE(DBG_LEXER,
2021 (" DQ TS_POP->%s CQE ", SYNTAX));
2022 USTPUTC(CTLQUOTEEND, out);
2023 } else {
2024 VTRACE(DBG_LEXER,
2025 (" %s TS_POP->DQ CQM ", SYNTAX));
2026 TS_PUSH();
2027 syntax = DQSYNTAX;
2028 SETDBLQUOTE();
2029 USTPUTC(CTLQUOTEMARK, out);
2030 }
2031 continue;
2032 case CVAR: /* '$' */
2033 VTRACE(DBG_LEXER, ("'$'..."));
2034 out = insert_elided_nl(out);
2035 PARSESUB(); /* parse substitution */
2036 continue;
2037 case CENDVAR: /* CLOSEBRACE */
2038 if (varnest > 0 && !ISDBLQUOTE()) {
2039 VTRACE(DBG_LEXER, ("vn=%d !DQ", varnest));
2040 TS_POP();
2041 VTRACE(DBG_LEXER, (" TS_POP->%s CEV ", SYNTAX));
2042 USTPUTC(CTLENDVAR, out);
2043 } else {
2044 VTRACE(DBG_LEXER, ("'%c'", c));
2045 USTPUTC(c, out);
2046 }
2047 out = insert_elided_nl(out);
2048 continue;
2049 case CLP: /* '(' in arithmetic */
2050 parenlevel++;
2051 VTRACE(DBG_LEXER, ("'('(%d)", parenlevel));
2052 USTPUTC(c, out);
2053 continue;;
2054 case CRP: /* ')' in arithmetic */
2055 if (parenlevel > 0) {
2056 USTPUTC(c, out);
2057 --parenlevel;
2058 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2059 } else {
2060 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2061 if (pgetc_linecont() == /*(*/ ')') {
2062 out = insert_elided_nl(out);
2063 if (--arinest == 0) {
2064 TS_POP();
2065 USTPUTC(CTLENDARI, out);
2066 } else
2067 USTPUTC(/*(*/ ')', out);
2068 } else {
2069 break; /* to synerror() just below */
2070 #if 0 /* the old way, causes weird errors on bad input */
2071 /*
2072 * unbalanced parens
2073 * (don't 2nd guess - no error)
2074 */
2075 pungetc();
2076 USTPUTC(/*(*/ ')', out);
2077 #endif
2078 }
2079 }
2080 continue;
2081 case CBQUOTE: /* '`' */
2082 VTRACE(DBG_LEXER, ("'`' -> parsebackq()\n"));
2083 out = parsebackq(stack, out, &bqlist, 1);
2084 VTRACE(DBG_LEXER, ("parsebackq() -> readtoken1: "));
2085 continue;
2086 case CEOF: /* --> c == PEOF */
2087 VTRACE(DBG_LEXER, ("EOF "));
2088 break; /* will exit loop */
2089 default:
2090 VTRACE(DBG_LEXER, ("['%c'(%#.2x)]", c&0xFF, c&0x1FF));
2091 if (varnest == 0 && !ISDBLQUOTE())
2092 break; /* exit loop */
2093 USTPUTC(c, out);
2094 VTRACE(DBG_LEXER, (","));
2095 continue;
2096 }
2097 VTRACE(DBG_LEXER, (" END TOKEN\n", c&0xFF, c&0x1FF));
2098 break; /* break from switch -> break from for loop too */
2099 }
2100
2101 if (syntax == ARISYNTAX) {
2102 cleanup_state_stack(stack);
2103 synerror(/*((*/ "Missing '))'");
2104 }
2105 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
2106 cleanup_state_stack(stack);
2107 synerror("Unterminated quoted string");
2108 }
2109 if (varnest != 0) {
2110 cleanup_state_stack(stack);
2111 startlinno = plinno;
2112 /* { */
2113 synerror("Missing '}'");
2114 }
2115
2116 STPUTC('\0', out);
2117 len = out - stackblock();
2118 out = stackblock();
2119
2120 if (!magicq) {
2121 if ((c == '<' || c == '>')
2122 && quotef == 0 && (*out == '\0' || is_number(out))) {
2123 parseredir(out, c);
2124 cleanup_state_stack(stack);
2125 return lasttoken = TREDIR;
2126 } else {
2127 pungetc();
2128 }
2129 }
2130
2131 VTRACE(DBG_PARSE|DBG_LEXER,
2132 ("readtoken1 %sword \"%s\", completed%s (%d) left %d enl\n",
2133 (quotef ? "quoted " : ""), out, (bqlist ? " with cmdsubs" : ""),
2134 len, elided_nl));
2135
2136 quoteflag = quotef;
2137 backquotelist = bqlist;
2138 grabstackblock(len);
2139 wordtext = out;
2140 cleanup_state_stack(stack);
2141 return lasttoken = TWORD;
2142 /* end of readtoken routine */
2143
2144
2145 /*
2146 * Parse a substitution. At this point, we have read the dollar sign
2147 * and nothing else.
2148 */
2149
2150 parsesub: {
2151 int subtype;
2152 int typeloc;
2153 int flags;
2154 char *p;
2155 static const char types[] = "}-+?=";
2156
2157 c = pgetc_linecont();
2158 VTRACE(DBG_LEXER, ("\"$%c\"(%#.2x)", c&0xFF, c&0x1FF));
2159 if (c == '(' /*)*/) { /* $(command) or $((arith)) */
2160 if (pgetc_linecont() == '(' /*')'*/ ) {
2161 VTRACE(DBG_LEXER, ("\"$((\" ARITH "));
2162 out = insert_elided_nl(out);
2163 PARSEARITH();
2164 } else {
2165 VTRACE(DBG_LEXER, ("\"$(\" CSUB->parsebackq()\n"));
2166 out = insert_elided_nl(out);
2167 pungetc();
2168 out = parsebackq(stack, out, &bqlist, 0);
2169 VTRACE(DBG_LEXER, ("parseback()->readtoken1(): "));
2170 }
2171 } else if (c == OPENBRACE || is_name(c) || is_special(c)) {
2172 VTRACE(DBG_LEXER, (" $EXP:CTLVAR "));
2173 USTPUTC(CTLVAR, out);
2174 typeloc = out - stackblock();
2175 USTPUTC(VSNORMAL, out);
2176 subtype = VSNORMAL;
2177 flags = 0;
2178 if (c == OPENBRACE) {
2179 c = pgetc_linecont();
2180 if (c == '#') {
2181 if ((c = pgetc_linecont()) == CLOSEBRACE)
2182 c = '#';
2183 else if (is_name(c) || isdigit(c))
2184 subtype = VSLENGTH;
2185 else if (is_special(c)) {
2186 /*
2187 * ${#} is $# - the number of sh params
2188 * ${##} is the length of ${#}
2189 * ${###} is ${#} with as much nothing
2190 * as possible removed from start
2191 * ${##1} is ${#} with leading 1 gone
2192 * ${##\#} is ${#} with leading # gone
2193 *
2194 * this stuff is UGLY!
2195 */
2196 if (pgetc_linecont() == CLOSEBRACE) {
2197 pungetc();
2198 subtype = VSLENGTH;
2199 } else {
2200 static char cbuf[2];
2201
2202 pungetc(); /* would like 2 */
2203 cbuf[0] = c; /* so ... */
2204 cbuf[1] = '\0';
2205 pushstring(cbuf, 1, NULL);
2206 c = '#'; /* ${#:...} */
2207 subtype = 0; /* .. or similar */
2208 }
2209 } else {
2210 pungetc();
2211 c = '#';
2212 subtype = 0;
2213 }
2214 }
2215 else
2216 subtype = 0;
2217 VTRACE(DBG_LEXER, ("${ st=%d ", subtype));
2218 }
2219 if (is_name(c)) {
2220 p = out;
2221 do {
2222 VTRACE(DBG_LEXER, ("%c", c));
2223 STPUTC(c, out);
2224 c = pgetc_linecont();
2225 } while (is_in_name(c));
2226
2227 #if 0
2228 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
2229 int i;
2230 int linno;
2231 char buf[10];
2232
2233 /*
2234 * The "LINENO hack"
2235 *
2236 * Replace the variable name with the
2237 * current line number.
2238 */
2239 linno = plinno;
2240 if (funclinno != 0)
2241 linno -= funclinno - 1;
2242 snprintf(buf, sizeof(buf), "%d", linno);
2243 STADJUST(-6, out);
2244 for (i = 0; buf[i] != '\0'; i++)
2245 STPUTC(buf[i], out);
2246 flags |= VSLINENO;
2247 }
2248 #endif
2249 } else if (is_digit(c)) {
2250 do {
2251 VTRACE(DBG_LEXER, ("%c", c));
2252 STPUTC(c, out);
2253 c = pgetc_linecont();
2254 } while (subtype != VSNORMAL && is_digit(c));
2255 }
2256 else if (is_special(c)) {
2257 VTRACE(DBG_LEXER, ("\"$%c", c));
2258 USTPUTC(c, out);
2259 c = pgetc_linecont();
2260 }
2261 else {
2262 VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??\n", c&0xFF,c&0x1FF));
2263 badsub:
2264 cleanup_state_stack(stack);
2265 synerror("Bad substitution");
2266 }
2267
2268 STPUTC('=', out);
2269 if (subtype == 0) {
2270 switch (c) {
2271 case ':':
2272 flags |= VSNUL;
2273 c = pgetc_linecont();
2274 /*FALLTHROUGH*/
2275 default:
2276 p = strchr(types, c);
2277 if (p == NULL)
2278 goto badsub;
2279 subtype = p - types + VSNORMAL;
2280 break;
2281 case '%':
2282 case '#':
2283 {
2284 int cc = c;
2285 subtype = c == '#' ? VSTRIMLEFT :
2286 VSTRIMRIGHT;
2287 c = pgetc_linecont();
2288 if (c == cc)
2289 subtype++;
2290 else
2291 pungetc();
2292 break;
2293 }
2294 }
2295 } else {
2296 if (subtype == VSLENGTH && c != /*{*/ '}')
2297 synerror("no modifiers allowed with ${#var}");
2298 pungetc();
2299 }
2300 if (quoted || arinest)
2301 flags |= VSQUOTE;
2302 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
2303 flags |= VSPATQ;
2304 VTRACE(DBG_LEXER, (" st%d:%x", subtype, flags));
2305 *(stackblock() + typeloc) = subtype | flags;
2306 if (subtype != VSNORMAL) {
2307 TS_PUSH();
2308 varnest++;
2309 arinest = 0;
2310 if (subtype > VSASSIGN) { /* # ## % %% */
2311 syntax = BASESYNTAX;
2312 quoted = 0;
2313 magicq = 0;
2314 }
2315 VTRACE(DBG_LEXER, (" TS_PUSH->%s vn=%d%s ",
2316 SYNTAX, varnest, quoted ? " Q" : ""));
2317 }
2318 } else if (c == '\'' && syntax == BASESYNTAX) {
2319 USTPUTC(CTLQUOTEMARK, out);
2320 VTRACE(DBG_LEXER, (" CSTR \"$'\" CQM "));
2321 CVTRACE(DBG_LEXER, quotef==0, ("QF=1 "));
2322 quotef = 1;
2323 TS_PUSH();
2324 syntax = SQSYNTAX;
2325 quoted = CQ;
2326 VTRACE(DBG_LEXER, ("%s->TS_PUSH()->SQ ", SYNTAX));
2327 } else {
2328 VTRACE(DBG_LEXER, ("$unk -> '$' (pushback '%c'%#.2x)",
2329 c & 0xFF, c & 0x1FF));
2330 USTPUTC('$', out);
2331 pungetc();
2332 }
2333 goto parsesub_return;
2334 }
2335
2336
2337 /*
2338 * Parse an arithmetic expansion (indicate start of one and set state)
2339 */
2340 parsearith: {
2341
2342 #if 0
2343 if (syntax == ARISYNTAX) {
2344 /*
2345 * we collapse embedded arithmetic expansion to
2346 * parentheses, which should be equivalent
2347 *
2348 * XXX It isn't, must fix, soonish...
2349 */
2350 USTPUTC('(' /*)*/, out);
2351 USTPUTC('(' /*)*/, out);
2352 /*
2353 * Need 2 of them because there will (should be)
2354 * two closing ))'s to follow later.
2355 */
2356 parenlevel += 2;
2357 } else
2358 #endif
2359 {
2360 VTRACE(DBG_LEXER, (" CTLARI%c ", ISDBLQUOTE()?'"':'_'));
2361 USTPUTC(CTLARI, out);
2362 if (ISDBLQUOTE())
2363 USTPUTC('"',out);
2364 else
2365 USTPUTC(' ',out);
2366
2367 VTRACE(DBG_LEXER, ("%s->TS_PUSH->ARI(1)", SYNTAX));
2368 TS_PUSH();
2369 syntax = ARISYNTAX;
2370 arinest = 1;
2371 varnest = 0;
2372 magicq = 1;
2373 }
2374 goto parsearith_return;
2375 }
2376
2377 } /* end of readtoken */
2378
2379
2380
2381
2382 #ifdef mkinit
2383 INCLUDE "parser.h"
2384
2385 RESET {
2386 psp.v_current_parser = &parse_state;
2387
2388 parse_state.ps_tokpushback = 0;
2389 parse_state.ps_checkkwd = 0;
2390 parse_state.ps_heredoclist = NULL;
2391 }
2392 #endif
2393
2394 /*
2395 * Returns true if the text contains nothing to expand (no dollar signs
2396 * or backquotes).
2397 */
2398
2399 STATIC int
2400 noexpand(char *text)
2401 {
2402 char *p;
2403 char c;
2404
2405 p = text;
2406 while ((c = *p++) != '\0') {
2407 if (c == CTLQUOTEMARK || c == CTLQUOTEEND)
2408 continue;
2409 if (c == CTLESC)
2410 p++;
2411 else if (BASESYNTAX[(int)c] == CCTL)
2412 return 0;
2413 }
2414 return 1;
2415 }
2416
2417
2418 /*
2419 * Return true if the argument is a legal variable name (a letter or
2420 * underscore followed by zero or more letters, underscores, and digits).
2421 */
2422
2423 int
2424 goodname(const char *name)
2425 {
2426 const char *p;
2427
2428 p = name;
2429 if (! is_name(*p))
2430 return 0;
2431 while (*++p) {
2432 if (! is_in_name(*p))
2433 return 0;
2434 }
2435 return 1;
2436 }
2437
2438 int
2439 isassignment(const char *p)
2440 {
2441 if (!is_name(*p))
2442 return 0;
2443 while (*++p != '=')
2444 if (*p == '\0' || !is_in_name(*p))
2445 return 0;
2446 return 1;
2447 }
2448
2449 /*
2450 * skip past any \n's, and leave lasttoken set to whatever follows
2451 */
2452 STATIC void
2453 linebreak(void)
2454 {
2455 while (readtoken() == TNL)
2456 ;
2457 }
2458
2459 /*
2460 * The next token must be "token" -- check, then move past it
2461 */
2462 STATIC void
2463 consumetoken(int token)
2464 {
2465 if (readtoken() != token) {
2466 VTRACE(DBG_PARSE, ("consumetoken(%d): expecting %s got %s",
2467 token, tokname[token], tokname[lasttoken]));
2468 CVTRACE(DBG_PARSE, (lasttoken==TWORD), (" \"%s\"", wordtext));
2469 VTRACE(DBG_PARSE, ("\n"));
2470 synexpect(token, NULL);
2471 }
2472 }
2473
2474 /*
2475 * Called when an unexpected token is read during the parse. The argument
2476 * is the token that is expected, or -1 if more than one type of token can
2477 * occur at this point.
2478 */
2479
2480 STATIC void
2481 synexpect(int token, const char *text)
2482 {
2483 char msg[64];
2484 char *p;
2485
2486 if (lasttoken == TWORD) {
2487 size_t len = strlen(wordtext);
2488
2489 if (len <= 13)
2490 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
2491 else
2492 fmtstr(msg, 34,
2493 "Word \"%.10s...\" unexpected", wordtext);
2494 } else
2495 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
2496
2497 p = strchr(msg, '\0');
2498 if (text)
2499 fmtstr(p, 30, " (expecting \"%.10s\")", text);
2500 else if (token >= 0)
2501 fmtstr(p, 30, " (expecting %s)", tokname[token]);
2502
2503 synerror(msg);
2504 /* NOTREACHED */
2505 }
2506
2507
2508 STATIC void
2509 synerror(const char *msg)
2510 {
2511 error("%d: Syntax error: %s", startlinno, msg);
2512 /* NOTREACHED */
2513 }
2514
2515 STATIC void
2516 setprompt(int which)
2517 {
2518 whichprompt = which;
2519
2520 #ifndef SMALL
2521 if (!el)
2522 #endif
2523 out2str(getprompt(NULL));
2524 }
2525
2526 /*
2527 * handle getting the next character, while ignoring \ \n
2528 * (which is a little tricky as we only have one char of pushback
2529 * and we need that one elsewhere).
2530 */
2531 STATIC int
2532 pgetc_linecont(void)
2533 {
2534 int c;
2535
2536 while ((c = pgetc()) == '\\') {
2537 c = pgetc();
2538 if (c == '\n') {
2539 plinno++;
2540 elided_nl++;
2541 VTRACE(DBG_LEXER, ("\"\\n\"drop(el=%d@%d)",
2542 elided_nl, plinno));
2543 if (doprompt)
2544 setprompt(2);
2545 else
2546 setprompt(0);
2547 } else {
2548 pungetc();
2549 /* Allow the backslash to be pushed back. */
2550 pushstring("\\", 1, NULL);
2551 return (pgetc());
2552 }
2553 }
2554 return (c);
2555 }
2556
2557 /*
2558 * called by editline -- any expansions to the prompt
2559 * should be added here.
2560 */
2561 const char *
2562 getprompt(void *unused)
2563 {
2564 char *p;
2565 const char *cp;
2566 int wp;
2567
2568 if (!doprompt)
2569 return "";
2570
2571 VTRACE(DBG_PARSE|DBG_EXPAND, ("getprompt %d\n", whichprompt));
2572
2573 switch (wp = whichprompt) {
2574 case 0:
2575 return "";
2576 case 1:
2577 p = ps1val();
2578 break;
2579 case 2:
2580 p = ps2val();
2581 break;
2582 default:
2583 return "<internal prompt error>";
2584 }
2585 if (p == NULL)
2586 return "";
2587
2588 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt <<%s>>\n", p));
2589
2590 cp = expandstr(p, plinno);
2591 whichprompt = wp; /* history depends on it not changing */
2592
2593 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt -> <<%s>>\n", cp));
2594
2595 return cp;
2596 }
2597
2598 /*
2599 * Expand a string ... used for expanding prompts (PS1...)
2600 *
2601 * Never return NULL, always some string (return input string if invalid)
2602 *
2603 * The internal routine does the work, leaving the result on the
2604 * stack (or in a static string, or even the input string) and
2605 * handles parser recursion, and cleanup after an error while parsing.
2606 *
2607 * The visible interface copies the result off the stack (if it is there),
2608 * and handles stack management, leaving the stack in the exact same
2609 * state it was when expandstr() was called (so it can be used part way
2610 * through building a stack data structure - as in when PS2 is being
2611 * expanded half way through reading a "command line")
2612 *
2613 * on error, expandonstack() cleans up the parser state, but then
2614 * simply jumps out through expandstr() withut doing any stack cleanup,
2615 * which is OK, as the error handler must deal with that anyway.
2616 *
2617 * The split into two funcs is to avoid problems with setjmp/longjmp
2618 * and local variables which could otherwise be optimised into bizarre
2619 * behaviour.
2620 */
2621 static const char *
2622 expandonstack(char *ps, int cmdsub, int lineno)
2623 {
2624 union node n;
2625 struct jmploc jmploc;
2626 struct jmploc *const savehandler = handler;
2627 struct parsefile *const savetopfile = getcurrentfile();
2628 const int save_x = xflag;
2629 const int save_e_s = errors_suppressed;
2630 struct parse_state new_state = init_parse_state;
2631 struct parse_state *const saveparser = psp.v_current_parser;
2632 const char *result = NULL;
2633
2634 if (!setjmp(jmploc.loc)) {
2635 handler = &jmploc;
2636 errors_suppressed = 1;
2637
2638 psp.v_current_parser = &new_state;
2639 setinputstring(ps, 1, lineno);
2640
2641 readtoken1(pgetc(), DQSYNTAX, 1);
2642 if (backquotelist != NULL) {
2643 if (!cmdsub)
2644 result = ps;
2645 else if (!promptcmds)
2646 result = "-o promptcmds not set: ";
2647 }
2648 if (result == NULL) {
2649 n.narg.type = NARG;
2650 n.narg.next = NULL;
2651 n.narg.text = wordtext;
2652 n.narg.lineno = lineno;
2653 n.narg.backquote = backquotelist;
2654
2655 xflag = 0; /* we might be expanding PS4 ... */
2656 expandarg(&n, NULL, 0);
2657 result = stackblock();
2658 }
2659 } else {
2660 psp.v_current_parser = saveparser;
2661 xflag = save_x;
2662 popfilesupto(savetopfile);
2663 handler = savehandler;
2664 errors_suppressed = save_e_s;
2665
2666 if (exception == EXEXIT)
2667 longjmp(handler->loc, 1);
2668 if (exception == EXINT)
2669 exraise(SIGINT);
2670 return "";
2671 }
2672 psp.v_current_parser = saveparser;
2673 xflag = save_x;
2674 popfilesupto(savetopfile);
2675 handler = savehandler;
2676 errors_suppressed = save_e_s;
2677
2678 if (result == NULL)
2679 result = ps;
2680
2681 return result;
2682 }
2683
2684 const char *
2685 expandstr(char *ps, int lineno)
2686 {
2687 const char *result = NULL;
2688 struct stackmark smark;
2689 static char *buffer = NULL; /* storage for prompt, never freed */
2690 static size_t bufferlen = 0;
2691
2692 setstackmark(&smark);
2693 /*
2694 * At this point we anticipate that there may be a string
2695 * growing on the stack, but we have no idea how big it is.
2696 * However we know that it cannot be bigger than the current
2697 * allocated stack block, so simply reserve the whole thing,
2698 * then we can use the stack without barfing all over what
2699 * is there already... (the stack mark undoes this later.)
2700 */
2701 (void) stalloc(stackblocksize());
2702
2703 result = expandonstack(ps, 1, lineno);
2704
2705 if (__predict_true(result == stackblock())) {
2706 size_t len = strlen(result) + 1;
2707
2708 /*
2709 * the result (usual case) is on the stack, which we
2710 * are just about to discard (popstackmark()) so we
2711 * need to move it somewhere safe first.
2712 */
2713
2714 if (__predict_false(len > bufferlen)) {
2715 char *new;
2716 size_t newlen = bufferlen;
2717
2718 if (__predict_false(len > (SIZE_MAX >> 4))) {
2719 result = "huge prompt: ";
2720 goto getout;
2721 }
2722
2723 if (newlen == 0)
2724 newlen = 32;
2725 while (newlen <= len)
2726 newlen <<= 1;
2727
2728 new = (char *)realloc(buffer, newlen);
2729
2730 if (__predict_false(new == NULL)) {
2731 /*
2732 * this should rarely (if ever) happen
2733 * but we must do something when it does...
2734 */
2735 result = "No mem for prompt: ";
2736 goto getout;
2737 } else {
2738 buffer = new;
2739 bufferlen = newlen;
2740 }
2741 }
2742 (void)memcpy(buffer, result, len);
2743 result = buffer;
2744 }
2745
2746 getout:;
2747 popstackmark(&smark);
2748
2749 return result;
2750 }
2751
2752 /*
2753 * and a simpler version, which does no $( ) expansions, for
2754 * use during shell startup when we know we are not parsing,
2755 * and so the stack is not in use - we can do what we like,
2756 * and do not need to clean up (that's handled externally).
2757 *
2758 * Simply return the result, even if it is on the stack
2759 */
2760 const char *
2761 expandenv(char *arg)
2762 {
2763 return expandonstack(arg, 0, 0);
2764 }
2765