parser.c revision 1.175 1 /* $NetBSD: parser.c,v 1.175 2021/11/16 11:25:44 kre Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.175 2021/11/16 11:25:44 kre Exp $");
41 #endif
42 #endif /* not lint */
43
44 #include <limits.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48
49 #include "shell.h"
50 #include "parser.h"
51 #include "nodes.h"
52 #include "expand.h" /* defines rmescapes() */
53 #include "eval.h" /* defines commandname */
54 #include "syntax.h"
55 #include "options.h"
56 #include "input.h"
57 #include "output.h"
58 #include "redir.h" /* defines max_user_fd */
59 #include "var.h"
60 #include "error.h"
61 #include "memalloc.h"
62 #include "mystring.h"
63 #include "alias.h"
64 #include "show.h"
65 #ifndef SMALL
66 #include "myhistedit.h"
67 #endif
68 #ifdef DEBUG
69 #include "nodenames.h"
70 #endif
71
72 /*
73 * Shell command parser.
74 */
75
76 /* values returned by readtoken */
77 #include "token.h"
78
79 #define OPENBRACE '{'
80 #define CLOSEBRACE '}'
81
82 struct HereDoc {
83 struct HereDoc *next; /* next here document in list */
84 union node *here; /* redirection node */
85 char *eofmark; /* string indicating end of input */
86 int striptabs; /* if set, strip leading tabs */
87 int startline; /* line number where << seen */
88 };
89
90 MKINIT struct parse_state parse_state;
91 union parse_state_p psp = { .c_current_parser = &parse_state };
92
93 static const struct parse_state init_parse_state = { /* all 0's ... */
94 .ps_heredoclist = NULL,
95 .ps_parsebackquote = 0,
96 .ps_doprompt = 0,
97 .ps_needprompt = 0,
98 .ps_lasttoken = 0,
99 .ps_tokpushback = 0,
100 .ps_wordtext = NULL,
101 .ps_checkkwd = 0,
102 .ps_redirnode = NULL,
103 .ps_heredoc = NULL,
104 .ps_quoteflag = 0,
105 .ps_startlinno = 0,
106 .ps_funclinno = 0,
107 .ps_elided_nl = 0,
108 };
109
110 STATIC union node *list(int);
111 STATIC union node *andor(void);
112 STATIC union node *pipeline(void);
113 STATIC union node *command(void);
114 STATIC union node *simplecmd(union node **, union node *);
115 STATIC union node *makeword(int);
116 STATIC void parsefname(void);
117 STATIC int slurp_heredoc(char *const, const int, const int);
118 STATIC void readheredocs(void);
119 STATIC int peektoken(void);
120 STATIC int readtoken(void);
121 STATIC int xxreadtoken(void);
122 STATIC int readtoken1(int, char const *, int);
123 STATIC int noexpand(char *);
124 STATIC void linebreak(void);
125 STATIC void consumetoken(int);
126 STATIC void synexpect(int, const char *) __dead;
127 STATIC void synerror(const char *) __dead;
128 STATIC void setprompt(int);
129 STATIC int pgetc_linecont(void);
130
131 static const char EOFhere[] = "EOF reading here (<<) document";
132
133 #ifdef DEBUG
134 int parsing = 0;
135 #endif
136
137 /*
138 * Read and parse a command. Returns NEOF on end of file. (NULL is a
139 * valid parse tree indicating a blank line.)
140 */
141
142 union node *
143 parsecmd(int interact)
144 {
145 int t;
146 union node *n;
147
148 #ifdef DEBUG
149 parsing++;
150 #endif
151 tokpushback = 0;
152 checkkwd = 0;
153 doprompt = interact;
154 if (doprompt)
155 setprompt(1);
156 else
157 setprompt(0);
158 needprompt = 0;
159 t = readtoken();
160 #ifdef DEBUG
161 parsing--;
162 #endif
163 if (t == TEOF)
164 return NEOF;
165 if (t == TNL)
166 return NULL;
167
168 #ifdef DEBUG
169 parsing++;
170 #endif
171 tokpushback++;
172 n = list(1);
173 #ifdef DEBUG
174 parsing--;
175 #endif
176 if (heredoclist)
177 error("%d: Here document (<<%s) expected but not present",
178 heredoclist->startline, heredoclist->eofmark);
179 return n;
180 }
181
182
183 STATIC union node *
184 list(int nlflag)
185 {
186 union node *ntop, *n1, *n2, *n3;
187 int tok;
188
189 CTRACE(DBG_PARSE, ("list(%d): entered @%d\n",nlflag,plinno));
190
191 checkkwd = CHKNL | CHKKWD | CHKALIAS;
192 if (nlflag == 0 && tokendlist[peektoken()])
193 return NULL;
194 ntop = n1 = NULL;
195 for (;;) {
196 n2 = andor();
197 tok = readtoken();
198 if (tok == TBACKGND) {
199 if (n2->type == NCMD || n2->type == NPIPE)
200 n2->ncmd.backgnd = 1;
201 else if (n2->type == NREDIR)
202 n2->type = NBACKGND;
203 else {
204 n3 = stalloc(sizeof(struct nredir));
205 n3->type = NBACKGND;
206 n3->nredir.n = n2;
207 n3->nredir.redirect = NULL;
208 n2 = n3;
209 }
210 }
211
212 if (ntop == NULL)
213 ntop = n2;
214 else if (n1 == NULL) {
215 n1 = stalloc(sizeof(struct nbinary));
216 n1->type = NSEMI;
217 n1->nbinary.ch1 = ntop;
218 n1->nbinary.ch2 = n2;
219 ntop = n1;
220 } else {
221 n3 = stalloc(sizeof(struct nbinary));
222 n3->type = NSEMI;
223 n3->nbinary.ch1 = n1->nbinary.ch2;
224 n3->nbinary.ch2 = n2;
225 n1->nbinary.ch2 = n3;
226 n1 = n3;
227 }
228
229 switch (tok) {
230 case TBACKGND:
231 case TSEMI:
232 tok = readtoken();
233 /* FALLTHROUGH */
234 case TNL:
235 if (tok == TNL) {
236 readheredocs();
237 if (nlflag)
238 return ntop;
239 } else if (tok == TEOF && nlflag)
240 return ntop;
241 else
242 tokpushback++;
243
244 checkkwd = CHKNL | CHKKWD | CHKALIAS;
245 if (!nlflag && tokendlist[peektoken()])
246 return ntop;
247 break;
248 case TEOF:
249 pungetc(); /* push back EOF on input */
250 return ntop;
251 default:
252 if (nlflag)
253 synexpect(-1, 0);
254 tokpushback++;
255 return ntop;
256 }
257 }
258 }
259
260 STATIC union node *
261 andor(void)
262 {
263 union node *n1, *n2, *n3;
264 int t;
265
266 CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno));
267
268 n1 = pipeline();
269 for (;;) {
270 if ((t = readtoken()) == TAND) {
271 t = NAND;
272 } else if (t == TOR) {
273 t = NOR;
274 } else {
275 tokpushback++;
276 return n1;
277 }
278 n2 = pipeline();
279 n3 = stalloc(sizeof(struct nbinary));
280 n3->type = t;
281 n3->nbinary.ch1 = n1;
282 n3->nbinary.ch2 = n2;
283 n1 = n3;
284 }
285 }
286
287 STATIC union node *
288 pipeline(void)
289 {
290 union node *n1, *n2, *pipenode;
291 struct nodelist *lp, *prev;
292 int negate;
293
294 CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno));
295
296 negate = 0;
297 checkkwd = CHKNL | CHKKWD | CHKALIAS;
298 while (readtoken() == TNOT) {
299 CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n"));
300 #ifndef BOGUS_NOT_COMMAND
301 if (posix && negate)
302 synerror("2nd \"!\" unexpected");
303 #endif
304 negate++;
305 }
306 tokpushback++;
307 n1 = command();
308 if (readtoken() == TPIPE) {
309 pipenode = stalloc(sizeof(struct npipe));
310 pipenode->type = NPIPE;
311 pipenode->npipe.backgnd = 0;
312 lp = stalloc(sizeof(struct nodelist));
313 pipenode->npipe.cmdlist = lp;
314 lp->n = n1;
315 do {
316 prev = lp;
317 lp = stalloc(sizeof(struct nodelist));
318 lp->n = command();
319 prev->next = lp;
320 } while (readtoken() == TPIPE);
321 lp->next = NULL;
322 n1 = pipenode;
323 }
324 tokpushback++;
325 if (negate) {
326 CTRACE(DBG_PARSE, ("%snegate pipeline\n",
327 (negate&1) ? "" : "double "));
328 n2 = stalloc(sizeof(struct nnot));
329 n2->type = (negate & 1) ? NNOT : NDNOT;
330 n2->nnot.com = n1;
331 return n2;
332 } else
333 return n1;
334 }
335
336
337
338 STATIC union node *
339 command(void)
340 {
341 union node *n1, *n2;
342 union node *ap, **app;
343 union node *cp, **cpp;
344 union node *redir, **rpp;
345 int t;
346 #ifdef BOGUS_NOT_COMMAND
347 int negate = 0;
348 #endif
349
350 CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno));
351
352 checkkwd = CHKNL | CHKKWD | CHKALIAS;
353 redir = NULL;
354 n1 = NULL;
355 rpp = &redir;
356
357 /* Check for redirection which may precede command */
358 while (readtoken() == TREDIR) {
359 *rpp = n2 = redirnode;
360 rpp = &n2->nfile.next;
361 parsefname();
362 }
363 tokpushback++;
364
365 #ifdef BOGUS_NOT_COMMAND /* only in pileline() */
366 while (readtoken() == TNOT) {
367 CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n"));
368 negate++;
369 }
370 tokpushback++;
371 #endif
372
373 switch (readtoken()) {
374 case TIF:
375 n1 = stalloc(sizeof(struct nif));
376 n1->type = NIF;
377 n1->nif.test = list(0);
378 consumetoken(TTHEN);
379 n1->nif.ifpart = list(0);
380 n2 = n1;
381 while (readtoken() == TELIF) {
382 n2->nif.elsepart = stalloc(sizeof(struct nif));
383 n2 = n2->nif.elsepart;
384 n2->type = NIF;
385 n2->nif.test = list(0);
386 consumetoken(TTHEN);
387 n2->nif.ifpart = list(0);
388 }
389 if (lasttoken == TELSE)
390 n2->nif.elsepart = list(0);
391 else {
392 n2->nif.elsepart = NULL;
393 tokpushback++;
394 }
395 consumetoken(TFI);
396 checkkwd = CHKKWD | CHKALIAS;
397 break;
398 case TWHILE:
399 case TUNTIL:
400 n1 = stalloc(sizeof(struct nbinary));
401 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
402 n1->nbinary.ch1 = list(0);
403 consumetoken(TDO);
404 n1->nbinary.ch2 = list(0);
405 consumetoken(TDONE);
406 checkkwd = CHKKWD | CHKALIAS;
407 break;
408 case TFOR:
409 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
410 synerror("Bad for loop variable");
411 n1 = stalloc(sizeof(struct nfor));
412 n1->type = NFOR;
413 n1->nfor.var = wordtext;
414 n1->nfor.lineno = startlinno;
415 linebreak();
416 if (lasttoken==TWORD && !quoteflag && equal(wordtext,"in")) {
417 app = ≈
418 while (readtoken() == TWORD) {
419 n2 = makeword(startlinno);
420 *app = n2;
421 app = &n2->narg.next;
422 }
423 *app = NULL;
424 n1->nfor.args = ap;
425 if (lasttoken != TNL && lasttoken != TSEMI)
426 synexpect(TSEMI, 0);
427 if (lasttoken == TNL)
428 readheredocs();
429 } else {
430 static char argvars[5] = {
431 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
432 };
433
434 n2 = stalloc(sizeof(struct narg));
435 n2->type = NARG;
436 n2->narg.text = argvars;
437 n2->narg.backquote = NULL;
438 n2->narg.next = NULL;
439 n2->narg.lineno = startlinno;
440 n1->nfor.args = n2;
441 /*
442 * Newline or semicolon here is optional (but note
443 * that the original Bourne shell only allowed NL).
444 */
445 if (lasttoken != TNL && lasttoken != TSEMI)
446 tokpushback++;
447 }
448 checkkwd = CHKNL | CHKKWD | CHKALIAS;
449 if ((t = readtoken()) == TDO)
450 t = TDONE;
451 else if (t == TBEGIN)
452 t = TEND;
453 else
454 synexpect(TDO, 0);
455 n1->nfor.body = list(0);
456 consumetoken(t);
457 checkkwd = CHKKWD | CHKALIAS;
458 break;
459 case TCASE:
460 n1 = stalloc(sizeof(struct ncase));
461 n1->type = NCASE;
462 n1->ncase.lineno = startlinno - elided_nl;
463 consumetoken(TWORD);
464 n1->ncase.expr = makeword(startlinno);
465 linebreak();
466 if (lasttoken != TWORD || !equal(wordtext, "in"))
467 synexpect(-1, "in");
468 cpp = &n1->ncase.cases;
469 checkkwd = CHKNL | CHKKWD;
470 readtoken();
471 /*
472 * Both ksh and bash accept 'case x in esac'
473 * so configure scripts started taking advantage of this.
474 * The page: http://pubs.opengroup.org/onlinepubs/\
475 * 009695399/utilities/xcu_chap02.html contradicts itself,
476 * as to if this is legal; the "Case Conditional Format"
477 * paragraph shows one case is required, but the "Grammar"
478 * section shows a grammar that explicitly allows the no
479 * case option.
480 *
481 * The standard also says (section 2.10):
482 * This formal syntax shall take precedence over the
483 * preceding text syntax description.
484 * ie: the "Grammar" section wins. The text is just
485 * a rough guide (introduction to the common case.)
486 */
487 while (lasttoken != TESAC) {
488 *cpp = cp = stalloc(sizeof(struct nclist));
489 cp->type = NCLIST;
490 app = &cp->nclist.pattern;
491 if (lasttoken == TLP)
492 readtoken();
493 for (;;) {
494 if (lasttoken < TWORD)
495 synexpect(TWORD, 0);
496 *app = ap = makeword(startlinno);
497 checkkwd = CHKNL | CHKKWD;
498 if (readtoken() != TPIPE)
499 break;
500 app = &ap->narg.next;
501 readtoken();
502 }
503 if (lasttoken != TRP)
504 synexpect(TRP, 0);
505 cp->nclist.lineno = startlinno;
506 cp->nclist.body = list(0);
507
508 checkkwd = CHKNL | CHKKWD | CHKALIAS;
509 if ((t = readtoken()) != TESAC) {
510 if (t != TENDCASE && t != TCASEFALL) {
511 synexpect(TENDCASE, 0);
512 } else {
513 if (t == TCASEFALL)
514 cp->type = NCLISTCONT;
515 checkkwd = CHKNL | CHKKWD;
516 readtoken();
517 }
518 }
519 cpp = &cp->nclist.next;
520 }
521 *cpp = NULL;
522 checkkwd = CHKKWD | CHKALIAS;
523 break;
524 case TLP:
525 n1 = stalloc(sizeof(struct nredir));
526 n1->type = NSUBSHELL;
527 n1->nredir.n = list(0);
528 n1->nredir.redirect = NULL;
529 if (n1->nredir.n == NULL)
530 synexpect(-1, 0);
531 consumetoken(TRP);
532 checkkwd = CHKKWD | CHKALIAS;
533 break;
534 case TBEGIN:
535 n1 = list(0);
536 if (posix && n1 == NULL)
537 synexpect(-1, 0);
538 consumetoken(TEND);
539 checkkwd = CHKKWD | CHKALIAS;
540 break;
541
542 case TBACKGND:
543 case TSEMI:
544 case TAND:
545 case TOR:
546 case TPIPE:
547 case TNL:
548 case TEOF:
549 case TRP:
550 case TENDCASE:
551 case TCASEFALL:
552 /*
553 * simple commands must have something in them,
554 * either a word (which at this point includes a=b)
555 * or a redirection. If we reached the end of the
556 * command (which one of these tokens indicates)
557 * when we are just starting, and have not had a
558 * redirect, then ...
559 *
560 * nb: it is still possible to end up with empty
561 * simple commands, if the "command" is a var
562 * expansion that produces nothing:
563 * X= ; $X && $X
564 * --> &&
565 * That is OK and is handled after word expansions.
566 */
567 if (!redir)
568 synexpect(-1, 0);
569 /*
570 * continue to build a node containing the redirect.
571 * the tokpushback means that our ending token will be
572 * read again in simplecmd, causing it to terminate,
573 * so only the redirect(s) will be contained in the
574 * returned n1
575 */
576 /* FALLTHROUGH */
577 case TWORD:
578 tokpushback++;
579 n1 = simplecmd(rpp, redir);
580 goto checkneg;
581 default:
582 synexpect(-1, 0);
583 /* NOTREACHED */
584 }
585
586 /* Now check for redirection which may follow command */
587 while (readtoken() == TREDIR) {
588 *rpp = n2 = redirnode;
589 rpp = &n2->nfile.next;
590 parsefname();
591 }
592 tokpushback++;
593 *rpp = NULL;
594 if (redir) {
595 if (n1 == NULL || n1->type != NSUBSHELL) {
596 n2 = stalloc(sizeof(struct nredir));
597 n2->type = NREDIR;
598 n2->nredir.n = n1;
599 n1 = n2;
600 }
601 n1->nredir.redirect = redir;
602 }
603
604 checkneg:
605 #ifdef BOGUS_NOT_COMMAND
606 if (negate) {
607 VTRACE(DBG_PARSE, ("bogus %snegate command\n",
608 (negate&1) ? "" : "double "));
609 n2 = stalloc(sizeof(struct nnot));
610 n2->type = (negate & 1) ? NNOT : NDNOT;
611 n2->nnot.com = n1;
612 return n2;
613 }
614 else
615 #endif
616 return n1;
617 }
618
619
620 STATIC union node *
621 simplecmd(union node **rpp, union node *redir)
622 {
623 union node *args, **app;
624 union node *n = NULL;
625 int line = 0;
626 int savecheckkwd;
627 #ifdef BOGUS_NOT_COMMAND
628 union node *n2;
629 int negate = 0;
630 #endif
631
632 CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n",
633 redir ? "" : "out", plinno));
634
635 /* If we don't have any redirections already, then we must reset */
636 /* rpp to be the address of the local redir variable. */
637 if (redir == 0)
638 rpp = &redir;
639
640 args = NULL;
641 app = &args;
642
643 #ifdef BOGUS_NOT_COMMAND /* pipelines get negated, commands do not */
644 while (readtoken() == TNOT) {
645 VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n"));
646 negate++;
647 }
648 tokpushback++;
649 #endif
650
651 savecheckkwd = CHKALIAS;
652 for (;;) {
653 checkkwd = savecheckkwd;
654 if (readtoken() == TWORD) {
655 if (line == 0)
656 line = startlinno;
657 n = makeword(startlinno);
658 *app = n;
659 app = &n->narg.next;
660 if (savecheckkwd != 0 && !isassignment(wordtext))
661 savecheckkwd = 0;
662 } else if (lasttoken == TREDIR) {
663 if (line == 0)
664 line = startlinno;
665 *rpp = n = redirnode;
666 rpp = &n->nfile.next;
667 parsefname(); /* read name of redirection file */
668 } else if (lasttoken == TLP && app == &args->narg.next
669 && redir == 0) {
670 /* We have a function */
671 consumetoken(TRP);
672 funclinno = plinno;
673 /*
674 * Make sure there are no unquoted $'s in the
675 * name (allowing those, not expanding them,
676 * simply treating '$' as a character, is desireable
677 * but the parser has converted them to CTLxxx
678 * chars, and that's not what we want
679 *
680 * Fortunately here the user can simply quote
681 * the name to avoid this restriction.
682 */
683 if (!noexpand(n->narg.text))
684 synerror("Bad function name (use quotes)");
685 rmescapes(n->narg.text);
686 if (strchr(n->narg.text, '/'))
687 synerror("Bad function name");
688 VTRACE(DBG_PARSE, ("Function '%s' seen @%d\n",
689 n->narg.text, plinno));
690 n->type = NDEFUN;
691 n->narg.lineno = plinno - elided_nl;
692 n->narg.next = command();
693 funclinno = 0;
694 goto checkneg;
695 } else {
696 tokpushback++;
697 break;
698 }
699 }
700
701 if (args == NULL && redir == NULL)
702 synexpect(-1, 0);
703 *app = NULL;
704 *rpp = NULL;
705 n = stalloc(sizeof(struct ncmd));
706 n->type = NCMD;
707 n->ncmd.lineno = line - elided_nl;
708 n->ncmd.backgnd = 0;
709 n->ncmd.args = args;
710 n->ncmd.redirect = redir;
711 n->ncmd.lineno = startlinno;
712
713 checkneg:
714 #ifdef BOGUS_NOT_COMMAND
715 if (negate) {
716 VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n",
717 (negate&1) ? "" : "double "));
718 n2 = stalloc(sizeof(struct nnot));
719 n2->type = (negate & 1) ? NNOT : NDNOT;
720 n2->nnot.com = n;
721 return n2;
722 }
723 else
724 #endif
725 return n;
726 }
727
728 STATIC union node *
729 makeword(int lno)
730 {
731 union node *n;
732
733 n = stalloc(sizeof(struct narg));
734 n->type = NARG;
735 n->narg.next = NULL;
736 n->narg.text = wordtext;
737 n->narg.backquote = backquotelist;
738 n->narg.lineno = lno;
739 return n;
740 }
741
742 void
743 fixredir(union node *n, const char *text, int err)
744 {
745
746 VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err));
747 if (!err)
748 n->ndup.vname = NULL;
749
750 if (is_number(text)) {
751 n->ndup.dupfd = number(text);
752 if (n->ndup.dupfd < user_fd_limit &&
753 n->ndup.dupfd > max_user_fd)
754 max_user_fd = n->ndup.dupfd;
755 } else if (text[0] == '-' && text[1] == '\0')
756 n->ndup.dupfd = -1;
757 else {
758
759 if (err)
760 synerror("Bad fd number");
761 else
762 n->ndup.vname = makeword(startlinno - elided_nl);
763 }
764 }
765
766
767 STATIC void
768 parsefname(void)
769 {
770 union node *n = redirnode;
771
772 if (readtoken() != TWORD)
773 synexpect(-1, 0);
774 if (n->type == NHERE) {
775 struct HereDoc *here = heredoc;
776 struct HereDoc *p;
777
778 if (quoteflag == 0)
779 n->type = NXHERE;
780 VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno));
781 if (here->striptabs) {
782 while (*wordtext == '\t')
783 wordtext++;
784 }
785
786 /*
787 * this test is not really necessary, we are not
788 * required to expand wordtext, but there's no reason
789 * it cannot be $$ or something like that - that would
790 * not mean the pid, but literally two '$' characters.
791 * There is no need for limits on what the word can be.
792 * However, it needs to stay literal as entered, not
793 * have $ converted to CTLVAR or something, which as
794 * the parser is, at the minute, is impossible to prevent.
795 * So, leave it like this until the rest of the parser is fixed.
796 */
797 if (!noexpand(wordtext))
798 synerror("Illegal eof marker for << redirection");
799
800 rmescapes(wordtext);
801 here->eofmark = wordtext;
802 here->next = NULL;
803 if (heredoclist == NULL)
804 heredoclist = here;
805 else {
806 for (p = heredoclist ; p->next ; p = p->next)
807 continue;
808 p->next = here;
809 }
810 } else if (n->type == NTOFD || n->type == NFROMFD) {
811 fixredir(n, wordtext, 0);
812 } else {
813 n->nfile.fname = makeword(startlinno - elided_nl);
814 }
815 }
816
817 /*
818 * Check to see whether we are at the end of the here document. When this
819 * is called, c is set to the first character of the next input line. If
820 * we are at the end of the here document, this routine sets the c to PEOF.
821 * The new value of c is returned.
822 */
823
824 static int
825 checkend(int c, char * const eofmark, const int striptabs)
826 {
827
828 if (striptabs) {
829 while (c == '\t')
830 c = pgetc();
831 }
832 if (c == PEOF) {
833 if (*eofmark == '\0')
834 return (c);
835 synerror(EOFhere);
836 }
837 if (c == *eofmark) {
838 int c2;
839 char *q;
840
841 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
842 if (c2 == '\n') {
843 plinno++;
844 needprompt = doprompt;
845 }
846 if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
847 c = PEOF;
848 if (c2 == '\n') {
849 plinno++;
850 needprompt = doprompt;
851 }
852 } else {
853 pungetc();
854 pushstring(eofmark + 1, q - (eofmark + 1), NULL);
855 }
856 } else if (c == '\n' && *eofmark == '\0') {
857 c = PEOF;
858 plinno++;
859 needprompt = doprompt;
860 }
861 return (c);
862 }
863
864
865 /*
866 * Input any here documents.
867 */
868
869 STATIC int
870 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
871 {
872 int c;
873 char *out;
874 int lines = plinno;
875
876 c = pgetc();
877
878 /*
879 * If we hit EOF on the input, and the eofmark is a null string ('')
880 * we consider this empty line to be the eofmark, and exit without err.
881 */
882 if (c == PEOF && *eofmark != '\0')
883 synerror(EOFhere);
884
885 STARTSTACKSTR(out);
886
887 while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
888 do {
889 if (sq) {
890 /*
891 * in single quoted mode (eofmark quoted)
892 * all we look for is \n so we can check
893 * for the epfmark - everything saved literally.
894 */
895 STPUTC(c, out);
896 if (c == '\n') {
897 plinno++;
898 break;
899 }
900 continue;
901 }
902 /*
903 * In double quoted (non-quoted eofmark)
904 * we must handle \ followed by \n here
905 * otherwise we can mismatch the end mark.
906 * All other uses of \ will be handled later
907 * when the here doc is expanded.
908 *
909 * This also makes sure \\ followed by \n does
910 * not suppress the newline (the \ quotes itself)
911 */
912 if (c == '\\') { /* A backslash */
913 STPUTC(c, out);
914 c = pgetc(); /* followed by */
915 if (c == '\n') { /* a newline? */
916 STPUTC(c, out);
917 plinno++;
918 continue; /* don't break */
919 }
920 }
921 STPUTC(c, out); /* keep the char */
922 if (c == '\n') { /* at end of line */
923 plinno++;
924 break; /* look for eofmark */
925 }
926 } while ((c = pgetc()) != PEOF);
927
928 /*
929 * If we have read a line, and reached EOF, without
930 * finding the eofmark, whether the EOF comes before
931 * or immediately after the \n, that is an error.
932 */
933 if (c == PEOF || (c = pgetc()) == PEOF)
934 synerror(EOFhere);
935 }
936 STPUTC('\0', out);
937
938 c = out - stackblock();
939 out = stackblock();
940 grabstackblock(c);
941 wordtext = out;
942
943 VTRACE(DBG_PARSE,
944 ("Slurped a %d line %sheredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n",
945 plinno - lines, sq ? "quoted " : "", eofmark,
946 striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c),
947 wordtext, (c > 16 ? "..." : ""), plinno));
948
949 return (plinno - lines);
950 }
951
952 static char *
953 insert_elided_nl(char *str)
954 {
955 while (elided_nl > 0) {
956 STPUTC(CTLNONL, str);
957 elided_nl--;
958 }
959 return str;
960 }
961
962 STATIC void
963 readheredocs(void)
964 {
965 struct HereDoc *here;
966 union node *n;
967 int line, l;
968
969 line = 0; /*XXX - gcc! obviously unneeded */
970 if (heredoclist)
971 line = heredoclist->startline + 1;
972 l = 0;
973 while (heredoclist) {
974 line += l;
975 here = heredoclist;
976 heredoclist = here->next;
977 if (needprompt) {
978 setprompt(2);
979 needprompt = 0;
980 }
981
982 l = slurp_heredoc(here->eofmark, here->striptabs,
983 here->here->nhere.type == NHERE);
984
985 here->here->nhere.doc = n = makeword(line);
986
987 if (here->here->nhere.type == NHERE)
988 continue;
989
990 /*
991 * Now "parse" here docs that have unquoted eofmarkers.
992 */
993 setinputstring(wordtext, 1, line);
994 VTRACE(DBG_PARSE, ("Reprocessing %d line here doc from %d\n",
995 l, line));
996 readtoken1(pgetc(), DQSYNTAX, 1);
997 n->narg.text = wordtext;
998 n->narg.backquote = backquotelist;
999 popfile();
1000 }
1001 }
1002
1003 STATIC int
1004 peektoken(void)
1005 {
1006 int t;
1007
1008 t = readtoken();
1009 tokpushback++;
1010 return (t);
1011 }
1012
1013 STATIC int
1014 readtoken(void)
1015 {
1016 int t;
1017 #ifdef DEBUG
1018 int alreadyseen = tokpushback;
1019 int savecheckkwd = checkkwd;
1020 #endif
1021 struct alias *ap;
1022
1023 top:
1024 t = xxreadtoken();
1025
1026 if (checkkwd & CHKNL) {
1027 while (t == TNL) {
1028 readheredocs();
1029 t = xxreadtoken();
1030 }
1031 }
1032
1033 /*
1034 * check for keywords and aliases
1035 */
1036 if (t == TWORD && !quoteflag) {
1037 const char *const *pp;
1038
1039 if (checkkwd & CHKKWD)
1040 for (pp = parsekwd; *pp; pp++) {
1041 if (**pp == *wordtext && equal(*pp, wordtext)) {
1042 lasttoken = t = pp -
1043 parsekwd + KWDOFFSET;
1044 VTRACE(DBG_PARSE,
1045 ("keyword %s recognized @%d\n",
1046 tokname[t], plinno));
1047 goto out;
1048 }
1049 }
1050
1051 if (checkkwd & CHKALIAS &&
1052 (ap = lookupalias(wordtext, 1)) != NULL) {
1053 VTRACE(DBG_PARSE,
1054 ("alias '%s' recognized -> <:%s:>\n",
1055 wordtext, ap->val));
1056 pushstring(ap->val, strlen(ap->val), ap);
1057 goto top;
1058 }
1059 }
1060 out:
1061 if (t != TNOT)
1062 checkkwd = 0;
1063
1064 VTRACE(DBG_PARSE, ("%stoken %s %s @%d (chkkwd %x->%x)\n",
1065 alreadyseen ? "reread " : "", tokname[t],
1066 t == TWORD ? wordtext : "", plinno, savecheckkwd, checkkwd));
1067 return (t);
1068 }
1069
1070
1071 /*
1072 * Read the next input token.
1073 * If the token is a word, we set backquotelist to the list of cmds in
1074 * backquotes. We set quoteflag to true if any part of the word was
1075 * quoted.
1076 * If the token is TREDIR, then we set redirnode to a structure containing
1077 * the redirection.
1078 * In all cases, the variable startlinno is set to the number of the line
1079 * on which the token starts.
1080 *
1081 * [Change comment: here documents and internal procedures]
1082 * [Readtoken shouldn't have any arguments. Perhaps we should make the
1083 * word parsing code into a separate routine. In this case, readtoken
1084 * doesn't need to have any internal procedures, but parseword does.
1085 * We could also make parseoperator in essence the main routine, and
1086 * have parseword (readtoken1?) handle both words and redirection.]
1087 */
1088
1089 #define RETURN(token) return lasttoken = (token)
1090
1091 STATIC int
1092 xxreadtoken(void)
1093 {
1094 int c;
1095
1096 if (tokpushback) {
1097 tokpushback = 0;
1098 CTRACE(DBG_LEXER,
1099 ("xxreadtoken() returns %s (%d) again\n",
1100 tokname[lasttoken], lasttoken));
1101 return lasttoken;
1102 }
1103 if (needprompt) {
1104 setprompt(2);
1105 needprompt = 0;
1106 }
1107 elided_nl = 0;
1108 startlinno = plinno;
1109 for (;;) { /* until token or start of word found */
1110 c = pgetc_macro();
1111 CTRACE(DBG_LEXER, ("xxreadtoken() sees '%c' (%#.2x) ",
1112 c&0xFF, c&0x1FF));
1113 switch (c) {
1114 case ' ': case '\t': case PFAKE:
1115 CTRACE(DBG_LEXER, (" ignored\n"));
1116 continue;
1117 case '#':
1118 while ((c = pgetc()) != '\n' && c != PEOF)
1119 continue;
1120 CTRACE(DBG_LEXER,
1121 ("skipped comment to (not incl) \\n\n"));
1122 pungetc();
1123 continue;
1124
1125 case '\n':
1126 plinno++;
1127 CTRACE(DBG_LEXER, ("newline now @%d\n", plinno));
1128 needprompt = doprompt;
1129 RETURN(TNL);
1130 case PEOF:
1131 CTRACE(DBG_LEXER, ("EOF -> TEOF (return)\n"));
1132 RETURN(TEOF);
1133
1134 case '&':
1135 if (pgetc_linecont() == '&') {
1136 CTRACE(DBG_LEXER,
1137 ("and another -> TAND (return)\n"));
1138 RETURN(TAND);
1139 }
1140 pungetc();
1141 CTRACE(DBG_LEXER, (" -> TBACKGND (return)\n"));
1142 RETURN(TBACKGND);
1143 case '|':
1144 if (pgetc_linecont() == '|') {
1145 CTRACE(DBG_LEXER,
1146 ("and another -> TOR (return)\n"));
1147 RETURN(TOR);
1148 }
1149 pungetc();
1150 CTRACE(DBG_LEXER, (" -> TPIPE (return)\n"));
1151 RETURN(TPIPE);
1152 case ';':
1153 switch (pgetc_linecont()) {
1154 case ';':
1155 CTRACE(DBG_LEXER,
1156 ("and another -> TENDCASE (return)\n"));
1157 RETURN(TENDCASE);
1158 case '&':
1159 CTRACE(DBG_LEXER,
1160 ("and '&' -> TCASEFALL (return)\n"));
1161 RETURN(TCASEFALL);
1162 default:
1163 pungetc();
1164 CTRACE(DBG_LEXER, (" -> TSEMI (return)\n"));
1165 RETURN(TSEMI);
1166 }
1167 case '(':
1168 CTRACE(DBG_LEXER, (" -> TLP (return)\n"));
1169 RETURN(TLP);
1170 case ')':
1171 CTRACE(DBG_LEXER, (" -> TRP (return)\n"));
1172 RETURN(TRP);
1173
1174 case '\\':
1175 switch (pgetc()) {
1176 case '\n':
1177 startlinno = ++plinno;
1178 CTRACE(DBG_LEXER, ("\\\n ignored, now @%d\n",
1179 plinno));
1180 if (doprompt)
1181 setprompt(2);
1182 else
1183 setprompt(0);
1184 continue;
1185 case PEOF:
1186 CTRACE(DBG_LEXER,
1187 ("then EOF -> TEOF (return) '\\' dropped\n"));
1188 RETURN(TEOF);
1189 default:
1190 CTRACE(DBG_LEXER, ("not \\\n or EOF: "));
1191 pungetc();
1192 break;
1193 }
1194 /* FALLTHROUGH */
1195 default:
1196 CTRACE(DBG_LEXER, ("getting a word\n"));
1197 return readtoken1(c, BASESYNTAX, 0);
1198 }
1199 }
1200 #undef RETURN
1201 }
1202
1203
1204
1205 /*
1206 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
1207 * is not NULL, read a here document. In the latter case, eofmark is the
1208 * word which marks the end of the document and striptabs is true if
1209 * leading tabs should be stripped from the document. The argument firstc
1210 * is the first character of the input token or document.
1211 *
1212 * Because C does not have internal subroutines, I have simulated them
1213 * using goto's to implement the subroutine linkage. The following macros
1214 * will run code that appears at the end of readtoken1.
1215 */
1216
1217 /*
1218 * We used to remember only the current syntax, variable nesting level,
1219 * double quote state for each var nesting level, and arith nesting
1220 * level (unrelated to var nesting) and one prev syntax when in arith
1221 * syntax. This worked for simple cases, but can't handle arith inside
1222 * var expansion inside arith inside var with some quoted and some not.
1223 *
1224 * Inspired by FreeBSD's implementation (though it was the obvious way)
1225 * though implemented differently, we now have a stack that keeps track
1226 * of what we are doing now, and what we were doing previously.
1227 * Every time something changes, which will eventually end and should
1228 * revert to the previous state, we push this stack, and then pop it
1229 * again later (that is every ${} with an operator (to parse the word
1230 * or pattern that follows) ${x} and $x are too simple to need it)
1231 * $(( )) $( ) and "...". Always. Really, always!
1232 *
1233 * The stack is implemented as one static (on the C stack) base block
1234 * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1235 * enough for the vast majority of cases. For torture tests, we
1236 * malloc more blocks as needed. All accesses through the inline
1237 * functions below.
1238 */
1239
1240 /*
1241 * varnest & arinest will typically be 0 or 1
1242 * (varnest can increment in usages like ${x=${y}} but probably
1243 * does not really need to)
1244 * parenlevel allows balancing parens inside a $(( )), it is reset
1245 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1246 * quoted is special - we need to know 2 things ... are we inside "..."
1247 * (even if inherited from some previous nesting level) and was there
1248 * an opening '"' at this level (so the next will be closing).
1249 * "..." can span nesting levels, but cannot be opened in one and
1250 * closed in a different one.
1251 * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1252 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1253 * is really so special that this setting is not very important)
1254 * and 0x10 that indicates that an opening quote has been seen.
1255 * The bottom 4 bits are inherited, the 0x10 bit is not.
1256 */
1257 struct tokenstate {
1258 const char *ts_syntax;
1259 unsigned short ts_parenlevel; /* counters */
1260 unsigned short ts_varnest; /* 64000 levels should be enough! */
1261 unsigned short ts_arinest;
1262 unsigned short ts_quoted; /* 1 -> single, 2 -> double */
1263 unsigned short ts_magicq; /* heredoc or word expand */
1264 };
1265
1266 #define NQ 0x00 /* Unquoted */
1267 #define SQ 0x01 /* Single Quotes */
1268 #define DQ 0x02 /* Double Quotes (or equivalent) */
1269 #define CQ 0x03 /* C style Single Quotes */
1270 #define QF 0x0F /* Mask to extract previous values */
1271 #define QS 0x10 /* Quoting started at this level in stack */
1272
1273 #define LEVELS_PER_BLOCK 8
1274 #define VSS struct statestack
1275
1276 struct statestack {
1277 VSS *prev; /* previous block in list */
1278 int cur; /* which of our tokenstates is current */
1279 struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1280 };
1281
1282 static inline struct tokenstate *
1283 currentstate(VSS *stack)
1284 {
1285 return &stack->tokenstate[stack->cur];
1286 }
1287
1288 #ifdef notdef
1289 static inline struct tokenstate *
1290 prevstate(VSS *stack)
1291 {
1292 if (stack->cur != 0)
1293 return &stack->tokenstate[stack->cur - 1];
1294 if (stack->prev == NULL) /* cannot drop below base */
1295 return &stack->tokenstate[0];
1296 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1297 }
1298 #endif
1299
1300 static inline VSS *
1301 bump_state_level(VSS *stack)
1302 {
1303 struct tokenstate *os, *ts;
1304
1305 os = currentstate(stack);
1306
1307 if (++stack->cur >= LEVELS_PER_BLOCK) {
1308 VSS *ss;
1309
1310 ss = (VSS *)ckmalloc(sizeof (struct statestack));
1311 ss->cur = 0;
1312 ss->prev = stack;
1313 stack = ss;
1314 }
1315
1316 ts = currentstate(stack);
1317
1318 ts->ts_parenlevel = 0; /* parens inside never match outside */
1319
1320 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */
1321 ts->ts_varnest = os->ts_varnest;
1322 ts->ts_arinest = os->ts_arinest; /* when appropriate */
1323 ts->ts_syntax = os->ts_syntax; /* they will be altered */
1324 ts->ts_magicq = os->ts_magicq;
1325
1326 return stack;
1327 }
1328
1329 static inline VSS *
1330 drop_state_level(VSS *stack)
1331 {
1332 if (stack->cur == 0) {
1333 VSS *ss;
1334
1335 ss = stack;
1336 stack = ss->prev;
1337 if (stack == NULL)
1338 return ss;
1339 ckfree(ss);
1340 }
1341 --stack->cur;
1342 return stack;
1343 }
1344
1345 static inline void
1346 cleanup_state_stack(VSS *stack)
1347 {
1348 while (stack->prev != NULL) {
1349 stack->cur = 0;
1350 stack = drop_state_level(stack);
1351 }
1352 }
1353
1354 #define PARSESUB() {goto parsesub; parsesub_return:;}
1355 #define PARSEARITH() {goto parsearith; parsearith_return:;}
1356
1357 /*
1358 * The following macros all assume the existance of a local var "stack"
1359 * which contains a pointer to the current struct stackstate
1360 */
1361
1362 /*
1363 * These are macros rather than inline funcs to avoid code churn as much
1364 * as possible - they replace macros of the same name used previously.
1365 */
1366 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS)
1367 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ)
1368 #ifdef notdef
1369 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \
1370 stack->cur != 0 || stack->prev ? \
1371 prevstate(stack)->ts_quoted & QF : 0)
1372 #endif
1373
1374 /*
1375 * This set are just to avoid excess typing and line lengths...
1376 * The ones that "look like" var names must be implemented to be lvalues
1377 */
1378 #define syntax (currentstate(stack)->ts_syntax)
1379 #define parenlevel (currentstate(stack)->ts_parenlevel)
1380 #define varnest (currentstate(stack)->ts_varnest)
1381 #define arinest (currentstate(stack)->ts_arinest)
1382 #define quoted (currentstate(stack)->ts_quoted)
1383 #define magicq (currentstate(stack)->ts_magicq)
1384 #define TS_PUSH() (stack = bump_state_level(stack))
1385 #define TS_POP() (stack = drop_state_level(stack))
1386
1387 /*
1388 * Called to parse command substitutions. oldstyle is true if the command
1389 * is enclosed inside `` (otherwise it was enclosed in "$( )")
1390 *
1391 * Internally nlpp is a pointer to the head of the linked
1392 * list of commands (passed by reference), and savelen is the number of
1393 * characters on the top of the stack which must be preserved.
1394 */
1395 static char *
1396 parsebackq(VSS *const stack, char * const in,
1397 struct nodelist **const pbqlist, const int oldstyle)
1398 {
1399 struct nodelist **nlpp;
1400 const int savepbq = parsebackquote;
1401 union node *n;
1402 char *out;
1403 char *str = NULL;
1404 char *volatile sstr = str;
1405 struct jmploc jmploc;
1406 struct jmploc *const savehandler = handler;
1407 struct parsefile *const savetopfile = getcurrentfile();
1408 const int savelen = in - stackblock();
1409 int saveprompt;
1410 int lno;
1411
1412 if (setjmp(jmploc.loc)) {
1413 popfilesupto(savetopfile);
1414 if (sstr)
1415 ckfree(__UNVOLATILE(sstr));
1416 cleanup_state_stack(stack);
1417 parsebackquote = 0;
1418 handler = savehandler;
1419 CTRACE(DBG_LEXER, ("parsebackq() err (%d), unwinding\n",
1420 exception));
1421 longjmp(handler->loc, 1);
1422 }
1423 INTOFF;
1424 sstr = str = NULL;
1425 if (savelen > 0) {
1426 sstr = str = ckmalloc(savelen);
1427 memcpy(str, stackblock(), savelen);
1428 }
1429 handler = &jmploc;
1430 INTON;
1431 if (oldstyle) {
1432 /*
1433 * We must read until the closing backquote, giving special
1434 * treatment to some slashes, and then push the string and
1435 * reread it as input, interpreting it normally.
1436 */
1437 int pc;
1438 int psavelen;
1439 char *pstr;
1440 int line1 = plinno;
1441
1442 VTRACE(DBG_PARSE|DBG_LEXER,
1443 ("parsebackq: repackaging `` as $( )"));
1444 /*
1445 * Because the entire `...` is read here, we don't
1446 * need to bother the state stack. That will be used
1447 * (as appropriate) when the processed string is re-read.
1448 */
1449 STARTSTACKSTR(out);
1450 #ifdef DEBUG
1451 for (psavelen = 0;;psavelen++) { /* } */
1452 #else
1453 for (;;) {
1454 #endif
1455 if (needprompt) {
1456 setprompt(2);
1457 needprompt = 0;
1458 }
1459 pc = pgetc();
1460 VTRACE(DBG_LEXER,
1461 ("parsebackq() got '%c'(%#.2x) in `` %s", pc&0xFF,
1462 pc&0x1FF, pc == '`' ? "terminator\n" : ""));
1463 if (pc == '`')
1464 break;
1465 switch (pc) {
1466 case '\\':
1467 pc = pgetc();
1468 VTRACE(DBG_LEXER, ("then '%c'(%#.2x) ",
1469 pc&0xFF, pc&0x1FF));
1470 #ifdef DEBUG
1471 psavelen++;
1472 #endif
1473 if (pc == '\n') { /* keep \ \n for later */
1474 plinno++;
1475 VTRACE(DBG_LEXER, ("@%d ", plinno));
1476 needprompt = doprompt;
1477 }
1478 if (pc != '\\' && pc != '`' && pc != '$'
1479 && (!ISDBLQUOTE() || pc != '"')) {
1480 VTRACE(DBG_LEXER, ("keep '\\' "));
1481 STPUTC('\\', out);
1482 }
1483 break;
1484
1485 case '\n':
1486 plinno++;
1487 VTRACE(DBG_LEXER, ("@%d ", plinno));
1488 needprompt = doprompt;
1489 break;
1490
1491 case PEOF:
1492 startlinno = line1;
1493 VTRACE(DBG_LEXER, ("EOF\n", plinno));
1494 synerror("EOF in backquote substitution");
1495 break;
1496
1497 default:
1498 break;
1499 }
1500 VTRACE(DBG_LEXER, (".\n", plinno));
1501 STPUTC(pc, out);
1502 }
1503 STPUTC('\0', out);
1504 VTRACE(DBG_LEXER, ("parsebackq() ``:"));
1505 VTRACE(DBG_PARSE|DBG_LEXER, (" read %d", psavelen));
1506 psavelen = out - stackblock();
1507 VTRACE(DBG_PARSE|DBG_LEXER, (" produced %d\n", psavelen));
1508 if (psavelen > 0) {
1509 pstr = grabstackstr(out);
1510 CTRACE(DBG_LEXER,
1511 ("parsebackq() reprocessing as $(%s)\n", pstr));
1512 setinputstring(pstr, 1, line1);
1513 }
1514 }
1515 nlpp = pbqlist;
1516 while (*nlpp)
1517 nlpp = &(*nlpp)->next;
1518 *nlpp = stalloc(sizeof(struct nodelist));
1519 (*nlpp)->next = NULL;
1520 parsebackquote = oldstyle;
1521
1522 if (oldstyle) {
1523 saveprompt = doprompt;
1524 doprompt = 0;
1525 } else
1526 saveprompt = 0;
1527
1528 lno = -plinno;
1529 CTRACE(DBG_LEXER, ("parsebackq() parsing embedded command list\n"));
1530 n = list(0);
1531 CTRACE(DBG_LEXER, ("parsebackq() parsed $() (%d -> %d)\n", -lno,
1532 lno + plinno));
1533 lno += plinno;
1534
1535 if (oldstyle) {
1536 if (peektoken() != TEOF)
1537 synexpect(-1, 0);
1538 doprompt = saveprompt;
1539 } else
1540 consumetoken(TRP);
1541
1542 (*nlpp)->n = n;
1543 if (oldstyle) {
1544 /*
1545 * Start reading from old file again, ignoring any pushed back
1546 * tokens left from the backquote parsing
1547 */
1548 CTRACE(DBG_LEXER, ("parsebackq() back to previous input\n"));
1549 popfile();
1550 tokpushback = 0;
1551 }
1552
1553 while (stackblocksize() <= savelen)
1554 growstackblock();
1555 STARTSTACKSTR(out);
1556 if (str) {
1557 memcpy(out, str, savelen);
1558 STADJUST(savelen, out);
1559 INTOFF;
1560 ckfree(str);
1561 sstr = str = NULL;
1562 INTON;
1563 }
1564 parsebackquote = savepbq;
1565 handler = savehandler;
1566 if (arinest || ISDBLQUOTE()) {
1567 STPUTC(CTLBACKQ | CTLQUOTE, out);
1568 while (--lno >= 0)
1569 STPUTC(CTLNONL, out);
1570 } else
1571 STPUTC(CTLBACKQ, out);
1572
1573 return out;
1574 }
1575
1576 /*
1577 * Parse a redirection operator. The parameter "out" points to a string
1578 * specifying the fd to be redirected. It is guaranteed to be either ""
1579 * or a numeric string (for now anyway). The parameter "c" contains the
1580 * first character of the redirection operator.
1581 *
1582 * Note the string "out" is on the stack, which we are about to clobber,
1583 * so process it first...
1584 */
1585
1586 static void
1587 parseredir(const char *out, int c)
1588 {
1589 union node *np;
1590 int fd;
1591
1592 np = stalloc(sizeof(struct nfile));
1593
1594 fd = (*out == '\0') ? -1 : number(out); /* number(out) >= 0 */
1595 np->nfile.fd = fd; /* do this again later with updated fd */
1596 if (fd != np->nfile.fd)
1597 error("file descriptor (%d) out of range (max %ld)",
1598 fd, user_fd_limit - 1);
1599 if (fd < user_fd_limit && fd > max_user_fd)
1600 max_user_fd = fd;
1601
1602 VTRACE(DBG_LEXER, ("parseredir after '%s%c' ", out, c));
1603 if (c == '>') {
1604 if (fd < 0)
1605 fd = 1;
1606 c = pgetc_linecont();
1607 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1608 if (c == '>')
1609 np->type = NAPPEND;
1610 else if (c == '|')
1611 np->type = NCLOBBER;
1612 else if (c == '&')
1613 np->type = NTOFD;
1614 else {
1615 np->type = NTO;
1616 VTRACE(DBG_LEXER, ("unwanted ", c));
1617 pungetc();
1618 }
1619 } else { /* c == '<' */
1620 if (fd < 0)
1621 fd = 0;
1622 c = pgetc_linecont();
1623 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1624 switch (c) {
1625 case '<':
1626 /* if sizes differ, just discard the old one */
1627 if (sizeof (struct nfile) != sizeof (struct nhere))
1628 np = stalloc(sizeof(struct nhere));
1629 np->type = NHERE;
1630 np->nhere.fd = 0;
1631 heredoc = stalloc(sizeof(struct HereDoc));
1632 heredoc->here = np;
1633 heredoc->startline = plinno;
1634 if ((c = pgetc_linecont()) == '-') {
1635 CTRACE(DBG_LEXER, ("and '%c'(%#.2x) ",
1636 c & 0xFF, c & 0x1FF));
1637 heredoc->striptabs = 1;
1638 } else {
1639 heredoc->striptabs = 0;
1640 pungetc();
1641 }
1642 break;
1643
1644 case '&':
1645 np->type = NFROMFD;
1646 break;
1647
1648 case '>':
1649 np->type = NFROMTO;
1650 break;
1651
1652 default:
1653 np->type = NFROM;
1654 VTRACE(DBG_LEXER, ("unwanted('%c'0#.2x)", c&0xFF,
1655 c&0x1FF));
1656 pungetc();
1657 break;
1658 }
1659 }
1660 np->nfile.fd = fd;
1661
1662 VTRACE(DBG_LEXER, (" ->%"PRIdsNT" fd=%d\n", NODETYPENAME(np->type),fd));
1663
1664 redirnode = np; /* this is the "value" of TRENODE */
1665 }
1666
1667 /*
1668 * Called to parse a backslash escape sequence inside $'...'.
1669 * The backslash has already been read.
1670 */
1671 static char *
1672 readcstyleesc(char *out)
1673 {
1674 int c, vc, i, n;
1675 unsigned int v;
1676
1677 c = pgetc();
1678 VTRACE(DBG_LEXER, ("CSTR(\\%c)(\\%#x)", c&0xFF, c&0x1FF));
1679 switch (c) {
1680 case '\0':
1681 case PEOF:
1682 synerror("Unterminated quoted string ($'...)");
1683 case '\n':
1684 plinno++;
1685 VTRACE(DBG_LEXER, ("@%d ", plinno));
1686 if (doprompt)
1687 setprompt(2);
1688 else
1689 setprompt(0);
1690 return out;
1691
1692 case '\\':
1693 case '\'':
1694 case '"':
1695 v = c;
1696 break;
1697
1698 case 'a': v = '\a'; break;
1699 case 'b': v = '\b'; break;
1700 case 'e': v = '\033'; break;
1701 case 'f': v = '\f'; break;
1702 case 'n': v = '\n'; break;
1703 case 'r': v = '\r'; break;
1704 case 't': v = '\t'; break;
1705 case 'v': v = '\v'; break;
1706
1707 case '0': case '1': case '2': case '3':
1708 case '4': case '5': case '6': case '7':
1709 v = c - '0';
1710 c = pgetc();
1711 if (c >= '0' && c <= '7') {
1712 v <<= 3;
1713 v += c - '0';
1714 c = pgetc();
1715 if (c >= '0' && c <= '7') {
1716 v <<= 3;
1717 v += c - '0';
1718 } else
1719 pungetc();
1720 } else
1721 pungetc();
1722 break;
1723
1724 case 'c':
1725 c = pgetc();
1726 if (c < 0x3f || c > 0x7a || c == 0x60)
1727 synerror("Bad \\c escape sequence");
1728 if (c == '\\' && pgetc() != '\\')
1729 synerror("Bad \\c\\ escape sequence");
1730 if (c == '?')
1731 v = 127;
1732 else
1733 v = c & 0x1f;
1734 break;
1735
1736 case 'x':
1737 n = 2;
1738 goto hexval;
1739 case 'u':
1740 n = 4;
1741 goto hexval;
1742 case 'U':
1743 n = 8;
1744 hexval:
1745 v = 0;
1746 for (i = 0; i < n; i++) {
1747 c = pgetc();
1748 if (c >= '0' && c <= '9')
1749 v = (v << 4) + c - '0';
1750 else if (c >= 'A' && c <= 'F')
1751 v = (v << 4) + c - 'A' + 10;
1752 else if (c >= 'a' && c <= 'f')
1753 v = (v << 4) + c - 'a' + 10;
1754 else {
1755 pungetc();
1756 break;
1757 }
1758 }
1759 if (n > 2 && v > 127) {
1760 if (v >= 0xd800 && v <= 0xdfff)
1761 synerror("Invalid \\u escape sequence");
1762
1763 /* XXX should we use iconv here. What locale? */
1764 CHECKSTRSPACE(12, out);
1765
1766 /*
1767 * Add a byte to output string, while checking if it needs to
1768 * be escaped -- if its value happens to match the value of one
1769 * of our internal CTL* chars - which would (at a minumum) be
1770 * summarily removed later, if not escaped.
1771 *
1772 * The current definition of ISCTL() allows the compiler to
1773 * optimise away either half, or all, of the test in most of
1774 * the cases here (0xc0 | anything) cannot be between 0x80 and 0x9f
1775 * for example, so there a test is not needed).
1776 *
1777 * Which tests can be removed depends upon the actual values
1778 * selected for the CTL* chars.
1779 */
1780 #define ESC_USTPUTC(c, o) do { \
1781 char _ch = (c); \
1782 \
1783 if (ISCTL(_ch)) \
1784 USTPUTC(CTLESC, o); \
1785 USTPUTC(_ch, o); \
1786 } while (0)
1787
1788 VTRACE(DBG_LEXER, ("CSTR(\\%c%8.8x)", n==4?'u':'U', v));
1789 if (v <= 0x7ff) {
1790 ESC_USTPUTC(0xc0 | v >> 6, out);
1791 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1792 return out;
1793 } else if (v <= 0xffff) {
1794 ESC_USTPUTC(0xe0 | v >> 12, out);
1795 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1796 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1797 return out;
1798 } else if (v <= 0x10ffff) {
1799 ESC_USTPUTC(0xf0 | v >> 18, out);
1800 ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1801 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1802 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1803 return out;
1804
1805 /* these next two are not very likely, but we may as well be complete */
1806 } else if (v <= 0x3FFFFFF) {
1807 ESC_USTPUTC(0xf8 | v >> 24, out);
1808 ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out);
1809 ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1810 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1811 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1812 return out;
1813 } else if (v <= 0x7FFFFFFF) {
1814 ESC_USTPUTC(0xfC | v >> 30, out);
1815 ESC_USTPUTC(0x80 | ((v >> 24) & 0x3f), out);
1816 ESC_USTPUTC(0x80 | ((v >> 18) & 0x3f), out);
1817 ESC_USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1818 ESC_USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1819 ESC_USTPUTC(0x80 | (v & 0x3f), out);
1820 return out;
1821 }
1822 if (v > 127)
1823 v = '?';
1824 }
1825 break;
1826 default:
1827 synerror("Unknown $'' escape sequence");
1828 }
1829 vc = (char)v;
1830 VTRACE(DBG_LEXER, ("->%u(%#x)['%c']", v, v, vc&0xFF));
1831
1832 /*
1833 * If we managed to create a \n from a \ sequence (no matter how)
1834 * then we replace it with the magic CRTCNL control char, which
1835 * will turn into a \n again later, but in the meantime, never
1836 * causes LINENO increments.
1837 */
1838 if (vc == '\n') {
1839 VTRACE(DBG_LEXER, ("CTLCNL."));
1840 USTPUTC(CTLCNL, out);
1841 return out;
1842 }
1843
1844 /*
1845 * We can't handle NUL bytes.
1846 * POSIX says we should skip till the closing quote.
1847 */
1848 if (vc == '\0') {
1849 CTRACE(DBG_LEXER, ("\\0: skip to '", v, v, vc&0xFF));
1850 while ((c = pgetc()) != '\'') {
1851 if (c == '\\')
1852 c = pgetc();
1853 if (c == PEOF)
1854 synerror("Unterminated quoted string ($'...)");
1855 if (c == '\n') {
1856 plinno++;
1857 if (doprompt)
1858 setprompt(2);
1859 else
1860 setprompt(0);
1861 }
1862 }
1863 pungetc();
1864 return out;
1865 }
1866 CVTRACE(DBG_LEXER, NEEDESC(vc), ("CTLESC-"));
1867 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", vc&0xFF, vc&0x1FF));
1868 if (NEEDESC(vc))
1869 USTPUTC(CTLESC, out);
1870 USTPUTC(vc, out);
1871 return out;
1872 }
1873
1874 /*
1875 * The lowest level basic tokenizer.
1876 *
1877 * The next input byte (character) is in firstc, syn says which
1878 * syntax tables we are to use (basic, single or double quoted, or arith)
1879 * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1880 * quote character itself is not special (used parsing here docs and similar)
1881 *
1882 * The result is the type of the next token (its value, when there is one,
1883 * is saved in the relevant global var - must fix that someday!) which is
1884 * also saved for re-reading ("lasttoken").
1885 *
1886 * Overall, this routine does far more parsing than it is supposed to.
1887 * That will also need fixing, someday...
1888 */
1889 STATIC int
1890 readtoken1(int firstc, char const *syn, int oneword)
1891 {
1892 int c;
1893 char * out;
1894 int len;
1895 struct nodelist *bqlist;
1896 int quotef;
1897 VSS static_stack;
1898 VSS *stack = &static_stack;
1899
1900 stack->prev = NULL;
1901 stack->cur = 0;
1902
1903 syntax = syn;
1904
1905 #ifdef DEBUG
1906 #define SYNTAX ( syntax == BASESYNTAX ? "BASE" : \
1907 syntax == DQSYNTAX ? "DQ" : \
1908 syntax == SQSYNTAX ? "SQ" : \
1909 syntax == ARISYNTAX ? "ARI" : \
1910 "???" )
1911 #endif
1912
1913 startlinno = plinno;
1914 varnest = 0;
1915 quoted = 0;
1916 if (syntax == DQSYNTAX)
1917 SETDBLQUOTE();
1918 quotef = 0;
1919 bqlist = NULL;
1920 arinest = 0;
1921 parenlevel = 0;
1922 elided_nl = 0;
1923 magicq = oneword;
1924
1925 CTRACE(DBG_LEXER, ("readtoken1(%c) syntax=%s %s%s(quoted=%x)\n",
1926 firstc&0xFF, SYNTAX, magicq ? "magic quotes" : "",
1927 ISDBLQUOTE()?" ISDBLQUOTE":"", quoted));
1928
1929 STARTSTACKSTR(out);
1930
1931 for (c = firstc ;; c = pgetc_macro()) { /* until of token */
1932 if (syntax == ARISYNTAX)
1933 out = insert_elided_nl(out);
1934 CHECKSTRSPACE(6, out); /* permit 6 calls to USTPUTC */
1935 switch (syntax[c]) {
1936 case CFAKE:
1937 VTRACE(DBG_LEXER, ("CFAKE"));
1938 if (syntax == BASESYNTAX && varnest == 0)
1939 break;
1940 VTRACE(DBG_LEXER, (","));
1941 continue;
1942 case CNL: /* '\n' */
1943 VTRACE(DBG_LEXER, ("CNL"));
1944 if (syntax == BASESYNTAX && varnest == 0)
1945 break; /* exit loop */
1946 USTPUTC(c, out);
1947 plinno++;
1948 VTRACE(DBG_LEXER, ("@%d,", plinno));
1949 if (doprompt)
1950 setprompt(2);
1951 else
1952 setprompt(0);
1953 continue;
1954
1955 case CSBACK: /* single quoted backslash */
1956 if ((quoted & QF) == CQ) {
1957 out = readcstyleesc(out);
1958 continue;
1959 }
1960 VTRACE(DBG_LEXER, ("ESC:"));
1961 USTPUTC(CTLESC, out);
1962 /* FALLTHROUGH */
1963 case CWORD:
1964 VTRACE(DBG_LEXER, ("'%c'", c));
1965 USTPUTC(c, out);
1966 continue;
1967
1968 case CCTL:
1969 CVTRACE(DBG_LEXER, !magicq || ISDBLQUOTE(),
1970 ("%s%sESC:",!magicq?"!m":"",ISDBLQUOTE()?"DQ":""));
1971 if (!magicq || ISDBLQUOTE())
1972 USTPUTC(CTLESC, out);
1973 VTRACE(DBG_LEXER, ("'%c'", c));
1974 USTPUTC(c, out);
1975 continue;
1976 case CBACK: /* backslash */
1977 c = pgetc();
1978 VTRACE(DBG_LEXER, ("\\'%c'(%#.2x)", c&0xFF, c&0x1FF));
1979 if (c == PEOF) {
1980 VTRACE(DBG_LEXER, ("EOF, keep \\ "));
1981 USTPUTC('\\', out);
1982 pungetc();
1983 continue;
1984 }
1985 if (c == '\n') {
1986 plinno++;
1987 elided_nl++;
1988 VTRACE(DBG_LEXER, ("eli \\n (%d) @%d ",
1989 elided_nl, plinno));
1990 if (doprompt)
1991 setprompt(2);
1992 else
1993 setprompt(0);
1994 continue;
1995 }
1996 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
1997 quotef = 1; /* current token is quoted */
1998 if (quoted && c != '\\' && c != '`' &&
1999 (c != '}' || varnest == 0) &&
2000 c != '$' && (c != '"' || magicq)) {
2001 /*
2002 * retain the \ (which we *know* needs CTLESC)
2003 * when in "..." and the following char is
2004 * not one of the magic few.)
2005 * Otherwise the \ has done its work, and
2006 * is dropped.
2007 */
2008 VTRACE(DBG_LEXER, ("ESC:'\\'"));
2009 USTPUTC(CTLESC, out);
2010 USTPUTC('\\', out);
2011 }
2012 CVTRACE(DBG_LEXER, NEEDESC(c) || !magicq,
2013 ("%sESC:", NEEDESC(c) ? "+" : "m"));
2014 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", c&0xFF, c&0x1FF));
2015 if (NEEDESC(c))
2016 USTPUTC(CTLESC, out);
2017 else if (!magicq) {
2018 USTPUTC(CTLESC, out);
2019 USTPUTC(c, out);
2020 continue;
2021 }
2022 USTPUTC(c, out);
2023 continue;
2024 case CSQUOTE:
2025 if (syntax != SQSYNTAX) {
2026 CVTRACE(DBG_LEXER, !magicq, (" CQM "));
2027 if (!magicq)
2028 USTPUTC(CTLQUOTEMARK, out);
2029 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
2030 quotef = 1;
2031 TS_PUSH();
2032 syntax = SQSYNTAX;
2033 quoted = SQ;
2034 VTRACE(DBG_LEXER, (" TS_PUSH(SQ)"));
2035 continue;
2036 }
2037 if (magicq && arinest == 0 && varnest == 0) {
2038 /* Ignore inside quoted here document */
2039 VTRACE(DBG_LEXER, ("<<'>>"));
2040 USTPUTC(c, out);
2041 continue;
2042 }
2043 /* End of single quotes... */
2044 TS_POP();
2045 VTRACE(DBG_LEXER, ("SQ TS_POP->%s ", SYNTAX));
2046 CVTRACE(DBG_LEXER, syntax == BASESYNTAX, (" CQE "));
2047 if (syntax == BASESYNTAX)
2048 USTPUTC(CTLQUOTEEND, out);
2049 continue;
2050 case CDQUOTE:
2051 if (magicq && arinest == 0 /* && varnest == 0 */) {
2052 VTRACE(DBG_LEXER, ("<<\">>"));
2053 /* Ignore inside here document */
2054 USTPUTC(c, out);
2055 continue;
2056 }
2057 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
2058 quotef = 1;
2059 if (arinest) {
2060 if (ISDBLQUOTE()) {
2061 VTRACE(DBG_LEXER,
2062 (" CQE ari(%d", arinest));
2063 USTPUTC(CTLQUOTEEND, out);
2064 TS_POP();
2065 VTRACE(DBG_LEXER, ("%d)TS_POP->%s ",
2066 arinest, SYNTAX));
2067 } else {
2068 VTRACE(DBG_LEXER,
2069 (" ari(%d) %s TS_PUSH->DQ CQM ",
2070 arinest, SYNTAX));
2071 TS_PUSH();
2072 syntax = DQSYNTAX;
2073 SETDBLQUOTE();
2074 USTPUTC(CTLQUOTEMARK, out);
2075 }
2076 continue;
2077 }
2078 CVTRACE(DBG_LEXER, magicq, (" MQignDQ "));
2079 if (magicq)
2080 continue;
2081 if (ISDBLQUOTE()) {
2082 TS_POP();
2083 VTRACE(DBG_LEXER,
2084 (" DQ TS_POP->%s CQE ", SYNTAX));
2085 USTPUTC(CTLQUOTEEND, out);
2086 } else {
2087 VTRACE(DBG_LEXER,
2088 (" %s TS_POP->DQ CQM ", SYNTAX));
2089 TS_PUSH();
2090 syntax = DQSYNTAX;
2091 SETDBLQUOTE();
2092 USTPUTC(CTLQUOTEMARK, out);
2093 }
2094 continue;
2095 case CVAR: /* '$' */
2096 VTRACE(DBG_LEXER, ("'$'..."));
2097 out = insert_elided_nl(out);
2098 PARSESUB(); /* parse substitution */
2099 continue;
2100 case CENDVAR: /* CLOSEBRACE */
2101 if (varnest > 0 && !ISDBLQUOTE()) {
2102 VTRACE(DBG_LEXER, ("vn=%d !DQ", varnest));
2103 TS_POP();
2104 VTRACE(DBG_LEXER, (" TS_POP->%s CEV ", SYNTAX));
2105 USTPUTC(CTLENDVAR, out);
2106 } else {
2107 VTRACE(DBG_LEXER, ("'%c'", c));
2108 USTPUTC(c, out);
2109 }
2110 out = insert_elided_nl(out);
2111 continue;
2112 case CLP: /* '(' in arithmetic */
2113 parenlevel++;
2114 VTRACE(DBG_LEXER, ("'('(%d)", parenlevel));
2115 USTPUTC(c, out);
2116 continue;
2117 case CRP: /* ')' in arithmetic */
2118 if (parenlevel > 0) {
2119 USTPUTC(c, out);
2120 --parenlevel;
2121 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2122 } else {
2123 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2124 if (pgetc_linecont() == /*(*/ ')') {
2125 out = insert_elided_nl(out);
2126 if (--arinest == 0) {
2127 TS_POP();
2128 USTPUTC(CTLENDARI, out);
2129 } else
2130 USTPUTC(/*(*/ ')', out);
2131 } else {
2132 break; /* to synerror() just below */
2133 #if 0 /* the old way, causes weird errors on bad input */
2134 /*
2135 * unbalanced parens
2136 * (don't 2nd guess - no error)
2137 */
2138 pungetc();
2139 USTPUTC(/*(*/ ')', out);
2140 #endif
2141 }
2142 }
2143 continue;
2144 case CBQUOTE: /* '`' */
2145 VTRACE(DBG_LEXER, ("'`' -> parsebackq()\n"));
2146 out = parsebackq(stack, out, &bqlist, 1);
2147 VTRACE(DBG_LEXER, ("parsebackq() -> readtoken1: "));
2148 continue;
2149 case CEOF: /* --> c == PEOF */
2150 VTRACE(DBG_LEXER, ("EOF "));
2151 break; /* will exit loop */
2152 default:
2153 VTRACE(DBG_LEXER, ("['%c'(%#.2x)]", c&0xFF, c&0x1FF));
2154 if (varnest == 0 && !ISDBLQUOTE())
2155 break; /* exit loop */
2156 USTPUTC(c, out);
2157 VTRACE(DBG_LEXER, (","));
2158 continue;
2159 }
2160 VTRACE(DBG_LEXER, (" END TOKEN\n", c&0xFF, c&0x1FF));
2161 break; /* break from switch -> break from for loop too */
2162 }
2163
2164 if (syntax == ARISYNTAX) {
2165 cleanup_state_stack(stack);
2166 synerror(/*((*/ "Missing '))'");
2167 }
2168 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
2169 cleanup_state_stack(stack);
2170 synerror("Unterminated quoted string");
2171 }
2172 if (varnest != 0) {
2173 cleanup_state_stack(stack);
2174 startlinno = plinno;
2175 /* { */
2176 synerror("Missing '}'");
2177 }
2178
2179 STPUTC('\0', out);
2180 len = out - stackblock();
2181 out = stackblock();
2182
2183 if (!magicq) {
2184 if ((c == '<' || c == '>')
2185 && quotef == 0 && (*out == '\0' || is_number(out))) {
2186 parseredir(out, c);
2187 cleanup_state_stack(stack);
2188 return lasttoken = TREDIR;
2189 } else {
2190 pungetc();
2191 }
2192 }
2193
2194 VTRACE(DBG_PARSE|DBG_LEXER,
2195 ("readtoken1 %sword \"%s\", completed%s (%d) left %d enl\n",
2196 (quotef ? "quoted " : ""), out, (bqlist ? " with cmdsubs" : ""),
2197 len, elided_nl));
2198
2199 quoteflag = quotef;
2200 backquotelist = bqlist;
2201 grabstackblock(len);
2202 wordtext = out;
2203 cleanup_state_stack(stack);
2204 return lasttoken = TWORD;
2205 /* end of readtoken routine */
2206
2207
2208 /*
2209 * Parse a substitution. At this point, we have read the dollar sign
2210 * and nothing else.
2211 */
2212
2213 parsesub: {
2214 int subtype;
2215 int typeloc;
2216 int flags;
2217 char *p;
2218 static const char types[] = "}-+?=";
2219
2220 c = pgetc_linecont();
2221 VTRACE(DBG_LEXER, ("\"$%c\"(%#.2x)", c&0xFF, c&0x1FF));
2222 if (c == '(' /*)*/) { /* $(command) or $((arith)) */
2223 if (pgetc_linecont() == '(' /*')'*/ ) {
2224 VTRACE(DBG_LEXER, ("\"$((\" ARITH "));
2225 out = insert_elided_nl(out);
2226 PARSEARITH();
2227 } else {
2228 VTRACE(DBG_LEXER, ("\"$(\" CSUB->parsebackq()\n"));
2229 out = insert_elided_nl(out);
2230 pungetc();
2231 out = parsebackq(stack, out, &bqlist, 0);
2232 VTRACE(DBG_LEXER, ("parseback()->readtoken1(): "));
2233 }
2234 } else if (c == OPENBRACE || is_name(c) || is_special(c)) {
2235 VTRACE(DBG_LEXER, (" $EXP:CTLVAR "));
2236 USTPUTC(CTLVAR, out);
2237 typeloc = out - stackblock();
2238 USTPUTC(VSNORMAL, out);
2239 subtype = VSNORMAL;
2240 flags = 0;
2241 if (c == OPENBRACE) {
2242 c = pgetc_linecont();
2243 if (c == '#') {
2244 if ((c = pgetc_linecont()) == CLOSEBRACE)
2245 c = '#';
2246 else if (is_name(c) || isdigit(c))
2247 subtype = VSLENGTH;
2248 else if (is_special(c)) {
2249 /*
2250 * ${#} is $# - the number of sh params
2251 * ${##} is the length of ${#}
2252 * ${###} is ${#} with as much nothing
2253 * as possible removed from start
2254 * ${##1} is ${#} with leading 1 gone
2255 * ${##\#} is ${#} with leading # gone
2256 *
2257 * this stuff is UGLY!
2258 */
2259 if (pgetc_linecont() == CLOSEBRACE) {
2260 pungetc();
2261 subtype = VSLENGTH;
2262 } else {
2263 static char cbuf[2];
2264
2265 pungetc(); /* would like 2 */
2266 cbuf[0] = c; /* so ... */
2267 cbuf[1] = '\0';
2268 pushstring(cbuf, 1, NULL);
2269 c = '#'; /* ${#:...} */
2270 subtype = 0; /* .. or similar */
2271 }
2272 } else {
2273 pungetc();
2274 c = '#';
2275 subtype = 0;
2276 }
2277 }
2278 else
2279 subtype = 0;
2280 VTRACE(DBG_LEXER, ("${ st=%d ", subtype));
2281 }
2282 if (is_name(c)) {
2283 p = out;
2284 do {
2285 VTRACE(DBG_LEXER, ("%c", c));
2286 STPUTC(c, out);
2287 c = pgetc_linecont();
2288 } while (is_in_name(c));
2289
2290 #if 0
2291 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
2292 int i;
2293 int linno;
2294 char buf[10];
2295
2296 /*
2297 * The "LINENO hack"
2298 *
2299 * Replace the variable name with the
2300 * current line number.
2301 */
2302 linno = plinno;
2303 if (funclinno != 0)
2304 linno -= funclinno - 1;
2305 snprintf(buf, sizeof(buf), "%d", linno);
2306 STADJUST(-6, out);
2307 for (i = 0; buf[i] != '\0'; i++)
2308 STPUTC(buf[i], out);
2309 flags |= VSLINENO;
2310 }
2311 #endif
2312 } else if (is_digit(c)) {
2313 do {
2314 VTRACE(DBG_LEXER, ("%c", c));
2315 STPUTC(c, out);
2316 c = pgetc_linecont();
2317 } while (subtype != VSNORMAL && is_digit(c));
2318 }
2319 else if (is_special(c)) {
2320 VTRACE(DBG_LEXER, ("\"$%c", c));
2321 USTPUTC(c, out);
2322 c = pgetc_linecont();
2323 }
2324 else {
2325 VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??\n", c&0xFF,c&0x1FF));
2326 badsub:
2327 cleanup_state_stack(stack);
2328 synerror("Bad substitution");
2329 }
2330
2331 STPUTC('=', out);
2332 if (subtype == 0) {
2333 switch (c) {
2334 case ':':
2335 flags |= VSNUL;
2336 c = pgetc_linecont();
2337 /*FALLTHROUGH*/
2338 default:
2339 p = strchr(types, c);
2340 if (p == NULL)
2341 goto badsub;
2342 subtype = p - types + VSNORMAL;
2343 break;
2344 case '%':
2345 case '#':
2346 {
2347 int cc = c;
2348 subtype = c == '#' ? VSTRIMLEFT :
2349 VSTRIMRIGHT;
2350 c = pgetc_linecont();
2351 if (c == cc)
2352 subtype++;
2353 else
2354 pungetc();
2355 break;
2356 }
2357 }
2358 } else {
2359 if (subtype == VSLENGTH && c != /*{*/ '}')
2360 synerror("no modifiers allowed with ${#var}");
2361 pungetc();
2362 }
2363 if (quoted || arinest)
2364 flags |= VSQUOTE;
2365 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
2366 flags |= VSPATQ;
2367 VTRACE(DBG_LEXER, (" st%d:%x", subtype, flags));
2368 *(stackblock() + typeloc) = subtype | flags;
2369 if (subtype != VSNORMAL) {
2370 TS_PUSH();
2371 varnest++;
2372 arinest = 0;
2373 if (subtype > VSASSIGN) { /* # ## % %% */
2374 syntax = BASESYNTAX;
2375 quoted = 0;
2376 magicq = 0;
2377 }
2378 VTRACE(DBG_LEXER, (" TS_PUSH->%s vn=%d%s ",
2379 SYNTAX, varnest, quoted ? " Q" : ""));
2380 }
2381 } else if (c == '\'' && syntax == BASESYNTAX) {
2382 USTPUTC(CTLQUOTEMARK, out);
2383 VTRACE(DBG_LEXER, (" CSTR \"$'\" CQM "));
2384 CVTRACE(DBG_LEXER, quotef==0, ("QF=1 "));
2385 quotef = 1;
2386 TS_PUSH();
2387 syntax = SQSYNTAX;
2388 quoted = CQ;
2389 VTRACE(DBG_LEXER, ("%s->TS_PUSH()->SQ ", SYNTAX));
2390 } else {
2391 VTRACE(DBG_LEXER, ("$unk -> '$' (pushback '%c'%#.2x)",
2392 c & 0xFF, c & 0x1FF));
2393 USTPUTC('$', out);
2394 pungetc();
2395 }
2396 goto parsesub_return;
2397 }
2398
2399
2400 /*
2401 * Parse an arithmetic expansion (indicate start of one and set state)
2402 */
2403 parsearith: {
2404
2405 #if 0
2406 if (syntax == ARISYNTAX) {
2407 /*
2408 * we collapse embedded arithmetic expansion to
2409 * parentheses, which should be equivalent
2410 *
2411 * XXX It isn't, must fix, soonish...
2412 */
2413 USTPUTC('(' /*)*/, out);
2414 USTPUTC('(' /*)*/, out);
2415 /*
2416 * Need 2 of them because there will (should be)
2417 * two closing ))'s to follow later.
2418 */
2419 parenlevel += 2;
2420 } else
2421 #endif
2422 {
2423 VTRACE(DBG_LEXER, (" CTLARI%c ", ISDBLQUOTE()?'"':'_'));
2424 USTPUTC(CTLARI, out);
2425 if (ISDBLQUOTE())
2426 USTPUTC('"',out);
2427 else
2428 USTPUTC(' ',out);
2429
2430 VTRACE(DBG_LEXER, ("%s->TS_PUSH->ARI(1)", SYNTAX));
2431 TS_PUSH();
2432 syntax = ARISYNTAX;
2433 arinest = 1;
2434 varnest = 0;
2435 magicq = 1;
2436 }
2437 goto parsearith_return;
2438 }
2439
2440 } /* end of readtoken */
2441
2442
2443
2444
2445 #ifdef mkinit
2446 INCLUDE "parser.h"
2447
2448 RESET {
2449 psp.v_current_parser = &parse_state;
2450
2451 parse_state.ps_tokpushback = 0;
2452 parse_state.ps_checkkwd = 0;
2453 parse_state.ps_heredoclist = NULL;
2454 }
2455 #endif
2456
2457 /*
2458 * Returns true if the text contains nothing to expand (no dollar signs
2459 * or backquotes).
2460 */
2461
2462 STATIC int
2463 noexpand(char *text)
2464 {
2465 char *p;
2466 char c;
2467
2468 p = text;
2469 while ((c = *p++) != '\0') {
2470 if (c == CTLQUOTEMARK || c == CTLQUOTEEND)
2471 continue;
2472 if (c == CTLESC)
2473 p++;
2474 else if (ISCTL(c))
2475 return 0;
2476 }
2477 return 1;
2478 }
2479
2480
2481 /*
2482 * Return true if the argument is a legal variable name (a letter or
2483 * underscore followed by zero or more letters, underscores, and digits).
2484 */
2485
2486 int
2487 goodname(const char *name)
2488 {
2489 const char *p;
2490
2491 p = name;
2492 if (! is_name(*p))
2493 return 0;
2494 while (*++p) {
2495 if (! is_in_name(*p))
2496 return 0;
2497 }
2498 return 1;
2499 }
2500
2501 int
2502 isassignment(const char *p)
2503 {
2504 if (!is_name(*p))
2505 return 0;
2506 while (*++p != '=')
2507 if (*p == '\0' || !is_in_name(*p))
2508 return 0;
2509 return 1;
2510 }
2511
2512 /*
2513 * skip past any \n's, and leave lasttoken set to whatever follows
2514 */
2515 STATIC void
2516 linebreak(void)
2517 {
2518 while (readtoken() == TNL)
2519 readheredocs();
2520 }
2521
2522 /*
2523 * The next token must be "token" -- check, then move past it
2524 */
2525 STATIC void
2526 consumetoken(int token)
2527 {
2528 if (readtoken() != token) {
2529 VTRACE(DBG_PARSE, ("consumetoken(%d): expecting %s got %s",
2530 token, tokname[token], tokname[lasttoken]));
2531 CVTRACE(DBG_PARSE, (lasttoken==TWORD), (" \"%s\"", wordtext));
2532 VTRACE(DBG_PARSE, ("\n"));
2533 synexpect(token, NULL);
2534 }
2535 }
2536
2537 /*
2538 * Called when an unexpected token is read during the parse. The argument
2539 * is the token that is expected, or -1 if more than one type of token can
2540 * occur at this point.
2541 */
2542
2543 STATIC void
2544 synexpect(int token, const char *text)
2545 {
2546 char msg[64];
2547 char *p;
2548
2549 if (lasttoken == TWORD) {
2550 size_t len = strlen(wordtext);
2551
2552 if (len <= 13)
2553 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
2554 else
2555 fmtstr(msg, 34,
2556 "Word \"%.10s...\" unexpected", wordtext);
2557 } else
2558 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
2559
2560 p = strchr(msg, '\0');
2561 if (text)
2562 fmtstr(p, 30, " (expecting \"%.10s\")", text);
2563 else if (token >= 0)
2564 fmtstr(p, 30, " (expecting %s)", tokname[token]);
2565
2566 synerror(msg);
2567 /* NOTREACHED */
2568 }
2569
2570
2571 STATIC void
2572 synerror(const char *msg)
2573 {
2574 error("%d: Syntax error: %s", startlinno, msg);
2575 /* NOTREACHED */
2576 }
2577
2578 STATIC void
2579 setprompt(int which)
2580 {
2581 whichprompt = which;
2582
2583 #ifndef SMALL
2584 if (!el)
2585 #endif
2586 out2str(getprompt(NULL));
2587 }
2588
2589 /*
2590 * handle getting the next character, while ignoring \ \n
2591 * (which is a little tricky as we only have one char of pushback
2592 * and we need that one elsewhere).
2593 */
2594 STATIC int
2595 pgetc_linecont(void)
2596 {
2597 int c;
2598
2599 while ((c = pgetc()) == '\\') {
2600 c = pgetc();
2601 if (c == '\n') {
2602 plinno++;
2603 elided_nl++;
2604 VTRACE(DBG_LEXER, ("\"\\n\"drop(el=%d@%d)",
2605 elided_nl, plinno));
2606 if (doprompt)
2607 setprompt(2);
2608 else
2609 setprompt(0);
2610 } else {
2611 pungetc();
2612 /* Allow the backslash to be pushed back. */
2613 pushstring("\\", 1, NULL);
2614 return (pgetc());
2615 }
2616 }
2617 return (c);
2618 }
2619
2620 /*
2621 * called by editline -- any expansions to the prompt
2622 * should be added here.
2623 */
2624 const char *
2625 getprompt(void *unused)
2626 {
2627 char *p;
2628 const char *cp;
2629 int wp;
2630
2631 if (!doprompt)
2632 return "";
2633
2634 VTRACE(DBG_PARSE|DBG_EXPAND, ("getprompt %d\n", whichprompt));
2635
2636 switch (wp = whichprompt) {
2637 case 0:
2638 return "";
2639 case 1:
2640 p = ps1val();
2641 break;
2642 case 2:
2643 p = ps2val();
2644 break;
2645 default:
2646 return "<internal prompt error>";
2647 }
2648 if (p == NULL)
2649 return "";
2650
2651 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt <<%s>>\n", p));
2652
2653 cp = expandstr(p, plinno);
2654 whichprompt = wp; /* history depends on it not changing */
2655
2656 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt -> <<%s>>\n", cp));
2657
2658 return cp;
2659 }
2660
2661 /*
2662 * Expand a string ... used for expanding prompts (PS1...)
2663 *
2664 * Never return NULL, always some string (return input string if invalid)
2665 *
2666 * The internal routine does the work, leaving the result on the
2667 * stack (or in a static string, or even the input string) and
2668 * handles parser recursion, and cleanup after an error while parsing.
2669 *
2670 * The visible interface copies the result off the stack (if it is there),
2671 * and handles stack management, leaving the stack in the exact same
2672 * state it was when expandstr() was called (so it can be used part way
2673 * through building a stack data structure - as in when PS2 is being
2674 * expanded half way through reading a "command line")
2675 *
2676 * on error, expandonstack() cleans up the parser state, but then
2677 * simply jumps out through expandstr() withut doing any stack cleanup,
2678 * which is OK, as the error handler must deal with that anyway.
2679 *
2680 * The split into two funcs is to avoid problems with setjmp/longjmp
2681 * and local variables which could otherwise be optimised into bizarre
2682 * behaviour.
2683 */
2684 static const char *
2685 expandonstack(char *ps, int cmdsub, int lineno)
2686 {
2687 union node n;
2688 struct jmploc jmploc;
2689 struct jmploc *const savehandler = handler;
2690 struct parsefile *const savetopfile = getcurrentfile();
2691 const int save_x = xflag;
2692 const int save_e_s = errors_suppressed;
2693 struct parse_state new_state = init_parse_state;
2694 struct parse_state *const saveparser = psp.v_current_parser;
2695 const char *result = NULL;
2696
2697 if (!setjmp(jmploc.loc)) {
2698 handler = &jmploc;
2699 errors_suppressed = 1;
2700
2701 psp.v_current_parser = &new_state;
2702 setinputstring(ps, 1, lineno);
2703
2704 readtoken1(pgetc(), DQSYNTAX, 1);
2705 if (backquotelist != NULL) {
2706 if (!cmdsub)
2707 result = ps;
2708 else if (!promptcmds)
2709 result = "-o promptcmds not set: ";
2710 }
2711 if (result == NULL) {
2712 n.narg.type = NARG;
2713 n.narg.next = NULL;
2714 n.narg.text = wordtext;
2715 n.narg.lineno = lineno;
2716 n.narg.backquote = backquotelist;
2717
2718 xflag = 0; /* we might be expanding PS4 ... */
2719 expandarg(&n, NULL, 0);
2720 result = stackblock();
2721 }
2722 } else {
2723 psp.v_current_parser = saveparser;
2724 xflag = save_x;
2725 popfilesupto(savetopfile);
2726 handler = savehandler;
2727 errors_suppressed = save_e_s;
2728
2729 if (exception == EXEXIT)
2730 longjmp(handler->loc, 1);
2731 if (exception == EXINT)
2732 exraise(SIGINT);
2733 return "";
2734 }
2735 psp.v_current_parser = saveparser;
2736 xflag = save_x;
2737 popfilesupto(savetopfile);
2738 handler = savehandler;
2739 errors_suppressed = save_e_s;
2740
2741 if (result == NULL)
2742 result = ps;
2743
2744 return result;
2745 }
2746
2747 const char *
2748 expandstr(char *ps, int lineno)
2749 {
2750 const char *result = NULL;
2751 struct stackmark smark;
2752 static char *buffer = NULL; /* storage for prompt, never freed */
2753 static size_t bufferlen = 0;
2754
2755 setstackmark(&smark);
2756 /*
2757 * At this point we anticipate that there may be a string
2758 * growing on the stack, but we have no idea how big it is.
2759 * However we know that it cannot be bigger than the current
2760 * allocated stack block, so simply reserve the whole thing,
2761 * then we can use the stack without barfing all over what
2762 * is there already... (the stack mark undoes this later.)
2763 */
2764 (void) stalloc(stackblocksize());
2765
2766 result = expandonstack(ps, 1, lineno);
2767
2768 if (__predict_true(result == stackblock())) {
2769 size_t len = strlen(result) + 1;
2770
2771 /*
2772 * the result (usual case) is on the stack, which we
2773 * are just about to discard (popstackmark()) so we
2774 * need to move it somewhere safe first.
2775 */
2776
2777 if (__predict_false(len > bufferlen)) {
2778 char *new;
2779 size_t newlen = bufferlen;
2780
2781 if (__predict_false(len > (SIZE_MAX >> 4))) {
2782 result = "huge prompt: ";
2783 goto getout;
2784 }
2785
2786 if (newlen == 0)
2787 newlen = 32;
2788 while (newlen <= len)
2789 newlen <<= 1;
2790
2791 new = (char *)realloc(buffer, newlen);
2792
2793 if (__predict_false(new == NULL)) {
2794 /*
2795 * this should rarely (if ever) happen
2796 * but we must do something when it does...
2797 */
2798 result = "No mem for prompt: ";
2799 goto getout;
2800 } else {
2801 buffer = new;
2802 bufferlen = newlen;
2803 }
2804 }
2805 (void)memcpy(buffer, result, len);
2806 result = buffer;
2807 }
2808
2809 getout:;
2810 popstackmark(&smark);
2811
2812 return result;
2813 }
2814
2815 /*
2816 * and a simpler version, which does no $( ) expansions, for
2817 * use during shell startup when we know we are not parsing,
2818 * and so the stack is not in use - we can do what we like,
2819 * and do not need to clean up (that's handled externally).
2820 *
2821 * Simply return the result, even if it is on the stack
2822 */
2823 const char *
2824 expandenv(char *arg)
2825 {
2826 return expandonstack(arg, 0, 0);
2827 }
2828