parser.c revision 1.164 1 /* $NetBSD: parser.c,v 1.164 2019/01/22 14:32:17 kre Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
39 #else
40 __RCSID("$NetBSD: parser.c,v 1.164 2019/01/22 14:32:17 kre Exp $");
41 #endif
42 #endif /* not lint */
43
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <limits.h>
47
48 #include "shell.h"
49 #include "parser.h"
50 #include "nodes.h"
51 #include "expand.h" /* defines rmescapes() */
52 #include "eval.h" /* defines commandname */
53 #include "syntax.h"
54 #include "options.h"
55 #include "input.h"
56 #include "output.h"
57 #include "var.h"
58 #include "error.h"
59 #include "memalloc.h"
60 #include "mystring.h"
61 #include "alias.h"
62 #include "show.h"
63 #ifndef SMALL
64 #include "myhistedit.h"
65 #endif
66 #ifdef DEBUG
67 #include "nodenames.h"
68 #endif
69
70 /*
71 * Shell command parser.
72 */
73
74 /* values returned by readtoken */
75 #include "token.h"
76
77 #define OPENBRACE '{'
78 #define CLOSEBRACE '}'
79
80 struct HereDoc {
81 struct HereDoc *next; /* next here document in list */
82 union node *here; /* redirection node */
83 char *eofmark; /* string indicating end of input */
84 int striptabs; /* if set, strip leading tabs */
85 int startline; /* line number where << seen */
86 };
87
88 MKINIT struct parse_state parse_state;
89 union parse_state_p psp = { .c_current_parser = &parse_state };
90
91 static const struct parse_state init_parse_state = { /* all 0's ... */
92 .ps_heredoclist = NULL,
93 .ps_parsebackquote = 0,
94 .ps_doprompt = 0,
95 .ps_needprompt = 0,
96 .ps_lasttoken = 0,
97 .ps_tokpushback = 0,
98 .ps_wordtext = NULL,
99 .ps_checkkwd = 0,
100 .ps_redirnode = NULL,
101 .ps_heredoc = NULL,
102 .ps_quoteflag = 0,
103 .ps_startlinno = 0,
104 .ps_funclinno = 0,
105 .ps_elided_nl = 0,
106 };
107
108 STATIC union node *list(int);
109 STATIC union node *andor(void);
110 STATIC union node *pipeline(void);
111 STATIC union node *command(void);
112 STATIC union node *simplecmd(union node **, union node *);
113 STATIC union node *makeword(int);
114 STATIC void parsefname(void);
115 STATIC int slurp_heredoc(char *const, const int, const int);
116 STATIC void readheredocs(void);
117 STATIC int peektoken(void);
118 STATIC int readtoken(void);
119 STATIC int xxreadtoken(void);
120 STATIC int readtoken1(int, char const *, int);
121 STATIC int noexpand(char *);
122 STATIC void linebreak(void);
123 STATIC void consumetoken(int);
124 STATIC void synexpect(int, const char *) __dead;
125 STATIC void synerror(const char *) __dead;
126 STATIC void setprompt(int);
127 STATIC int pgetc_linecont(void);
128
129 static const char EOFhere[] = "EOF reading here (<<) document";
130
131 #ifdef DEBUG
132 int parsing = 0;
133 #endif
134
135 /*
136 * Read and parse a command. Returns NEOF on end of file. (NULL is a
137 * valid parse tree indicating a blank line.)
138 */
139
140 union node *
141 parsecmd(int interact)
142 {
143 int t;
144 union node *n;
145
146 #ifdef DEBUG
147 parsing++;
148 #endif
149 tokpushback = 0;
150 checkkwd = 0;
151 doprompt = interact;
152 if (doprompt)
153 setprompt(1);
154 else
155 setprompt(0);
156 needprompt = 0;
157 t = readtoken();
158 #ifdef DEBUG
159 parsing--;
160 #endif
161 if (t == TEOF)
162 return NEOF;
163 if (t == TNL)
164 return NULL;
165
166 #ifdef DEBUG
167 parsing++;
168 #endif
169 tokpushback++;
170 n = list(1);
171 #ifdef DEBUG
172 parsing--;
173 #endif
174 if (heredoclist)
175 error("%d: Here document (<<%s) expected but not present",
176 heredoclist->startline, heredoclist->eofmark);
177 return n;
178 }
179
180
181 STATIC union node *
182 list(int nlflag)
183 {
184 union node *ntop, *n1, *n2, *n3;
185 int tok;
186
187 CTRACE(DBG_PARSE, ("list(%d): entered @%d\n",nlflag,plinno));
188
189 checkkwd = CHKNL | CHKKWD | CHKALIAS;
190 if (nlflag == 0 && tokendlist[peektoken()])
191 return NULL;
192 ntop = n1 = NULL;
193 for (;;) {
194 n2 = andor();
195 tok = readtoken();
196 if (tok == TBACKGND) {
197 if (n2->type == NCMD || n2->type == NPIPE)
198 n2->ncmd.backgnd = 1;
199 else if (n2->type == NREDIR)
200 n2->type = NBACKGND;
201 else {
202 n3 = stalloc(sizeof(struct nredir));
203 n3->type = NBACKGND;
204 n3->nredir.n = n2;
205 n3->nredir.redirect = NULL;
206 n2 = n3;
207 }
208 }
209
210 if (ntop == NULL)
211 ntop = n2;
212 else if (n1 == NULL) {
213 n1 = stalloc(sizeof(struct nbinary));
214 n1->type = NSEMI;
215 n1->nbinary.ch1 = ntop;
216 n1->nbinary.ch2 = n2;
217 ntop = n1;
218 } else {
219 n3 = stalloc(sizeof(struct nbinary));
220 n3->type = NSEMI;
221 n3->nbinary.ch1 = n1->nbinary.ch2;
222 n3->nbinary.ch2 = n2;
223 n1->nbinary.ch2 = n3;
224 n1 = n3;
225 }
226
227 switch (tok) {
228 case TBACKGND:
229 case TSEMI:
230 tok = readtoken();
231 /* FALLTHROUGH */
232 case TNL:
233 if (tok == TNL) {
234 readheredocs();
235 if (nlflag)
236 return ntop;
237 } else if (tok == TEOF && nlflag)
238 return ntop;
239 else
240 tokpushback++;
241
242 checkkwd = CHKNL | CHKKWD | CHKALIAS;
243 if (!nlflag && tokendlist[peektoken()])
244 return ntop;
245 break;
246 case TEOF:
247 pungetc(); /* push back EOF on input */
248 return ntop;
249 default:
250 if (nlflag)
251 synexpect(-1, 0);
252 tokpushback++;
253 return ntop;
254 }
255 }
256 }
257
258 STATIC union node *
259 andor(void)
260 {
261 union node *n1, *n2, *n3;
262 int t;
263
264 CTRACE(DBG_PARSE, ("andor: entered @%d\n", plinno));
265
266 n1 = pipeline();
267 for (;;) {
268 if ((t = readtoken()) == TAND) {
269 t = NAND;
270 } else if (t == TOR) {
271 t = NOR;
272 } else {
273 tokpushback++;
274 return n1;
275 }
276 n2 = pipeline();
277 n3 = stalloc(sizeof(struct nbinary));
278 n3->type = t;
279 n3->nbinary.ch1 = n1;
280 n3->nbinary.ch2 = n2;
281 n1 = n3;
282 }
283 }
284
285 STATIC union node *
286 pipeline(void)
287 {
288 union node *n1, *n2, *pipenode;
289 struct nodelist *lp, *prev;
290 int negate;
291
292 CTRACE(DBG_PARSE, ("pipeline: entered @%d\n", plinno));
293
294 negate = 0;
295 checkkwd = CHKNL | CHKKWD | CHKALIAS;
296 while (readtoken() == TNOT) {
297 CTRACE(DBG_PARSE, ("pipeline: TNOT recognized\n"));
298 #ifndef BOGUS_NOT_COMMAND
299 if (posix && negate)
300 synerror("2nd \"!\" unexpected");
301 #endif
302 negate++;
303 }
304 tokpushback++;
305 n1 = command();
306 if (readtoken() == TPIPE) {
307 pipenode = stalloc(sizeof(struct npipe));
308 pipenode->type = NPIPE;
309 pipenode->npipe.backgnd = 0;
310 lp = stalloc(sizeof(struct nodelist));
311 pipenode->npipe.cmdlist = lp;
312 lp->n = n1;
313 do {
314 prev = lp;
315 lp = stalloc(sizeof(struct nodelist));
316 lp->n = command();
317 prev->next = lp;
318 } while (readtoken() == TPIPE);
319 lp->next = NULL;
320 n1 = pipenode;
321 }
322 tokpushback++;
323 if (negate) {
324 CTRACE(DBG_PARSE, ("%snegate pipeline\n",
325 (negate&1) ? "" : "double "));
326 n2 = stalloc(sizeof(struct nnot));
327 n2->type = (negate & 1) ? NNOT : NDNOT;
328 n2->nnot.com = n1;
329 return n2;
330 } else
331 return n1;
332 }
333
334
335
336 STATIC union node *
337 command(void)
338 {
339 union node *n1, *n2;
340 union node *ap, **app;
341 union node *cp, **cpp;
342 union node *redir, **rpp;
343 int t;
344 #ifdef BOGUS_NOT_COMMAND
345 int negate = 0;
346 #endif
347
348 CTRACE(DBG_PARSE, ("command: entered @%d\n", plinno));
349
350 checkkwd = CHKNL | CHKKWD | CHKALIAS;
351 redir = NULL;
352 n1 = NULL;
353 rpp = &redir;
354
355 /* Check for redirection which may precede command */
356 while (readtoken() == TREDIR) {
357 *rpp = n2 = redirnode;
358 rpp = &n2->nfile.next;
359 parsefname();
360 }
361 tokpushback++;
362
363 #ifdef BOGUS_NOT_COMMAND /* only in pileline() */
364 while (readtoken() == TNOT) {
365 CTRACE(DBG_PARSE, ("command: TNOT (bogus) recognized\n"));
366 negate++;
367 }
368 tokpushback++;
369 #endif
370
371 switch (readtoken()) {
372 case TIF:
373 n1 = stalloc(sizeof(struct nif));
374 n1->type = NIF;
375 n1->nif.test = list(0);
376 consumetoken(TTHEN);
377 n1->nif.ifpart = list(0);
378 n2 = n1;
379 while (readtoken() == TELIF) {
380 n2->nif.elsepart = stalloc(sizeof(struct nif));
381 n2 = n2->nif.elsepart;
382 n2->type = NIF;
383 n2->nif.test = list(0);
384 consumetoken(TTHEN);
385 n2->nif.ifpart = list(0);
386 }
387 if (lasttoken == TELSE)
388 n2->nif.elsepart = list(0);
389 else {
390 n2->nif.elsepart = NULL;
391 tokpushback++;
392 }
393 consumetoken(TFI);
394 checkkwd = CHKKWD | CHKALIAS;
395 break;
396 case TWHILE:
397 case TUNTIL:
398 n1 = stalloc(sizeof(struct nbinary));
399 n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
400 n1->nbinary.ch1 = list(0);
401 consumetoken(TDO);
402 n1->nbinary.ch2 = list(0);
403 consumetoken(TDONE);
404 checkkwd = CHKKWD | CHKALIAS;
405 break;
406 case TFOR:
407 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
408 synerror("Bad for loop variable");
409 n1 = stalloc(sizeof(struct nfor));
410 n1->type = NFOR;
411 n1->nfor.var = wordtext;
412 linebreak();
413 if (lasttoken==TWORD && !quoteflag && equal(wordtext,"in")) {
414 app = ≈
415 while (readtoken() == TWORD) {
416 n2 = makeword(startlinno);
417 *app = n2;
418 app = &n2->narg.next;
419 }
420 *app = NULL;
421 n1->nfor.args = ap;
422 if (lasttoken != TNL && lasttoken != TSEMI)
423 synexpect(TSEMI, 0);
424 } else {
425 static char argvars[5] = {
426 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
427 };
428
429 n2 = stalloc(sizeof(struct narg));
430 n2->type = NARG;
431 n2->narg.text = argvars;
432 n2->narg.backquote = NULL;
433 n2->narg.next = NULL;
434 n2->narg.lineno = startlinno;
435 n1->nfor.args = n2;
436 /*
437 * Newline or semicolon here is optional (but note
438 * that the original Bourne shell only allowed NL).
439 */
440 if (lasttoken != TNL && lasttoken != TSEMI)
441 tokpushback++;
442 }
443 checkkwd = CHKNL | CHKKWD | CHKALIAS;
444 if ((t = readtoken()) == TDO)
445 t = TDONE;
446 else if (t == TBEGIN)
447 t = TEND;
448 else
449 synexpect(TDO, 0);
450 n1->nfor.body = list(0);
451 consumetoken(t);
452 checkkwd = CHKKWD | CHKALIAS;
453 break;
454 case TCASE:
455 n1 = stalloc(sizeof(struct ncase));
456 n1->type = NCASE;
457 n1->ncase.lineno = startlinno - elided_nl;
458 consumetoken(TWORD);
459 n1->ncase.expr = makeword(startlinno);
460 linebreak();
461 if (lasttoken != TWORD || !equal(wordtext, "in"))
462 synexpect(-1, "in");
463 cpp = &n1->ncase.cases;
464 checkkwd = CHKNL | CHKKWD;
465 readtoken();
466 /*
467 * Both ksh and bash accept 'case x in esac'
468 * so configure scripts started taking advantage of this.
469 * The page: http://pubs.opengroup.org/onlinepubs/\
470 * 009695399/utilities/xcu_chap02.html contradicts itself,
471 * as to if this is legal; the "Case Conditional Format"
472 * paragraph shows one case is required, but the "Grammar"
473 * section shows a grammar that explicitly allows the no
474 * case option.
475 *
476 * The standard also says (section 2.10):
477 * This formal syntax shall take precedence over the
478 * preceding text syntax description.
479 * ie: the "Grammar" section wins. The text is just
480 * a rough guide (introduction to the common case.)
481 */
482 while (lasttoken != TESAC) {
483 *cpp = cp = stalloc(sizeof(struct nclist));
484 cp->type = NCLIST;
485 app = &cp->nclist.pattern;
486 if (lasttoken == TLP)
487 readtoken();
488 for (;;) {
489 if (lasttoken < TWORD)
490 synexpect(TWORD, 0);
491 *app = ap = makeword(startlinno);
492 checkkwd = CHKNL | CHKKWD;
493 if (readtoken() != TPIPE)
494 break;
495 app = &ap->narg.next;
496 readtoken();
497 }
498 if (lasttoken != TRP)
499 synexpect(TRP, 0);
500 cp->nclist.lineno = startlinno;
501 cp->nclist.body = list(0);
502
503 checkkwd = CHKNL | CHKKWD | CHKALIAS;
504 if ((t = readtoken()) != TESAC) {
505 if (t != TENDCASE && t != TCASEFALL) {
506 synexpect(TENDCASE, 0);
507 } else {
508 if (t == TCASEFALL)
509 cp->type = NCLISTCONT;
510 checkkwd = CHKNL | CHKKWD;
511 readtoken();
512 }
513 }
514 cpp = &cp->nclist.next;
515 }
516 *cpp = NULL;
517 checkkwd = CHKKWD | CHKALIAS;
518 break;
519 case TLP:
520 n1 = stalloc(sizeof(struct nredir));
521 n1->type = NSUBSHELL;
522 n1->nredir.n = list(0);
523 n1->nredir.redirect = NULL;
524 if (n1->nredir.n == NULL)
525 synexpect(-1, 0);
526 consumetoken(TRP);
527 checkkwd = CHKKWD | CHKALIAS;
528 break;
529 case TBEGIN:
530 n1 = list(0);
531 if (posix && n1 == NULL)
532 synexpect(-1, 0);
533 consumetoken(TEND);
534 checkkwd = CHKKWD | CHKALIAS;
535 break;
536
537 case TBACKGND:
538 case TSEMI:
539 case TAND:
540 case TOR:
541 case TPIPE:
542 case TNL:
543 case TEOF:
544 case TRP:
545 case TENDCASE:
546 case TCASEFALL:
547 /*
548 * simple commands must have something in them,
549 * either a word (which at this point includes a=b)
550 * or a redirection. If we reached the end of the
551 * command (which one of these tokens indicates)
552 * when we are just starting, and have not had a
553 * redirect, then ...
554 *
555 * nb: it is still possible to end up with empty
556 * simple commands, if the "command" is a var
557 * expansion that produces nothing:
558 * X= ; $X && $X
559 * --> &&
560 * That is OK and is handled after word expansions.
561 */
562 if (!redir)
563 synexpect(-1, 0);
564 /*
565 * continue to build a node containing the redirect.
566 * the tokpushback means that our ending token will be
567 * read again in simplecmd, causing it to terminate,
568 * so only the redirect(s) will be contained in the
569 * returned n1
570 */
571 /* FALLTHROUGH */
572 case TWORD:
573 tokpushback++;
574 n1 = simplecmd(rpp, redir);
575 goto checkneg;
576 default:
577 synexpect(-1, 0);
578 /* NOTREACHED */
579 }
580
581 /* Now check for redirection which may follow command */
582 while (readtoken() == TREDIR) {
583 *rpp = n2 = redirnode;
584 rpp = &n2->nfile.next;
585 parsefname();
586 }
587 tokpushback++;
588 *rpp = NULL;
589 if (redir) {
590 if (n1 == NULL || n1->type != NSUBSHELL) {
591 n2 = stalloc(sizeof(struct nredir));
592 n2->type = NREDIR;
593 n2->nredir.n = n1;
594 n1 = n2;
595 }
596 n1->nredir.redirect = redir;
597 }
598
599 checkneg:
600 #ifdef BOGUS_NOT_COMMAND
601 if (negate) {
602 VTRACE(DBG_PARSE, ("bogus %snegate command\n",
603 (negate&1) ? "" : "double "));
604 n2 = stalloc(sizeof(struct nnot));
605 n2->type = (negate & 1) ? NNOT : NDNOT;
606 n2->nnot.com = n1;
607 return n2;
608 }
609 else
610 #endif
611 return n1;
612 }
613
614
615 STATIC union node *
616 simplecmd(union node **rpp, union node *redir)
617 {
618 union node *args, **app;
619 union node *n = NULL;
620 int line = 0;
621 int savecheckkwd;
622 #ifdef BOGUS_NOT_COMMAND
623 union node *n2;
624 int negate = 0;
625 #endif
626
627 CTRACE(DBG_PARSE, ("simple command with%s redir already @%d\n",
628 redir ? "" : "out", plinno));
629
630 /* If we don't have any redirections already, then we must reset */
631 /* rpp to be the address of the local redir variable. */
632 if (redir == 0)
633 rpp = &redir;
634
635 args = NULL;
636 app = &args;
637
638 #ifdef BOGUS_NOT_COMMAND /* pipelines get negated, commands do not */
639 while (readtoken() == TNOT) {
640 VTRACE(DBG_PARSE, ("simplcmd: bogus TNOT recognized\n"));
641 negate++;
642 }
643 tokpushback++;
644 #endif
645
646 savecheckkwd = CHKALIAS;
647 for (;;) {
648 checkkwd = savecheckkwd;
649 if (readtoken() == TWORD) {
650 if (line == 0)
651 line = startlinno;
652 n = makeword(startlinno);
653 *app = n;
654 app = &n->narg.next;
655 if (savecheckkwd != 0 && !isassignment(wordtext))
656 savecheckkwd = 0;
657 } else if (lasttoken == TREDIR) {
658 if (line == 0)
659 line = startlinno;
660 *rpp = n = redirnode;
661 rpp = &n->nfile.next;
662 parsefname(); /* read name of redirection file */
663 } else if (lasttoken == TLP && app == &args->narg.next
664 && redir == 0) {
665 /* We have a function */
666 consumetoken(TRP);
667 funclinno = plinno;
668 rmescapes(n->narg.text);
669 if (strchr(n->narg.text, '/'))
670 synerror("Bad function name");
671 VTRACE(DBG_PARSE, ("Function '%s' seen @%d\n",
672 n->narg.text, plinno));
673 n->type = NDEFUN;
674 n->narg.lineno = plinno - elided_nl;
675 n->narg.next = command();
676 funclinno = 0;
677 goto checkneg;
678 } else {
679 tokpushback++;
680 break;
681 }
682 }
683
684 if (args == NULL && redir == NULL)
685 synexpect(-1, 0);
686 *app = NULL;
687 *rpp = NULL;
688 n = stalloc(sizeof(struct ncmd));
689 n->type = NCMD;
690 n->ncmd.lineno = line - elided_nl;
691 n->ncmd.backgnd = 0;
692 n->ncmd.args = args;
693 n->ncmd.redirect = redir;
694 n->ncmd.lineno = startlinno;
695
696 checkneg:
697 #ifdef BOGUS_NOT_COMMAND
698 if (negate) {
699 VTRACE(DBG_PARSE, ("bogus %snegate simplecmd\n",
700 (negate&1) ? "" : "double "));
701 n2 = stalloc(sizeof(struct nnot));
702 n2->type = (negate & 1) ? NNOT : NDNOT;
703 n2->nnot.com = n;
704 return n2;
705 }
706 else
707 #endif
708 return n;
709 }
710
711 STATIC union node *
712 makeword(int lno)
713 {
714 union node *n;
715
716 n = stalloc(sizeof(struct narg));
717 n->type = NARG;
718 n->narg.next = NULL;
719 n->narg.text = wordtext;
720 n->narg.backquote = backquotelist;
721 n->narg.lineno = lno;
722 return n;
723 }
724
725 void
726 fixredir(union node *n, const char *text, int err)
727 {
728
729 VTRACE(DBG_PARSE, ("Fix redir %s %d\n", text, err));
730 if (!err)
731 n->ndup.vname = NULL;
732
733 if (is_number(text))
734 n->ndup.dupfd = number(text);
735 else if (text[0] == '-' && text[1] == '\0')
736 n->ndup.dupfd = -1;
737 else {
738
739 if (err)
740 synerror("Bad fd number");
741 else
742 n->ndup.vname = makeword(startlinno - elided_nl);
743 }
744 }
745
746
747 STATIC void
748 parsefname(void)
749 {
750 union node *n = redirnode;
751
752 if (readtoken() != TWORD)
753 synexpect(-1, 0);
754 if (n->type == NHERE) {
755 struct HereDoc *here = heredoc;
756 struct HereDoc *p;
757
758 if (quoteflag == 0)
759 n->type = NXHERE;
760 VTRACE(DBG_PARSE, ("Here document %d @%d\n", n->type, plinno));
761 if (here->striptabs) {
762 while (*wordtext == '\t')
763 wordtext++;
764 }
765
766 /*
767 * this test is not really necessary, we are not
768 * required to expand wordtext, but there's no reason
769 * it cannot be $$ or something like that - that would
770 * not mean the pid, but literally two '$' characters.
771 * There is no need for limits on what the word can be.
772 * However, it needs to stay literal as entered, not
773 * have $ converted to CTLVAR or something, which as
774 * the parser is, at the minute, is impossible to prevent.
775 * So, leave it like this until the rest of the parser is fixed.
776 */
777 if (!noexpand(wordtext))
778 synerror("Illegal eof marker for << redirection");
779
780 rmescapes(wordtext);
781 here->eofmark = wordtext;
782 here->next = NULL;
783 if (heredoclist == NULL)
784 heredoclist = here;
785 else {
786 for (p = heredoclist ; p->next ; p = p->next)
787 continue;
788 p->next = here;
789 }
790 } else if (n->type == NTOFD || n->type == NFROMFD) {
791 fixredir(n, wordtext, 0);
792 } else {
793 n->nfile.fname = makeword(startlinno - elided_nl);
794 }
795 }
796
797 /*
798 * Check to see whether we are at the end of the here document. When this
799 * is called, c is set to the first character of the next input line. If
800 * we are at the end of the here document, this routine sets the c to PEOF.
801 * The new value of c is returned.
802 */
803
804 static int
805 checkend(int c, char * const eofmark, const int striptabs)
806 {
807
808 if (striptabs) {
809 while (c == '\t')
810 c = pgetc();
811 }
812 if (c == PEOF) {
813 if (*eofmark == '\0')
814 return (c);
815 synerror(EOFhere);
816 }
817 if (c == *eofmark) {
818 int c2;
819 char *q;
820
821 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
822 if (c2 == '\n') {
823 plinno++;
824 needprompt = doprompt;
825 }
826 if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
827 c = PEOF;
828 if (c2 == '\n') {
829 plinno++;
830 needprompt = doprompt;
831 }
832 } else {
833 pungetc();
834 pushstring(eofmark + 1, q - (eofmark + 1), NULL);
835 }
836 } else if (c == '\n' && *eofmark == '\0') {
837 c = PEOF;
838 plinno++;
839 needprompt = doprompt;
840 }
841 return (c);
842 }
843
844
845 /*
846 * Input any here documents.
847 */
848
849 STATIC int
850 slurp_heredoc(char *const eofmark, const int striptabs, const int sq)
851 {
852 int c;
853 char *out;
854 int lines = plinno;
855
856 c = pgetc();
857
858 /*
859 * If we hit EOF on the input, and the eofmark is a null string ('')
860 * we consider this empty line to be the eofmark, and exit without err.
861 */
862 if (c == PEOF && *eofmark != '\0')
863 synerror(EOFhere);
864
865 STARTSTACKSTR(out);
866
867 while ((c = checkend(c, eofmark, striptabs)) != PEOF) {
868 do {
869 if (sq) {
870 /*
871 * in single quoted mode (eofmark quoted)
872 * all we look for is \n so we can check
873 * for the epfmark - everything saved literally.
874 */
875 STPUTC(c, out);
876 if (c == '\n') {
877 plinno++;
878 break;
879 }
880 continue;
881 }
882 /*
883 * In double quoted (non-quoted eofmark)
884 * we must handle \ followed by \n here
885 * otherwise we can mismatch the end mark.
886 * All other uses of \ will be handled later
887 * when the here doc is expanded.
888 *
889 * This also makes sure \\ followed by \n does
890 * not suppress the newline (the \ quotes itself)
891 */
892 if (c == '\\') { /* A backslash */
893 STPUTC(c, out);
894 c = pgetc(); /* followed by */
895 if (c == '\n') { /* a newline? */
896 STPUTC(c, out);
897 plinno++;
898 continue; /* don't break */
899 }
900 }
901 STPUTC(c, out); /* keep the char */
902 if (c == '\n') { /* at end of line */
903 plinno++;
904 break; /* look for eofmark */
905 }
906 } while ((c = pgetc()) != PEOF);
907
908 /*
909 * If we have read a line, and reached EOF, without
910 * finding the eofmark, whether the EOF comes before
911 * or immediately after the \n, that is an error.
912 */
913 if (c == PEOF || (c = pgetc()) == PEOF)
914 synerror(EOFhere);
915 }
916 STPUTC('\0', out);
917
918 c = out - stackblock();
919 out = stackblock();
920 grabstackblock(c);
921 wordtext = out;
922
923 VTRACE(DBG_PARSE,
924 ("Slurped a %d line %sheredoc (to '%s')%s: len %d, \"%.*s%s\" @%d\n",
925 plinno - lines, sq ? "quoted " : "", eofmark,
926 striptabs ? " tab stripped" : "", c, (c > 16 ? 16 : c),
927 wordtext, (c > 16 ? "..." : ""), plinno));
928
929 return (plinno - lines);
930 }
931
932 static char *
933 insert_elided_nl(char *str)
934 {
935 while (elided_nl > 0) {
936 STPUTC(CTLNONL, str);
937 elided_nl--;
938 }
939 return str;
940 }
941
942 STATIC void
943 readheredocs(void)
944 {
945 struct HereDoc *here;
946 union node *n;
947 int line, l;
948
949 line = 0; /*XXX - gcc! obviously unneeded */
950 if (heredoclist)
951 line = heredoclist->startline + 1;
952 l = 0;
953 while (heredoclist) {
954 line += l;
955 here = heredoclist;
956 heredoclist = here->next;
957 if (needprompt) {
958 setprompt(2);
959 needprompt = 0;
960 }
961
962 l = slurp_heredoc(here->eofmark, here->striptabs,
963 here->here->nhere.type == NHERE);
964
965 here->here->nhere.doc = n = makeword(line);
966
967 if (here->here->nhere.type == NHERE)
968 continue;
969
970 /*
971 * Now "parse" here docs that have unquoted eofmarkers.
972 */
973 setinputstring(wordtext, 1, line);
974 VTRACE(DBG_PARSE, ("Reprocessing %d line here doc from %d\n",
975 l, line));
976 readtoken1(pgetc(), DQSYNTAX, 1);
977 n->narg.text = wordtext;
978 n->narg.backquote = backquotelist;
979 popfile();
980 }
981 }
982
983 STATIC int
984 peektoken(void)
985 {
986 int t;
987
988 t = readtoken();
989 tokpushback++;
990 return (t);
991 }
992
993 STATIC int
994 readtoken(void)
995 {
996 int t;
997 #ifdef DEBUG
998 int alreadyseen = tokpushback;
999 int savecheckkwd = checkkwd;
1000 #endif
1001 struct alias *ap;
1002
1003 top:
1004 t = xxreadtoken();
1005
1006 if (checkkwd & CHKNL) {
1007 while (t == TNL) {
1008 readheredocs();
1009 t = xxreadtoken();
1010 }
1011 }
1012
1013 /*
1014 * check for keywords and aliases
1015 */
1016 if (t == TWORD && !quoteflag) {
1017 const char *const *pp;
1018
1019 if (checkkwd & CHKKWD)
1020 for (pp = parsekwd; *pp; pp++) {
1021 if (**pp == *wordtext && equal(*pp, wordtext)) {
1022 lasttoken = t = pp -
1023 parsekwd + KWDOFFSET;
1024 VTRACE(DBG_PARSE,
1025 ("keyword %s recognized @%d\n",
1026 tokname[t], plinno));
1027 goto out;
1028 }
1029 }
1030
1031 if (checkkwd & CHKALIAS &&
1032 (ap = lookupalias(wordtext, 1)) != NULL) {
1033 VTRACE(DBG_PARSE,
1034 ("alias '%s' recognized -> <:%s:>\n",
1035 wordtext, ap->val));
1036 pushstring(ap->val, strlen(ap->val), ap);
1037 goto top;
1038 }
1039 }
1040 out:
1041 if (t != TNOT)
1042 checkkwd = 0;
1043
1044 VTRACE(DBG_PARSE, ("%stoken %s %s @%d (chkkwd %x->%x)\n",
1045 alreadyseen ? "reread " : "", tokname[t],
1046 t == TWORD ? wordtext : "", plinno, savecheckkwd, checkkwd));
1047 return (t);
1048 }
1049
1050
1051 /*
1052 * Read the next input token.
1053 * If the token is a word, we set backquotelist to the list of cmds in
1054 * backquotes. We set quoteflag to true if any part of the word was
1055 * quoted.
1056 * If the token is TREDIR, then we set redirnode to a structure containing
1057 * the redirection.
1058 * In all cases, the variable startlinno is set to the number of the line
1059 * on which the token starts.
1060 *
1061 * [Change comment: here documents and internal procedures]
1062 * [Readtoken shouldn't have any arguments. Perhaps we should make the
1063 * word parsing code into a separate routine. In this case, readtoken
1064 * doesn't need to have any internal procedures, but parseword does.
1065 * We could also make parseoperator in essence the main routine, and
1066 * have parseword (readtoken1?) handle both words and redirection.]
1067 */
1068
1069 #define RETURN(token) return lasttoken = (token)
1070
1071 STATIC int
1072 xxreadtoken(void)
1073 {
1074 int c;
1075
1076 if (tokpushback) {
1077 tokpushback = 0;
1078 CTRACE(DBG_LEXER,
1079 ("xxreadtoken() returns %s (%d) again\n",
1080 tokname[lasttoken], lasttoken));
1081 return lasttoken;
1082 }
1083 if (needprompt) {
1084 setprompt(2);
1085 needprompt = 0;
1086 }
1087 elided_nl = 0;
1088 startlinno = plinno;
1089 for (;;) { /* until token or start of word found */
1090 c = pgetc_macro();
1091 CTRACE(DBG_LEXER, ("xxreadtoken() sees '%c' (%#.2x) ",
1092 c&0xFF, c&0x1FF));
1093 switch (c) {
1094 case ' ': case '\t': case PFAKE:
1095 CTRACE(DBG_LEXER, (" ignored\n"));
1096 continue;
1097 case '#':
1098 while ((c = pgetc()) != '\n' && c != PEOF)
1099 continue;
1100 CTRACE(DBG_LEXER,
1101 ("skipped comment to (not incl) \\n\n"));
1102 pungetc();
1103 continue;
1104
1105 case '\n':
1106 plinno++;
1107 CTRACE(DBG_LEXER, ("newline now @%d\n", plinno));
1108 needprompt = doprompt;
1109 RETURN(TNL);
1110 case PEOF:
1111 CTRACE(DBG_LEXER, ("EOF -> TEOF (return)\n"));
1112 RETURN(TEOF);
1113
1114 case '&':
1115 if (pgetc_linecont() == '&') {
1116 CTRACE(DBG_LEXER,
1117 ("and another -> TAND (return)\n"));
1118 RETURN(TAND);
1119 }
1120 pungetc();
1121 CTRACE(DBG_LEXER, (" -> TBACKGND (return)\n"));
1122 RETURN(TBACKGND);
1123 case '|':
1124 if (pgetc_linecont() == '|') {
1125 CTRACE(DBG_LEXER,
1126 ("and another -> TOR (return)\n"));
1127 RETURN(TOR);
1128 }
1129 pungetc();
1130 CTRACE(DBG_LEXER, (" -> TPIPE (return)\n"));
1131 RETURN(TPIPE);
1132 case ';':
1133 switch (pgetc_linecont()) {
1134 case ';':
1135 CTRACE(DBG_LEXER,
1136 ("and another -> TENDCASE (return)\n"));
1137 RETURN(TENDCASE);
1138 case '&':
1139 CTRACE(DBG_LEXER,
1140 ("and '&' -> TCASEFALL (return)\n"));
1141 RETURN(TCASEFALL);
1142 default:
1143 pungetc();
1144 CTRACE(DBG_LEXER, (" -> TSEMI (return)\n"));
1145 RETURN(TSEMI);
1146 }
1147 case '(':
1148 CTRACE(DBG_LEXER, (" -> TLP (return)\n"));
1149 RETURN(TLP);
1150 case ')':
1151 CTRACE(DBG_LEXER, (" -> TRP (return)\n"));
1152 RETURN(TRP);
1153
1154 case '\\':
1155 switch (pgetc()) {
1156 case '\n':
1157 startlinno = ++plinno;
1158 CTRACE(DBG_LEXER, ("\\\n ignored, now @%d\n",
1159 plinno));
1160 if (doprompt)
1161 setprompt(2);
1162 else
1163 setprompt(0);
1164 continue;
1165 case PEOF:
1166 CTRACE(DBG_LEXER,
1167 ("then EOF -> TEOF (return) '\\' dropped\n"));
1168 RETURN(TEOF);
1169 default:
1170 CTRACE(DBG_LEXER, ("not \\\n or EOF: "));
1171 pungetc();
1172 break;
1173 }
1174 /* FALLTHROUGH */
1175 default:
1176 CTRACE(DBG_LEXER, ("getting a word\n"));
1177 return readtoken1(c, BASESYNTAX, 0);
1178 }
1179 }
1180 #undef RETURN
1181 }
1182
1183
1184
1185 /*
1186 * If eofmark is NULL, read a word or a redirection symbol. If eofmark
1187 * is not NULL, read a here document. In the latter case, eofmark is the
1188 * word which marks the end of the document and striptabs is true if
1189 * leading tabs should be stripped from the document. The argument firstc
1190 * is the first character of the input token or document.
1191 *
1192 * Because C does not have internal subroutines, I have simulated them
1193 * using goto's to implement the subroutine linkage. The following macros
1194 * will run code that appears at the end of readtoken1.
1195 */
1196
1197 /*
1198 * We used to remember only the current syntax, variable nesting level,
1199 * double quote state for each var nesting level, and arith nesting
1200 * level (unrelated to var nesting) and one prev syntax when in arith
1201 * syntax. This worked for simple cases, but can't handle arith inside
1202 * var expansion inside arith inside var with some quoted and some not.
1203 *
1204 * Inspired by FreeBSD's implementation (though it was the obvious way)
1205 * though implemented differently, we now have a stack that keeps track
1206 * of what we are doing now, and what we were doing previously.
1207 * Every time something changes, which will eventually end and should
1208 * revert to the previous state, we push this stack, and then pop it
1209 * again later (that is every ${} with an operator (to parse the word
1210 * or pattern that follows) ${x} and $x are too simple to need it)
1211 * $(( )) $( ) and "...". Always. Really, always!
1212 *
1213 * The stack is implemented as one static (on the C stack) base block
1214 * containing LEVELS_PER_BLOCK (8) stack entries, which should be
1215 * enough for the vast majority of cases. For torture tests, we
1216 * malloc more blocks as needed. All accesses through the inline
1217 * functions below.
1218 */
1219
1220 /*
1221 * varnest & arinest will typically be 0 or 1
1222 * (varnest can increment in usages like ${x=${y}} but probably
1223 * does not really need to)
1224 * parenlevel allows balancing parens inside a $(( )), it is reset
1225 * at each new nesting level ( $(( ( x + 3 ${unset-)} )) does not work.
1226 * quoted is special - we need to know 2 things ... are we inside "..."
1227 * (even if inherited from some previous nesting level) and was there
1228 * an opening '"' at this level (so the next will be closing).
1229 * "..." can span nesting levels, but cannot be opened in one and
1230 * closed in a different one.
1231 * To handle this, "quoted" has two fields, the bottom 4 (really 2)
1232 * bits are 0, 1, or 2, for un, single, and double quoted (single quoted
1233 * is really so special that this setting is not very important)
1234 * and 0x10 that indicates that an opening quote has been seen.
1235 * The bottom 4 bits are inherited, the 0x10 bit is not.
1236 */
1237 struct tokenstate {
1238 const char *ts_syntax;
1239 unsigned short ts_parenlevel; /* counters */
1240 unsigned short ts_varnest; /* 64000 levels should be enough! */
1241 unsigned short ts_arinest;
1242 unsigned short ts_quoted; /* 1 -> single, 2 -> double */
1243 unsigned short ts_magicq; /* heredoc or word expand */
1244 };
1245
1246 #define NQ 0x00 /* Unquoted */
1247 #define SQ 0x01 /* Single Quotes */
1248 #define DQ 0x02 /* Double Quotes (or equivalent) */
1249 #define CQ 0x03 /* C style Single Quotes */
1250 #define QF 0x0F /* Mask to extract previous values */
1251 #define QS 0x10 /* Quoting started at this level in stack */
1252
1253 #define LEVELS_PER_BLOCK 8
1254 #define VSS struct statestack
1255
1256 struct statestack {
1257 VSS *prev; /* previous block in list */
1258 int cur; /* which of our tokenstates is current */
1259 struct tokenstate tokenstate[LEVELS_PER_BLOCK];
1260 };
1261
1262 static inline struct tokenstate *
1263 currentstate(VSS *stack)
1264 {
1265 return &stack->tokenstate[stack->cur];
1266 }
1267
1268 #ifdef notdef
1269 static inline struct tokenstate *
1270 prevstate(VSS *stack)
1271 {
1272 if (stack->cur != 0)
1273 return &stack->tokenstate[stack->cur - 1];
1274 if (stack->prev == NULL) /* cannot drop below base */
1275 return &stack->tokenstate[0];
1276 return &stack->prev->tokenstate[LEVELS_PER_BLOCK - 1];
1277 }
1278 #endif
1279
1280 static inline VSS *
1281 bump_state_level(VSS *stack)
1282 {
1283 struct tokenstate *os, *ts;
1284
1285 os = currentstate(stack);
1286
1287 if (++stack->cur >= LEVELS_PER_BLOCK) {
1288 VSS *ss;
1289
1290 ss = (VSS *)ckmalloc(sizeof (struct statestack));
1291 ss->cur = 0;
1292 ss->prev = stack;
1293 stack = ss;
1294 }
1295
1296 ts = currentstate(stack);
1297
1298 ts->ts_parenlevel = 0; /* parens inside never match outside */
1299
1300 ts->ts_quoted = os->ts_quoted & QF; /* these are default settings */
1301 ts->ts_varnest = os->ts_varnest;
1302 ts->ts_arinest = os->ts_arinest; /* when appropriate */
1303 ts->ts_syntax = os->ts_syntax; /* they will be altered */
1304 ts->ts_magicq = os->ts_magicq;
1305
1306 return stack;
1307 }
1308
1309 static inline VSS *
1310 drop_state_level(VSS *stack)
1311 {
1312 if (stack->cur == 0) {
1313 VSS *ss;
1314
1315 ss = stack;
1316 stack = ss->prev;
1317 if (stack == NULL)
1318 return ss;
1319 ckfree(ss);
1320 }
1321 --stack->cur;
1322 return stack;
1323 }
1324
1325 static inline void
1326 cleanup_state_stack(VSS *stack)
1327 {
1328 while (stack->prev != NULL) {
1329 stack->cur = 0;
1330 stack = drop_state_level(stack);
1331 }
1332 }
1333
1334 #define PARSESUB() {goto parsesub; parsesub_return:;}
1335 #define PARSEARITH() {goto parsearith; parsearith_return:;}
1336
1337 /*
1338 * The following macros all assume the existance of a local var "stack"
1339 * which contains a pointer to the current struct stackstate
1340 */
1341
1342 /*
1343 * These are macros rather than inline funcs to avoid code churn as much
1344 * as possible - they replace macros of the same name used previously.
1345 */
1346 #define ISDBLQUOTE() (currentstate(stack)->ts_quoted & QS)
1347 #define SETDBLQUOTE() (currentstate(stack)->ts_quoted = QS | DQ)
1348 #ifdef notdef
1349 #define CLRDBLQUOTE() (currentstate(stack)->ts_quoted = \
1350 stack->cur != 0 || stack->prev ? \
1351 prevstate(stack)->ts_quoted & QF : 0)
1352 #endif
1353
1354 /*
1355 * This set are just to avoid excess typing and line lengths...
1356 * The ones that "look like" var names must be implemented to be lvalues
1357 */
1358 #define syntax (currentstate(stack)->ts_syntax)
1359 #define parenlevel (currentstate(stack)->ts_parenlevel)
1360 #define varnest (currentstate(stack)->ts_varnest)
1361 #define arinest (currentstate(stack)->ts_arinest)
1362 #define quoted (currentstate(stack)->ts_quoted)
1363 #define magicq (currentstate(stack)->ts_magicq)
1364 #define TS_PUSH() (stack = bump_state_level(stack))
1365 #define TS_POP() (stack = drop_state_level(stack))
1366
1367 /*
1368 * Called to parse command substitutions. oldstyle is true if the command
1369 * is enclosed inside `` (otherwise it was enclosed in "$( )")
1370 *
1371 * Internally nlpp is a pointer to the head of the linked
1372 * list of commands (passed by reference), and savelen is the number of
1373 * characters on the top of the stack which must be preserved.
1374 */
1375 static char *
1376 parsebackq(VSS *const stack, char * const in,
1377 struct nodelist **const pbqlist, const int oldstyle)
1378 {
1379 struct nodelist **nlpp;
1380 const int savepbq = parsebackquote;
1381 union node *n;
1382 char *out;
1383 char *str = NULL;
1384 char *volatile sstr = str;
1385 struct jmploc jmploc;
1386 struct jmploc *const savehandler = handler;
1387 struct parsefile *const savetopfile = getcurrentfile();
1388 const int savelen = in - stackblock();
1389 int saveprompt;
1390 int lno;
1391
1392 if (setjmp(jmploc.loc)) {
1393 popfilesupto(savetopfile);
1394 if (sstr)
1395 ckfree(__UNVOLATILE(sstr));
1396 cleanup_state_stack(stack);
1397 parsebackquote = 0;
1398 handler = savehandler;
1399 CTRACE(DBG_LEXER, ("parsebackq() err (%d), unwinding\n",
1400 exception));
1401 longjmp(handler->loc, 1);
1402 }
1403 INTOFF;
1404 sstr = str = NULL;
1405 if (savelen > 0) {
1406 sstr = str = ckmalloc(savelen);
1407 memcpy(str, stackblock(), savelen);
1408 }
1409 handler = &jmploc;
1410 INTON;
1411 if (oldstyle) {
1412 /*
1413 * We must read until the closing backquote, giving special
1414 * treatment to some slashes, and then push the string and
1415 * reread it as input, interpreting it normally.
1416 */
1417 int pc;
1418 int psavelen;
1419 char *pstr;
1420 int line1 = plinno;
1421
1422 VTRACE(DBG_PARSE|DBG_LEXER,
1423 ("parsebackq: repackaging `` as $( )"));
1424 /*
1425 * Because the entire `...` is read here, we don't
1426 * need to bother the state stack. That will be used
1427 * (as appropriate) when the processed string is re-read.
1428 */
1429 STARTSTACKSTR(out);
1430 #ifdef DEBUG
1431 for (psavelen = 0;;psavelen++) { /* } */
1432 #else
1433 for (;;) {
1434 #endif
1435 if (needprompt) {
1436 setprompt(2);
1437 needprompt = 0;
1438 }
1439 pc = pgetc();
1440 VTRACE(DBG_LEXER,
1441 ("parsebackq() got '%c'(%#.2x) in `` %s", pc&0xFF,
1442 pc&0x1FF, pc == '`' ? "terminator\n" : ""));
1443 if (pc == '`')
1444 break;
1445 switch (pc) {
1446 case '\\':
1447 pc = pgetc();
1448 VTRACE(DBG_LEXER, ("then '%c'(%#.2x) ",
1449 pc&0xFF, pc&0x1FF));
1450 #ifdef DEBUG
1451 psavelen++;
1452 #endif
1453 if (pc == '\n') { /* keep \ \n for later */
1454 plinno++;
1455 VTRACE(DBG_LEXER, ("@%d ", plinno));
1456 needprompt = doprompt;
1457 }
1458 if (pc != '\\' && pc != '`' && pc != '$'
1459 && (!ISDBLQUOTE() || pc != '"')) {
1460 VTRACE(DBG_LEXER, ("keep '\\' "));
1461 STPUTC('\\', out);
1462 }
1463 break;
1464
1465 case '\n':
1466 plinno++;
1467 VTRACE(DBG_LEXER, ("@%d ", plinno));
1468 needprompt = doprompt;
1469 break;
1470
1471 case PEOF:
1472 startlinno = line1;
1473 VTRACE(DBG_LEXER, ("EOF\n", plinno));
1474 synerror("EOF in backquote substitution");
1475 break;
1476
1477 default:
1478 break;
1479 }
1480 VTRACE(DBG_LEXER, (".\n", plinno));
1481 STPUTC(pc, out);
1482 }
1483 STPUTC('\0', out);
1484 VTRACE(DBG_LEXER, ("parsebackq() ``:"));
1485 VTRACE(DBG_PARSE|DBG_LEXER, (" read %d", psavelen));
1486 psavelen = out - stackblock();
1487 VTRACE(DBG_PARSE|DBG_LEXER, (" produced %d\n", psavelen));
1488 if (psavelen > 0) {
1489 pstr = grabstackstr(out);
1490 CTRACE(DBG_LEXER,
1491 ("parsebackq() reprocessing as $(%s)\n", pstr));
1492 setinputstring(pstr, 1, line1);
1493 }
1494 }
1495 nlpp = pbqlist;
1496 while (*nlpp)
1497 nlpp = &(*nlpp)->next;
1498 *nlpp = stalloc(sizeof(struct nodelist));
1499 (*nlpp)->next = NULL;
1500 parsebackquote = oldstyle;
1501
1502 if (oldstyle) {
1503 saveprompt = doprompt;
1504 doprompt = 0;
1505 } else
1506 saveprompt = 0;
1507
1508 lno = -plinno;
1509 CTRACE(DBG_LEXER, ("parsebackq() parsing embedded command list\n"));
1510 n = list(0);
1511 CTRACE(DBG_LEXER, ("parsebackq() parsed $() (%d -> %d)\n", -lno,
1512 lno + plinno));
1513 lno += plinno;
1514
1515 if (oldstyle) {
1516 if (peektoken() != TEOF)
1517 synexpect(-1, 0);
1518 doprompt = saveprompt;
1519 } else
1520 consumetoken(TRP);
1521
1522 (*nlpp)->n = n;
1523 if (oldstyle) {
1524 /*
1525 * Start reading from old file again, ignoring any pushed back
1526 * tokens left from the backquote parsing
1527 */
1528 CTRACE(DBG_LEXER, ("parsebackq() back to previous input\n"));
1529 popfile();
1530 tokpushback = 0;
1531 }
1532
1533 while (stackblocksize() <= savelen)
1534 growstackblock();
1535 STARTSTACKSTR(out);
1536 if (str) {
1537 memcpy(out, str, savelen);
1538 STADJUST(savelen, out);
1539 INTOFF;
1540 ckfree(str);
1541 sstr = str = NULL;
1542 INTON;
1543 }
1544 parsebackquote = savepbq;
1545 handler = savehandler;
1546 if (arinest || ISDBLQUOTE()) {
1547 STPUTC(CTLBACKQ | CTLQUOTE, out);
1548 while (--lno >= 0)
1549 STPUTC(CTLNONL, out);
1550 } else
1551 STPUTC(CTLBACKQ, out);
1552
1553 return out;
1554 }
1555
1556 /*
1557 * Parse a redirection operator. The parameter "out" points to a string
1558 * specifying the fd to be redirected. It is guaranteed to be either ""
1559 * or a numeric string (for now anyway). The parameter "c" contains the
1560 * first character of the redirection operator.
1561 *
1562 * Note the string "out" is on the stack, which we are about to clobber,
1563 * so process it first...
1564 */
1565
1566 static void
1567 parseredir(const char *out, int c)
1568 {
1569 union node *np;
1570 int fd;
1571
1572 fd = (*out == '\0') ? -1 : number(out);
1573
1574 np = stalloc(sizeof(struct nfile));
1575 VTRACE(DBG_LEXER, ("parseredir after '%s%c' ", out, c));
1576 if (c == '>') {
1577 if (fd < 0)
1578 fd = 1;
1579 c = pgetc_linecont();
1580 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1581 if (c == '>')
1582 np->type = NAPPEND;
1583 else if (c == '|')
1584 np->type = NCLOBBER;
1585 else if (c == '&')
1586 np->type = NTOFD;
1587 else {
1588 np->type = NTO;
1589 VTRACE(DBG_LEXER, ("unwanted ", c));
1590 pungetc();
1591 }
1592 } else { /* c == '<' */
1593 if (fd < 0)
1594 fd = 0;
1595 c = pgetc_linecont();
1596 VTRACE(DBG_LEXER, ("is '%c'(%#.2x) ", c&0xFF, c&0x1FF));
1597 switch (c) {
1598 case '<':
1599 /* if sizes differ, just discard the old one */
1600 if (sizeof (struct nfile) != sizeof (struct nhere))
1601 np = stalloc(sizeof(struct nhere));
1602 np->type = NHERE;
1603 np->nhere.fd = 0;
1604 heredoc = stalloc(sizeof(struct HereDoc));
1605 heredoc->here = np;
1606 heredoc->startline = plinno;
1607 if ((c = pgetc_linecont()) == '-') {
1608 CTRACE(DBG_LEXER, ("and '%c'(%#.2x) ",
1609 c & 0xFF, c & 0x1FF));
1610 heredoc->striptabs = 1;
1611 } else {
1612 heredoc->striptabs = 0;
1613 pungetc();
1614 }
1615 break;
1616
1617 case '&':
1618 np->type = NFROMFD;
1619 break;
1620
1621 case '>':
1622 np->type = NFROMTO;
1623 break;
1624
1625 default:
1626 np->type = NFROM;
1627 VTRACE(DBG_LEXER, ("unwanted('%c'0#.2x)", c&0xFF,
1628 c&0x1FF));
1629 pungetc();
1630 break;
1631 }
1632 }
1633 np->nfile.fd = fd;
1634
1635 VTRACE(DBG_LEXER, (" ->%"PRIdsNT" fd=%d\n", NODETYPENAME(np->type),fd));
1636
1637 redirnode = np; /* this is the "value" of TRENODE */
1638 }
1639
1640 /*
1641 * Called to parse a backslash escape sequence inside $'...'.
1642 * The backslash has already been read.
1643 */
1644 static char *
1645 readcstyleesc(char *out)
1646 {
1647 int c, vc, i, n;
1648 unsigned int v;
1649
1650 c = pgetc();
1651 VTRACE(DBG_LEXER, ("CSTR(\\%c)(\\%#x)", c&0xFF, c&0x1FF));
1652 switch (c) {
1653 case '\0':
1654 case PEOF:
1655 synerror("Unterminated quoted string");
1656 case '\n':
1657 plinno++;
1658 VTRACE(DBG_LEXER, ("@%d ", plinno));
1659 if (doprompt)
1660 setprompt(2);
1661 else
1662 setprompt(0);
1663 return out;
1664
1665 case '\\':
1666 case '\'':
1667 case '"':
1668 v = c;
1669 break;
1670
1671 case 'a': v = '\a'; break;
1672 case 'b': v = '\b'; break;
1673 case 'e': v = '\033'; break;
1674 case 'f': v = '\f'; break;
1675 case 'n': v = '\n'; break;
1676 case 'r': v = '\r'; break;
1677 case 't': v = '\t'; break;
1678 case 'v': v = '\v'; break;
1679
1680 case '0': case '1': case '2': case '3':
1681 case '4': case '5': case '6': case '7':
1682 v = c - '0';
1683 c = pgetc();
1684 if (c >= '0' && c <= '7') {
1685 v <<= 3;
1686 v += c - '0';
1687 c = pgetc();
1688 if (c >= '0' && c <= '7') {
1689 v <<= 3;
1690 v += c - '0';
1691 } else
1692 pungetc();
1693 } else
1694 pungetc();
1695 break;
1696
1697 case 'c':
1698 c = pgetc();
1699 if (c < 0x3f || c > 0x7a || c == 0x60)
1700 synerror("Bad \\c escape sequence");
1701 if (c == '\\' && pgetc() != '\\')
1702 synerror("Bad \\c\\ escape sequence");
1703 if (c == '?')
1704 v = 127;
1705 else
1706 v = c & 0x1f;
1707 break;
1708
1709 case 'x':
1710 n = 2;
1711 goto hexval;
1712 case 'u':
1713 n = 4;
1714 goto hexval;
1715 case 'U':
1716 n = 8;
1717 hexval:
1718 v = 0;
1719 for (i = 0; i < n; i++) {
1720 c = pgetc();
1721 if (c >= '0' && c <= '9')
1722 v = (v << 4) + c - '0';
1723 else if (c >= 'A' && c <= 'F')
1724 v = (v << 4) + c - 'A' + 10;
1725 else if (c >= 'a' && c <= 'f')
1726 v = (v << 4) + c - 'a' + 10;
1727 else {
1728 pungetc();
1729 break;
1730 }
1731 }
1732 if (n > 2 && v > 127) {
1733 if (v >= 0xd800 && v <= 0xdfff)
1734 synerror("Invalid \\u escape sequence");
1735
1736 /* XXX should we use iconv here. What locale? */
1737 CHECKSTRSPACE(4, out);
1738
1739 if (v <= 0x7ff) {
1740 USTPUTC(0xc0 | v >> 6, out);
1741 USTPUTC(0x80 | (v & 0x3f), out);
1742 return out;
1743 } else if (v <= 0xffff) {
1744 USTPUTC(0xe0 | v >> 12, out);
1745 USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1746 USTPUTC(0x80 | (v & 0x3f), out);
1747 return out;
1748 } else if (v <= 0x10ffff) {
1749 USTPUTC(0xf0 | v >> 18, out);
1750 USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1751 USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1752 USTPUTC(0x80 | (v & 0x3f), out);
1753 return out;
1754 }
1755 if (v > 127)
1756 v = '?';
1757 }
1758 break;
1759 default:
1760 synerror("Unknown $'' escape sequence");
1761 }
1762 vc = (char)v;
1763 VTRACE(DBG_LEXER, ("->%u(%#x)['%c']", v, v, vc&0xFF));
1764
1765 /*
1766 * If we managed to create a \n from a \ sequence (no matter how)
1767 * then we replace it with the magic CRTCNL control char, which
1768 * will turn into a \n again later, but in the meantime, never
1769 * causes LINENO increments.
1770 */
1771 if (vc == '\n') {
1772 VTRACE(DBG_LEXER, ("CTLCNL."));
1773 USTPUTC(CTLCNL, out);
1774 return out;
1775 }
1776
1777 /*
1778 * We can't handle NUL bytes.
1779 * POSIX says we should skip till the closing quote.
1780 */
1781 if (vc == '\0') {
1782 CTRACE(DBG_LEXER, ("\\0: skip to '", v, v, vc&0xFF));
1783 while ((c = pgetc()) != '\'') {
1784 if (c == '\\')
1785 c = pgetc();
1786 if (c == PEOF)
1787 synerror("Unterminated quoted string");
1788 if (c == '\n') {
1789 plinno++;
1790 if (doprompt)
1791 setprompt(2);
1792 else
1793 setprompt(0);
1794 }
1795 }
1796 pungetc();
1797 return out;
1798 }
1799 CVTRACE(DBG_LEXER, NEEDESC(vc), ("CTLESC-"));
1800 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", vc&0xFF, vc&0x1FF));
1801 if (NEEDESC(vc))
1802 USTPUTC(CTLESC, out);
1803 USTPUTC(vc, out);
1804 return out;
1805 }
1806
1807 /*
1808 * The lowest level basic tokenizer.
1809 *
1810 * The next input byte (character) is in firstc, syn says which
1811 * syntax tables we are to use (basic, single or double quoted, or arith)
1812 * and magicq (used with sqsyntax and dqsyntax only) indicates that the
1813 * quote character itself is not special (used parsing here docs and similar)
1814 *
1815 * The result is the type of the next token (its value, when there is one,
1816 * is saved in the relevant global var - must fix that someday!) which is
1817 * also saved for re-reading ("lasttoken").
1818 *
1819 * Overall, this routine does far more parsing than it is supposed to.
1820 * That will also need fixing, someday...
1821 */
1822 STATIC int
1823 readtoken1(int firstc, char const *syn, int oneword)
1824 {
1825 int c;
1826 char * out;
1827 int len;
1828 struct nodelist *bqlist;
1829 int quotef;
1830 VSS static_stack;
1831 VSS *stack = &static_stack;
1832
1833 stack->prev = NULL;
1834 stack->cur = 0;
1835
1836 syntax = syn;
1837
1838 #ifdef DEBUG
1839 #define SYNTAX ( syntax == BASESYNTAX ? "BASE" : \
1840 syntax == DQSYNTAX ? "DQ" : \
1841 syntax == SQSYNTAX ? "SQ" : \
1842 syntax == ARISYNTAX ? "ARI" : \
1843 "???" )
1844 #endif
1845
1846 startlinno = plinno;
1847 varnest = 0;
1848 quoted = 0;
1849 if (syntax == DQSYNTAX)
1850 SETDBLQUOTE();
1851 quotef = 0;
1852 bqlist = NULL;
1853 arinest = 0;
1854 parenlevel = 0;
1855 elided_nl = 0;
1856 magicq = oneword;
1857
1858 CTRACE(DBG_LEXER, ("readtoken1(%c) syntax=%s %s%s(quoted=%x)\n",
1859 firstc&0xFF, SYNTAX, magicq ? "magic quotes" : "",
1860 ISDBLQUOTE()?" ISDBLQUOTE":"", quoted));
1861
1862 STARTSTACKSTR(out);
1863
1864 for (c = firstc ;; c = pgetc_macro()) { /* until of token */
1865 if (syntax == ARISYNTAX)
1866 out = insert_elided_nl(out);
1867 CHECKSTRSPACE(6, out); /* permit 6 calls to USTPUTC */
1868 switch (syntax[c]) {
1869 case CFAKE:
1870 VTRACE(DBG_LEXER, ("CFAKE"));
1871 if (syntax == BASESYNTAX && varnest == 0)
1872 break;
1873 VTRACE(DBG_LEXER, (","));
1874 continue;
1875 case CNL: /* '\n' */
1876 VTRACE(DBG_LEXER, ("CNL"));
1877 if (syntax == BASESYNTAX && varnest == 0)
1878 break; /* exit loop */
1879 USTPUTC(c, out);
1880 plinno++;
1881 VTRACE(DBG_LEXER, ("@%d,", plinno));
1882 if (doprompt)
1883 setprompt(2);
1884 else
1885 setprompt(0);
1886 continue;
1887
1888 case CSBACK: /* single quoted backslash */
1889 if ((quoted & QF) == CQ) {
1890 out = readcstyleesc(out);
1891 continue;
1892 }
1893 VTRACE(DBG_LEXER, ("ESC:"));
1894 USTPUTC(CTLESC, out);
1895 /* FALLTHROUGH */
1896 case CWORD:
1897 VTRACE(DBG_LEXER, ("'%c'", c));
1898 USTPUTC(c, out);
1899 continue;
1900
1901 case CCTL:
1902 CVTRACE(DBG_LEXER, !magicq || ISDBLQUOTE(),
1903 ("%s%sESC:",!magicq?"!m":"",ISDBLQUOTE()?"DQ":""));
1904 if (!magicq || ISDBLQUOTE())
1905 USTPUTC(CTLESC, out);
1906 VTRACE(DBG_LEXER, ("'%c'", c));
1907 USTPUTC(c, out);
1908 continue;
1909 case CBACK: /* backslash */
1910 c = pgetc();
1911 VTRACE(DBG_LEXER, ("\\'%c'(%#.2x)", c&0xFF, c&0x1FF));
1912 if (c == PEOF) {
1913 VTRACE(DBG_LEXER, ("EOF, keep \\ "));
1914 USTPUTC('\\', out);
1915 pungetc();
1916 continue;
1917 }
1918 if (c == '\n') {
1919 plinno++;
1920 elided_nl++;
1921 VTRACE(DBG_LEXER, ("eli \\n (%d) @%d ",
1922 elided_nl, plinno));
1923 if (doprompt)
1924 setprompt(2);
1925 else
1926 setprompt(0);
1927 continue;
1928 }
1929 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
1930 quotef = 1; /* current token is quoted */
1931 if (quoted && c != '\\' && c != '`' &&
1932 c != '$' && (c != '"' || magicq)) {
1933 /*
1934 * retain the \ (which we *know* needs CTLESC)
1935 * when in "..." and the following char is
1936 * not one of the magic few.)
1937 * Otherwise the \ has done its work, and
1938 * is dropped.
1939 */
1940 VTRACE(DBG_LEXER, ("ESC:'\\'"));
1941 USTPUTC(CTLESC, out);
1942 USTPUTC('\\', out);
1943 }
1944 CVTRACE(DBG_LEXER, NEEDESC(c) || !magicq,
1945 ("%sESC:", NEEDESC(c) ? "+" : "m"));
1946 VTRACE(DBG_LEXER, ("'%c'(%#.2x)", c&0xFF, c&0x1FF));
1947 if (NEEDESC(c))
1948 USTPUTC(CTLESC, out);
1949 else if (!magicq) {
1950 USTPUTC(CTLESC, out);
1951 USTPUTC(c, out);
1952 continue;
1953 }
1954 USTPUTC(c, out);
1955 continue;
1956 case CSQUOTE:
1957 if (syntax != SQSYNTAX) {
1958 CVTRACE(DBG_LEXER, !magicq, (" CQM "));
1959 if (!magicq)
1960 USTPUTC(CTLQUOTEMARK, out);
1961 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
1962 quotef = 1;
1963 TS_PUSH();
1964 syntax = SQSYNTAX;
1965 quoted = SQ;
1966 VTRACE(DBG_LEXER, (" TS_PUSH(SQ)"));
1967 continue;
1968 }
1969 if (magicq && arinest == 0 && varnest == 0) {
1970 /* Ignore inside quoted here document */
1971 VTRACE(DBG_LEXER, ("<<'>>"));
1972 USTPUTC(c, out);
1973 continue;
1974 }
1975 /* End of single quotes... */
1976 TS_POP();
1977 VTRACE(DBG_LEXER, ("SQ TS_POP->%s ", SYNTAX));
1978 CVTRACE(DBG_LEXER, syntax == BASESYNTAX, (" CQE "));
1979 if (syntax == BASESYNTAX)
1980 USTPUTC(CTLQUOTEEND, out);
1981 continue;
1982 case CDQUOTE:
1983 if (magicq && arinest == 0 /* && varnest == 0 */) {
1984 VTRACE(DBG_LEXER, ("<<\">>"));
1985 /* Ignore inside here document */
1986 USTPUTC(c, out);
1987 continue;
1988 }
1989 CVTRACE(DBG_LEXER, quotef==0, (" QF=1 "));
1990 quotef = 1;
1991 if (arinest) {
1992 if (ISDBLQUOTE()) {
1993 VTRACE(DBG_LEXER,
1994 (" CQE ari(%d", arinest));
1995 USTPUTC(CTLQUOTEEND, out);
1996 TS_POP();
1997 VTRACE(DBG_LEXER, ("%d)TS_POP->%s ",
1998 arinest, SYNTAX));
1999 } else {
2000 VTRACE(DBG_LEXER,
2001 (" ari(%d) %s TS_PUSH->DQ CQM ",
2002 arinest, SYNTAX));
2003 TS_PUSH();
2004 syntax = DQSYNTAX;
2005 SETDBLQUOTE();
2006 USTPUTC(CTLQUOTEMARK, out);
2007 }
2008 continue;
2009 }
2010 CVTRACE(DBG_LEXER, magicq, (" MQignDQ "));
2011 if (magicq)
2012 continue;
2013 if (ISDBLQUOTE()) {
2014 TS_POP();
2015 VTRACE(DBG_LEXER,
2016 (" DQ TS_POP->%s CQE ", SYNTAX));
2017 USTPUTC(CTLQUOTEEND, out);
2018 } else {
2019 VTRACE(DBG_LEXER,
2020 (" %s TS_POP->DQ CQM ", SYNTAX));
2021 TS_PUSH();
2022 syntax = DQSYNTAX;
2023 SETDBLQUOTE();
2024 USTPUTC(CTLQUOTEMARK, out);
2025 }
2026 continue;
2027 case CVAR: /* '$' */
2028 VTRACE(DBG_LEXER, ("'$'..."));
2029 out = insert_elided_nl(out);
2030 PARSESUB(); /* parse substitution */
2031 continue;
2032 case CENDVAR: /* CLOSEBRACE */
2033 if (varnest > 0 && !ISDBLQUOTE()) {
2034 VTRACE(DBG_LEXER, ("vn=%d !DQ", varnest));
2035 TS_POP();
2036 VTRACE(DBG_LEXER, (" TS_POP->%s CEV ", SYNTAX));
2037 USTPUTC(CTLENDVAR, out);
2038 } else {
2039 VTRACE(DBG_LEXER, ("'%c'", c));
2040 USTPUTC(c, out);
2041 }
2042 out = insert_elided_nl(out);
2043 continue;
2044 case CLP: /* '(' in arithmetic */
2045 parenlevel++;
2046 VTRACE(DBG_LEXER, ("'('(%d)", parenlevel));
2047 USTPUTC(c, out);
2048 continue;;
2049 case CRP: /* ')' in arithmetic */
2050 if (parenlevel > 0) {
2051 USTPUTC(c, out);
2052 --parenlevel;
2053 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2054 } else {
2055 VTRACE(DBG_LEXER, ("')'(%d)", parenlevel));
2056 if (pgetc_linecont() == /*(*/ ')') {
2057 out = insert_elided_nl(out);
2058 if (--arinest == 0) {
2059 TS_POP();
2060 USTPUTC(CTLENDARI, out);
2061 } else
2062 USTPUTC(/*(*/ ')', out);
2063 } else {
2064 break; /* to synerror() just below */
2065 #if 0 /* the old way, causes weird errors on bad input */
2066 /*
2067 * unbalanced parens
2068 * (don't 2nd guess - no error)
2069 */
2070 pungetc();
2071 USTPUTC(/*(*/ ')', out);
2072 #endif
2073 }
2074 }
2075 continue;
2076 case CBQUOTE: /* '`' */
2077 VTRACE(DBG_LEXER, ("'`' -> parsebackq()\n"));
2078 out = parsebackq(stack, out, &bqlist, 1);
2079 VTRACE(DBG_LEXER, ("parsebackq() -> readtoken1: "));
2080 continue;
2081 case CEOF: /* --> c == PEOF */
2082 VTRACE(DBG_LEXER, ("EOF "));
2083 break; /* will exit loop */
2084 default:
2085 VTRACE(DBG_LEXER, ("['%c'(%#.2x)]", c&0xFF, c&0x1FF));
2086 if (varnest == 0 && !ISDBLQUOTE())
2087 break; /* exit loop */
2088 USTPUTC(c, out);
2089 VTRACE(DBG_LEXER, (","));
2090 continue;
2091 }
2092 VTRACE(DBG_LEXER, (" END TOKEN\n", c&0xFF, c&0x1FF));
2093 break; /* break from switch -> break from for loop too */
2094 }
2095
2096 if (syntax == ARISYNTAX) {
2097 cleanup_state_stack(stack);
2098 synerror(/*((*/ "Missing '))'");
2099 }
2100 if (syntax != BASESYNTAX && /* ! parsebackquote && */ !magicq) {
2101 cleanup_state_stack(stack);
2102 synerror("Unterminated quoted string");
2103 }
2104 if (varnest != 0) {
2105 cleanup_state_stack(stack);
2106 startlinno = plinno;
2107 /* { */
2108 synerror("Missing '}'");
2109 }
2110
2111 STPUTC('\0', out);
2112 len = out - stackblock();
2113 out = stackblock();
2114
2115 if (!magicq) {
2116 if ((c == '<' || c == '>')
2117 && quotef == 0 && (*out == '\0' || is_number(out))) {
2118 parseredir(out, c);
2119 cleanup_state_stack(stack);
2120 return lasttoken = TREDIR;
2121 } else {
2122 pungetc();
2123 }
2124 }
2125
2126 VTRACE(DBG_PARSE|DBG_LEXER,
2127 ("readtoken1 %sword \"%s\", completed%s (%d) left %d enl\n",
2128 (quotef ? "quoted " : ""), out, (bqlist ? " with cmdsubs" : ""),
2129 len, elided_nl));
2130
2131 quoteflag = quotef;
2132 backquotelist = bqlist;
2133 grabstackblock(len);
2134 wordtext = out;
2135 cleanup_state_stack(stack);
2136 return lasttoken = TWORD;
2137 /* end of readtoken routine */
2138
2139
2140 /*
2141 * Parse a substitution. At this point, we have read the dollar sign
2142 * and nothing else.
2143 */
2144
2145 parsesub: {
2146 int subtype;
2147 int typeloc;
2148 int flags;
2149 char *p;
2150 static const char types[] = "}-+?=";
2151
2152 c = pgetc_linecont();
2153 VTRACE(DBG_LEXER, ("\"$%c\"(%#.2x)", c&0xFF, c&0x1FF));
2154 if (c == '(' /*)*/) { /* $(command) or $((arith)) */
2155 if (pgetc_linecont() == '(' /*')'*/ ) {
2156 VTRACE(DBG_LEXER, ("\"$((\" ARITH "));
2157 out = insert_elided_nl(out);
2158 PARSEARITH();
2159 } else {
2160 VTRACE(DBG_LEXER, ("\"$(\" CSUB->parsebackq()\n"));
2161 out = insert_elided_nl(out);
2162 pungetc();
2163 out = parsebackq(stack, out, &bqlist, 0);
2164 VTRACE(DBG_LEXER, ("parseback()->readtoken1(): "));
2165 }
2166 } else if (c == OPENBRACE || is_name(c) || is_special(c)) {
2167 VTRACE(DBG_LEXER, (" $EXP:CTLVAR "));
2168 USTPUTC(CTLVAR, out);
2169 typeloc = out - stackblock();
2170 USTPUTC(VSNORMAL, out);
2171 subtype = VSNORMAL;
2172 flags = 0;
2173 if (c == OPENBRACE) {
2174 c = pgetc_linecont();
2175 if (c == '#') {
2176 if ((c = pgetc_linecont()) == CLOSEBRACE)
2177 c = '#';
2178 else if (is_name(c) || isdigit(c))
2179 subtype = VSLENGTH;
2180 else if (is_special(c)) {
2181 /*
2182 * ${#} is $# - the number of sh params
2183 * ${##} is the length of ${#}
2184 * ${###} is ${#} with as much nothing
2185 * as possible removed from start
2186 * ${##1} is ${#} with leading 1 gone
2187 * ${##\#} is ${#} with leading # gone
2188 *
2189 * this stuff is UGLY!
2190 */
2191 if (pgetc_linecont() == CLOSEBRACE) {
2192 pungetc();
2193 subtype = VSLENGTH;
2194 } else {
2195 static char cbuf[2];
2196
2197 pungetc(); /* would like 2 */
2198 cbuf[0] = c; /* so ... */
2199 cbuf[1] = '\0';
2200 pushstring(cbuf, 1, NULL);
2201 c = '#'; /* ${#:...} */
2202 subtype = 0; /* .. or similar */
2203 }
2204 } else {
2205 pungetc();
2206 c = '#';
2207 subtype = 0;
2208 }
2209 }
2210 else
2211 subtype = 0;
2212 VTRACE(DBG_LEXER, ("${ st=%d ", subtype));
2213 }
2214 if (is_name(c)) {
2215 p = out;
2216 do {
2217 VTRACE(DBG_LEXER, ("%c", c));
2218 STPUTC(c, out);
2219 c = pgetc_linecont();
2220 } while (is_in_name(c));
2221
2222 #if 0
2223 if (out - p == 6 && strncmp(p, "LINENO", 6) == 0) {
2224 int i;
2225 int linno;
2226 char buf[10];
2227
2228 /*
2229 * The "LINENO hack"
2230 *
2231 * Replace the variable name with the
2232 * current line number.
2233 */
2234 linno = plinno;
2235 if (funclinno != 0)
2236 linno -= funclinno - 1;
2237 snprintf(buf, sizeof(buf), "%d", linno);
2238 STADJUST(-6, out);
2239 for (i = 0; buf[i] != '\0'; i++)
2240 STPUTC(buf[i], out);
2241 flags |= VSLINENO;
2242 }
2243 #endif
2244 } else if (is_digit(c)) {
2245 do {
2246 VTRACE(DBG_LEXER, ("%c", c));
2247 STPUTC(c, out);
2248 c = pgetc_linecont();
2249 } while (subtype != VSNORMAL && is_digit(c));
2250 }
2251 else if (is_special(c)) {
2252 VTRACE(DBG_LEXER, ("\"$%c", c));
2253 USTPUTC(c, out);
2254 c = pgetc_linecont();
2255 }
2256 else {
2257 VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??\n", c&0xFF,c&0x1FF));
2258 badsub:
2259 cleanup_state_stack(stack);
2260 synerror("Bad substitution");
2261 }
2262
2263 STPUTC('=', out);
2264 if (subtype == 0) {
2265 switch (c) {
2266 case ':':
2267 flags |= VSNUL;
2268 c = pgetc_linecont();
2269 /*FALLTHROUGH*/
2270 default:
2271 p = strchr(types, c);
2272 if (p == NULL)
2273 goto badsub;
2274 subtype = p - types + VSNORMAL;
2275 break;
2276 case '%':
2277 case '#':
2278 {
2279 int cc = c;
2280 subtype = c == '#' ? VSTRIMLEFT :
2281 VSTRIMRIGHT;
2282 c = pgetc_linecont();
2283 if (c == cc)
2284 subtype++;
2285 else
2286 pungetc();
2287 break;
2288 }
2289 }
2290 } else {
2291 if (subtype == VSLENGTH && c != /*{*/ '}')
2292 synerror("no modifiers allowed with ${#var}");
2293 pungetc();
2294 }
2295 if (quoted || arinest)
2296 flags |= VSQUOTE;
2297 if (subtype >= VSTRIMLEFT && subtype <= VSTRIMRIGHTMAX)
2298 flags |= VSPATQ;
2299 VTRACE(DBG_LEXER, (" st%d:%x", subtype, flags));
2300 *(stackblock() + typeloc) = subtype | flags;
2301 if (subtype != VSNORMAL) {
2302 TS_PUSH();
2303 varnest++;
2304 arinest = 0;
2305 if (subtype > VSASSIGN) { /* # ## % %% */
2306 syntax = BASESYNTAX;
2307 quoted = 0;
2308 magicq = 0;
2309 }
2310 VTRACE(DBG_LEXER, (" TS_PUSH->%s vn=%d%s ",
2311 SYNTAX, varnest, quoted ? " Q" : ""));
2312 }
2313 } else if (c == '\'' && syntax == BASESYNTAX) {
2314 USTPUTC(CTLQUOTEMARK, out);
2315 VTRACE(DBG_LEXER, (" CSTR \"$'\" CQM "));
2316 CVTRACE(DBG_LEXER, quotef==0, ("QF=1 "));
2317 quotef = 1;
2318 TS_PUSH();
2319 syntax = SQSYNTAX;
2320 quoted = CQ;
2321 VTRACE(DBG_LEXER, ("%s->TS_PUSH()->SQ ", SYNTAX));
2322 } else {
2323 VTRACE(DBG_LEXER, ("$unk -> '$' (pushback '%c'%#.2x)",
2324 c & 0xFF, c & 0x1FF));
2325 USTPUTC('$', out);
2326 pungetc();
2327 }
2328 goto parsesub_return;
2329 }
2330
2331
2332 /*
2333 * Parse an arithmetic expansion (indicate start of one and set state)
2334 */
2335 parsearith: {
2336
2337 #if 0
2338 if (syntax == ARISYNTAX) {
2339 /*
2340 * we collapse embedded arithmetic expansion to
2341 * parentheses, which should be equivalent
2342 *
2343 * XXX It isn't, must fix, soonish...
2344 */
2345 USTPUTC('(' /*)*/, out);
2346 USTPUTC('(' /*)*/, out);
2347 /*
2348 * Need 2 of them because there will (should be)
2349 * two closing ))'s to follow later.
2350 */
2351 parenlevel += 2;
2352 } else
2353 #endif
2354 {
2355 VTRACE(DBG_LEXER, (" CTLARI%c ", ISDBLQUOTE()?'"':'_'));
2356 USTPUTC(CTLARI, out);
2357 if (ISDBLQUOTE())
2358 USTPUTC('"',out);
2359 else
2360 USTPUTC(' ',out);
2361
2362 VTRACE(DBG_LEXER, ("%s->TS_PUSH->ARI(1)", SYNTAX));
2363 TS_PUSH();
2364 syntax = ARISYNTAX;
2365 arinest = 1;
2366 varnest = 0;
2367 magicq = 1;
2368 }
2369 goto parsearith_return;
2370 }
2371
2372 } /* end of readtoken */
2373
2374
2375
2376
2377 #ifdef mkinit
2378 INCLUDE "parser.h"
2379
2380 RESET {
2381 psp.v_current_parser = &parse_state;
2382
2383 parse_state.ps_tokpushback = 0;
2384 parse_state.ps_checkkwd = 0;
2385 parse_state.ps_heredoclist = NULL;
2386 }
2387 #endif
2388
2389 /*
2390 * Returns true if the text contains nothing to expand (no dollar signs
2391 * or backquotes).
2392 */
2393
2394 STATIC int
2395 noexpand(char *text)
2396 {
2397 char *p;
2398 char c;
2399
2400 p = text;
2401 while ((c = *p++) != '\0') {
2402 if (c == CTLQUOTEMARK || c == CTLQUOTEEND)
2403 continue;
2404 if (c == CTLESC)
2405 p++;
2406 else if (BASESYNTAX[(int)c] == CCTL)
2407 return 0;
2408 }
2409 return 1;
2410 }
2411
2412
2413 /*
2414 * Return true if the argument is a legal variable name (a letter or
2415 * underscore followed by zero or more letters, underscores, and digits).
2416 */
2417
2418 int
2419 goodname(const char *name)
2420 {
2421 const char *p;
2422
2423 p = name;
2424 if (! is_name(*p))
2425 return 0;
2426 while (*++p) {
2427 if (! is_in_name(*p))
2428 return 0;
2429 }
2430 return 1;
2431 }
2432
2433 int
2434 isassignment(const char *p)
2435 {
2436 if (!is_name(*p))
2437 return 0;
2438 while (*++p != '=')
2439 if (*p == '\0' || !is_in_name(*p))
2440 return 0;
2441 return 1;
2442 }
2443
2444 /*
2445 * skip past any \n's, and leave lasttoken set to whatever follows
2446 */
2447 STATIC void
2448 linebreak(void)
2449 {
2450 while (readtoken() == TNL)
2451 ;
2452 }
2453
2454 /*
2455 * The next token must be "token" -- check, then move past it
2456 */
2457 STATIC void
2458 consumetoken(int token)
2459 {
2460 if (readtoken() != token) {
2461 VTRACE(DBG_PARSE, ("consumetoken(%d): expecting %s got %s",
2462 token, tokname[token], tokname[lasttoken]));
2463 CVTRACE(DBG_PARSE, (lasttoken==TWORD), (" \"%s\"", wordtext));
2464 VTRACE(DBG_PARSE, ("\n"));
2465 synexpect(token, NULL);
2466 }
2467 }
2468
2469 /*
2470 * Called when an unexpected token is read during the parse. The argument
2471 * is the token that is expected, or -1 if more than one type of token can
2472 * occur at this point.
2473 */
2474
2475 STATIC void
2476 synexpect(int token, const char *text)
2477 {
2478 char msg[64];
2479 char *p;
2480
2481 if (lasttoken == TWORD) {
2482 size_t len = strlen(wordtext);
2483
2484 if (len <= 13)
2485 fmtstr(msg, 34, "Word \"%.13s\" unexpected", wordtext);
2486 else
2487 fmtstr(msg, 34,
2488 "Word \"%.10s...\" unexpected", wordtext);
2489 } else
2490 fmtstr(msg, 34, "%s unexpected", tokname[lasttoken]);
2491
2492 p = strchr(msg, '\0');
2493 if (text)
2494 fmtstr(p, 30, " (expecting \"%.10s\")", text);
2495 else if (token >= 0)
2496 fmtstr(p, 30, " (expecting %s)", tokname[token]);
2497
2498 synerror(msg);
2499 /* NOTREACHED */
2500 }
2501
2502
2503 STATIC void
2504 synerror(const char *msg)
2505 {
2506 error("%d: Syntax error: %s", startlinno, msg);
2507 /* NOTREACHED */
2508 }
2509
2510 STATIC void
2511 setprompt(int which)
2512 {
2513 whichprompt = which;
2514
2515 #ifndef SMALL
2516 if (!el)
2517 #endif
2518 out2str(getprompt(NULL));
2519 }
2520
2521 /*
2522 * handle getting the next character, while ignoring \ \n
2523 * (which is a little tricky as we only have one char of pushback
2524 * and we need that one elsewhere).
2525 */
2526 STATIC int
2527 pgetc_linecont(void)
2528 {
2529 int c;
2530
2531 while ((c = pgetc()) == '\\') {
2532 c = pgetc();
2533 if (c == '\n') {
2534 plinno++;
2535 elided_nl++;
2536 VTRACE(DBG_LEXER, ("\"\\n\"drop(el=%d@%d)",
2537 elided_nl, plinno));
2538 if (doprompt)
2539 setprompt(2);
2540 else
2541 setprompt(0);
2542 } else {
2543 pungetc();
2544 /* Allow the backslash to be pushed back. */
2545 pushstring("\\", 1, NULL);
2546 return (pgetc());
2547 }
2548 }
2549 return (c);
2550 }
2551
2552 /*
2553 * called by editline -- any expansions to the prompt
2554 * should be added here.
2555 */
2556 const char *
2557 getprompt(void *unused)
2558 {
2559 char *p;
2560 const char *cp;
2561 int wp;
2562
2563 if (!doprompt)
2564 return "";
2565
2566 VTRACE(DBG_PARSE|DBG_EXPAND, ("getprompt %d\n", whichprompt));
2567
2568 switch (wp = whichprompt) {
2569 case 0:
2570 return "";
2571 case 1:
2572 p = ps1val();
2573 break;
2574 case 2:
2575 p = ps2val();
2576 break;
2577 default:
2578 return "<internal prompt error>";
2579 }
2580 if (p == NULL)
2581 return "";
2582
2583 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt <<%s>>\n", p));
2584
2585 cp = expandstr(p, plinno);
2586 whichprompt = wp; /* history depends on it not changing */
2587
2588 VTRACE(DBG_PARSE|DBG_EXPAND, ("prompt -> <<%s>>\n", cp));
2589
2590 return cp;
2591 }
2592
2593 /*
2594 * Expand a string ... used for expanding prompts (PS1...)
2595 *
2596 * Never return NULL, always some string (return input string if invalid)
2597 *
2598 * The internal routine does the work, leaving the result on the
2599 * stack (or in a static string, or even the input string) and
2600 * handles parser recursion, and cleanup after an error while parsing.
2601 *
2602 * The visible interface copies the result off the stack (if it is there),
2603 * and handles stack management, leaving the stack in the exact same
2604 * state it was when expandstr() was called (so it can be used part way
2605 * through building a stack data structure - as in when PS2 is being
2606 * expanded half way through reading a "command line")
2607 *
2608 * on error, expandonstack() cleans up the parser state, but then
2609 * simply jumps out through expandstr() withut doing any stack cleanup,
2610 * which is OK, as the error handler must deal with that anyway.
2611 *
2612 * The split into two funcs is to avoid problems with setjmp/longjmp
2613 * and local variables which could otherwise be optimised into bizarre
2614 * behaviour.
2615 */
2616 static const char *
2617 expandonstack(char *ps, int cmdsub, int lineno)
2618 {
2619 union node n;
2620 struct jmploc jmploc;
2621 struct jmploc *const savehandler = handler;
2622 struct parsefile *const savetopfile = getcurrentfile();
2623 const int save_x = xflag;
2624 struct parse_state new_state = init_parse_state;
2625 struct parse_state *const saveparser = psp.v_current_parser;
2626 const char *result = NULL;
2627
2628 if (!setjmp(jmploc.loc)) {
2629 handler = &jmploc;
2630
2631 psp.v_current_parser = &new_state;
2632 setinputstring(ps, 1, lineno);
2633
2634 readtoken1(pgetc(), DQSYNTAX, 1);
2635 if (backquotelist != NULL) {
2636 if (!cmdsub)
2637 result = ps;
2638 else if (!promptcmds)
2639 result = "-o promptcmds not set: ";
2640 }
2641 if (result == NULL) {
2642 n.narg.type = NARG;
2643 n.narg.next = NULL;
2644 n.narg.text = wordtext;
2645 n.narg.lineno = lineno;
2646 n.narg.backquote = backquotelist;
2647
2648 xflag = 0; /* we might be expanding PS4 ... */
2649 expandarg(&n, NULL, 0);
2650 result = stackblock();
2651 }
2652 } else {
2653 psp.v_current_parser = saveparser;
2654 xflag = save_x;
2655 popfilesupto(savetopfile);
2656 handler = savehandler;
2657
2658 if (exception == EXEXIT)
2659 longjmp(handler->loc, 1);
2660 if (exception == EXINT)
2661 exraise(SIGINT);
2662 return ps;
2663 }
2664 psp.v_current_parser = saveparser;
2665 xflag = save_x;
2666 popfilesupto(savetopfile);
2667 handler = savehandler;
2668
2669
2670 if (result == NULL)
2671 result = ps;
2672
2673 return result;
2674 }
2675
2676 const char *
2677 expandstr(char *ps, int lineno)
2678 {
2679 const char *result = NULL;
2680 struct stackmark smark;
2681 static char *buffer = NULL; /* storage for prompt, never freed */
2682 static size_t bufferlen = 0;
2683
2684 setstackmark(&smark);
2685 /*
2686 * At this point we anticipate that there may be a string
2687 * growing on the stack, but we have no idea how big it is.
2688 * However we know that it cannot be bigger than the current
2689 * allocated stack block, so simply reserve the whole thing,
2690 * then we can use the stack without barfing all over what
2691 * is there already... (the stack mark undoes this later.)
2692 */
2693 (void) stalloc(stackblocksize());
2694
2695 result = expandonstack(ps, 1, lineno);
2696
2697 if (__predict_true(result == stackblock())) {
2698 size_t len = strlen(result) + 1;
2699
2700 /*
2701 * the result (usual case) is on the stack, which we
2702 * are just about to discard (popstackmark()) so we
2703 * need to move it somewhere safe first.
2704 */
2705
2706 if (__predict_false(len > bufferlen)) {
2707 char *new;
2708 size_t newlen = bufferlen;
2709
2710 if (__predict_false(len > (SIZE_MAX >> 4))) {
2711 result = "huge prompt: ";
2712 goto getout;
2713 }
2714
2715 if (newlen == 0)
2716 newlen = 32;
2717 while (newlen <= len)
2718 newlen <<= 1;
2719
2720 new = (char *)realloc(buffer, newlen);
2721
2722 if (__predict_false(new == NULL)) {
2723 /*
2724 * this should rarely (if ever) happen
2725 * but we must do something when it does...
2726 */
2727 result = "No mem for prompt: ";
2728 goto getout;
2729 } else {
2730 buffer = new;
2731 bufferlen = newlen;
2732 }
2733 }
2734 (void)memcpy(buffer, result, len);
2735 result = buffer;
2736 }
2737
2738 getout:;
2739 popstackmark(&smark);
2740
2741 return result;
2742 }
2743
2744 /*
2745 * and a simpler version, which does no $( ) expansions, for
2746 * use during shell startup when we know we are not parsing,
2747 * and so the stack is not in use - we can do what we like,
2748 * and do not need to clean up (that's handled externally).
2749 *
2750 * Simply return the result, even if it is on the stack
2751 */
2752 const char *
2753 expandenv(char *arg)
2754 {
2755 return expandonstack(arg, 0, 0);
2756 }
2757