lex.c revision 1.7 1 /* $NetBSD: lex.c,v 1.7 1999/10/20 15:49:15 hubertf Exp $ */
2
3 /*
4 * lexical analysis and source input
5 */
6
7 #include "sh.h"
8 #include <ctype.h>
9
10
11 /* Structure to keep track of the lexing state and the various pieces of info
12 * needed for each particular state.
13 */
14 typedef struct lex_state Lex_state;
15 struct lex_state {
16 int ls_state;
17 union {
18 /* $(...) */
19 struct scsparen_info {
20 int nparen; /* count open parenthesis */
21 int csstate; /* XXX remove */
22 #define ls_scsparen ls_info.u_scsparen
23 } u_scsparen;
24
25 /* $((...)) */
26 struct sasparen_info {
27 int nparen; /* count open parenthesis */
28 int start; /* marks start of $(( in output str */
29 #define ls_sasparen ls_info.u_sasparen
30 } u_sasparen;
31
32 /* ((...)) */
33 struct sletparen_info {
34 int nparen; /* count open parenthesis */
35 #define ls_sletparen ls_info.u_sletparen
36 } u_sletparen;
37
38 /* `...` */
39 struct sbquote_info {
40 int indquotes; /* true if in double quotes: "`...`" */
41 #define ls_sbquote ls_info.u_sbquote
42 } u_sbquote;
43
44 Lex_state *base; /* used to point to next state block */
45 } ls_info;
46 };
47
48 typedef struct State_info State_info;
49 struct State_info {
50 Lex_state *base;
51 Lex_state *end;
52 };
53
54
55 static void readhere ARGS((struct ioword *iop));
56 static int getsc__ ARGS((void));
57 static void getsc_line ARGS((Source *s));
58 static int getsc_bn ARGS((void));
59 static char *get_brace_var ARGS((XString *wsp, char *wp));
60 static int arraysub ARGS((char **strp));
61 static const char *ungetsc ARGS((int c));
62 static void gethere ARGS((void));
63 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
64 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
65
66 static int backslash_skip;
67 static int ignore_backslash_newline;
68
69 /* optimized getsc_bn() */
70 #define getsc() (*source->str != '\0' && *source->str != '\\' \
71 && !backslash_skip ? *source->str++ : getsc_bn())
72 /* optimized getsc__() */
73 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
74
75 #define STATE_BSIZE 32
76
77 #define PUSH_STATE(s) do { \
78 if (++statep == state_info.end) \
79 statep = push_state_(&state_info, statep); \
80 state = statep->ls_state = (s); \
81 } while (0)
82
83 #define POP_STATE() do { \
84 if (--statep == state_info.base) \
85 statep = pop_state_(&state_info, statep); \
86 state = statep->ls_state; \
87 } while (0)
88
89
90
91 /*
92 * Lexical analyzer
93 *
94 * tokens are not regular expressions, they are LL(1).
95 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
96 * hence the state stack.
97 */
98
99 int
100 yylex(cf)
101 int cf;
102 {
103 Lex_state states[STATE_BSIZE], *statep;
104 State_info state_info;
105 register int c, state;
106 XString ws; /* expandable output word */
107 register char *wp; /* output word pointer */
108 char *sp, *dp;
109 int c2;
110
111
112 Again:
113 states[0].ls_state = -1;
114 states[0].ls_info.base = (Lex_state *) 0;
115 statep = &states[1];
116 state_info.base = states;
117 state_info.end = &states[STATE_BSIZE];
118
119 Xinit(ws, wp, 64, ATEMP);
120
121 backslash_skip = 0;
122 ignore_backslash_newline = 0;
123
124 if (cf&ONEWORD)
125 state = SWORD;
126 #ifdef KSH
127 else if (cf&LETEXPR) {
128 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
129 state = SLETPAREN;
130 statep->ls_sletparen.nparen = 0;
131 }
132 #endif /* KSH */
133 else { /* normal lexing */
134 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
135 while ((c = getsc()) == ' ' || c == '\t')
136 ;
137 if (c == '#') {
138 ignore_backslash_newline++;
139 while ((c = getsc()) != '\0' && c != '\n')
140 ;
141 ignore_backslash_newline--;
142 }
143 ungetsc(c);
144 }
145 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
146 source->flags &= ~SF_ALIAS;
147 /* In POSIX mode, a trailing space only counts if we are
148 * parsing a simple command
149 */
150 if (!Flag(FPOSIX) || (cf & CMDWORD))
151 cf |= ALIAS;
152 }
153
154 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
155 statep->ls_state = state;
156
157 /* collect non-special or quoted characters to form word */
158 while (!((c = getsc()) == 0
159 || ((state == SBASE || state == SHEREDELIM)
160 && ctype(c, C_LEX1))))
161 {
162 Xcheck(ws, wp);
163 switch (state) {
164 case SBASE:
165 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
166 *wp = EOS; /* temporary */
167 if (is_wdvarname(Xstring(ws, wp), FALSE))
168 {
169 char *p, *tmp;
170
171 if (arraysub(&tmp)) {
172 *wp++ = CHAR;
173 *wp++ = c;
174 for (p = tmp; *p; ) {
175 Xcheck(ws, wp);
176 *wp++ = CHAR;
177 *wp++ = *p++;
178 }
179 afree(tmp, ATEMP);
180 break;
181 } else {
182 Source *s;
183
184 s = pushs(SREREAD,
185 source->areap);
186 s->start = s->str
187 = s->u.freeme = tmp;
188 s->next = source;
189 source = s;
190 }
191 }
192 *wp++ = CHAR;
193 *wp++ = c;
194 break;
195 }
196 /* fall through.. */
197 Sbase1: /* includes *(...|...) pattern (*+?@!) */
198 #ifdef KSH
199 if (c == '*' || c == '@' || c == '+' || c == '?'
200 || c == '!')
201 {
202 c2 = getsc();
203 if (c2 == '(' /*)*/ ) {
204 *wp++ = OPAT;
205 *wp++ = c;
206 PUSH_STATE(SPATTERN);
207 break;
208 }
209 ungetsc(c2);
210 }
211 #endif /* KSH */
212 /* fall through.. */
213 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
214 switch (c) {
215 case '\\':
216 c = getsc();
217 #ifdef OS2
218 if (isalnum(c)) {
219 *wp++ = CHAR, *wp++ = '\\';
220 *wp++ = CHAR, *wp++ = c;
221 } else
222 #endif
223 if (c) /* trailing \ is lost */
224 *wp++ = QCHAR, *wp++ = c;
225 break;
226 case '\'':
227 *wp++ = OQUOTE;
228 ignore_backslash_newline++;
229 PUSH_STATE(SSQUOTE);
230 break;
231 case '"':
232 *wp++ = OQUOTE;
233 PUSH_STATE(SDQUOTE);
234 break;
235 default:
236 goto Subst;
237 }
238 break;
239
240 Subst:
241 switch (c) {
242 case '\\':
243 c = getsc();
244 switch (c) {
245 case '"': case '\\':
246 case '$': case '`':
247 *wp++ = QCHAR, *wp++ = c;
248 break;
249 default:
250 Xcheck(ws, wp);
251 if (c) { /* trailing \ is lost */
252 *wp++ = CHAR, *wp++ = '\\';
253 *wp++ = CHAR, *wp++ = c;
254 }
255 break;
256 }
257 break;
258 case '$':
259 c = getsc();
260 if (c == '(') /*)*/ {
261 c = getsc();
262 if (c == '(') /*)*/ {
263 PUSH_STATE(SASPAREN);
264 statep->ls_sasparen.nparen = 2;
265 statep->ls_sasparen.start =
266 Xsavepos(ws, wp);
267 *wp++ = EXPRSUB;
268 } else {
269 ungetsc(c);
270 PUSH_STATE(SCSPAREN);
271 statep->ls_scsparen.nparen = 1;
272 statep->ls_scsparen.csstate = 0;
273 *wp++ = COMSUB;
274 }
275 } else if (c == '{') /*}*/ {
276 *wp++ = OSUBST;
277 *wp++ = '{'; /*}*/
278 wp = get_brace_var(&ws, wp);
279 c = getsc();
280 /* allow :# and :% (ksh88 compat) */
281 if (c == ':') {
282 *wp++ = CHAR, *wp++ = c;
283 c = getsc();
284 }
285 /* If this is a trim operation,
286 * treat (,|,) specially in STBRACE.
287 */
288 if (c == '#' || c == '%') {
289 ungetsc(c);
290 PUSH_STATE(STBRACE);
291 } else {
292 ungetsc(c);
293 PUSH_STATE(SBRACE);
294 }
295 } else if (ctype(c, C_ALPHA)) {
296 *wp++ = OSUBST;
297 *wp++ = 'X';
298 do {
299 Xcheck(ws, wp);
300 *wp++ = c;
301 c = getsc();
302 } while (ctype(c, C_ALPHA|C_DIGIT));
303 *wp++ = '\0';
304 *wp++ = CSUBST;
305 *wp++ = 'X';
306 ungetsc(c);
307 } else if (ctype(c, C_DIGIT|C_VAR1)) {
308 Xcheck(ws, wp);
309 *wp++ = OSUBST;
310 *wp++ = 'X';
311 *wp++ = c;
312 *wp++ = '\0';
313 *wp++ = CSUBST;
314 *wp++ = 'X';
315 } else {
316 *wp++ = CHAR, *wp++ = '$';
317 ungetsc(c);
318 }
319 break;
320 case '`':
321 PUSH_STATE(SBQUOTE);
322 *wp++ = COMSUB;
323 /* Need to know if we are inside double quotes
324 * since sh/at&t-ksh translate the \" to " in
325 * "`..\"..`".
326 * This is not done in posix mode (section
327 * 3.2.3, Double Quotes: "The backquote shall
328 * retain its special meaning introducing the
329 * other form of command substitution (see
330 * 3.6.3). The portion of the quoted string
331 * from the initial backquote and the
332 * characters up to the next backquote that
333 * is not preceded by a backslash (having
334 * escape characters removed) defines that
335 * command whose output replaces `...` when
336 * the word is expanded."
337 * Section 3.6.3, Command Substitution:
338 * "Within the backquoted style of command
339 * substitution, backslash shall retain its
340 * literal meaning, except when followed by
341 * $ ` \.").
342 */
343 statep->ls_sbquote.indquotes = 0;
344 if (!Flag(FPOSIX)) {
345 Lex_state *s = statep;
346 Lex_state *base = state_info.base;
347 while (1) {
348 for (; s != base; s--) {
349 if (s->ls_state == SDQUOTE) {
350 statep->ls_sbquote.indquotes = 1;
351 break;
352 }
353 }
354 if (s != base)
355 break;
356 if (!(s = s->ls_info.base))
357 break;
358 base = s-- - STATE_BSIZE;
359 }
360 }
361 break;
362 default:
363 *wp++ = CHAR, *wp++ = c;
364 }
365 break;
366
367 case SSQUOTE:
368 if (c == '\'') {
369 POP_STATE();
370 *wp++ = CQUOTE;
371 ignore_backslash_newline--;
372 } else
373 *wp++ = QCHAR, *wp++ = c;
374 break;
375
376 case SDQUOTE:
377 if (c == '"') {
378 POP_STATE();
379 *wp++ = CQUOTE;
380 } else
381 goto Subst;
382 break;
383
384 case SCSPAREN: /* $( .. ) */
385 /* todo: deal with $(...) quoting properly
386 * kludge to partly fake quoting inside $(..): doesn't
387 * really work because nested $(..) or ${..} inside
388 * double quotes aren't dealt with.
389 */
390 switch (statep->ls_scsparen.csstate) {
391 case 0: /* normal */
392 switch (c) {
393 case '(':
394 statep->ls_scsparen.nparen++;
395 break;
396 case ')':
397 statep->ls_scsparen.nparen--;
398 break;
399 case '\\':
400 statep->ls_scsparen.csstate = 1;
401 break;
402 case '"':
403 statep->ls_scsparen.csstate = 2;
404 break;
405 case '\'':
406 statep->ls_scsparen.csstate = 4;
407 ignore_backslash_newline++;
408 break;
409 }
410 break;
411
412 case 1: /* backslash in normal mode */
413 case 3: /* backslash in double quotes */
414 --statep->ls_scsparen.csstate;
415 break;
416
417 case 2: /* double quotes */
418 if (c == '"')
419 statep->ls_scsparen.csstate = 0;
420 else if (c == '\\')
421 statep->ls_scsparen.csstate = 3;
422 break;
423
424 case 4: /* single quotes */
425 if (c == '\'') {
426 statep->ls_scsparen.csstate = 0;
427 ignore_backslash_newline--;
428 }
429 break;
430 }
431 if (statep->ls_scsparen.nparen == 0) {
432 POP_STATE();
433 *wp++ = 0; /* end of COMSUB */
434 } else
435 *wp++ = c;
436 break;
437
438 case SASPAREN: /* $(( .. )) */
439 /* todo: deal with $((...); (...)) properly */
440 /* XXX should nest using existing state machine
441 * (embed "..", $(...), etc.) */
442 if (c == '(')
443 statep->ls_sasparen.nparen++;
444 else if (c == ')') {
445 statep->ls_sasparen.nparen--;
446 if (statep->ls_sasparen.nparen == 1) {
447 /*(*/
448 if ((c2 = getsc()) == ')') {
449 POP_STATE();
450 *wp++ = 0; /* end of EXPRSUB */
451 break;
452 } else {
453 char *s;
454
455 ungetsc(c2);
456 /* mismatched parenthesis -
457 * assume we were really
458 * parsing a $(..) expression
459 */
460 s = Xrestpos(ws, wp,
461 statep->ls_sasparen.start);
462 memmove(s + 1, s, wp - s);
463 *s++ = COMSUB;
464 *s = '('; /*)*/
465 wp++;
466 statep->ls_scsparen.nparen = 1;
467 statep->ls_scsparen.csstate = 0;
468 state = statep->ls_state
469 = SCSPAREN;
470
471 }
472 }
473 }
474 *wp++ = c;
475 break;
476
477 case SBRACE:
478 /*{*/
479 if (c == '}') {
480 POP_STATE();
481 *wp++ = CSUBST;
482 *wp++ = /*{*/ '}';
483 } else
484 goto Sbase1;
485 break;
486
487 case STBRACE:
488 /* Same as SBRACE, except (,|,) treated specially */
489 /*{*/
490 if (c == '}') {
491 POP_STATE();
492 *wp++ = CSUBST;
493 *wp++ = /*{*/ '}';
494 } else if (c == '|') {
495 *wp++ = SPAT;
496 } else if (c == '(') {
497 *wp++ = OPAT;
498 *wp++ = ' '; /* simile for @ */
499 PUSH_STATE(SPATTERN);
500 } else
501 goto Sbase1;
502 break;
503
504 case SBQUOTE:
505 if (c == '`') {
506 *wp++ = 0;
507 POP_STATE();
508 } else if (c == '\\') {
509 switch (c = getsc()) {
510 case '\\':
511 case '$': case '`':
512 *wp++ = c;
513 break;
514 case '"':
515 if (statep->ls_sbquote.indquotes) {
516 *wp++ = c;
517 break;
518 }
519 /* fall through.. */
520 default:
521 if (c) { /* trailing \ is lost */
522 *wp++ = '\\';
523 *wp++ = c;
524 }
525 break;
526 }
527 } else
528 *wp++ = c;
529 break;
530
531 case SWORD: /* ONEWORD */
532 goto Subst;
533
534 #ifdef KSH
535 case SLETPAREN: /* LETEXPR: (( ... )) */
536 /*(*/
537 if (c == ')') {
538 if (statep->ls_sletparen.nparen > 0)
539 --statep->ls_sletparen.nparen;
540 /*(*/
541 else if ((c2 = getsc()) == ')') {
542 c = 0;
543 *wp++ = CQUOTE;
544 goto Done;
545 } else
546 ungetsc(c2);
547 } else if (c == '(')
548 /* parenthesis inside quotes and backslashes
549 * are lost, but at&t ksh doesn't count them
550 * either
551 */
552 ++statep->ls_sletparen.nparen;
553 goto Sbase2;
554 #endif /* KSH */
555
556 case SHEREDELIM: /* <<,<<- delimiter */
557 /* XXX chuck this state (and the next) - use
558 * the existing states ($ and \`..` should be
559 * stripped of their specialness after the
560 * fact).
561 */
562 /* here delimiters need a special case since
563 * $ and `..` are not to be treated specially
564 */
565 if (c == '\\') {
566 c = getsc();
567 if (c) { /* trailing \ is lost */
568 *wp++ = QCHAR;
569 *wp++ = c;
570 }
571 } else if (c == '\'') {
572 PUSH_STATE(SSQUOTE);
573 *wp++ = OQUOTE;
574 ignore_backslash_newline++;
575 } else if (c == '"') {
576 state = statep->ls_state = SHEREDQUOTE;
577 *wp++ = OQUOTE;
578 } else {
579 *wp++ = CHAR;
580 *wp++ = c;
581 }
582 break;
583
584 case SHEREDQUOTE: /* " in <<,<<- delimiter */
585 if (c == '"') {
586 *wp++ = CQUOTE;
587 state = statep->ls_state = SHEREDELIM;
588 } else {
589 if (c == '\\') {
590 switch (c = getsc()) {
591 case '\\': case '"':
592 case '$': case '`':
593 break;
594 default:
595 if (c) { /* trailing \ lost */
596 *wp++ = CHAR;
597 *wp++ = '\\';
598 }
599 break;
600 }
601 }
602 *wp++ = CHAR;
603 *wp++ = c;
604 }
605 break;
606
607 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
608 if ( /*(*/ c == ')') {
609 *wp++ = CPAT;
610 POP_STATE();
611 } else if (c == '|') {
612 *wp++ = SPAT;
613 } else if (c == '(') {
614 *wp++ = OPAT;
615 *wp++ = ' '; /* simile for @ */
616 PUSH_STATE(SPATTERN);
617 } else
618 goto Sbase1;
619 break;
620 }
621 }
622 Done:
623 Xcheck(ws, wp);
624 if (statep != &states[1])
625 /* XXX figure out what is missing */
626 yyerror("no closing quote\n");
627
628 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
629 if (state == SHEREDELIM)
630 state = SBASE;
631
632 dp = Xstring(ws, wp);
633 if ((c == '<' || c == '>') && state == SBASE
634 && ((c2 = Xlength(ws, wp)) == 0
635 || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
636 {
637 struct ioword *iop =
638 (struct ioword *) alloc(sizeof(*iop), ATEMP);
639
640 if (c2 == 2)
641 iop->unit = dp[1] - '0';
642 else
643 iop->unit = c == '>'; /* 0 for <, 1 for > */
644
645 c2 = getsc();
646 /* <<, >>, <> are ok, >< is not */
647 if (c == c2 || (c == '<' && c2 == '>')) {
648 iop->flag = c == c2 ?
649 (c == '>' ? IOCAT : IOHERE) : IORDWR;
650 if (iop->flag == IOHERE) {
651 if ((c2 = getsc()) == '-') {
652 iop->flag |= IOSKIP;
653 } else {
654 ungetsc(c2);
655 }
656 }
657 } else if (c2 == '&')
658 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
659 else {
660 iop->flag = c == '>' ? IOWRITE : IOREAD;
661 if (c == '>' && c2 == '|')
662 iop->flag |= IOCLOB;
663 else
664 ungetsc(c2);
665 }
666
667 iop->name = (char *) 0;
668 iop->delim = (char *) 0;
669 iop->heredoc = (char *) 0;
670 Xfree(ws, wp); /* free word */
671 yylval.iop = iop;
672 return REDIR;
673 }
674
675 if (wp == dp && state == SBASE) {
676 Xfree(ws, wp); /* free word */
677 /* no word, process LEX1 character */
678 switch (c) {
679 default:
680 return c;
681
682 case '|':
683 case '&':
684 case ';':
685 if ((c2 = getsc()) == c)
686 c = (c == ';') ? BREAK :
687 (c == '|') ? LOGOR :
688 (c == '&') ? LOGAND :
689 YYERRCODE;
690 #ifdef KSH
691 else if (c == '|' && c2 == '&')
692 c = COPROC;
693 #endif /* KSH */
694 else
695 ungetsc(c2);
696 return c;
697
698 case '\n':
699 gethere();
700 if (cf & CONTIN)
701 goto Again;
702 return c;
703
704 case '(': /*)*/
705 #ifdef KSH
706 if ((c2 = getsc()) == '(') /*)*/
707 /* XXX need to handle ((...); (...)) */
708 c = MDPAREN;
709 else
710 ungetsc(c2);
711 #endif /* KSH */
712 return c;
713 /*(*/
714 case ')':
715 return c;
716 }
717 }
718
719 *wp++ = EOS; /* terminate word */
720 yylval.cp = Xclose(ws, wp);
721 if (state == SWORD
722 #ifdef KSH
723 || state == SLETPAREN
724 #endif /* KSH */
725 ) /* ONEWORD? */
726 return LWORD;
727 ungetsc(c); /* unget terminator */
728
729 /* copy word to unprefixed string ident */
730 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
731 *dp++ = *sp++;
732 /* Make sure the ident array stays '\0' paded */
733 memset(dp, 0, (ident+IDENT) - dp + 1);
734 if (c != EOS)
735 *ident = '\0'; /* word is not unquoted */
736
737 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
738 struct tbl *p;
739 int h = hash(ident);
740
741 /* { */
742 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
743 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
744 {
745 afree(yylval.cp, ATEMP);
746 return p->val.i;
747 }
748 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
749 && (p->flag & ISSET))
750 {
751 register Source *s;
752
753 for (s = source; s->type == SALIAS; s = s->next)
754 if (s->u.tblp == p)
755 return LWORD;
756 /* push alias expansion */
757 s = pushs(SALIAS, source->areap);
758 s->start = s->str = p->val.s;
759 s->u.tblp = p;
760 s->next = source;
761 source = s;
762 afree(yylval.cp, ATEMP);
763 goto Again;
764 }
765 }
766
767 return LWORD;
768 }
769
770 static void
771 gethere()
772 {
773 register struct ioword **p;
774
775 for (p = heres; p < herep; p++)
776 readhere(*p);
777 herep = heres;
778 }
779
780 /*
781 * read "<<word" text into temp file
782 */
783
784 static void
785 readhere(iop)
786 struct ioword *iop;
787 {
788 register int c;
789 char *volatile eof;
790 char *eofp;
791 int skiptabs;
792 XString xs;
793 char *xp;
794 int xpos;
795
796 eof = evalstr(iop->delim, 0);
797
798 if (!(iop->flag & IOEVAL))
799 ignore_backslash_newline++;
800
801 Xinit(xs, xp, 256, ATEMP);
802
803 for (;;) {
804 eofp = eof;
805 skiptabs = iop->flag & IOSKIP;
806 xpos = Xsavepos(xs, xp);
807 while ((c = getsc()) != 0) {
808 if (skiptabs) {
809 if (c == '\t')
810 continue;
811 skiptabs = 0;
812 }
813 if (c != *eofp)
814 break;
815 Xcheck(xs, xp);
816 Xput(xs, xp, c);
817 eofp++;
818 }
819 /* Allow EOF here so commands with out trailing newlines
820 * will work (eg, ksh -c '...', $(...), etc).
821 */
822 if (*eofp == '\0' && (c == 0 || c == '\n')) {
823 xp = Xrestpos(xs, xp, xpos);
824 break;
825 }
826 ungetsc(c);
827 while ((c = getsc()) != '\n') {
828 if (c == 0)
829 yyerror("here document `%s' unclosed\n", eof);
830 Xcheck(xs, xp);
831 Xput(xs, xp, c);
832 }
833 Xcheck(xs, xp);
834 Xput(xs, xp, c);
835 }
836 Xput(xs, xp, '\0');
837 iop->heredoc = Xclose(xs, xp);
838
839 if (!(iop->flag & IOEVAL))
840 ignore_backslash_newline--;
841 }
842
843 void
844 #ifdef HAVE_PROTOTYPES
845 yyerror(const char *fmt, ...)
846 #else
847 yyerror(fmt, va_alist)
848 const char *fmt;
849 va_dcl
850 #endif
851 {
852 va_list va;
853
854 /* pop aliases and re-reads */
855 while (source->type == SALIAS || source->type == SREREAD)
856 source = source->next;
857 source->str = null; /* zap pending input */
858
859 error_prefix(TRUE);
860 SH_VA_START(va, fmt);
861 shf_vfprintf(shl_out, fmt, va);
862 va_end(va);
863 errorf(null);
864 }
865
866 /*
867 * input for yylex with alias expansion
868 */
869
870 Source *
871 pushs(type, areap)
872 int type;
873 Area *areap;
874 {
875 register Source *s;
876
877 s = (Source *) alloc(sizeof(Source), areap);
878 s->type = type;
879 s->str = null;
880 s->start = NULL;
881 s->line = 0;
882 s->errline = 0;
883 s->file = NULL;
884 s->flags = 0;
885 s->next = NULL;
886 s->areap = areap;
887 if (type == SFILE || type == SSTDIN) {
888 char *dummy;
889 Xinit(s->xs, dummy, 256, s->areap);
890 } else
891 memset(&s->xs, 0, sizeof(s->xs));
892 return s;
893 }
894
895 static int
896 getsc__()
897 {
898 register Source *s = source;
899 register int c;
900
901 while ((c = *s->str++) == 0) {
902 s->str = NULL; /* return 0 for EOF by default */
903 switch (s->type) {
904 case SEOF:
905 s->str = null;
906 return 0;
907
908 case SSTDIN:
909 case SFILE:
910 getsc_line(s);
911 break;
912
913 case SWSTR:
914 break;
915
916 case SSTRING:
917 break;
918
919 case SWORDS:
920 s->start = s->str = *s->u.strv++;
921 s->type = SWORDSEP;
922 break;
923
924 case SWORDSEP:
925 if (*s->u.strv == NULL) {
926 s->start = s->str = newline;
927 s->type = SEOF;
928 } else {
929 s->start = s->str = space;
930 s->type = SWORDS;
931 }
932 break;
933
934 case SALIAS:
935 if (s->flags & SF_ALIASEND) {
936 /* pass on an unused SF_ALIAS flag */
937 source = s->next;
938 source->flags |= s->flags & SF_ALIAS;
939 s = source;
940 } else if (*s->u.tblp->val.s
941 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
942 {
943 source = s = s->next; /* pop source stack */
944 /* Note that this alias ended with a space,
945 * enabling alias expansion on the following
946 * word.
947 */
948 s->flags |= SF_ALIAS;
949 } else {
950 /* At this point, we need to keep the current
951 * alias in the source list so recursive
952 * aliases can be detected and we also need
953 * to return the next character. Do this
954 * by temporarily popping the alias to get
955 * the next character and then put it back
956 * in the source list with the SF_ALIASEND
957 * flag set.
958 */
959 source = s->next; /* pop source stack */
960 source->flags |= s->flags & SF_ALIAS;
961 c = getsc__();
962 if (c) {
963 s->flags |= SF_ALIASEND;
964 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
965 s->start = s->str = s->ugbuf;
966 s->next = source;
967 source = s;
968 } else {
969 s = source;
970 /* avoid reading eof twice */
971 s->str = NULL;
972 break;
973 }
974 }
975 continue;
976
977 case SREREAD:
978 if (s->start != s->ugbuf) /* yuck */
979 afree(s->u.freeme, ATEMP);
980 source = s = s->next;
981 continue;
982 }
983 if (s->str == NULL) {
984 s->type = SEOF;
985 s->start = s->str = null;
986 return '\0';
987 }
988 if (s->flags & SF_ECHO) {
989 shf_puts(s->str, shl_out);
990 shf_flush(shl_out);
991 }
992 }
993 return c;
994 }
995
996 static void
997 getsc_line(s)
998 Source *s;
999 {
1000 char *xp = Xstring(s->xs, xp);
1001 int interactive = Flag(FTALKING) && s->type == SSTDIN;
1002 int have_tty = interactive && (s->flags & SF_TTY);
1003
1004 /* Done here to ensure nothing odd happens when a timeout occurs */
1005 XcheckN(s->xs, xp, LINE);
1006 *xp = '\0';
1007 s->start = s->str = xp;
1008
1009 #ifdef KSH
1010 if (have_tty && ksh_tmout) {
1011 ksh_tmout_state = TMOUT_READING;
1012 alarm(ksh_tmout);
1013 }
1014 #endif /* KSH */
1015 #ifdef EDIT
1016 if (have_tty && (0
1017 # ifdef VI
1018 || Flag(FVI)
1019 # endif /* VI */
1020 # ifdef EMACS
1021 || Flag(FEMACS) || Flag(FGMACS)
1022 # endif /* EMACS */
1023 ))
1024 {
1025 int nread;
1026
1027 nread = x_read(xp, LINE);
1028 if (nread < 0) /* read error */
1029 nread = 0;
1030 xp[nread] = '\0';
1031 xp += nread;
1032 }
1033 else
1034 #endif /* EDIT */
1035 {
1036 if (interactive) {
1037 pprompt(prompt, 0);
1038 } else
1039 s->line++;
1040
1041 while (1) {
1042 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1043
1044 if (!p && shf_error(s->u.shf)
1045 && shf_errno(s->u.shf) == EINTR)
1046 {
1047 shf_clearerr(s->u.shf);
1048 if (trap)
1049 runtraps(0);
1050 continue;
1051 }
1052 if (!p || (xp = p, xp[-1] == '\n'))
1053 break;
1054 /* double buffer size */
1055 xp++; /* move past null so doubling works... */
1056 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1057 xp--; /* ...and move back again */
1058 }
1059 /* flush any unwanted input so other programs/builtins
1060 * can read it. Not very optimal, but less error prone
1061 * than flushing else where, dealing with redirections,
1062 * etc..
1063 * todo: reduce size of shf buffer (~128?) if SSTDIN
1064 */
1065 if (s->type == SSTDIN)
1066 shf_flush(s->u.shf);
1067 }
1068 /* XXX: temporary kludge to restore source after a
1069 * trap may have been executed.
1070 */
1071 source = s;
1072 #ifdef KSH
1073 if (have_tty && ksh_tmout)
1074 {
1075 ksh_tmout_state = TMOUT_EXECUTING;
1076 alarm(0);
1077 }
1078 #endif /* KSH */
1079 s->start = s->str = Xstring(s->xs, xp);
1080 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1081 /* Note: if input is all nulls, this is not eof */
1082 if (Xlength(s->xs, xp) == 0) { /* EOF */
1083 if (s->type == SFILE)
1084 shf_fdclose(s->u.shf);
1085 s->str = NULL;
1086 } else if (interactive) {
1087 #ifdef HISTORY
1088 char *p = Xstring(s->xs, xp);
1089 if (cur_prompt == PS1)
1090 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1091 p++;
1092 if (*p) {
1093 # ifdef EASY_HISTORY
1094 if (cur_prompt == PS2)
1095 histappend(Xstring(s->xs, xp), 1);
1096 else
1097 # endif /* EASY_HISTORY */
1098 {
1099 s->line++;
1100 histsave(s->line, s->str, 1);
1101 }
1102 }
1103 #endif /* HISTORY */
1104 }
1105 if (interactive)
1106 set_prompt(PS2, (Source *) 0);
1107 }
1108
1109 void
1110 set_prompt(to, s)
1111 int to;
1112 Source *s;
1113 {
1114 cur_prompt = to;
1115
1116 switch (to) {
1117 case PS1: /* command */
1118 #ifdef KSH
1119 /* Substitute ! and !! here, before substitutions are done
1120 * so ! in expanded variables are not expanded.
1121 * NOTE: this is not what at&t ksh does (it does it after
1122 * substitutions, POSIX doesn't say which is to be done.
1123 */
1124 {
1125 struct shf *shf;
1126 char *ps1;
1127 Area *saved_atemp;
1128 #ifdef __GNUC__
1129 (void) &ps1;
1130 #endif
1131
1132 ps1 = str_val(global("PS1"));
1133 shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1134 SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1135 while (*ps1) {
1136 if (*ps1 != '!' || *++ps1 == '!')
1137 shf_putchar(*ps1++, shf);
1138 else
1139 shf_fprintf(shf, "%d",
1140 s ? s->line + 1 : 0);
1141 }
1142 ps1 = shf_sclose(shf);
1143 saved_atemp = ATEMP;
1144 newenv(E_ERRH);
1145 if (ksh_sigsetjmp(e->jbuf, 0)) {
1146 prompt = safe_prompt;
1147 /* Don't print an error - assume it has already
1148 * been printed. Reason is we may have forked
1149 * to run a command and the child may be
1150 * unwinding its stack through this code as it
1151 * exits.
1152 */
1153 } else
1154 prompt = str_save(substitute(ps1, 0),
1155 saved_atemp);
1156 quitenv();
1157 }
1158 #else /* KSH */
1159 prompt = str_val(global("PS1"));
1160 #endif /* KSH */
1161 break;
1162
1163 case PS2: /* command continuation */
1164 prompt = str_val(global("PS2"));
1165 break;
1166 }
1167 }
1168
1169 /* See also related routine, promptlen() in edit.c */
1170 void
1171 pprompt(cp, ntruncate)
1172 const char *cp;
1173 int ntruncate;
1174 {
1175 #if 0
1176 char nbuf[32];
1177 int c;
1178
1179 while (*cp != 0) {
1180 if (*cp != '!')
1181 c = *cp++;
1182 else if (*++cp == '!')
1183 c = *cp++;
1184 else {
1185 int len;
1186 char *p;
1187
1188 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1189 source->line + 1);
1190 len = strlen(nbuf);
1191 if (ntruncate) {
1192 if (ntruncate >= len) {
1193 ntruncate -= len;
1194 continue;
1195 }
1196 p += ntruncate;
1197 len -= ntruncate;
1198 ntruncate = 0;
1199 }
1200 shf_write(p, len, shl_out);
1201 continue;
1202 }
1203 if (ntruncate)
1204 --ntruncate;
1205 else
1206 shf_putc(c, shl_out);
1207 }
1208 #endif /* 0 */
1209 shf_puts(cp + ntruncate, shl_out);
1210 shf_flush(shl_out);
1211 }
1212
1213 /* Read the variable part of a ${...} expression (ie, up to but not including
1214 * the :[-+?=#%] or close-brace.
1215 */
1216 static char *
1217 get_brace_var(wsp, wp)
1218 XString *wsp;
1219 char *wp;
1220 {
1221 enum parse_state {
1222 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1223 PS_NUMBER, PS_VAR1, PS_END
1224 }
1225 state;
1226 char c;
1227
1228 state = PS_INITIAL;
1229 while (1) {
1230 c = getsc();
1231 /* State machine to figure out where the variable part ends. */
1232 switch (state) {
1233 case PS_INITIAL:
1234 if (c == '#') {
1235 state = PS_SAW_HASH;
1236 break;
1237 }
1238 /* fall through.. */
1239 case PS_SAW_HASH:
1240 if (letter(c))
1241 state = PS_IDENT;
1242 else if (digit(c))
1243 state = PS_NUMBER;
1244 else if (ctype(c, C_VAR1))
1245 state = PS_VAR1;
1246 else
1247 state = PS_END;
1248 break;
1249 case PS_IDENT:
1250 if (!letnum(c)) {
1251 state = PS_END;
1252 if (c == '[') {
1253 char *tmp, *p;
1254
1255 if (!arraysub(&tmp))
1256 yyerror("missing ]\n");
1257 *wp++ = c;
1258 for (p = tmp; *p; ) {
1259 Xcheck(*wsp, wp);
1260 *wp++ = *p++;
1261 }
1262 afree(tmp, ATEMP);
1263 c = getsc(); /* the ] */
1264 }
1265 }
1266 break;
1267 case PS_NUMBER:
1268 if (!digit(c))
1269 state = PS_END;
1270 break;
1271 case PS_VAR1:
1272 state = PS_END;
1273 break;
1274 case PS_END: /* keep gcc happy */
1275 break;
1276 }
1277 if (state == PS_END) {
1278 *wp++ = '\0'; /* end of variable part */
1279 ungetsc(c);
1280 break;
1281 }
1282 Xcheck(*wsp, wp);
1283 *wp++ = c;
1284 }
1285 return wp;
1286 }
1287
1288 /*
1289 * Save an array subscript - returns true if matching bracket found, false
1290 * if eof or newline was found.
1291 * (Returned string double null terminated)
1292 */
1293 static int
1294 arraysub(strp)
1295 char **strp;
1296 {
1297 XString ws;
1298 char *wp;
1299 char c;
1300 int depth = 1; /* we are just past the initial [ */
1301
1302 Xinit(ws, wp, 32, ATEMP);
1303
1304 do {
1305 c = getsc();
1306 Xcheck(ws, wp);
1307 *wp++ = c;
1308 if (c == '[')
1309 depth++;
1310 else if (c == ']')
1311 depth--;
1312 } while (depth > 0 && c && c != '\n');
1313
1314 *wp++ = '\0';
1315 *strp = Xclose(ws, wp);
1316
1317 return depth == 0 ? 1 : 0;
1318 }
1319
1320 /* Unget a char: handles case when we are already at the start of the buffer */
1321 static const char *
1322 ungetsc(c)
1323 int c;
1324 {
1325 if (backslash_skip)
1326 backslash_skip--;
1327 /* Don't unget eof... */
1328 if (source->str == null && c == '\0')
1329 return source->str;
1330 if (source->str > source->start)
1331 source->str--;
1332 else {
1333 Source *s;
1334
1335 s = pushs(SREREAD, source->areap);
1336 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1337 s->start = s->str = s->ugbuf;
1338 s->next = source;
1339 source = s;
1340 }
1341 return source->str;
1342 }
1343
1344
1345 /* Called to get a char that isn't a \newline sequence. */
1346 static int
1347 getsc_bn ARGS((void))
1348 {
1349 int c, c2;
1350
1351 if (ignore_backslash_newline)
1352 return getsc_();
1353
1354 if (backslash_skip == 1) {
1355 backslash_skip = 2;
1356 return getsc_();
1357 }
1358
1359 backslash_skip = 0;
1360
1361 while (1) {
1362 c = getsc_();
1363 if (c == '\\') {
1364 if ((c2 = getsc_()) == '\n')
1365 /* ignore the \newline; get the next char... */
1366 continue;
1367 ungetsc(c2);
1368 backslash_skip = 1;
1369 }
1370 return c;
1371 }
1372 }
1373
1374 static Lex_state *
1375 push_state_(si, old_end)
1376 State_info *si;
1377 Lex_state *old_end;
1378 {
1379 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1380
1381 new[0].ls_info.base = old_end;
1382 si->base = &new[0];
1383 si->end = &new[STATE_BSIZE];
1384 return &new[1];
1385 }
1386
1387 static Lex_state *
1388 pop_state_(si, old_end)
1389 State_info *si;
1390 Lex_state *old_end;
1391 {
1392 Lex_state *old_base = si->base;
1393
1394 si->base = old_end->ls_info.base - STATE_BSIZE;
1395 si->end = old_end->ls_info.base;
1396
1397 afree(old_base, ATEMP);
1398
1399 return si->base + STATE_BSIZE - 1;;
1400 }
1401