lex.c revision 1.9 1 /* $NetBSD: lex.c,v 1.9 2003/06/23 11:38:59 agc Exp $ */
2
3 /*
4 * lexical analysis and source input
5 */
6 #include <sys/cdefs.h>
7
8 #ifndef lint
9 __RCSID("$NetBSD: lex.c,v 1.9 2003/06/23 11:38:59 agc Exp $");
10 #endif
11
12
13 #include "sh.h"
14 #include <ctype.h>
15
16
17 /* Structure to keep track of the lexing state and the various pieces of info
18 * needed for each particular state.
19 */
20 typedef struct lex_state Lex_state;
21 struct lex_state {
22 int ls_state;
23 union {
24 /* $(...) */
25 struct scsparen_info {
26 int nparen; /* count open parenthesis */
27 int csstate; /* XXX remove */
28 #define ls_scsparen ls_info.u_scsparen
29 } u_scsparen;
30
31 /* $((...)) */
32 struct sasparen_info {
33 int nparen; /* count open parenthesis */
34 int start; /* marks start of $(( in output str */
35 #define ls_sasparen ls_info.u_sasparen
36 } u_sasparen;
37
38 /* ((...)) */
39 struct sletparen_info {
40 int nparen; /* count open parenthesis */
41 #define ls_sletparen ls_info.u_sletparen
42 } u_sletparen;
43
44 /* `...` */
45 struct sbquote_info {
46 int indquotes; /* true if in double quotes: "`...`" */
47 #define ls_sbquote ls_info.u_sbquote
48 } u_sbquote;
49
50 Lex_state *base; /* used to point to next state block */
51 } ls_info;
52 };
53
54 typedef struct State_info State_info;
55 struct State_info {
56 Lex_state *base;
57 Lex_state *end;
58 };
59
60
61 static void readhere ARGS((struct ioword *iop));
62 static int getsc__ ARGS((void));
63 static void getsc_line ARGS((Source *s));
64 static int getsc_bn ARGS((void));
65 static char *get_brace_var ARGS((XString *wsp, char *wp));
66 static int arraysub ARGS((char **strp));
67 static const char *ungetsc ARGS((int c));
68 static void gethere ARGS((void));
69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
71
72 static int backslash_skip;
73 static int ignore_backslash_newline;
74
75 /* optimized getsc_bn() */
76 #define getsc() (*source->str != '\0' && *source->str != '\\' \
77 && !backslash_skip ? *source->str++ : getsc_bn())
78 /* optimized getsc__() */
79 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
80
81 #define STATE_BSIZE 32
82
83 #define PUSH_STATE(s) do { \
84 if (++statep == state_info.end) \
85 statep = push_state_(&state_info, statep); \
86 state = statep->ls_state = (s); \
87 } while (0)
88
89 #define POP_STATE() do { \
90 if (--statep == state_info.base) \
91 statep = pop_state_(&state_info, statep); \
92 state = statep->ls_state; \
93 } while (0)
94
95
96
97 /*
98 * Lexical analyzer
99 *
100 * tokens are not regular expressions, they are LL(1).
101 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
102 * hence the state stack.
103 */
104
105 int
106 yylex(cf)
107 int cf;
108 {
109 Lex_state states[STATE_BSIZE], *statep;
110 State_info state_info;
111 register int c, state;
112 XString ws; /* expandable output word */
113 register char *wp; /* output word pointer */
114 char *sp, *dp;
115 int c2;
116
117
118 Again:
119 states[0].ls_state = -1;
120 states[0].ls_info.base = (Lex_state *) 0;
121 statep = &states[1];
122 state_info.base = states;
123 state_info.end = &states[STATE_BSIZE];
124
125 Xinit(ws, wp, 64, ATEMP);
126
127 backslash_skip = 0;
128 ignore_backslash_newline = 0;
129
130 if (cf&ONEWORD)
131 state = SWORD;
132 #ifdef KSH
133 else if (cf&LETEXPR) {
134 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
135 state = SLETPAREN;
136 statep->ls_sletparen.nparen = 0;
137 }
138 #endif /* KSH */
139 else { /* normal lexing */
140 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
141 while ((c = getsc()) == ' ' || c == '\t')
142 ;
143 if (c == '#') {
144 ignore_backslash_newline++;
145 while ((c = getsc()) != '\0' && c != '\n')
146 ;
147 ignore_backslash_newline--;
148 }
149 ungetsc(c);
150 }
151 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
152 source->flags &= ~SF_ALIAS;
153 /* In POSIX mode, a trailing space only counts if we are
154 * parsing a simple command
155 */
156 if (!Flag(FPOSIX) || (cf & CMDWORD))
157 cf |= ALIAS;
158 }
159
160 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
161 statep->ls_state = state;
162
163 /* collect non-special or quoted characters to form word */
164 while (!((c = getsc()) == 0
165 || ((state == SBASE || state == SHEREDELIM)
166 && ctype(c, C_LEX1))))
167 {
168 Xcheck(ws, wp);
169 switch (state) {
170 case SBASE:
171 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
172 *wp = EOS; /* temporary */
173 if (is_wdvarname(Xstring(ws, wp), FALSE))
174 {
175 char *p, *tmp;
176
177 if (arraysub(&tmp)) {
178 *wp++ = CHAR;
179 *wp++ = c;
180 for (p = tmp; *p; ) {
181 Xcheck(ws, wp);
182 *wp++ = CHAR;
183 *wp++ = *p++;
184 }
185 afree(tmp, ATEMP);
186 break;
187 } else {
188 Source *s;
189
190 s = pushs(SREREAD,
191 source->areap);
192 s->start = s->str
193 = s->u.freeme = tmp;
194 s->next = source;
195 source = s;
196 }
197 }
198 *wp++ = CHAR;
199 *wp++ = c;
200 break;
201 }
202 /* fall through.. */
203 Sbase1: /* includes *(...|...) pattern (*+?@!) */
204 #ifdef KSH
205 if (c == '*' || c == '@' || c == '+' || c == '?'
206 || c == '!')
207 {
208 c2 = getsc();
209 if (c2 == '(' /*)*/ ) {
210 *wp++ = OPAT;
211 *wp++ = c;
212 PUSH_STATE(SPATTERN);
213 break;
214 }
215 ungetsc(c2);
216 }
217 #endif /* KSH */
218 /* fall through.. */
219 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
220 switch (c) {
221 case '\\':
222 c = getsc();
223 #ifdef OS2
224 if (isalnum(c)) {
225 *wp++ = CHAR, *wp++ = '\\';
226 *wp++ = CHAR, *wp++ = c;
227 } else
228 #endif
229 if (c) /* trailing \ is lost */
230 *wp++ = QCHAR, *wp++ = c;
231 break;
232 case '\'':
233 *wp++ = OQUOTE;
234 ignore_backslash_newline++;
235 PUSH_STATE(SSQUOTE);
236 break;
237 case '"':
238 *wp++ = OQUOTE;
239 PUSH_STATE(SDQUOTE);
240 break;
241 default:
242 goto Subst;
243 }
244 break;
245
246 Subst:
247 switch (c) {
248 case '\\':
249 c = getsc();
250 switch (c) {
251 case '"': case '\\':
252 case '$': case '`':
253 *wp++ = QCHAR, *wp++ = c;
254 break;
255 default:
256 Xcheck(ws, wp);
257 if (c) { /* trailing \ is lost */
258 *wp++ = CHAR, *wp++ = '\\';
259 *wp++ = CHAR, *wp++ = c;
260 }
261 break;
262 }
263 break;
264 case '$':
265 c = getsc();
266 if (c == '(') /*)*/ {
267 c = getsc();
268 if (c == '(') /*)*/ {
269 PUSH_STATE(SASPAREN);
270 statep->ls_sasparen.nparen = 2;
271 statep->ls_sasparen.start =
272 Xsavepos(ws, wp);
273 *wp++ = EXPRSUB;
274 } else {
275 ungetsc(c);
276 PUSH_STATE(SCSPAREN);
277 statep->ls_scsparen.nparen = 1;
278 statep->ls_scsparen.csstate = 0;
279 *wp++ = COMSUB;
280 }
281 } else if (c == '{') /*}*/ {
282 *wp++ = OSUBST;
283 *wp++ = '{'; /*}*/
284 wp = get_brace_var(&ws, wp);
285 c = getsc();
286 /* allow :# and :% (ksh88 compat) */
287 if (c == ':') {
288 *wp++ = CHAR, *wp++ = c;
289 c = getsc();
290 }
291 /* If this is a trim operation,
292 * treat (,|,) specially in STBRACE.
293 */
294 if (c == '#' || c == '%') {
295 ungetsc(c);
296 PUSH_STATE(STBRACE);
297 } else {
298 ungetsc(c);
299 PUSH_STATE(SBRACE);
300 }
301 } else if (ctype(c, C_ALPHA)) {
302 *wp++ = OSUBST;
303 *wp++ = 'X';
304 do {
305 Xcheck(ws, wp);
306 *wp++ = c;
307 c = getsc();
308 } while (ctype(c, C_ALPHA|C_DIGIT));
309 *wp++ = '\0';
310 *wp++ = CSUBST;
311 *wp++ = 'X';
312 ungetsc(c);
313 } else if (ctype(c, C_DIGIT|C_VAR1)) {
314 Xcheck(ws, wp);
315 *wp++ = OSUBST;
316 *wp++ = 'X';
317 *wp++ = c;
318 *wp++ = '\0';
319 *wp++ = CSUBST;
320 *wp++ = 'X';
321 } else {
322 *wp++ = CHAR, *wp++ = '$';
323 ungetsc(c);
324 }
325 break;
326 case '`':
327 PUSH_STATE(SBQUOTE);
328 *wp++ = COMSUB;
329 /* Need to know if we are inside double quotes
330 * since sh/at&t-ksh translate the \" to " in
331 * "`..\"..`".
332 * This is not done in posix mode (section
333 * 3.2.3, Double Quotes: "The backquote shall
334 * retain its special meaning introducing the
335 * other form of command substitution (see
336 * 3.6.3). The portion of the quoted string
337 * from the initial backquote and the
338 * characters up to the next backquote that
339 * is not preceded by a backslash (having
340 * escape characters removed) defines that
341 * command whose output replaces `...` when
342 * the word is expanded."
343 * Section 3.6.3, Command Substitution:
344 * "Within the backquoted style of command
345 * substitution, backslash shall retain its
346 * literal meaning, except when followed by
347 * $ ` \.").
348 */
349 statep->ls_sbquote.indquotes = 0;
350 if (!Flag(FPOSIX)) {
351 Lex_state *s = statep;
352 Lex_state *base = state_info.base;
353 while (1) {
354 for (; s != base; s--) {
355 if (s->ls_state == SDQUOTE) {
356 statep->ls_sbquote.indquotes = 1;
357 break;
358 }
359 }
360 if (s != base)
361 break;
362 if (!(s = s->ls_info.base))
363 break;
364 base = s-- - STATE_BSIZE;
365 }
366 }
367 break;
368 default:
369 *wp++ = CHAR, *wp++ = c;
370 }
371 break;
372
373 case SSQUOTE:
374 if (c == '\'') {
375 POP_STATE();
376 *wp++ = CQUOTE;
377 ignore_backslash_newline--;
378 } else
379 *wp++ = QCHAR, *wp++ = c;
380 break;
381
382 case SDQUOTE:
383 if (c == '"') {
384 POP_STATE();
385 *wp++ = CQUOTE;
386 } else
387 goto Subst;
388 break;
389
390 case SCSPAREN: /* $( .. ) */
391 /* todo: deal with $(...) quoting properly
392 * kludge to partly fake quoting inside $(..): doesn't
393 * really work because nested $(..) or ${..} inside
394 * double quotes aren't dealt with.
395 */
396 switch (statep->ls_scsparen.csstate) {
397 case 0: /* normal */
398 switch (c) {
399 case '(':
400 statep->ls_scsparen.nparen++;
401 break;
402 case ')':
403 statep->ls_scsparen.nparen--;
404 break;
405 case '\\':
406 statep->ls_scsparen.csstate = 1;
407 break;
408 case '"':
409 statep->ls_scsparen.csstate = 2;
410 break;
411 case '\'':
412 statep->ls_scsparen.csstate = 4;
413 ignore_backslash_newline++;
414 break;
415 }
416 break;
417
418 case 1: /* backslash in normal mode */
419 case 3: /* backslash in double quotes */
420 --statep->ls_scsparen.csstate;
421 break;
422
423 case 2: /* double quotes */
424 if (c == '"')
425 statep->ls_scsparen.csstate = 0;
426 else if (c == '\\')
427 statep->ls_scsparen.csstate = 3;
428 break;
429
430 case 4: /* single quotes */
431 if (c == '\'') {
432 statep->ls_scsparen.csstate = 0;
433 ignore_backslash_newline--;
434 }
435 break;
436 }
437 if (statep->ls_scsparen.nparen == 0) {
438 POP_STATE();
439 *wp++ = 0; /* end of COMSUB */
440 } else
441 *wp++ = c;
442 break;
443
444 case SASPAREN: /* $(( .. )) */
445 /* todo: deal with $((...); (...)) properly */
446 /* XXX should nest using existing state machine
447 * (embed "..", $(...), etc.) */
448 if (c == '(')
449 statep->ls_sasparen.nparen++;
450 else if (c == ')') {
451 statep->ls_sasparen.nparen--;
452 if (statep->ls_sasparen.nparen == 1) {
453 /*(*/
454 if ((c2 = getsc()) == ')') {
455 POP_STATE();
456 *wp++ = 0; /* end of EXPRSUB */
457 break;
458 } else {
459 char *s;
460
461 ungetsc(c2);
462 /* mismatched parenthesis -
463 * assume we were really
464 * parsing a $(..) expression
465 */
466 s = Xrestpos(ws, wp,
467 statep->ls_sasparen.start);
468 memmove(s + 1, s, wp - s);
469 *s++ = COMSUB;
470 *s = '('; /*)*/
471 wp++;
472 statep->ls_scsparen.nparen = 1;
473 statep->ls_scsparen.csstate = 0;
474 state = statep->ls_state
475 = SCSPAREN;
476
477 }
478 }
479 }
480 *wp++ = c;
481 break;
482
483 case SBRACE:
484 /*{*/
485 if (c == '}') {
486 POP_STATE();
487 *wp++ = CSUBST;
488 *wp++ = /*{*/ '}';
489 } else
490 goto Sbase1;
491 break;
492
493 case STBRACE:
494 /* Same as SBRACE, except (,|,) treated specially */
495 /*{*/
496 if (c == '}') {
497 POP_STATE();
498 *wp++ = CSUBST;
499 *wp++ = /*{*/ '}';
500 } else if (c == '|') {
501 *wp++ = SPAT;
502 } else if (c == '(') {
503 *wp++ = OPAT;
504 *wp++ = ' '; /* simile for @ */
505 PUSH_STATE(SPATTERN);
506 } else
507 goto Sbase1;
508 break;
509
510 case SBQUOTE:
511 if (c == '`') {
512 *wp++ = 0;
513 POP_STATE();
514 } else if (c == '\\') {
515 switch (c = getsc()) {
516 case '\\':
517 case '$': case '`':
518 *wp++ = c;
519 break;
520 case '"':
521 if (statep->ls_sbquote.indquotes) {
522 *wp++ = c;
523 break;
524 }
525 /* fall through.. */
526 default:
527 if (c) { /* trailing \ is lost */
528 *wp++ = '\\';
529 *wp++ = c;
530 }
531 break;
532 }
533 } else
534 *wp++ = c;
535 break;
536
537 case SWORD: /* ONEWORD */
538 goto Subst;
539
540 #ifdef KSH
541 case SLETPAREN: /* LETEXPR: (( ... )) */
542 /*(*/
543 if (c == ')') {
544 if (statep->ls_sletparen.nparen > 0)
545 --statep->ls_sletparen.nparen;
546 /*(*/
547 else if ((c2 = getsc()) == ')') {
548 c = 0;
549 *wp++ = CQUOTE;
550 goto Done;
551 } else
552 ungetsc(c2);
553 } else if (c == '(')
554 /* parenthesis inside quotes and backslashes
555 * are lost, but at&t ksh doesn't count them
556 * either
557 */
558 ++statep->ls_sletparen.nparen;
559 goto Sbase2;
560 #endif /* KSH */
561
562 case SHEREDELIM: /* <<,<<- delimiter */
563 /* XXX chuck this state (and the next) - use
564 * the existing states ($ and \`..` should be
565 * stripped of their specialness after the
566 * fact).
567 */
568 /* here delimiters need a special case since
569 * $ and `..` are not to be treated specially
570 */
571 if (c == '\\') {
572 c = getsc();
573 if (c) { /* trailing \ is lost */
574 *wp++ = QCHAR;
575 *wp++ = c;
576 }
577 } else if (c == '\'') {
578 PUSH_STATE(SSQUOTE);
579 *wp++ = OQUOTE;
580 ignore_backslash_newline++;
581 } else if (c == '"') {
582 state = statep->ls_state = SHEREDQUOTE;
583 *wp++ = OQUOTE;
584 } else {
585 *wp++ = CHAR;
586 *wp++ = c;
587 }
588 break;
589
590 case SHEREDQUOTE: /* " in <<,<<- delimiter */
591 if (c == '"') {
592 *wp++ = CQUOTE;
593 state = statep->ls_state = SHEREDELIM;
594 } else {
595 if (c == '\\') {
596 switch (c = getsc()) {
597 case '\\': case '"':
598 case '$': case '`':
599 break;
600 default:
601 if (c) { /* trailing \ lost */
602 *wp++ = CHAR;
603 *wp++ = '\\';
604 }
605 break;
606 }
607 }
608 *wp++ = CHAR;
609 *wp++ = c;
610 }
611 break;
612
613 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
614 if ( /*(*/ c == ')') {
615 *wp++ = CPAT;
616 POP_STATE();
617 } else if (c == '|') {
618 *wp++ = SPAT;
619 } else if (c == '(') {
620 *wp++ = OPAT;
621 *wp++ = ' '; /* simile for @ */
622 PUSH_STATE(SPATTERN);
623 } else
624 goto Sbase1;
625 break;
626 }
627 }
628 Done:
629 Xcheck(ws, wp);
630 if (statep != &states[1])
631 /* XXX figure out what is missing */
632 yyerror("no closing quote\n");
633
634 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
635 if (state == SHEREDELIM)
636 state = SBASE;
637
638 dp = Xstring(ws, wp);
639 if ((c == '<' || c == '>') && state == SBASE
640 && ((c2 = Xlength(ws, wp)) == 0
641 || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
642 {
643 struct ioword *iop =
644 (struct ioword *) alloc(sizeof(*iop), ATEMP);
645
646 if (c2 == 2)
647 iop->unit = dp[1] - '0';
648 else
649 iop->unit = c == '>'; /* 0 for <, 1 for > */
650
651 c2 = getsc();
652 /* <<, >>, <> are ok, >< is not */
653 if (c == c2 || (c == '<' && c2 == '>')) {
654 iop->flag = c == c2 ?
655 (c == '>' ? IOCAT : IOHERE) : IORDWR;
656 if (iop->flag == IOHERE) {
657 if ((c2 = getsc()) == '-') {
658 iop->flag |= IOSKIP;
659 } else {
660 ungetsc(c2);
661 }
662 }
663 } else if (c2 == '&')
664 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
665 else {
666 iop->flag = c == '>' ? IOWRITE : IOREAD;
667 if (c == '>' && c2 == '|')
668 iop->flag |= IOCLOB;
669 else
670 ungetsc(c2);
671 }
672
673 iop->name = (char *) 0;
674 iop->delim = (char *) 0;
675 iop->heredoc = (char *) 0;
676 Xfree(ws, wp); /* free word */
677 yylval.iop = iop;
678 return REDIR;
679 }
680
681 if (wp == dp && state == SBASE) {
682 Xfree(ws, wp); /* free word */
683 /* no word, process LEX1 character */
684 switch (c) {
685 default:
686 return c;
687
688 case '|':
689 case '&':
690 case ';':
691 if ((c2 = getsc()) == c)
692 c = (c == ';') ? BREAK :
693 (c == '|') ? LOGOR :
694 (c == '&') ? LOGAND :
695 YYERRCODE;
696 #ifdef KSH
697 else if (c == '|' && c2 == '&')
698 c = COPROC;
699 #endif /* KSH */
700 else
701 ungetsc(c2);
702 return c;
703
704 case '\n':
705 gethere();
706 if (cf & CONTIN)
707 goto Again;
708 return c;
709
710 case '(': /*)*/
711 #ifdef KSH
712 if ((c2 = getsc()) == '(') /*)*/
713 /* XXX need to handle ((...); (...)) */
714 c = MDPAREN;
715 else
716 ungetsc(c2);
717 #endif /* KSH */
718 return c;
719 /*(*/
720 case ')':
721 return c;
722 }
723 }
724
725 *wp++ = EOS; /* terminate word */
726 yylval.cp = Xclose(ws, wp);
727 if (state == SWORD
728 #ifdef KSH
729 || state == SLETPAREN
730 #endif /* KSH */
731 ) /* ONEWORD? */
732 return LWORD;
733 ungetsc(c); /* unget terminator */
734
735 /* copy word to unprefixed string ident */
736 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
737 *dp++ = *sp++;
738 /* Make sure the ident array stays '\0' paded */
739 memset(dp, 0, (ident+IDENT) - dp + 1);
740 if (c != EOS)
741 *ident = '\0'; /* word is not unquoted */
742
743 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
744 struct tbl *p;
745 int h = hash(ident);
746
747 /* { */
748 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
749 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
750 {
751 afree(yylval.cp, ATEMP);
752 return p->val.i;
753 }
754 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
755 && (p->flag & ISSET))
756 {
757 register Source *s;
758
759 for (s = source; s->type == SALIAS; s = s->next)
760 if (s->u.tblp == p)
761 return LWORD;
762 /* push alias expansion */
763 s = pushs(SALIAS, source->areap);
764 s->start = s->str = p->val.s;
765 s->u.tblp = p;
766 s->next = source;
767 source = s;
768 afree(yylval.cp, ATEMP);
769 goto Again;
770 }
771 }
772
773 return LWORD;
774 }
775
776 static void
777 gethere()
778 {
779 register struct ioword **p;
780
781 for (p = heres; p < herep; p++)
782 readhere(*p);
783 herep = heres;
784 }
785
786 /*
787 * read "<<word" text into temp file
788 */
789
790 static void
791 readhere(iop)
792 struct ioword *iop;
793 {
794 register int c;
795 char *volatile eof;
796 char *eofp;
797 int skiptabs;
798 XString xs;
799 char *xp;
800 int xpos;
801
802 eof = evalstr(iop->delim, 0);
803
804 if (!(iop->flag & IOEVAL))
805 ignore_backslash_newline++;
806
807 Xinit(xs, xp, 256, ATEMP);
808
809 for (;;) {
810 eofp = eof;
811 skiptabs = iop->flag & IOSKIP;
812 xpos = Xsavepos(xs, xp);
813 while ((c = getsc()) != 0) {
814 if (skiptabs) {
815 if (c == '\t')
816 continue;
817 skiptabs = 0;
818 }
819 if (c != *eofp)
820 break;
821 Xcheck(xs, xp);
822 Xput(xs, xp, c);
823 eofp++;
824 }
825 /* Allow EOF here so commands with out trailing newlines
826 * will work (eg, ksh -c '...', $(...), etc).
827 */
828 if (*eofp == '\0' && (c == 0 || c == '\n')) {
829 xp = Xrestpos(xs, xp, xpos);
830 break;
831 }
832 ungetsc(c);
833 while ((c = getsc()) != '\n') {
834 if (c == 0)
835 yyerror("here document `%s' unclosed\n", eof);
836 Xcheck(xs, xp);
837 Xput(xs, xp, c);
838 }
839 Xcheck(xs, xp);
840 Xput(xs, xp, c);
841 }
842 Xput(xs, xp, '\0');
843 iop->heredoc = Xclose(xs, xp);
844
845 if (!(iop->flag & IOEVAL))
846 ignore_backslash_newline--;
847 }
848
849 void
850 #ifdef HAVE_PROTOTYPES
851 yyerror(const char *fmt, ...)
852 #else
853 yyerror(fmt, va_alist)
854 const char *fmt;
855 va_dcl
856 #endif
857 {
858 va_list va;
859
860 /* pop aliases and re-reads */
861 while (source->type == SALIAS || source->type == SREREAD)
862 source = source->next;
863 source->str = null; /* zap pending input */
864
865 error_prefix(TRUE);
866 SH_VA_START(va, fmt);
867 shf_vfprintf(shl_out, fmt, va);
868 va_end(va);
869 errorf(null);
870 }
871
872 /*
873 * input for yylex with alias expansion
874 */
875
876 Source *
877 pushs(type, areap)
878 int type;
879 Area *areap;
880 {
881 register Source *s;
882
883 s = (Source *) alloc(sizeof(Source), areap);
884 s->type = type;
885 s->str = null;
886 s->start = NULL;
887 s->line = 0;
888 s->errline = 0;
889 s->file = NULL;
890 s->flags = 0;
891 s->next = NULL;
892 s->areap = areap;
893 if (type == SFILE || type == SSTDIN) {
894 char *dummy;
895 Xinit(s->xs, dummy, 256, s->areap);
896 } else
897 memset(&s->xs, 0, sizeof(s->xs));
898 return s;
899 }
900
901 static int
902 getsc__()
903 {
904 register Source *s = source;
905 register int c;
906
907 while ((c = *s->str++) == 0) {
908 s->str = NULL; /* return 0 for EOF by default */
909 switch (s->type) {
910 case SEOF:
911 s->str = null;
912 return 0;
913
914 case SSTDIN:
915 case SFILE:
916 getsc_line(s);
917 break;
918
919 case SWSTR:
920 break;
921
922 case SSTRING:
923 break;
924
925 case SWORDS:
926 s->start = s->str = *s->u.strv++;
927 s->type = SWORDSEP;
928 break;
929
930 case SWORDSEP:
931 if (*s->u.strv == NULL) {
932 s->start = s->str = newline;
933 s->type = SEOF;
934 } else {
935 s->start = s->str = space;
936 s->type = SWORDS;
937 }
938 break;
939
940 case SALIAS:
941 if (s->flags & SF_ALIASEND) {
942 /* pass on an unused SF_ALIAS flag */
943 source = s->next;
944 source->flags |= s->flags & SF_ALIAS;
945 s = source;
946 } else if (*s->u.tblp->val.s
947 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
948 {
949 source = s = s->next; /* pop source stack */
950 /* Note that this alias ended with a space,
951 * enabling alias expansion on the following
952 * word.
953 */
954 s->flags |= SF_ALIAS;
955 } else {
956 /* At this point, we need to keep the current
957 * alias in the source list so recursive
958 * aliases can be detected and we also need
959 * to return the next character. Do this
960 * by temporarily popping the alias to get
961 * the next character and then put it back
962 * in the source list with the SF_ALIASEND
963 * flag set.
964 */
965 source = s->next; /* pop source stack */
966 source->flags |= s->flags & SF_ALIAS;
967 c = getsc__();
968 if (c) {
969 s->flags |= SF_ALIASEND;
970 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
971 s->start = s->str = s->ugbuf;
972 s->next = source;
973 source = s;
974 } else {
975 s = source;
976 /* avoid reading eof twice */
977 s->str = NULL;
978 break;
979 }
980 }
981 continue;
982
983 case SREREAD:
984 if (s->start != s->ugbuf) /* yuck */
985 afree(s->u.freeme, ATEMP);
986 source = s = s->next;
987 continue;
988 }
989 if (s->str == NULL) {
990 s->type = SEOF;
991 s->start = s->str = null;
992 return '\0';
993 }
994 if (s->flags & SF_ECHO) {
995 shf_puts(s->str, shl_out);
996 shf_flush(shl_out);
997 }
998 }
999 return c;
1000 }
1001
1002 static void
1003 getsc_line(s)
1004 Source *s;
1005 {
1006 char *xp = Xstring(s->xs, xp);
1007 int interactive = Flag(FTALKING) && s->type == SSTDIN;
1008 int have_tty = interactive && (s->flags & SF_TTY);
1009
1010 /* Done here to ensure nothing odd happens when a timeout occurs */
1011 XcheckN(s->xs, xp, LINE);
1012 *xp = '\0';
1013 s->start = s->str = xp;
1014
1015 #ifdef KSH
1016 if (have_tty && ksh_tmout) {
1017 ksh_tmout_state = TMOUT_READING;
1018 alarm(ksh_tmout);
1019 }
1020 #endif /* KSH */
1021 #ifdef EDIT
1022 if (have_tty && (0
1023 # ifdef VI
1024 || Flag(FVI)
1025 # endif /* VI */
1026 # ifdef EMACS
1027 || Flag(FEMACS) || Flag(FGMACS)
1028 # endif /* EMACS */
1029 ))
1030 {
1031 int nread;
1032
1033 nread = x_read(xp, LINE);
1034 if (nread < 0) /* read error */
1035 nread = 0;
1036 xp[nread] = '\0';
1037 xp += nread;
1038 }
1039 else
1040 #endif /* EDIT */
1041 {
1042 if (interactive) {
1043 pprompt(prompt, 0);
1044 } else
1045 s->line++;
1046
1047 while (1) {
1048 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1049
1050 if (!p && shf_error(s->u.shf)
1051 && shf_errno(s->u.shf) == EINTR)
1052 {
1053 shf_clearerr(s->u.shf);
1054 if (trap)
1055 runtraps(0);
1056 continue;
1057 }
1058 if (!p || (xp = p, xp[-1] == '\n'))
1059 break;
1060 /* double buffer size */
1061 xp++; /* move past null so doubling works... */
1062 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1063 xp--; /* ...and move back again */
1064 }
1065 /* flush any unwanted input so other programs/builtins
1066 * can read it. Not very optimal, but less error prone
1067 * than flushing else where, dealing with redirections,
1068 * etc..
1069 * todo: reduce size of shf buffer (~128?) if SSTDIN
1070 */
1071 if (s->type == SSTDIN)
1072 shf_flush(s->u.shf);
1073 }
1074 /* XXX: temporary kludge to restore source after a
1075 * trap may have been executed.
1076 */
1077 source = s;
1078 #ifdef KSH
1079 if (have_tty && ksh_tmout)
1080 {
1081 ksh_tmout_state = TMOUT_EXECUTING;
1082 alarm(0);
1083 }
1084 #endif /* KSH */
1085 s->start = s->str = Xstring(s->xs, xp);
1086 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1087 /* Note: if input is all nulls, this is not eof */
1088 if (Xlength(s->xs, xp) == 0) { /* EOF */
1089 if (s->type == SFILE)
1090 shf_fdclose(s->u.shf);
1091 s->str = NULL;
1092 } else if (interactive) {
1093 #ifdef HISTORY
1094 char *p = Xstring(s->xs, xp);
1095 if (cur_prompt == PS1)
1096 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1097 p++;
1098 if (*p) {
1099 # ifdef EASY_HISTORY
1100 if (cur_prompt == PS2)
1101 histappend(Xstring(s->xs, xp), 1);
1102 else
1103 # endif /* EASY_HISTORY */
1104 {
1105 s->line++;
1106 histsave(s->line, s->str, 1);
1107 }
1108 }
1109 #endif /* HISTORY */
1110 }
1111 if (interactive)
1112 set_prompt(PS2, (Source *) 0);
1113 }
1114
1115 void
1116 set_prompt(to, s)
1117 int to;
1118 Source *s;
1119 {
1120 cur_prompt = to;
1121
1122 switch (to) {
1123 case PS1: /* command */
1124 #ifdef KSH
1125 /* Substitute ! and !! here, before substitutions are done
1126 * so ! in expanded variables are not expanded.
1127 * NOTE: this is not what at&t ksh does (it does it after
1128 * substitutions, POSIX doesn't say which is to be done.
1129 */
1130 {
1131 struct shf *shf;
1132 char *ps1;
1133 Area *saved_atemp;
1134 #ifdef __GNUC__
1135 (void) &ps1;
1136 #endif
1137
1138 ps1 = str_val(global("PS1"));
1139 shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1140 SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1141 while (*ps1) {
1142 if (*ps1 != '!' || *++ps1 == '!')
1143 shf_putchar(*ps1++, shf);
1144 else
1145 shf_fprintf(shf, "%d",
1146 s ? s->line + 1 : 0);
1147 }
1148 ps1 = shf_sclose(shf);
1149 saved_atemp = ATEMP;
1150 newenv(E_ERRH);
1151 if (ksh_sigsetjmp(e->jbuf, 0)) {
1152 prompt = safe_prompt;
1153 /* Don't print an error - assume it has already
1154 * been printed. Reason is we may have forked
1155 * to run a command and the child may be
1156 * unwinding its stack through this code as it
1157 * exits.
1158 */
1159 } else
1160 prompt = str_save(substitute(ps1, 0),
1161 saved_atemp);
1162 quitenv();
1163 }
1164 #else /* KSH */
1165 prompt = str_val(global("PS1"));
1166 #endif /* KSH */
1167 break;
1168
1169 case PS2: /* command continuation */
1170 prompt = str_val(global("PS2"));
1171 break;
1172 }
1173 }
1174
1175 /* See also related routine, promptlen() in edit.c */
1176 void
1177 pprompt(cp, ntruncate)
1178 const char *cp;
1179 int ntruncate;
1180 {
1181 #if 0
1182 char nbuf[32];
1183 int c;
1184
1185 while (*cp != 0) {
1186 if (*cp != '!')
1187 c = *cp++;
1188 else if (*++cp == '!')
1189 c = *cp++;
1190 else {
1191 int len;
1192 char *p;
1193
1194 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1195 source->line + 1);
1196 len = strlen(nbuf);
1197 if (ntruncate) {
1198 if (ntruncate >= len) {
1199 ntruncate -= len;
1200 continue;
1201 }
1202 p += ntruncate;
1203 len -= ntruncate;
1204 ntruncate = 0;
1205 }
1206 shf_write(p, len, shl_out);
1207 continue;
1208 }
1209 if (ntruncate)
1210 --ntruncate;
1211 else
1212 shf_putc(c, shl_out);
1213 }
1214 #endif /* 0 */
1215 shf_puts(cp + ntruncate, shl_out);
1216 shf_flush(shl_out);
1217 }
1218
1219 /* Read the variable part of a ${...} expression (ie, up to but not including
1220 * the :[-+?=#%] or close-brace.
1221 */
1222 static char *
1223 get_brace_var(wsp, wp)
1224 XString *wsp;
1225 char *wp;
1226 {
1227 enum parse_state {
1228 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1229 PS_NUMBER, PS_VAR1, PS_END
1230 }
1231 state;
1232 char c;
1233
1234 state = PS_INITIAL;
1235 while (1) {
1236 c = getsc();
1237 /* State machine to figure out where the variable part ends. */
1238 switch (state) {
1239 case PS_INITIAL:
1240 if (c == '#') {
1241 state = PS_SAW_HASH;
1242 break;
1243 }
1244 /* fall through.. */
1245 case PS_SAW_HASH:
1246 if (letter(c))
1247 state = PS_IDENT;
1248 else if (digit(c))
1249 state = PS_NUMBER;
1250 else if (ctype(c, C_VAR1))
1251 state = PS_VAR1;
1252 else
1253 state = PS_END;
1254 break;
1255 case PS_IDENT:
1256 if (!letnum(c)) {
1257 state = PS_END;
1258 if (c == '[') {
1259 char *tmp, *p;
1260
1261 if (!arraysub(&tmp))
1262 yyerror("missing ]\n");
1263 *wp++ = c;
1264 for (p = tmp; *p; ) {
1265 Xcheck(*wsp, wp);
1266 *wp++ = *p++;
1267 }
1268 afree(tmp, ATEMP);
1269 c = getsc(); /* the ] */
1270 }
1271 }
1272 break;
1273 case PS_NUMBER:
1274 if (!digit(c))
1275 state = PS_END;
1276 break;
1277 case PS_VAR1:
1278 state = PS_END;
1279 break;
1280 case PS_END: /* keep gcc happy */
1281 break;
1282 }
1283 if (state == PS_END) {
1284 *wp++ = '\0'; /* end of variable part */
1285 ungetsc(c);
1286 break;
1287 }
1288 Xcheck(*wsp, wp);
1289 *wp++ = c;
1290 }
1291 return wp;
1292 }
1293
1294 /*
1295 * Save an array subscript - returns true if matching bracket found, false
1296 * if eof or newline was found.
1297 * (Returned string double null terminated)
1298 */
1299 static int
1300 arraysub(strp)
1301 char **strp;
1302 {
1303 XString ws;
1304 char *wp;
1305 char c;
1306 int depth = 1; /* we are just past the initial [ */
1307
1308 Xinit(ws, wp, 32, ATEMP);
1309
1310 do {
1311 c = getsc();
1312 Xcheck(ws, wp);
1313 *wp++ = c;
1314 if (c == '[')
1315 depth++;
1316 else if (c == ']')
1317 depth--;
1318 } while (depth > 0 && c && c != '\n');
1319
1320 *wp++ = '\0';
1321 *strp = Xclose(ws, wp);
1322
1323 return depth == 0 ? 1 : 0;
1324 }
1325
1326 /* Unget a char: handles case when we are already at the start of the buffer */
1327 static const char *
1328 ungetsc(c)
1329 int c;
1330 {
1331 if (backslash_skip)
1332 backslash_skip--;
1333 /* Don't unget eof... */
1334 if (source->str == null && c == '\0')
1335 return source->str;
1336 if (source->str > source->start)
1337 source->str--;
1338 else {
1339 Source *s;
1340
1341 s = pushs(SREREAD, source->areap);
1342 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1343 s->start = s->str = s->ugbuf;
1344 s->next = source;
1345 source = s;
1346 }
1347 return source->str;
1348 }
1349
1350
1351 /* Called to get a char that isn't a \newline sequence. */
1352 static int
1353 getsc_bn ARGS((void))
1354 {
1355 int c, c2;
1356
1357 if (ignore_backslash_newline)
1358 return getsc_();
1359
1360 if (backslash_skip == 1) {
1361 backslash_skip = 2;
1362 return getsc_();
1363 }
1364
1365 backslash_skip = 0;
1366
1367 while (1) {
1368 c = getsc_();
1369 if (c == '\\') {
1370 if ((c2 = getsc_()) == '\n')
1371 /* ignore the \newline; get the next char... */
1372 continue;
1373 ungetsc(c2);
1374 backslash_skip = 1;
1375 }
1376 return c;
1377 }
1378 }
1379
1380 static Lex_state *
1381 push_state_(si, old_end)
1382 State_info *si;
1383 Lex_state *old_end;
1384 {
1385 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1386
1387 new[0].ls_info.base = old_end;
1388 si->base = &new[0];
1389 si->end = &new[STATE_BSIZE];
1390 return &new[1];
1391 }
1392
1393 static Lex_state *
1394 pop_state_(si, old_end)
1395 State_info *si;
1396 Lex_state *old_end;
1397 {
1398 Lex_state *old_base = si->base;
1399
1400 si->base = old_end->ls_info.base - STATE_BSIZE;
1401 si->end = old_end->ls_info.base;
1402
1403 afree(old_base, ATEMP);
1404
1405 return si->base + STATE_BSIZE - 1;
1406 }
1407