lex.c revision 1.12 1 /* $NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $ */
2
3 /*
4 * lexical analysis and source input
5 */
6 #include <sys/cdefs.h>
7
8 #ifndef lint
9 __RCSID("$NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $");
10 #endif
11
12
13 #include "sh.h"
14 #include <ctype.h>
15
16
17 /* Structure to keep track of the lexing state and the various pieces of info
18 * needed for each particular state.
19 */
20 typedef struct lex_state Lex_state;
21 struct lex_state {
22 int ls_state;
23 union {
24 /* $(...) */
25 struct scsparen_info {
26 int nparen; /* count open parenthesis */
27 int csstate; /* XXX remove */
28 #define ls_scsparen ls_info.u_scsparen
29 } u_scsparen;
30
31 /* $((...)) */
32 struct sasparen_info {
33 int nparen; /* count open parenthesis */
34 int start; /* marks start of $(( in output str */
35 #define ls_sasparen ls_info.u_sasparen
36 } u_sasparen;
37
38 /* ((...)) */
39 struct sletparen_info {
40 int nparen; /* count open parenthesis */
41 #define ls_sletparen ls_info.u_sletparen
42 } u_sletparen;
43
44 /* `...` */
45 struct sbquote_info {
46 int indquotes; /* true if in double quotes: "`...`" */
47 #define ls_sbquote ls_info.u_sbquote
48 } u_sbquote;
49
50 Lex_state *base; /* used to point to next state block */
51 } ls_info;
52 };
53
54 typedef struct State_info State_info;
55 struct State_info {
56 Lex_state *base;
57 Lex_state *end;
58 };
59
60
61 static void readhere ARGS((struct ioword *iop));
62 static int getsc__ ARGS((void));
63 static void getsc_line ARGS((Source *s));
64 static int getsc_bn ARGS((void));
65 static char *get_brace_var ARGS((XString *wsp, char *wp));
66 static int arraysub ARGS((char **strp));
67 static const char *ungetsc ARGS((int c));
68 static void gethere ARGS((void));
69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
71
72 static int backslash_skip;
73 static int ignore_backslash_newline;
74
75 /* optimized getsc_bn() */
76 #define getsc() (*source->str != '\0' && *source->str != '\\' \
77 && !backslash_skip ? *source->str++ : getsc_bn())
78 /* optimized getsc__() */
79 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
80
81 #define STATE_BSIZE 32
82
83 #define PUSH_STATE(s) do { \
84 if (++statep == state_info.end) \
85 statep = push_state_(&state_info, statep); \
86 state = statep->ls_state = (s); \
87 } while (0)
88
89 #define POP_STATE() do { \
90 if (--statep == state_info.base) \
91 statep = pop_state_(&state_info, statep); \
92 state = statep->ls_state; \
93 } while (0)
94
95
96
97 /*
98 * Lexical analyzer
99 *
100 * tokens are not regular expressions, they are LL(1).
101 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
102 * hence the state stack.
103 */
104
105 int
106 yylex(cf)
107 int cf;
108 {
109 Lex_state states[STATE_BSIZE], *statep;
110 State_info state_info;
111 register int c, state;
112 XString ws; /* expandable output word */
113 register char *wp; /* output word pointer */
114 char *sp, *dp;
115 int c2;
116
117
118 Again:
119 states[0].ls_state = -1;
120 states[0].ls_info.base = (Lex_state *) 0;
121 statep = &states[1];
122 state_info.base = states;
123 state_info.end = &states[STATE_BSIZE];
124
125 Xinit(ws, wp, 64, ATEMP);
126
127 backslash_skip = 0;
128 ignore_backslash_newline = 0;
129
130 if (cf&ONEWORD)
131 state = SWORD;
132 #ifdef KSH
133 else if (cf&LETEXPR) {
134 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
135 state = SLETPAREN;
136 statep->ls_sletparen.nparen = 0;
137 }
138 #endif /* KSH */
139 else { /* normal lexing */
140 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
141 while ((c = getsc()) == ' ' || c == '\t')
142 ;
143 if (c == '#') {
144 ignore_backslash_newline++;
145 while ((c = getsc()) != '\0' && c != '\n')
146 ;
147 ignore_backslash_newline--;
148 }
149 ungetsc(c);
150 }
151 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
152 source->flags &= ~SF_ALIAS;
153 /* In POSIX mode, a trailing space only counts if we are
154 * parsing a simple command
155 */
156 if (!Flag(FPOSIX) || (cf & CMDWORD))
157 cf |= ALIAS;
158 }
159
160 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
161 statep->ls_state = state;
162
163 /* collect non-special or quoted characters to form word */
164 while (!((c = getsc()) == 0
165 || ((state == SBASE || state == SHEREDELIM)
166 && ctype(c, C_LEX1))))
167 {
168 Xcheck(ws, wp);
169 switch (state) {
170 case SBASE:
171 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
172 *wp = EOS; /* temporary */
173 if (is_wdvarname(Xstring(ws, wp), FALSE))
174 {
175 char *p, *tmp;
176
177 if (arraysub(&tmp)) {
178 *wp++ = CHAR;
179 *wp++ = c;
180 for (p = tmp; *p; ) {
181 Xcheck(ws, wp);
182 *wp++ = CHAR;
183 *wp++ = *p++;
184 }
185 afree(tmp, ATEMP);
186 break;
187 } else {
188 Source *s;
189
190 s = pushs(SREREAD,
191 source->areap);
192 s->start = s->str
193 = s->u.freeme = tmp;
194 s->next = source;
195 source = s;
196 }
197 }
198 *wp++ = CHAR;
199 *wp++ = c;
200 break;
201 }
202 /* fall through.. */
203 Sbase1: /* includes *(...|...) pattern (*+?@!) */
204 #ifdef KSH
205 if (c == '*' || c == '@' || c == '+' || c == '?'
206 || c == '!')
207 {
208 c2 = getsc();
209 if (c2 == '(' /*)*/ ) {
210 *wp++ = OPAT;
211 *wp++ = c;
212 PUSH_STATE(SPATTERN);
213 break;
214 }
215 ungetsc(c2);
216 }
217 #endif /* KSH */
218 /* fall through.. */
219 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
220 switch (c) {
221 case '\\':
222 c = getsc();
223 #ifdef OS2
224 if (isalnum((unsigned char)c)) {
225 *wp++ = CHAR, *wp++ = '\\';
226 *wp++ = CHAR, *wp++ = c;
227 } else
228 #endif
229 if (c) /* trailing \ is lost */
230 *wp++ = QCHAR, *wp++ = c;
231 break;
232 case '\'':
233 *wp++ = OQUOTE;
234 ignore_backslash_newline++;
235 PUSH_STATE(SSQUOTE);
236 break;
237 case '"':
238 *wp++ = OQUOTE;
239 PUSH_STATE(SDQUOTE);
240 break;
241 default:
242 goto Subst;
243 }
244 break;
245
246 Subst:
247 switch (c) {
248 case '\\':
249 c = getsc();
250 switch (c) {
251 case '\\':
252 case '$': case '`':
253 *wp++ = QCHAR, *wp++ = c;
254 break;
255 case '"':
256 if ((cf & HEREDOC) == 0) {
257 *wp++ = QCHAR, *wp++ = c;
258 break;
259 }
260 /* FALLTROUGH */
261 default:
262 Xcheck(ws, wp);
263 if (c) { /* trailing \ is lost */
264 *wp++ = CHAR, *wp++ = '\\';
265 *wp++ = CHAR, *wp++ = c;
266 }
267 break;
268 }
269 break;
270 case '$':
271 c = getsc();
272 if (c == '(') /*)*/ {
273 c = getsc();
274 if (c == '(') /*)*/ {
275 PUSH_STATE(SASPAREN);
276 statep->ls_sasparen.nparen = 2;
277 statep->ls_sasparen.start =
278 Xsavepos(ws, wp);
279 *wp++ = EXPRSUB;
280 } else {
281 ungetsc(c);
282 PUSH_STATE(SCSPAREN);
283 statep->ls_scsparen.nparen = 1;
284 statep->ls_scsparen.csstate = 0;
285 *wp++ = COMSUB;
286 }
287 } else if (c == '{') /*}*/ {
288 *wp++ = OSUBST;
289 *wp++ = '{'; /*}*/
290 wp = get_brace_var(&ws, wp);
291 c = getsc();
292 /* allow :# and :% (ksh88 compat) */
293 if (c == ':') {
294 *wp++ = CHAR, *wp++ = c;
295 c = getsc();
296 }
297 /* If this is a trim operation,
298 * treat (,|,) specially in STBRACE.
299 */
300 if (c == '#' || c == '%') {
301 ungetsc(c);
302 PUSH_STATE(STBRACE);
303 } else {
304 ungetsc(c);
305 PUSH_STATE(SBRACE);
306 }
307 } else if (ctype(c, C_ALPHA)) {
308 *wp++ = OSUBST;
309 *wp++ = 'X';
310 do {
311 Xcheck(ws, wp);
312 *wp++ = c;
313 c = getsc();
314 } while (ctype(c, C_ALPHA|C_DIGIT));
315 *wp++ = '\0';
316 *wp++ = CSUBST;
317 *wp++ = 'X';
318 ungetsc(c);
319 } else if (ctype(c, C_DIGIT|C_VAR1)) {
320 Xcheck(ws, wp);
321 *wp++ = OSUBST;
322 *wp++ = 'X';
323 *wp++ = c;
324 *wp++ = '\0';
325 *wp++ = CSUBST;
326 *wp++ = 'X';
327 } else {
328 *wp++ = CHAR, *wp++ = '$';
329 ungetsc(c);
330 }
331 break;
332 case '`':
333 PUSH_STATE(SBQUOTE);
334 *wp++ = COMSUB;
335 /* Need to know if we are inside double quotes
336 * since sh/at&t-ksh translate the \" to " in
337 * "`..\"..`".
338 * This is not done in posix mode (section
339 * 3.2.3, Double Quotes: "The backquote shall
340 * retain its special meaning introducing the
341 * other form of command substitution (see
342 * 3.6.3). The portion of the quoted string
343 * from the initial backquote and the
344 * characters up to the next backquote that
345 * is not preceded by a backslash (having
346 * escape characters removed) defines that
347 * command whose output replaces `...` when
348 * the word is expanded."
349 * Section 3.6.3, Command Substitution:
350 * "Within the backquoted style of command
351 * substitution, backslash shall retain its
352 * literal meaning, except when followed by
353 * $ ` \.").
354 */
355 statep->ls_sbquote.indquotes = 0;
356 if (!Flag(FPOSIX)) {
357 Lex_state *s = statep;
358 Lex_state *base = state_info.base;
359 while (1) {
360 for (; s != base; s--) {
361 if (s->ls_state == SDQUOTE) {
362 statep->ls_sbquote.indquotes = 1;
363 break;
364 }
365 }
366 if (s != base)
367 break;
368 if (!(s = s->ls_info.base))
369 break;
370 base = s-- - STATE_BSIZE;
371 }
372 }
373 break;
374 default:
375 *wp++ = CHAR, *wp++ = c;
376 }
377 break;
378
379 case SSQUOTE:
380 if (c == '\'') {
381 POP_STATE();
382 *wp++ = CQUOTE;
383 ignore_backslash_newline--;
384 } else
385 *wp++ = QCHAR, *wp++ = c;
386 break;
387
388 case SDQUOTE:
389 if (c == '"') {
390 POP_STATE();
391 *wp++ = CQUOTE;
392 } else
393 goto Subst;
394 break;
395
396 case SCSPAREN: /* $( .. ) */
397 /* todo: deal with $(...) quoting properly
398 * kludge to partly fake quoting inside $(..): doesn't
399 * really work because nested $(..) or ${..} inside
400 * double quotes aren't dealt with.
401 */
402 switch (statep->ls_scsparen.csstate) {
403 case 0: /* normal */
404 switch (c) {
405 case '(':
406 statep->ls_scsparen.nparen++;
407 break;
408 case ')':
409 statep->ls_scsparen.nparen--;
410 break;
411 case '\\':
412 statep->ls_scsparen.csstate = 1;
413 break;
414 case '"':
415 statep->ls_scsparen.csstate = 2;
416 break;
417 case '\'':
418 statep->ls_scsparen.csstate = 4;
419 ignore_backslash_newline++;
420 break;
421 }
422 break;
423
424 case 1: /* backslash in normal mode */
425 case 3: /* backslash in double quotes */
426 --statep->ls_scsparen.csstate;
427 break;
428
429 case 2: /* double quotes */
430 if (c == '"')
431 statep->ls_scsparen.csstate = 0;
432 else if (c == '\\')
433 statep->ls_scsparen.csstate = 3;
434 break;
435
436 case 4: /* single quotes */
437 if (c == '\'') {
438 statep->ls_scsparen.csstate = 0;
439 ignore_backslash_newline--;
440 }
441 break;
442 }
443 if (statep->ls_scsparen.nparen == 0) {
444 POP_STATE();
445 *wp++ = 0; /* end of COMSUB */
446 } else
447 *wp++ = c;
448 break;
449
450 case SASPAREN: /* $(( .. )) */
451 /* todo: deal with $((...); (...)) properly */
452 /* XXX should nest using existing state machine
453 * (embed "..", $(...), etc.) */
454 if (c == '(')
455 statep->ls_sasparen.nparen++;
456 else if (c == ')') {
457 statep->ls_sasparen.nparen--;
458 if (statep->ls_sasparen.nparen == 1) {
459 /*(*/
460 if ((c2 = getsc()) == ')') {
461 POP_STATE();
462 *wp++ = 0; /* end of EXPRSUB */
463 break;
464 } else {
465 char *s;
466
467 ungetsc(c2);
468 /* mismatched parenthesis -
469 * assume we were really
470 * parsing a $(..) expression
471 */
472 s = Xrestpos(ws, wp,
473 statep->ls_sasparen.start);
474 memmove(s + 1, s, wp - s);
475 *s++ = COMSUB;
476 *s = '('; /*)*/
477 wp++;
478 statep->ls_scsparen.nparen = 1;
479 statep->ls_scsparen.csstate = 0;
480 state = statep->ls_state
481 = SCSPAREN;
482
483 }
484 }
485 }
486 *wp++ = c;
487 break;
488
489 case SBRACE:
490 /*{*/
491 if (c == '}') {
492 POP_STATE();
493 *wp++ = CSUBST;
494 *wp++ = /*{*/ '}';
495 } else
496 goto Sbase1;
497 break;
498
499 case STBRACE:
500 /* Same as SBRACE, except (,|,) treated specially */
501 /*{*/
502 if (c == '}') {
503 POP_STATE();
504 *wp++ = CSUBST;
505 *wp++ = /*{*/ '}';
506 } else if (c == '|') {
507 *wp++ = SPAT;
508 } else if (c == '(') {
509 *wp++ = OPAT;
510 *wp++ = ' '; /* simile for @ */
511 PUSH_STATE(SPATTERN);
512 } else
513 goto Sbase1;
514 break;
515
516 case SBQUOTE:
517 if (c == '`') {
518 *wp++ = 0;
519 POP_STATE();
520 } else if (c == '\\') {
521 switch (c = getsc()) {
522 case '\\':
523 case '$': case '`':
524 *wp++ = c;
525 break;
526 case '"':
527 if (statep->ls_sbquote.indquotes) {
528 *wp++ = c;
529 break;
530 }
531 /* fall through.. */
532 default:
533 if (c) { /* trailing \ is lost */
534 *wp++ = '\\';
535 *wp++ = c;
536 }
537 break;
538 }
539 } else
540 *wp++ = c;
541 break;
542
543 case SWORD: /* ONEWORD */
544 goto Subst;
545
546 #ifdef KSH
547 case SLETPAREN: /* LETEXPR: (( ... )) */
548 /*(*/
549 if (c == ')') {
550 if (statep->ls_sletparen.nparen > 0)
551 --statep->ls_sletparen.nparen;
552 /*(*/
553 else if ((c2 = getsc()) == ')') {
554 c = 0;
555 *wp++ = CQUOTE;
556 goto Done;
557 } else
558 ungetsc(c2);
559 } else if (c == '(')
560 /* parenthesis inside quotes and backslashes
561 * are lost, but at&t ksh doesn't count them
562 * either
563 */
564 ++statep->ls_sletparen.nparen;
565 goto Sbase2;
566 #endif /* KSH */
567
568 case SHEREDELIM: /* <<,<<- delimiter */
569 /* XXX chuck this state (and the next) - use
570 * the existing states ($ and \`..` should be
571 * stripped of their specialness after the
572 * fact).
573 */
574 /* here delimiters need a special case since
575 * $ and `..` are not to be treated specially
576 */
577 if (c == '\\') {
578 c = getsc();
579 if (c) { /* trailing \ is lost */
580 *wp++ = QCHAR;
581 *wp++ = c;
582 }
583 } else if (c == '\'') {
584 PUSH_STATE(SSQUOTE);
585 *wp++ = OQUOTE;
586 ignore_backslash_newline++;
587 } else if (c == '"') {
588 state = statep->ls_state = SHEREDQUOTE;
589 *wp++ = OQUOTE;
590 } else {
591 *wp++ = CHAR;
592 *wp++ = c;
593 }
594 break;
595
596 case SHEREDQUOTE: /* " in <<,<<- delimiter */
597 if (c == '"') {
598 *wp++ = CQUOTE;
599 state = statep->ls_state = SHEREDELIM;
600 } else {
601 if (c == '\\') {
602 switch (c = getsc()) {
603 case '\\': case '"':
604 case '$': case '`':
605 break;
606 default:
607 if (c) { /* trailing \ lost */
608 *wp++ = CHAR;
609 *wp++ = '\\';
610 }
611 break;
612 }
613 }
614 *wp++ = CHAR;
615 *wp++ = c;
616 }
617 break;
618
619 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
620 if ( /*(*/ c == ')') {
621 *wp++ = CPAT;
622 POP_STATE();
623 } else if (c == '|') {
624 *wp++ = SPAT;
625 } else if (c == '(') {
626 *wp++ = OPAT;
627 *wp++ = ' '; /* simile for @ */
628 PUSH_STATE(SPATTERN);
629 } else
630 goto Sbase1;
631 break;
632 }
633 }
634 Done:
635 Xcheck(ws, wp);
636 if (statep != &states[1])
637 /* XXX figure out what is missing */
638 yyerror("no closing quote\n");
639
640 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
641 if (state == SHEREDELIM)
642 state = SBASE;
643
644 dp = Xstring(ws, wp);
645 if ((c == '<' || c == '>') && state == SBASE
646 && ((c2 = Xlength(ws, wp)) == 0
647 || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
648 {
649 struct ioword *iop =
650 (struct ioword *) alloc(sizeof(*iop), ATEMP);
651
652 if (c2 == 2)
653 iop->unit = dp[1] - '0';
654 else
655 iop->unit = c == '>'; /* 0 for <, 1 for > */
656
657 c2 = getsc();
658 /* <<, >>, <> are ok, >< is not */
659 if (c == c2 || (c == '<' && c2 == '>')) {
660 iop->flag = c == c2 ?
661 (c == '>' ? IOCAT : IOHERE) : IORDWR;
662 if (iop->flag == IOHERE) {
663 if ((c2 = getsc()) == '-') {
664 iop->flag |= IOSKIP;
665 } else {
666 ungetsc(c2);
667 }
668 }
669 } else if (c2 == '&')
670 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
671 else {
672 iop->flag = c == '>' ? IOWRITE : IOREAD;
673 if (c == '>' && c2 == '|')
674 iop->flag |= IOCLOB;
675 else
676 ungetsc(c2);
677 }
678
679 iop->name = (char *) 0;
680 iop->delim = (char *) 0;
681 iop->heredoc = (char *) 0;
682 Xfree(ws, wp); /* free word */
683 yylval.iop = iop;
684 return REDIR;
685 }
686
687 if (wp == dp && state == SBASE) {
688 Xfree(ws, wp); /* free word */
689 /* no word, process LEX1 character */
690 switch (c) {
691 default:
692 return c;
693
694 case '|':
695 case '&':
696 case ';':
697 if ((c2 = getsc()) == c)
698 c = (c == ';') ? BREAK :
699 (c == '|') ? LOGOR :
700 (c == '&') ? LOGAND :
701 YYERRCODE;
702 #ifdef KSH
703 else if (c == '|' && c2 == '&')
704 c = COPROC;
705 #endif /* KSH */
706 else
707 ungetsc(c2);
708 return c;
709
710 case '\n':
711 gethere();
712 if (cf & CONTIN)
713 goto Again;
714 return c;
715
716 case '(': /*)*/
717 #ifdef KSH
718 if ((c2 = getsc()) == '(') /*)*/
719 /* XXX need to handle ((...); (...)) */
720 c = MDPAREN;
721 else
722 ungetsc(c2);
723 #endif /* KSH */
724 return c;
725 /*(*/
726 case ')':
727 return c;
728 }
729 }
730
731 *wp++ = EOS; /* terminate word */
732 yylval.cp = Xclose(ws, wp);
733 if (state == SWORD
734 #ifdef KSH
735 || state == SLETPAREN
736 #endif /* KSH */
737 ) /* ONEWORD? */
738 return LWORD;
739 ungetsc(c); /* unget terminator */
740
741 /* copy word to unprefixed string ident */
742 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
743 *dp++ = *sp++;
744 /* Make sure the ident array stays '\0' padded */
745 memset(dp, 0, (ident+IDENT) - dp + 1);
746 if (c != EOS)
747 *ident = '\0'; /* word is not unquoted */
748
749 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
750 struct tbl *p;
751 int h = hash(ident);
752
753 /* { */
754 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
755 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
756 {
757 afree(yylval.cp, ATEMP);
758 return p->val.i;
759 }
760 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
761 && (p->flag & ISSET))
762 {
763 register Source *s;
764
765 for (s = source; s->type == SALIAS; s = s->next)
766 if (s->u.tblp == p)
767 return LWORD;
768 /* push alias expansion */
769 s = pushs(SALIAS, source->areap);
770 s->start = s->str = p->val.s;
771 s->u.tblp = p;
772 s->next = source;
773 source = s;
774 afree(yylval.cp, ATEMP);
775 goto Again;
776 }
777 }
778
779 return LWORD;
780 }
781
782 static void
783 gethere()
784 {
785 register struct ioword **p;
786
787 for (p = heres; p < herep; p++)
788 readhere(*p);
789 herep = heres;
790 }
791
792 /*
793 * read "<<word" text into temp file
794 */
795
796 static void
797 readhere(iop)
798 struct ioword *iop;
799 {
800 register int c;
801 char *volatile eof;
802 char *eofp;
803 int skiptabs;
804 XString xs;
805 char *xp;
806 int xpos;
807
808 eof = evalstr(iop->delim, 0);
809
810 if (!(iop->flag & IOEVAL))
811 ignore_backslash_newline++;
812
813 Xinit(xs, xp, 256, ATEMP);
814
815 for (;;) {
816 eofp = eof;
817 skiptabs = iop->flag & IOSKIP;
818 xpos = Xsavepos(xs, xp);
819 while ((c = getsc()) != 0) {
820 if (skiptabs) {
821 if (c == '\t')
822 continue;
823 skiptabs = 0;
824 }
825 if (c != *eofp)
826 break;
827 Xcheck(xs, xp);
828 Xput(xs, xp, c);
829 eofp++;
830 }
831 /* Allow EOF here so commands with out trailing newlines
832 * will work (eg, ksh -c '...', $(...), etc).
833 */
834 if (*eofp == '\0' && (c == 0 || c == '\n')) {
835 xp = Xrestpos(xs, xp, xpos);
836 break;
837 }
838 ungetsc(c);
839 while ((c = getsc()) != '\n') {
840 if (c == 0)
841 yyerror("here document `%s' unclosed\n", eof);
842 Xcheck(xs, xp);
843 Xput(xs, xp, c);
844 }
845 Xcheck(xs, xp);
846 Xput(xs, xp, c);
847 }
848 Xput(xs, xp, '\0');
849 iop->heredoc = Xclose(xs, xp);
850
851 if (!(iop->flag & IOEVAL))
852 ignore_backslash_newline--;
853 }
854
855 void
856 #ifdef HAVE_PROTOTYPES
857 yyerror(const char *fmt, ...)
858 #else
859 yyerror(fmt, va_alist)
860 const char *fmt;
861 va_dcl
862 #endif
863 {
864 va_list va;
865
866 /* pop aliases and re-reads */
867 while (source->type == SALIAS || source->type == SREREAD)
868 source = source->next;
869 source->str = null; /* zap pending input */
870
871 error_prefix(TRUE);
872 SH_VA_START(va, fmt);
873 shf_vfprintf(shl_out, fmt, va);
874 va_end(va);
875 errorf(null);
876 }
877
878 /*
879 * input for yylex with alias expansion
880 */
881
882 Source *
883 pushs(type, areap)
884 int type;
885 Area *areap;
886 {
887 register Source *s;
888
889 s = (Source *) alloc(sizeof(Source), areap);
890 s->type = type;
891 s->str = null;
892 s->start = NULL;
893 s->line = 0;
894 s->errline = 0;
895 s->file = NULL;
896 s->flags = 0;
897 s->next = NULL;
898 s->areap = areap;
899 if (type == SFILE || type == SSTDIN) {
900 char *dummy;
901 Xinit(s->xs, dummy, 256, s->areap);
902 } else
903 memset(&s->xs, 0, sizeof(s->xs));
904 return s;
905 }
906
907 static int
908 getsc__()
909 {
910 register Source *s = source;
911 register int c;
912
913 while ((c = *s->str++) == 0) {
914 s->str = NULL; /* return 0 for EOF by default */
915 switch (s->type) {
916 case SEOF:
917 s->str = null;
918 return 0;
919
920 case SSTDIN:
921 case SFILE:
922 getsc_line(s);
923 break;
924
925 case SWSTR:
926 break;
927
928 case SSTRING:
929 break;
930
931 case SWORDS:
932 s->start = s->str = *s->u.strv++;
933 s->type = SWORDSEP;
934 break;
935
936 case SWORDSEP:
937 if (*s->u.strv == NULL) {
938 s->start = s->str = newline;
939 s->type = SEOF;
940 } else {
941 s->start = s->str = space;
942 s->type = SWORDS;
943 }
944 break;
945
946 case SALIAS:
947 if (s->flags & SF_ALIASEND) {
948 /* pass on an unused SF_ALIAS flag */
949 source = s->next;
950 source->flags |= s->flags & SF_ALIAS;
951 s = source;
952 } else if (*s->u.tblp->val.s
953 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
954 {
955 source = s = s->next; /* pop source stack */
956 /* Note that this alias ended with a space,
957 * enabling alias expansion on the following
958 * word.
959 */
960 s->flags |= SF_ALIAS;
961 } else {
962 /* At this point, we need to keep the current
963 * alias in the source list so recursive
964 * aliases can be detected and we also need
965 * to return the next character. Do this
966 * by temporarily popping the alias to get
967 * the next character and then put it back
968 * in the source list with the SF_ALIASEND
969 * flag set.
970 */
971 source = s->next; /* pop source stack */
972 source->flags |= s->flags & SF_ALIAS;
973 c = getsc__();
974 if (c) {
975 s->flags |= SF_ALIASEND;
976 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
977 s->start = s->str = s->ugbuf;
978 s->next = source;
979 source = s;
980 } else {
981 s = source;
982 /* avoid reading eof twice */
983 s->str = NULL;
984 break;
985 }
986 }
987 continue;
988
989 case SREREAD:
990 if (s->start != s->ugbuf) /* yuck */
991 afree(s->u.freeme, ATEMP);
992 source = s = s->next;
993 continue;
994 }
995 if (s->str == NULL) {
996 s->type = SEOF;
997 s->start = s->str = null;
998 return '\0';
999 }
1000 if (s->flags & SF_ECHO) {
1001 shf_puts(s->str, shl_out);
1002 shf_flush(shl_out);
1003 }
1004 }
1005 return c;
1006 }
1007
1008 static void
1009 getsc_line(s)
1010 Source *s;
1011 {
1012 char *xp = Xstring(s->xs, xp);
1013 int interactive = Flag(FTALKING) && s->type == SSTDIN;
1014 int have_tty = interactive && (s->flags & SF_TTY);
1015
1016 /* Done here to ensure nothing odd happens when a timeout occurs */
1017 XcheckN(s->xs, xp, LINE);
1018 *xp = '\0';
1019 s->start = s->str = xp;
1020
1021 #ifdef KSH
1022 if (have_tty && ksh_tmout) {
1023 ksh_tmout_state = TMOUT_READING;
1024 alarm(ksh_tmout);
1025 }
1026 #endif /* KSH */
1027 #ifdef EDIT
1028 if (have_tty && (0
1029 # ifdef VI
1030 || Flag(FVI)
1031 # endif /* VI */
1032 # ifdef EMACS
1033 || Flag(FEMACS) || Flag(FGMACS)
1034 # endif /* EMACS */
1035 ))
1036 {
1037 int nread;
1038
1039 nread = x_read(xp, LINE);
1040 if (nread < 0) /* read error */
1041 nread = 0;
1042 xp[nread] = '\0';
1043 xp += nread;
1044 }
1045 else
1046 #endif /* EDIT */
1047 {
1048 if (interactive) {
1049 pprompt(prompt, 0);
1050 } else
1051 s->line++;
1052
1053 while (1) {
1054 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1055
1056 if (!p && shf_error(s->u.shf)
1057 && shf_errno(s->u.shf) == EINTR)
1058 {
1059 shf_clearerr(s->u.shf);
1060 if (trap)
1061 runtraps(0);
1062 continue;
1063 }
1064 if (!p || (xp = p, xp[-1] == '\n'))
1065 break;
1066 /* double buffer size */
1067 xp++; /* move past null so doubling works... */
1068 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1069 xp--; /* ...and move back again */
1070 }
1071 /* flush any unwanted input so other programs/builtins
1072 * can read it. Not very optimal, but less error prone
1073 * than flushing else where, dealing with redirections,
1074 * etc..
1075 * todo: reduce size of shf buffer (~128?) if SSTDIN
1076 */
1077 if (s->type == SSTDIN)
1078 shf_flush(s->u.shf);
1079 }
1080 /* XXX: temporary kludge to restore source after a
1081 * trap may have been executed.
1082 */
1083 source = s;
1084 #ifdef KSH
1085 if (have_tty && ksh_tmout)
1086 {
1087 ksh_tmout_state = TMOUT_EXECUTING;
1088 alarm(0);
1089 }
1090 #endif /* KSH */
1091 s->start = s->str = Xstring(s->xs, xp);
1092 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1093 /* Note: if input is all nulls, this is not eof */
1094 if (Xlength(s->xs, xp) == 0) { /* EOF */
1095 if (s->type == SFILE)
1096 shf_fdclose(s->u.shf);
1097 s->str = NULL;
1098 } else if (interactive) {
1099 #ifdef HISTORY
1100 char *p = Xstring(s->xs, xp);
1101 if (cur_prompt == PS1)
1102 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1103 p++;
1104 if (*p) {
1105 # ifdef EASY_HISTORY
1106 if (cur_prompt == PS2)
1107 histappend(Xstring(s->xs, xp), 1);
1108 else
1109 # endif /* EASY_HISTORY */
1110 {
1111 s->line++;
1112 histsave(s->line, s->str, 1);
1113 }
1114 }
1115 #endif /* HISTORY */
1116 }
1117 if (interactive)
1118 set_prompt(PS2, (Source *) 0);
1119 }
1120
1121 void
1122 set_prompt(to, s)
1123 int to;
1124 Source *s;
1125 {
1126 cur_prompt = to;
1127
1128 switch (to) {
1129 case PS1: /* command */
1130 #ifdef KSH
1131 /* Substitute ! and !! here, before substitutions are done
1132 * so ! in expanded variables are not expanded.
1133 * NOTE: this is not what at&t ksh does (it does it after
1134 * substitutions, POSIX doesn't say which is to be done.
1135 */
1136 {
1137 struct shf *shf;
1138 char * volatile ps1;
1139 Area *saved_atemp;
1140
1141 ps1 = str_val(global("PS1"));
1142 shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1143 SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1144 while (*ps1) {
1145 if (*ps1 != '!' || *++ps1 == '!')
1146 shf_putchar(*ps1++, shf);
1147 else
1148 shf_fprintf(shf, "%d",
1149 s ? s->line + 1 : 0);
1150 }
1151 ps1 = shf_sclose(shf);
1152 saved_atemp = ATEMP;
1153 newenv(E_ERRH);
1154 if (ksh_sigsetjmp(e->jbuf, 0)) {
1155 prompt = safe_prompt;
1156 /* Don't print an error - assume it has already
1157 * been printed. Reason is we may have forked
1158 * to run a command and the child may be
1159 * unwinding its stack through this code as it
1160 * exits.
1161 */
1162 } else
1163 prompt = str_save(substitute(ps1, 0),
1164 saved_atemp);
1165 quitenv();
1166 }
1167 #else /* KSH */
1168 prompt = str_val(global("PS1"));
1169 #endif /* KSH */
1170 break;
1171
1172 case PS2: /* command continuation */
1173 prompt = str_val(global("PS2"));
1174 break;
1175 }
1176 }
1177
1178 /* See also related routine, promptlen() in edit.c */
1179 void
1180 pprompt(cp, ntruncate)
1181 const char *cp;
1182 int ntruncate;
1183 {
1184 #if 0
1185 char nbuf[32];
1186 int c;
1187
1188 while (*cp != 0) {
1189 if (*cp != '!')
1190 c = *cp++;
1191 else if (*++cp == '!')
1192 c = *cp++;
1193 else {
1194 int len;
1195 char *p;
1196
1197 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1198 source->line + 1);
1199 len = strlen(nbuf);
1200 if (ntruncate) {
1201 if (ntruncate >= len) {
1202 ntruncate -= len;
1203 continue;
1204 }
1205 p += ntruncate;
1206 len -= ntruncate;
1207 ntruncate = 0;
1208 }
1209 shf_write(p, len, shl_out);
1210 continue;
1211 }
1212 if (ntruncate)
1213 --ntruncate;
1214 else
1215 shf_putc(c, shl_out);
1216 }
1217 #endif /* 0 */
1218 shf_puts(cp + ntruncate, shl_out);
1219 shf_flush(shl_out);
1220 }
1221
1222 /* Read the variable part of a ${...} expression (ie, up to but not including
1223 * the :[-+?=#%] or close-brace.
1224 */
1225 static char *
1226 get_brace_var(wsp, wp)
1227 XString *wsp;
1228 char *wp;
1229 {
1230 enum parse_state {
1231 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1232 PS_NUMBER, PS_VAR1, PS_END
1233 }
1234 state;
1235 char c;
1236
1237 state = PS_INITIAL;
1238 while (1) {
1239 c = getsc();
1240 /* State machine to figure out where the variable part ends. */
1241 switch (state) {
1242 case PS_INITIAL:
1243 if (c == '#') {
1244 state = PS_SAW_HASH;
1245 break;
1246 }
1247 /* fall through.. */
1248 case PS_SAW_HASH:
1249 if (letter(c))
1250 state = PS_IDENT;
1251 else if (digit(c))
1252 state = PS_NUMBER;
1253 else if (ctype(c, C_VAR1))
1254 state = PS_VAR1;
1255 else
1256 state = PS_END;
1257 break;
1258 case PS_IDENT:
1259 if (!letnum(c)) {
1260 state = PS_END;
1261 if (c == '[') {
1262 char *tmp, *p;
1263
1264 if (!arraysub(&tmp))
1265 yyerror("missing ]\n");
1266 *wp++ = c;
1267 for (p = tmp; *p; ) {
1268 Xcheck(*wsp, wp);
1269 *wp++ = *p++;
1270 }
1271 afree(tmp, ATEMP);
1272 c = getsc(); /* the ] */
1273 }
1274 }
1275 break;
1276 case PS_NUMBER:
1277 if (!digit(c))
1278 state = PS_END;
1279 break;
1280 case PS_VAR1:
1281 state = PS_END;
1282 break;
1283 case PS_END: /* keep gcc happy */
1284 break;
1285 }
1286 if (state == PS_END) {
1287 *wp++ = '\0'; /* end of variable part */
1288 ungetsc(c);
1289 break;
1290 }
1291 Xcheck(*wsp, wp);
1292 *wp++ = c;
1293 }
1294 return wp;
1295 }
1296
1297 /*
1298 * Save an array subscript - returns true if matching bracket found, false
1299 * if eof or newline was found.
1300 * (Returned string double null terminated)
1301 */
1302 static int
1303 arraysub(strp)
1304 char **strp;
1305 {
1306 XString ws;
1307 char *wp;
1308 char c;
1309 int depth = 1; /* we are just past the initial [ */
1310
1311 Xinit(ws, wp, 32, ATEMP);
1312
1313 do {
1314 c = getsc();
1315 Xcheck(ws, wp);
1316 *wp++ = c;
1317 if (c == '[')
1318 depth++;
1319 else if (c == ']')
1320 depth--;
1321 } while (depth > 0 && c && c != '\n');
1322
1323 *wp++ = '\0';
1324 *strp = Xclose(ws, wp);
1325
1326 return depth == 0 ? 1 : 0;
1327 }
1328
1329 /* Unget a char: handles case when we are already at the start of the buffer */
1330 static const char *
1331 ungetsc(c)
1332 int c;
1333 {
1334 if (backslash_skip)
1335 backslash_skip--;
1336 /* Don't unget eof... */
1337 if (source->str == null && c == '\0')
1338 return source->str;
1339 if (source->str > source->start)
1340 source->str--;
1341 else {
1342 Source *s;
1343
1344 s = pushs(SREREAD, source->areap);
1345 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1346 s->start = s->str = s->ugbuf;
1347 s->next = source;
1348 source = s;
1349 }
1350 return source->str;
1351 }
1352
1353
1354 /* Called to get a char that isn't a \newline sequence. */
1355 static int
1356 getsc_bn ARGS((void))
1357 {
1358 int c, c2;
1359
1360 if (ignore_backslash_newline)
1361 return getsc_();
1362
1363 if (backslash_skip == 1) {
1364 backslash_skip = 2;
1365 return getsc_();
1366 }
1367
1368 backslash_skip = 0;
1369
1370 while (1) {
1371 c = getsc_();
1372 if (c == '\\') {
1373 if ((c2 = getsc_()) == '\n')
1374 /* ignore the \newline; get the next char... */
1375 continue;
1376 ungetsc(c2);
1377 backslash_skip = 1;
1378 }
1379 return c;
1380 }
1381 }
1382
1383 static Lex_state *
1384 push_state_(si, old_end)
1385 State_info *si;
1386 Lex_state *old_end;
1387 {
1388 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1389
1390 new[0].ls_info.base = old_end;
1391 si->base = &new[0];
1392 si->end = &new[STATE_BSIZE];
1393 return &new[1];
1394 }
1395
1396 static Lex_state *
1397 pop_state_(si, old_end)
1398 State_info *si;
1399 Lex_state *old_end;
1400 {
1401 Lex_state *old_base = si->base;
1402
1403 si->base = old_end->ls_info.base - STATE_BSIZE;
1404 si->end = old_end->ls_info.base;
1405
1406 afree(old_base, ATEMP);
1407
1408 return si->base + STATE_BSIZE - 1;
1409 }
1410