lex.c revision 1.5 1 /* $NetBSD: lex.c,v 1.5 1998/11/04 18:27:21 christos Exp $ */
2
3 /*
4 * lexical analysis and source input
5 */
6
7 #include "sh.h"
8 #include <ctype.h>
9
10 static void readhere ARGS((struct ioword *iop));
11 static int getsc__ ARGS((void));
12 static void getsc_line ARGS((Source *s));
13 static char *get_brace_var ARGS((XString *wsp, char *wp));
14 static int arraysub ARGS((char **strp));
15 static const char *ungetsc ARGS((int c));
16 static int getsc_bn ARGS((void));
17 static void gethere ARGS((void));
18
19 static int backslash_skip;
20 static int ignore_backslash_newline;
21
22 /* optimized getsc_bn() */
23 #define getsc() (*source->str != '\0' && *source->str != '\\' \
24 && !backslash_skip ? *source->str++ : getsc_bn())
25 /* optimized getsc__() */
26 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
27
28
29 /*
30 * Lexical analyzer
31 *
32 * tokens are not regular expressions, they are LL(1).
33 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
34 * hence the state stack.
35 */
36
37 int
38 yylex(cf)
39 int cf;
40 {
41 register int c, state;
42 char states [64], *statep = states; /* XXX overflow check */
43 XString ws; /* expandable output word */
44 register char *wp; /* output word pointer */
45 register char *sp, *dp;
46 char UNINITIALIZED(*ddparen_start);
47 int istate;
48 int UNINITIALIZED(c2);
49 int UNINITIALIZED(nparen), UNINITIALIZED(csstate);
50 int UNINITIALIZED(ndparen);
51 int UNINITIALIZED(indquotes);
52
53
54 Again:
55 Xinit(ws, wp, 64, ATEMP);
56
57 backslash_skip = 0;
58 ignore_backslash_newline = 0;
59
60 if (cf&ONEWORD)
61 istate = SWORD;
62 #ifdef KSH
63 else if (cf&LETEXPR) {
64 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
65 istate = SDPAREN;
66 ndparen = 0;
67 }
68 #endif /* KSH */
69 else { /* normal lexing */
70 istate = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
71 while ((c = getsc()) == ' ' || c == '\t')
72 ;
73 if (c == '#') {
74 ignore_backslash_newline++;
75 while ((c = getsc()) != '\0' && c != '\n')
76 ;
77 ignore_backslash_newline--;
78 }
79 ungetsc(c);
80 }
81 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
82 source->flags &= ~SF_ALIAS;
83 /* In POSIX mode, a trailing space only counts if we are
84 * parsing a simple command
85 */
86 if (!Flag(FPOSIX) || (cf & CMDWORD))
87 cf |= ALIAS;
88 }
89
90 /* collect non-special or quoted characters to form word */
91 for (*statep = state = istate;
92 !((c = getsc()) == 0 || ((state == SBASE || state == SHEREDELIM)
93 && ctype(c, C_LEX1))); )
94 {
95 Xcheck(ws, wp);
96 switch (state) {
97 case SBASE:
98 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
99 *wp = EOS; /* temporary */
100 if (is_wdvarname(Xstring(ws, wp), FALSE))
101 {
102 char *p, *tmp;
103
104 if (arraysub(&tmp)) {
105 *wp++ = CHAR;
106 *wp++ = c;
107 for (p = tmp; *p; ) {
108 Xcheck(ws, wp);
109 *wp++ = CHAR;
110 *wp++ = *p++;
111 }
112 afree(tmp, ATEMP);
113 break;
114 } else {
115 Source *s;
116
117 s = pushs(SREREAD,
118 source->areap);
119 s->start = s->str
120 = s->u.freeme = tmp;
121 s->next = source;
122 source = s;
123 }
124 }
125 *wp++ = CHAR;
126 *wp++ = c;
127 break;
128 }
129 /* fall through.. */
130 Sbase1: /* includes *(...|...) pattern (*+?@!) */
131 #ifdef KSH
132 if (c == '*' || c == '@' || c == '+' || c == '?'
133 || c == '!')
134 {
135 c2 = getsc();
136 if (c2 == '(' /*)*/ ) {
137 *wp++ = OPAT;
138 *wp++ = c;
139 *++statep = state = SPATTERN;
140 break;
141 }
142 ungetsc(c2);
143 }
144 #endif /* KSH */
145 /* fall through.. */
146 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
147 switch (c) {
148 case '\\':
149 c = getsc();
150 #ifdef OS2
151 if (isalnum(c)) {
152 *wp++ = CHAR, *wp++ = '\\';
153 *wp++ = CHAR, *wp++ = c;
154 } else
155 #endif
156 if (c) /* trailing \ is lost */
157 *wp++ = QCHAR, *wp++ = c;
158 break;
159 case '\'':
160 *++statep = state = SSQUOTE;
161 *wp++ = OQUOTE;
162 ignore_backslash_newline++;
163 break;
164 case '"':
165 *++statep = state = SDQUOTE;
166 *wp++ = OQUOTE;
167 break;
168 default:
169 goto Subst;
170 }
171 break;
172
173 Subst:
174 switch (c) {
175 case '\\':
176 c = getsc();
177 switch (c) {
178 case '"': case '\\':
179 case '$': case '`':
180 *wp++ = QCHAR, *wp++ = c;
181 break;
182 default:
183 Xcheck(ws, wp);
184 if (c) { /* trailing \ is lost */
185 *wp++ = CHAR, *wp++ = '\\';
186 *wp++ = CHAR, *wp++ = c;
187 }
188 break;
189 }
190 break;
191 case '$':
192 c = getsc();
193 if (c == '(') /*)*/ {
194 c = getsc();
195 if (c == '(') /*)*/ {
196 *++statep = state = SDDPAREN;
197 nparen = 2;
198 ddparen_start = wp;
199 *wp++ = EXPRSUB;
200 } else {
201 ungetsc(c);
202 *++statep = state = SPAREN;
203 nparen = 1;
204 csstate = 0;
205 *wp++ = COMSUB;
206 }
207 } else if (c == '{') /*}*/ {
208 *wp++ = OSUBST;
209 wp = get_brace_var(&ws, wp);
210 /* If this is a trim operation,
211 * wrap @(...) around the pattern
212 * (allows easy handling of ${a#b|c})
213 */
214 c = getsc();
215 if (c == '#' || c == '%') {
216 *wp++ = CHAR, *wp++ = c;
217 if ((c2 = getsc()) == c)
218 *wp++ = CHAR, *wp++ = c;
219 else
220 ungetsc(c2);
221 *wp++ = OPAT, *wp++ = '@';
222 *++statep = state = STBRACE;
223 } else {
224 ungetsc(c);
225 *++statep = state = SBRACE;
226 }
227 } else if (ctype(c, C_ALPHA)) {
228 *wp++ = OSUBST;
229 do {
230 Xcheck(ws, wp);
231 *wp++ = c;
232 c = getsc();
233 } while (ctype(c, C_ALPHA|C_DIGIT));
234 *wp++ = '\0';
235 *wp++ = CSUBST;
236 ungetsc(c);
237 } else if (ctype(c, C_DIGIT|C_VAR1)) {
238 Xcheck(ws, wp);
239 *wp++ = OSUBST;
240 *wp++ = c;
241 *wp++ = '\0';
242 *wp++ = CSUBST;
243 } else {
244 *wp++ = CHAR, *wp++ = '$';
245 ungetsc(c);
246 }
247 break;
248 case '`':
249 *++statep = state = SBQUOTE;
250 *wp++ = COMSUB;
251 /* Need to know if we are inside double quotes
252 * since sh/at&t-ksh translate the \" to " in
253 * "`..\"..`".
254 */
255 indquotes = 0;
256 if (!Flag(FPOSIX))
257 for (sp = statep; sp > states; --sp)
258 if (*sp == SDQUOTE)
259 indquotes = 1;
260 break;
261 default:
262 *wp++ = CHAR, *wp++ = c;
263 }
264 break;
265
266 case SSQUOTE:
267 if (c == '\'') {
268 state = *--statep;
269 *wp++ = CQUOTE;
270 ignore_backslash_newline--;
271 } else
272 *wp++ = QCHAR, *wp++ = c;
273 break;
274
275 case SDQUOTE:
276 if (c == '"') {
277 state = *--statep;
278 *wp++ = CQUOTE;
279 } else
280 goto Subst;
281 break;
282
283 case SPAREN: /* $( .. ) */
284 /* todo: deal with $(...) quoting properly
285 * kludge to partly fake quoting inside $(..): doesn't
286 * really work because nested $(..) or ${..} inside
287 * double quotes aren't dealt with.
288 */
289 switch (csstate) {
290 case 0: /* normal */
291 switch (c) {
292 case '(':
293 nparen++;
294 break;
295 case ')':
296 nparen--;
297 break;
298 case '\\':
299 csstate = 1;
300 break;
301 case '"':
302 csstate = 2;
303 break;
304 case '\'':
305 csstate = 4;
306 ignore_backslash_newline++;
307 break;
308 }
309 break;
310
311 case 1: /* backslash in normal mode */
312 case 3: /* backslash in double quotes */
313 --csstate;
314 break;
315
316 case 2: /* double quotes */
317 if (c == '"')
318 csstate = 0;
319 else if (c == '\\')
320 csstate = 3;
321 break;
322
323 case 4: /* single quotes */
324 if (c == '\'') {
325 csstate = 0;
326 ignore_backslash_newline--;
327 }
328 break;
329 }
330 if (nparen == 0) {
331 state = *--statep;
332 *wp++ = 0; /* end of COMSUB */
333 } else
334 *wp++ = c;
335 break;
336
337 case SDDPAREN: /* $(( .. )) */
338 /* todo: deal with $((...); (...)) properly */
339 /* XXX should nest using existing state machine
340 * (embed "..", $(...), etc.) */
341 if (c == '(')
342 nparen++;
343 else if (c == ')') {
344 nparen--;
345 if (nparen == 1) {
346 /*(*/
347 if ((c2 = getsc()) == ')') {
348 state = *--statep;
349 *wp++ = 0; /* end of EXPRSUB */
350 break;
351 } else {
352 ungetsc(c2);
353 /* mismatched parenthesis -
354 * assume we were really
355 * parsing a $(..) expression
356 */
357 memmove(ddparen_start + 1,
358 ddparen_start,
359 wp - ddparen_start);
360 *ddparen_start++ = COMSUB;
361 *ddparen_start = '('; /*)*/
362 wp++;
363 csstate = 0;
364 *statep = state = SPAREN;
365 }
366 }
367 }
368 *wp++ = c;
369 break;
370
371 case SBRACE:
372 /*{*/
373 if (c == '}') {
374 state = *--statep;
375 *wp++ = CSUBST;
376 } else
377 goto Sbase1;
378 break;
379
380 case STBRACE:
381 /* same as SBRACE, except | is saved as SPAT and
382 * CPAT is added at the end.
383 */
384 /*{*/
385 if (c == '}') {
386 state = *--statep;
387 *wp++ = CPAT;
388 *wp++ = CSUBST;
389 } else if (c == '|') {
390 *wp++ = SPAT;
391 } else
392 goto Sbase1;
393 break;
394
395 case SBQUOTE:
396 if (c == '`') {
397 *wp++ = 0;
398 state = *--statep;
399 } else if (c == '\\') {
400 switch (c = getsc()) {
401 case '\\':
402 case '$': case '`':
403 *wp++ = c;
404 break;
405 case '"':
406 if (indquotes) {
407 *wp++ = c;
408 break;
409 }
410 /* fall through.. */
411 default:
412 if (c) { /* trailing \ is lost */
413 *wp++ = '\\';
414 *wp++ = c;
415 }
416 break;
417 }
418 } else
419 *wp++ = c;
420 break;
421
422 case SWORD: /* ONEWORD */
423 goto Subst;
424
425 #ifdef KSH
426 case SDPAREN: /* LETEXPR: (( ... )) */
427 /*(*/
428 if (c == ')') {
429 if (ndparen > 0)
430 --ndparen;
431 /*(*/
432 else if ((c2 = getsc()) == ')') {
433 c = 0;
434 *wp++ = CQUOTE;
435 goto Done;
436 } else
437 ungetsc(c2);
438 } else if (c == '(')
439 /* parenthesis inside quotes and backslashes
440 * are lost, but at&t ksh doesn't count them
441 * either
442 */
443 ++ndparen;
444 goto Sbase2;
445 #endif /* KSH */
446
447 case SHEREDELIM: /* <<,<<- delimiter */
448 /* XXX chuck this state (and the next) - use
449 * the existing states ($ and \`..` should be
450 * stripped of their specialness after the
451 * fact).
452 */
453 /* here delimiters need a special case since
454 * $ and `..` are not to be treated specially
455 */
456 if (c == '\\') {
457 c = getsc();
458 if (c) { /* trailing \ is lost */
459 *wp++ = QCHAR;
460 *wp++ = c;
461 }
462 } else if (c == '\'') {
463 *++statep = state = SSQUOTE;
464 *wp++ = OQUOTE;
465 ignore_backslash_newline++;
466 } else if (c == '"') {
467 state = SHEREDQUOTE;
468 *wp++ = OQUOTE;
469 } else {
470 *wp++ = CHAR;
471 *wp++ = c;
472 }
473 break;
474
475 case SHEREDQUOTE: /* " in <<,<<- delimiter */
476 if (c == '"') {
477 *wp++ = CQUOTE;
478 state = SHEREDELIM;
479 } else {
480 if (c == '\\') {
481 switch (c = getsc()) {
482 case '\\': case '"':
483 case '$': case '`':
484 break;
485 default:
486 if (c) { /* trailing \ lost */
487 *wp++ = CHAR;
488 *wp++ = '\\';
489 }
490 break;
491 }
492 }
493 *wp++ = CHAR;
494 *wp++ = c;
495 }
496 break;
497
498 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
499 if ( /*(*/ c == ')') {
500 *wp++ = CPAT;
501 state = *--statep;
502 } else if (c == '|')
503 *wp++ = SPAT;
504 else
505 goto Sbase1;
506 break;
507 }
508 }
509 Done:
510 Xcheck(ws, wp);
511 if (state != istate)
512 yyerror("no closing quote\n");
513
514 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
515 if (state == SHEREDELIM)
516 state = SBASE;
517
518 if ((c == '<' || c == '>') && state == SBASE) {
519 char *cp = Xstring(ws, wp);
520 if (Xlength(ws, wp) == 2 && cp[0] == CHAR && digit(cp[1])) {
521 wp = cp; /* throw away word */
522 c2/*unit*/ = cp[1] - '0';
523 } else
524 c2/*unit*/ = c == '>'; /* 0 for <, 1 for > */
525 }
526
527 if (wp == Xstring(ws, wp) && state == SBASE) {
528 Xfree(ws, wp); /* free word */
529 /* no word, process LEX1 character */
530 switch (c) {
531 default:
532 return c;
533
534 case '|':
535 case '&':
536 case ';':
537 if ((c2 = getsc()) == c)
538 c = (c == ';') ? BREAK :
539 (c == '|') ? LOGOR :
540 (c == '&') ? LOGAND :
541 YYERRCODE;
542 #ifdef KSH
543 else if (c == '|' && c2 == '&')
544 c = COPROC;
545 #endif /* KSH */
546 else
547 ungetsc(c2);
548 return c;
549
550 case '>':
551 case '<': {
552 register struct ioword *iop;
553
554 iop = (struct ioword *) alloc(sizeof(*iop), ATEMP);
555 iop->unit = c2/*unit*/;
556
557 c2 = getsc();
558 /* <<, >>, <> are ok, >< is not */
559 if (c == c2 || (c == '<' && c2 == '>')) {
560 iop->flag = c == c2 ?
561 (c == '>' ? IOCAT : IOHERE) : IORDWR;
562 if (iop->flag == IOHERE) {
563 if ((c2 = getsc()) == '-')
564 iop->flag |= IOSKIP;
565 else
566 ungetsc(c2);
567 }
568 } else if (c2 == '&')
569 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
570 else {
571 iop->flag = c == '>' ? IOWRITE : IOREAD;
572 if (c == '>' && c2 == '|')
573 iop->flag |= IOCLOB;
574 else
575 ungetsc(c2);
576 }
577
578 iop->name = (char *) 0;
579 iop->delim = (char *) 0;
580 yylval.iop = iop;
581 return REDIR;
582 }
583 case '\n':
584 gethere();
585 if (cf & CONTIN)
586 goto Again;
587 return c;
588
589 case '(': /*)*/
590 #ifdef KSH
591 if ((c2 = getsc()) == '(') /*)*/
592 c = MDPAREN;
593 else
594 ungetsc(c2);
595 #endif /* KSH */
596 return c;
597 /*(*/
598 case ')':
599 return c;
600 }
601 }
602
603 *wp++ = EOS; /* terminate word */
604 yylval.cp = Xclose(ws, wp);
605 if (state == SWORD
606 #ifdef KSH
607 || state == SDPAREN
608 #endif /* KSH */
609 ) /* ONEWORD? */
610 return LWORD;
611 ungetsc(c); /* unget terminator */
612
613 /* copy word to unprefixed string ident */
614 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
615 *dp++ = *sp++;
616 /* Make sure the ident array stays '\0' paded */
617 memset(dp, 0, (ident+IDENT) - dp + 1);
618 if (c != EOS)
619 *ident = '\0'; /* word is not unquoted */
620
621 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
622 struct tbl *p;
623 int h = hash(ident);
624
625 /* { */
626 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
627 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
628 {
629 afree(yylval.cp, ATEMP);
630 return p->val.i;
631 }
632 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
633 && (p->flag & ISSET))
634 {
635 register Source *s;
636
637 for (s = source; s->type == SALIAS; s = s->next)
638 if (s->u.tblp == p)
639 return LWORD;
640 /* push alias expansion */
641 s = pushs(SALIAS, source->areap);
642 s->start = s->str = p->val.s;
643 s->u.tblp = p;
644 s->next = source;
645 source = s;
646 afree(yylval.cp, ATEMP);
647 goto Again;
648 }
649 }
650
651 return LWORD;
652 }
653
654 static void
655 gethere()
656 {
657 register struct ioword **p;
658
659 for (p = heres; p < herep; p++)
660 readhere(*p);
661 herep = heres;
662 }
663
664 /*
665 * read "<<word" text into temp file
666 */
667
668 static void
669 readhere(iop)
670 register struct ioword *iop;
671 {
672 struct shf *volatile shf;
673 struct temp *h;
674 register int c;
675 char *volatile eof;
676 char *eofp;
677 int skiptabs;
678 int i;
679
680 eof = evalstr(iop->delim, 0);
681
682 if (e->flags & EF_FUNC_PARSE) {
683 h = maketemp(APERM);
684 h->next = func_heredocs;
685 func_heredocs = h;
686 } else {
687 h = maketemp(ATEMP);
688 h->next = e->temps;
689 e->temps = h;
690 }
691 iop->name = h->name;
692 if (!(shf = h->shf))
693 yyerror("cannot create temporary file %s - %s\n",
694 h->name, strerror(errno));
695
696 newenv(E_ERRH);
697 i = ksh_sigsetjmp(e->jbuf, 0);
698 if (i) {
699 quitenv();
700 shf_close(shf);
701 unwind(i);
702 }
703
704 if (!(iop->flag & IOEVAL))
705 ignore_backslash_newline++;
706
707 for (;;) {
708 eofp = eof;
709 skiptabs = iop->flag & IOSKIP;
710 while ((c = getsc()) != 0) {
711 if (skiptabs) {
712 if (c == '\t')
713 continue;
714 skiptabs = 0;
715 }
716 if (c != *eofp)
717 break;
718 eofp++;
719 }
720 /* Allow EOF here so commands with out trailing newlines
721 * will work (eg, ksh -c '...', $(...), etc).
722 */
723 if (*eofp == '\0' && (c == 0 || c == '\n'))
724 break;
725 ungetsc(c);
726 shf_write(eof, eofp - eof, shf);
727 while ((c = getsc()) != '\n') {
728 if (c == 0)
729 yyerror("here document `%s' unclosed\n", eof);
730 shf_putc(c, shf);
731 }
732 shf_putc(c, shf);
733 }
734 shf_flush(shf);
735 if (shf_error(shf))
736 yyerror("error saving here document `%s': %s\n",
737 eof, strerror(shf_errno(shf)));
738 /*XXX add similar checks for write errors everywhere */
739 quitenv();
740 shf_close(shf);
741 if (!(iop->flag & IOEVAL))
742 ignore_backslash_newline--;
743 }
744
745 void
746 #ifdef HAVE_PROTOTYPES
747 yyerror(const char *fmt, ...)
748 #else
749 yyerror(fmt, va_alist)
750 const char *fmt;
751 va_dcl
752 #endif
753 {
754 va_list va;
755
756 yynerrs++;
757 /* pop aliases and re-reads */
758 while (source->type == SALIAS || source->type == SREREAD)
759 source = source->next;
760 source->str = null; /* zap pending input */
761
762 error_prefix(TRUE);
763 SH_VA_START(va, fmt);
764 shf_vfprintf(shl_out, fmt, va);
765 va_end(va);
766 errorf(null);
767 }
768
769 /*
770 * input for yylex with alias expansion
771 */
772
773 Source *
774 pushs(type, areap)
775 int type;
776 Area *areap;
777 {
778 register Source *s;
779
780 s = (Source *) alloc(sizeof(Source), areap);
781 s->type = type;
782 s->str = null;
783 s->start = NULL;
784 s->line = 0;
785 s->errline = 0;
786 s->file = NULL;
787 s->flags = 0;
788 s->next = NULL;
789 s->areap = areap;
790 if (type == SFILE || type == SSTDIN) {
791 char *dummy;
792 Xinit(s->xs, dummy, 256, s->areap);
793 } else
794 memset(&s->xs, 0, sizeof(s->xs));
795 return s;
796 }
797
798 static int
799 getsc__()
800 {
801 register Source *s = source;
802 register int c;
803
804 while ((c = *s->str++) == 0) {
805 s->str = NULL; /* return 0 for EOF by default */
806 switch (s->type) {
807 case SEOF:
808 s->str = null;
809 return 0;
810
811 case SSTDIN:
812 case SFILE:
813 getsc_line(s);
814 break;
815
816 case SWSTR:
817 break;
818
819 case SSTRING:
820 break;
821
822 case SWORDS:
823 s->start = s->str = *s->u.strv++;
824 s->type = SWORDSEP;
825 break;
826
827 case SWORDSEP:
828 if (*s->u.strv == NULL) {
829 s->start = s->str = newline;
830 s->type = SEOF;
831 } else {
832 s->start = s->str = space;
833 s->type = SWORDS;
834 }
835 break;
836
837 case SALIAS:
838 if (s->flags & SF_ALIASEND) {
839 /* pass on an unused SF_ALIAS flag */
840 source = s->next;
841 source->flags |= s->flags & SF_ALIAS;
842 s = source;
843 } else if (*s->u.tblp->val.s
844 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
845 {
846 source = s = s->next; /* pop source stack */
847 /* Note that this alias ended with a space,
848 * enabling alias expansion on the following
849 * word.
850 */
851 s->flags |= SF_ALIAS;
852 } else {
853 /* At this point, we need to keep the current
854 * alias in the source list so recursive
855 * aliases can be detected and we also need
856 * to return the next character. Do this
857 * by temporarily popping the alias to get
858 * the next character and then put it back
859 * in the source list with the SF_ALIASEND
860 * flag set.
861 */
862 source = s->next; /* pop source stack */
863 source->flags |= s->flags & SF_ALIAS;
864 c = getsc__();
865 if (c) {
866 s->flags |= SF_ALIASEND;
867 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
868 s->start = s->str = s->ugbuf;
869 s->next = source;
870 source = s;
871 } else {
872 s = source;
873 /* avoid reading eof twice */
874 s->str = NULL;
875 break;
876 }
877 }
878 continue;
879
880 case SREREAD:
881 if (s->start != s->ugbuf) /* yuck */
882 afree(s->u.freeme, ATEMP);
883 source = s = s->next;
884 continue;
885 }
886 if (s->str == NULL) {
887 s->type = SEOF;
888 s->start = s->str = null;
889 return '\0';
890 }
891 if (s->flags & SF_ECHO) {
892 shf_puts(s->str, shl_out);
893 shf_flush(shl_out);
894 }
895 }
896 return c;
897 }
898
899 static void
900 getsc_line(s)
901 Source *s;
902 {
903 char *xp = Xstring(s->xs, xp);
904 int interactive = Flag(FTALKING) && s->type == SSTDIN;
905 int have_tty = interactive && (s->flags & SF_TTY);
906
907 /* Done here to ensure nothing odd happens when a timeout occurs */
908 XcheckN(s->xs, xp, LINE);
909 *xp = '\0';
910 s->start = s->str = xp;
911
912 #ifdef KSH
913 if (have_tty && ksh_tmout) {
914 ksh_tmout_state = TMOUT_READING;
915 alarm(ksh_tmout);
916 }
917 #endif /* KSH */
918 #ifdef EDIT
919 if (have_tty && (0
920 # ifdef VI
921 || Flag(FVI)
922 # endif /* VI */
923 # ifdef EMACS
924 || Flag(FEMACS) || Flag(FGMACS)
925 # endif /* EMACS */
926 ))
927 {
928 int nread;
929
930 nread = x_read(xp, LINE);
931 if (nread < 0) /* read error */
932 nread = 0;
933 xp[nread] = '\0';
934 xp += nread;
935 }
936 else
937 #endif /* EDIT */
938 {
939 if (interactive) {
940 pprompt(prompt, 0);
941 #ifdef OS2
942 setmode (0, O_TEXT);
943 #endif /* OS2 */
944 } else
945 s->line++;
946
947 while (1) {
948 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
949
950 if (!p && shf_error(s->u.shf)
951 && shf_errno(s->u.shf) == EINTR)
952 {
953 shf_clearerr(s->u.shf);
954 if (trap)
955 runtraps(0);
956 continue;
957 }
958 if (!p || (xp = p, xp[-1] == '\n'))
959 break;
960 /* double buffer size */
961 xp++; /* move past null so doubling works... */
962 XcheckN(s->xs, xp, Xlength(s->xs, xp));
963 xp--; /* ...and move back again */
964 }
965 #ifdef OS2
966 setmode(0, O_BINARY);
967 #endif /* OS2 */
968 /* flush any unwanted input so other programs/builtins
969 * can read it. Not very optimal, but less error prone
970 * than flushing else where, dealing with redirections,
971 * etc..
972 * todo: reduce size of shf buffer (~128?) if SSTDIN
973 */
974 if (s->type == SSTDIN)
975 shf_flush(s->u.shf);
976 }
977 /* XXX: temporary kludge to restore source after a
978 * trap may have been executed.
979 */
980 source = s;
981 #ifdef KSH
982 if (have_tty && ksh_tmout)
983 {
984 ksh_tmout_state = TMOUT_EXECUTING;
985 alarm(0);
986 }
987 #endif /* KSH */
988 s->start = s->str = Xstring(s->xs, xp);
989 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
990 /* Note: if input is all nulls, this is not eof */
991 if (Xlength(s->xs, xp) == 0) { /* EOF */
992 if (s->type == SFILE)
993 shf_fdclose(s->u.shf);
994 s->str = NULL;
995 } else if (interactive) {
996 #ifdef HISTORY
997 char *p = Xstring(s->xs, xp);
998 if (cur_prompt == PS1)
999 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1000 p++;
1001 if (*p) {
1002 # ifdef EASY_HISTORY
1003 if (cur_prompt == PS2)
1004 histappend(Xstring(s->xs, xp), 1);
1005 else
1006 # endif /* EASY_HISTORY */
1007 {
1008 s->line++;
1009 histsave(s->line, s->str, 1);
1010 }
1011 }
1012 #endif /* HISTORY */
1013 }
1014 if (interactive)
1015 set_prompt(PS2, (Source *) 0);
1016 }
1017
1018 void
1019 set_prompt(to, s)
1020 int to;
1021 Source *s;
1022 {
1023 cur_prompt = to;
1024
1025 switch (to) {
1026 case PS1: /* command */
1027 #ifdef KSH
1028 /* Substitute ! and !! here, before substitutions are done
1029 * so ! in expanded variables are not expanded.
1030 * NOTE: this is not what at&t ksh does (it does it after
1031 * substitutions, POSIX doesn't say which is to be done.
1032 */
1033 {
1034 struct shf *shf;
1035 char *ps1;
1036 Area *saved_atemp;
1037 #ifdef __GNUC__
1038 (void) &ps1;
1039 #endif
1040
1041 ps1 = str_val(global("PS1"));
1042 shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1043 SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1044 while (*ps1) {
1045 if (*ps1 != '!' || *++ps1 == '!')
1046 shf_putchar(*ps1++, shf);
1047 else
1048 shf_fprintf(shf, "%d",
1049 s ? s->line + 1 : 0);
1050 }
1051 ps1 = shf_sclose(shf);
1052 saved_atemp = ATEMP;
1053 newenv(E_ERRH);
1054 if (ksh_sigsetjmp(e->jbuf, 0)) {
1055 prompt = safe_prompt;
1056 /* Don't print an error - assume it has already
1057 * been printed. Reason is we may have forked
1058 * to run a command and the child may be
1059 * unwinding its stack through this code as it
1060 * exits.
1061 */
1062 } else
1063 prompt = str_save(substitute(ps1, 0),
1064 saved_atemp);
1065 quitenv();
1066 }
1067 #else /* KSH */
1068 prompt = str_val(global("PS1"));
1069 #endif /* KSH */
1070 break;
1071
1072 case PS2: /* command continuation */
1073 prompt = str_val(global("PS2"));
1074 break;
1075 }
1076 }
1077
1078 /* See also related routine, promptlen() in edit.c */
1079 void
1080 pprompt(cp, ntruncate)
1081 const char *cp;
1082 int ntruncate;
1083 {
1084 #if 0
1085 char nbuf[32];
1086 int c;
1087
1088 while (*cp != 0) {
1089 if (*cp != '!')
1090 c = *cp++;
1091 else if (*++cp == '!')
1092 c = *cp++;
1093 else {
1094 int len;
1095 char *p;
1096
1097 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1098 source->line + 1);
1099 len = strlen(nbuf);
1100 if (ntruncate) {
1101 if (ntruncate >= len) {
1102 ntruncate -= len;
1103 continue;
1104 }
1105 p += ntruncate;
1106 len -= ntruncate;
1107 ntruncate = 0;
1108 }
1109 shf_write(p, len, shl_out);
1110 continue;
1111 }
1112 if (ntruncate)
1113 --ntruncate;
1114 else
1115 shf_putc(c, shl_out);
1116 }
1117 #endif /* 0 */
1118 shf_puts(cp + ntruncate, shl_out);
1119 shf_flush(shl_out);
1120 }
1121
1122 /* Read the variable part of a ${...} expression (ie, up to but not including
1123 * the :[-+?=#%] or close-brace.
1124 */
1125 static char *
1126 get_brace_var(wsp, wp)
1127 XString *wsp;
1128 char *wp;
1129 {
1130 enum parse_state {
1131 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1132 PS_NUMBER, PS_VAR1, PS_END
1133 }
1134 state;
1135 char c;
1136
1137 state = PS_INITIAL;
1138 while (1) {
1139 c = getsc();
1140 /* State machine to figure out where the variable part ends. */
1141 switch (state) {
1142 case PS_INITIAL:
1143 if (c == '#') {
1144 state = PS_SAW_HASH;
1145 break;
1146 }
1147 /* fall through.. */
1148 case PS_SAW_HASH:
1149 if (letter(c))
1150 state = PS_IDENT;
1151 else if (digit(c))
1152 state = PS_NUMBER;
1153 else if (ctype(c, C_VAR1))
1154 state = PS_VAR1;
1155 else
1156 state = PS_END;
1157 break;
1158 case PS_IDENT:
1159 if (!letnum(c)) {
1160 state = PS_END;
1161 if (c == '[') {
1162 char *tmp, *p;
1163
1164 if (!arraysub(&tmp))
1165 yyerror("missing ]\n");
1166 *wp++ = c;
1167 for (p = tmp; *p; ) {
1168 Xcheck(*wsp, wp);
1169 *wp++ = *p++;
1170 }
1171 afree(tmp, ATEMP);
1172 c = getsc(); /* the ] */
1173 }
1174 }
1175 break;
1176 case PS_NUMBER:
1177 if (!digit(c))
1178 state = PS_END;
1179 break;
1180 case PS_VAR1:
1181 state = PS_END;
1182 break;
1183 case PS_END: /* keep gcc happy */
1184 break;
1185 }
1186 if (state == PS_END) {
1187 *wp++ = '\0'; /* end of variable part */
1188 ungetsc(c);
1189 break;
1190 }
1191 Xcheck(*wsp, wp);
1192 *wp++ = c;
1193 }
1194 return wp;
1195 }
1196
1197 /*
1198 * Save an array subscript - returns true if matching bracket found, false
1199 * if eof or newline was found.
1200 * (Returned string double null terminated)
1201 */
1202 static int
1203 arraysub(strp)
1204 char **strp;
1205 {
1206 XString ws;
1207 char *wp;
1208 char c;
1209 int depth = 1; /* we are just past the initial [ */
1210
1211 Xinit(ws, wp, 32, ATEMP);
1212
1213 do {
1214 c = getsc();
1215 Xcheck(ws, wp);
1216 *wp++ = c;
1217 if (c == '[')
1218 depth++;
1219 else if (c == ']')
1220 depth--;
1221 } while (depth > 0 && c && c != '\n');
1222
1223 *wp++ = '\0';
1224 *strp = Xclose(ws, wp);
1225
1226 return depth == 0 ? 1 : 0;
1227 }
1228
1229 /* Unget a char: handles case when we are already at the start of the buffer */
1230 static const char *
1231 ungetsc(c)
1232 int c;
1233 {
1234 if (backslash_skip)
1235 backslash_skip--;
1236 /* Don't unget eof... */
1237 if (source->str == null && c == '\0')
1238 return source->str;
1239 if (source->str > source->start)
1240 source->str--;
1241 else {
1242 Source *s;
1243
1244 s = pushs(SREREAD, source->areap);
1245 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1246 s->start = s->str = s->ugbuf;
1247 s->next = source;
1248 source = s;
1249 }
1250 return source->str;
1251 }
1252
1253
1254 /* Called to get a char that isn't a \newline sequence. */
1255 static int
1256 getsc_bn ARGS((void))
1257 {
1258 int c, c2;
1259
1260 if (ignore_backslash_newline)
1261 return getsc_();
1262
1263 if (backslash_skip == 1) {
1264 backslash_skip = 2;
1265 return getsc_();
1266 }
1267
1268 backslash_skip = 0;
1269
1270 while (1) {
1271 c = getsc_();
1272 if (c == '\\') {
1273 if ((c2 = getsc_()) == '\n')
1274 /* ignore the \newline; get the next char... */
1275 continue;
1276 ungetsc(c2);
1277 backslash_skip = 1;
1278 }
1279 return c;
1280 }
1281 }
1282