1 /* $NetBSD: lex.c,v 1.24 2019/09/26 11:01:09 mlelstv Exp $ */ 2 3 /* 4 * lexical analysis and source input 5 */ 6 #include <sys/cdefs.h> 7 8 #ifndef lint 9 __RCSID("$NetBSD: lex.c,v 1.24 2019/09/26 11:01:09 mlelstv Exp $"); 10 #endif 11 12 13 #include "sh.h" 14 #include <ctype.h> 15 16 17 /* Structure to keep track of the lexing state and the various pieces of info 18 * needed for each particular state. 19 */ 20 typedef struct lex_state Lex_state; 21 struct lex_state { 22 int ls_state; 23 union { 24 /* $(...) */ 25 struct scsparen_info { 26 int nparen; /* count open parenthesis */ 27 int csstate; /* XXX remove */ 28 #define ls_scsparen ls_info.u_scsparen 29 } u_scsparen; 30 31 /* $((...)) */ 32 struct sasparen_info { 33 int nparen; /* count open parenthesis */ 34 int start; /* marks start of $(( in output str */ 35 #define ls_sasparen ls_info.u_sasparen 36 } u_sasparen; 37 38 /* ((...)) */ 39 struct sletparen_info { 40 int nparen; /* count open parenthesis */ 41 #define ls_sletparen ls_info.u_sletparen 42 } u_sletparen; 43 44 /* `...` */ 45 struct sbquote_info { 46 int indquotes; /* true if in double quotes: "`...`" */ 47 #define ls_sbquote ls_info.u_sbquote 48 } u_sbquote; 49 50 Lex_state *base; /* used to point to next state block */ 51 } ls_info; 52 }; 53 54 typedef struct State_info State_info; 55 struct State_info { 56 Lex_state *base; 57 Lex_state *end; 58 }; 59 60 61 static void readhere ARGS((struct ioword *iop)); 62 static int getsc__ ARGS((void)); 63 static void getsc_line ARGS((Source *s)); 64 static int getsc_bn ARGS((void)); 65 static char *get_brace_var ARGS((XString *wsp, char *wp)); 66 static int arraysub ARGS((char **strp)); 67 static const char *ungetsc ARGS((int c)); 68 static void gethere ARGS((void)); 69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end)); 70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end)); 71 72 static int backslash_skip; 73 static int ignore_backslash_newline; 74 75 /* optimized getsc_bn() */ 76 #define getsc() (*source->str != '\0' && *source->str != '\\' \ 77 && !backslash_skip ? *source->str++ : getsc_bn()) 78 /* optimized getsc__() */ 79 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__()) 80 81 #define STATE_BSIZE 32 82 83 #define PUSH_STATE(s) do { \ 84 if (++statep == state_info.end) \ 85 statep = push_state_(&state_info, statep); \ 86 state = statep->ls_state = (s); \ 87 } while (0) 88 89 #define POP_STATE() do { \ 90 if (--statep == state_info.base) \ 91 statep = pop_state_(&state_info, statep); \ 92 state = statep->ls_state; \ 93 } while (0) 94 95 96 97 /* 98 * Lexical analyzer 99 * 100 * tokens are not regular expressions, they are LL(1). 101 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 102 * hence the state stack. 103 */ 104 105 int 106 yylex(cf) 107 int cf; 108 { 109 Lex_state states[STATE_BSIZE], *statep; 110 State_info state_info; 111 int c, state; 112 XString ws; /* expandable output word */ 113 char *wp; /* output word pointer */ 114 char *sp, *dp; 115 int c2; 116 117 118 Again: 119 states[0].ls_state = -1; 120 states[0].ls_info.base = (Lex_state *) 0; 121 statep = &states[1]; 122 state_info.base = states; 123 state_info.end = &states[STATE_BSIZE]; 124 125 Xinit(ws, wp, 64, ATEMP); 126 127 backslash_skip = 0; 128 ignore_backslash_newline = 0; 129 130 if (cf&ONEWORD) 131 state = SWORD; 132 #ifdef KSH 133 else if (cf&LETEXPR) { 134 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */ 135 state = SLETPAREN; 136 statep->ls_sletparen.nparen = 0; 137 } 138 #endif /* KSH */ 139 else { /* normal lexing */ 140 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 141 while ((c = getsc()) == ' ' || c == '\t') 142 ; 143 if (c == '#') { 144 ignore_backslash_newline++; 145 while ((c = getsc()) != '\0' && c != '\n') 146 ; 147 ignore_backslash_newline--; 148 } 149 ungetsc(c); 150 } 151 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ 152 source->flags &= ~SF_ALIAS; 153 /* In POSIX mode, a trailing space only counts if we are 154 * parsing a simple command 155 */ 156 if (!Flag(FPOSIX) || (cf & CMDWORD)) 157 cf |= ALIAS; 158 } 159 160 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */ 161 statep->ls_state = state; 162 163 /* collect non-special or quoted characters to form word */ 164 while (!((c = getsc()) == 0 165 || ((state == SBASE || state == SHEREDELIM) 166 && ctype(c, C_LEX1)))) 167 { 168 Xcheck(ws, wp); 169 switch (state) { 170 case SBASE: 171 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 172 *wp = EOS; /* temporary */ 173 if (is_wdvarname(Xstring(ws, wp), false)) 174 { 175 char *p, *tmp; 176 177 if (arraysub(&tmp)) { 178 *wp++ = CHAR; 179 *wp++ = c; 180 for (p = tmp; *p; ) { 181 Xcheck(ws, wp); 182 *wp++ = CHAR; 183 *wp++ = *p++; 184 } 185 afree(tmp, ATEMP); 186 break; 187 } else { 188 Source *s; 189 190 s = pushs(SREREAD, 191 source->areap); 192 s->start = s->str 193 = s->u.freeme = tmp; 194 s->next = source; 195 source = s; 196 } 197 } 198 *wp++ = CHAR; 199 *wp++ = c; 200 break; 201 } 202 /* fall through.. */ 203 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 204 #ifdef KSH 205 if (c == '*' || c == '@' || c == '+' || c == '?' 206 || c == '!') 207 { 208 c2 = getsc(); 209 if (c2 == '(' /*)*/ ) { 210 *wp++ = OPAT; 211 *wp++ = c; 212 PUSH_STATE(SPATTERN); 213 break; 214 } 215 ungetsc(c2); 216 } 217 #endif /* KSH */ 218 /* fall through.. */ 219 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 220 switch (c) { 221 case '\\': 222 c = getsc(); 223 if (c) /* trailing \ is lost */ 224 *wp++ = QCHAR, *wp++ = c; 225 break; 226 case '\'': 227 *wp++ = OQUOTE; 228 ignore_backslash_newline++; 229 PUSH_STATE(SSQUOTE); 230 break; 231 case '"': 232 *wp++ = OQUOTE; 233 PUSH_STATE(SDQUOTE); 234 break; 235 default: 236 goto Subst; 237 } 238 break; 239 240 Subst: 241 switch (c) { 242 Lex_state *s; 243 Lex_state *base; 244 245 case '\\': 246 c = getsc(); 247 switch (c) { 248 case '\\': 249 case '$': case '`': 250 *wp++ = QCHAR, *wp++ = c; 251 break; 252 case '"': 253 if ((cf & HEREDOC) == 0) { 254 *wp++ = QCHAR, *wp++ = c; 255 break; 256 } 257 /* FALLTHROUGH */ 258 default: 259 Xcheck(ws, wp); 260 if (c) { /* trailing \ is lost */ 261 *wp++ = CHAR, *wp++ = '\\'; 262 *wp++ = CHAR, *wp++ = c; 263 } 264 break; 265 } 266 break; 267 case '$': 268 c = getsc(); 269 if (c == '(') /*)*/ { 270 c = getsc(); 271 if (c == '(') /*)*/ { 272 PUSH_STATE(SASPAREN); 273 statep->ls_sasparen.nparen = 2; 274 statep->ls_sasparen.start = 275 Xsavepos(ws, wp); 276 *wp++ = EXPRSUB; 277 } else { 278 ungetsc(c); 279 PUSH_STATE(SCSPAREN); 280 statep->ls_scsparen.nparen = 1; 281 statep->ls_scsparen.csstate = 0; 282 *wp++ = COMSUB; 283 } 284 } else if (c == '{') /*}*/ { 285 *wp++ = OSUBST; 286 *wp++ = '{'; /*}*/ 287 wp = get_brace_var(&ws, wp); 288 c = getsc(); 289 /* allow :# and :% (ksh88 compat) */ 290 if (c == ':') { 291 *wp++ = CHAR, *wp++ = c; 292 c = getsc(); 293 } 294 /* If this is a trim operation, 295 * treat (,|,) specially in STBRACE. 296 */ 297 if (c == '#' || c == '%') { 298 ungetsc(c); 299 PUSH_STATE(STBRACE); 300 } else { 301 ungetsc(c); 302 PUSH_STATE(SBRACE); 303 } 304 } else if (ctype(c, C_ALPHA)) { 305 *wp++ = OSUBST; 306 *wp++ = 'X'; 307 do { 308 Xcheck(ws, wp); 309 *wp++ = c; 310 c = getsc(); 311 } while (ctype(c, C_ALPHA|C_DIGIT)); 312 *wp++ = '\0'; 313 *wp++ = CSUBST; 314 *wp++ = 'X'; 315 ungetsc(c); 316 } else if (ctype(c, C_DIGIT|C_VAR1)) { 317 Xcheck(ws, wp); 318 *wp++ = OSUBST; 319 *wp++ = 'X'; 320 *wp++ = c; 321 *wp++ = '\0'; 322 *wp++ = CSUBST; 323 *wp++ = 'X'; 324 } else { 325 *wp++ = CHAR, *wp++ = '$'; 326 ungetsc(c); 327 } 328 break; 329 case '`': 330 PUSH_STATE(SBQUOTE); 331 *wp++ = COMSUB; 332 /* Need to know if we are inside double quotes 333 * since sh/at&t-ksh translate the \" to " in 334 * "`..\"..`". POSIX also requires this. 335 * An earlier version of ksh misinterpreted 336 * the POSIX specification and performed 337 * removal of backslash escapes only if 338 * posix mode was not in effect. 339 */ 340 statep->ls_sbquote.indquotes = 0; 341 s = statep; 342 base = state_info.base; 343 while (1) { 344 for (; s != base; s--) { 345 if (s->ls_state == SDQUOTE) { 346 statep->ls_sbquote.indquotes = 1; 347 break; 348 } 349 } 350 if (s != base) 351 break; 352 if (!(s = s->ls_info.base)) 353 break; 354 base = s-- - STATE_BSIZE; 355 } 356 break; 357 default: 358 *wp++ = CHAR, *wp++ = c; 359 } 360 break; 361 362 case SSQUOTE: 363 if (c == '\'') { 364 POP_STATE(); 365 *wp++ = CQUOTE; 366 ignore_backslash_newline--; 367 } else 368 *wp++ = QCHAR, *wp++ = c; 369 break; 370 371 case SDQUOTE: 372 if (c == '"') { 373 POP_STATE(); 374 *wp++ = CQUOTE; 375 } else 376 goto Subst; 377 break; 378 379 case SCSPAREN: /* $( .. ) */ 380 /* todo: deal with $(...) quoting properly 381 * kludge to partly fake quoting inside $(..): doesn't 382 * really work because nested $(..) or ${..} inside 383 * double quotes aren't dealt with. 384 */ 385 switch (statep->ls_scsparen.csstate) { 386 case 0: /* normal */ 387 switch (c) { 388 case '(': 389 statep->ls_scsparen.nparen++; 390 break; 391 case ')': 392 statep->ls_scsparen.nparen--; 393 break; 394 case '\\': 395 statep->ls_scsparen.csstate = 1; 396 break; 397 case '"': 398 statep->ls_scsparen.csstate = 2; 399 break; 400 case '\'': 401 statep->ls_scsparen.csstate = 4; 402 ignore_backslash_newline++; 403 break; 404 } 405 break; 406 407 case 1: /* backslash in normal mode */ 408 case 3: /* backslash in double quotes */ 409 --statep->ls_scsparen.csstate; 410 break; 411 412 case 2: /* double quotes */ 413 if (c == '"') 414 statep->ls_scsparen.csstate = 0; 415 else if (c == '\\') 416 statep->ls_scsparen.csstate = 3; 417 break; 418 419 case 4: /* single quotes */ 420 if (c == '\'') { 421 statep->ls_scsparen.csstate = 0; 422 ignore_backslash_newline--; 423 } 424 break; 425 } 426 if (statep->ls_scsparen.nparen == 0) { 427 POP_STATE(); 428 *wp++ = 0; /* end of COMSUB */ 429 } else 430 *wp++ = c; 431 break; 432 433 case SASPAREN: /* $(( .. )) */ 434 /* todo: deal with $((...); (...)) properly */ 435 /* XXX should nest using existing state machine 436 * (embed "..", $(...), etc.) */ 437 if (c == '(') 438 statep->ls_sasparen.nparen++; 439 else if (c == ')') { 440 statep->ls_sasparen.nparen--; 441 if (statep->ls_sasparen.nparen == 1) { 442 /*(*/ 443 if ((c2 = getsc()) == ')') { 444 POP_STATE(); 445 *wp++ = 0; /* end of EXPRSUB */ 446 break; 447 } else { 448 char *s; 449 450 ungetsc(c2); 451 /* mismatched parenthesis - 452 * assume we were really 453 * parsing a $(..) expression 454 */ 455 s = Xrestpos(ws, wp, 456 statep->ls_sasparen.start); 457 memmove(s + 1, s, wp - s); 458 *s++ = COMSUB; 459 *s = '('; /*)*/ 460 wp++; 461 statep->ls_scsparen.nparen = 1; 462 statep->ls_scsparen.csstate = 0; 463 state = statep->ls_state 464 = SCSPAREN; 465 466 } 467 } 468 } 469 *wp++ = c; 470 break; 471 472 case SBRACE: 473 /*{*/ 474 if (c == '}') { 475 POP_STATE(); 476 *wp++ = CSUBST; 477 *wp++ = /*{*/ '}'; 478 } else 479 goto Sbase1; 480 break; 481 482 case STBRACE: 483 /* Same as SBRACE, except (,|,) treated specially */ 484 /*{*/ 485 if (c == '}') { 486 POP_STATE(); 487 *wp++ = CSUBST; 488 *wp++ = /*{*/ '}'; 489 } else if (c == '|') { 490 *wp++ = SPAT; 491 } else if (c == '(') { 492 *wp++ = OPAT; 493 *wp++ = ' '; /* simile for @ */ 494 PUSH_STATE(SPATTERN); 495 } else 496 goto Sbase1; 497 break; 498 499 case SBQUOTE: 500 if (c == '`') { 501 *wp++ = 0; 502 POP_STATE(); 503 } else if (c == '\\') { 504 switch (c = getsc()) { 505 case '\\': 506 case '$': case '`': 507 *wp++ = c; 508 break; 509 case '"': 510 if (statep->ls_sbquote.indquotes) { 511 *wp++ = c; 512 break; 513 } 514 /* fall through.. */ 515 default: 516 if (c) { /* trailing \ is lost */ 517 *wp++ = '\\'; 518 *wp++ = c; 519 } 520 break; 521 } 522 } else 523 *wp++ = c; 524 break; 525 526 case SWORD: /* ONEWORD */ 527 goto Subst; 528 529 #ifdef KSH 530 case SLETPAREN: /* LETEXPR: (( ... )) */ 531 /*(*/ 532 if (c == ')') { 533 if (statep->ls_sletparen.nparen > 0) 534 --statep->ls_sletparen.nparen; 535 /*(*/ 536 else if ((c2 = getsc()) == ')') { 537 c = 0; 538 *wp++ = CQUOTE; 539 goto Done; 540 } else 541 ungetsc(c2); 542 } else if (c == '(') 543 /* parenthesis inside quotes and backslashes 544 * are lost, but at&t ksh doesn't count them 545 * either 546 */ 547 ++statep->ls_sletparen.nparen; 548 goto Sbase2; 549 #endif /* KSH */ 550 551 case SHEREDELIM: /* <<,<<- delimiter */ 552 /* XXX chuck this state (and the next) - use 553 * the existing states ($ and \`..` should be 554 * stripped of their specialness after the 555 * fact). 556 */ 557 /* here delimiters need a special case since 558 * $ and `..` are not to be treated specially 559 */ 560 if (c == '\\') { 561 c = getsc(); 562 if (c) { /* trailing \ is lost */ 563 *wp++ = QCHAR; 564 *wp++ = c; 565 } 566 } else if (c == '\'') { 567 PUSH_STATE(SSQUOTE); 568 *wp++ = OQUOTE; 569 ignore_backslash_newline++; 570 } else if (c == '"') { 571 state = statep->ls_state = SHEREDQUOTE; 572 *wp++ = OQUOTE; 573 } else { 574 *wp++ = CHAR; 575 *wp++ = c; 576 } 577 break; 578 579 case SHEREDQUOTE: /* " in <<,<<- delimiter */ 580 if (c == '"') { 581 *wp++ = CQUOTE; 582 state = statep->ls_state = SHEREDELIM; 583 } else { 584 if (c == '\\') { 585 switch (c = getsc()) { 586 case '\\': case '"': 587 case '$': case '`': 588 break; 589 default: 590 if (c) { /* trailing \ lost */ 591 *wp++ = CHAR; 592 *wp++ = '\\'; 593 } 594 break; 595 } 596 } 597 *wp++ = CHAR; 598 *wp++ = c; 599 } 600 break; 601 602 case SPATTERN: /* in *(...|...) pattern (*+?@!) */ 603 if ( /*(*/ c == ')') { 604 *wp++ = CPAT; 605 POP_STATE(); 606 } else if (c == '|') { 607 *wp++ = SPAT; 608 } else if (c == '(') { 609 *wp++ = OPAT; 610 *wp++ = ' '; /* simile for @ */ 611 PUSH_STATE(SPATTERN); 612 } else 613 goto Sbase1; 614 break; 615 } 616 } 617 Done: 618 Xcheck(ws, wp); 619 if (statep != &states[1]) 620 /* XXX figure out what is missing */ 621 yyerror("no closing quote\n"); 622 623 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 624 if (state == SHEREDELIM) 625 state = SBASE; 626 627 dp = Xstring(ws, wp); 628 if ((c == '<' || c == '>') && state == SBASE 629 && ((c2 = Xlength(ws, wp)) == 0 630 || (c2 == 2 && dp[0] == CHAR && digit(dp[1])))) 631 { 632 struct ioword *iop = 633 (struct ioword *) alloc(sizeof(*iop), ATEMP); 634 635 if (c2 == 2) 636 iop->unit = dp[1] - '0'; 637 else 638 iop->unit = c == '>'; /* 0 for <, 1 for > */ 639 640 c2 = getsc(); 641 /* <<, >>, <> are ok, >< is not */ 642 if (c == c2 || (c == '<' && c2 == '>')) { 643 iop->flag = c == c2 ? 644 (c == '>' ? IOCAT : IOHERE) : IORDWR; 645 if (iop->flag == IOHERE) { 646 if ((c2 = getsc()) == '-') { 647 iop->flag |= IOSKIP; 648 } else { 649 ungetsc(c2); 650 } 651 } 652 } else if (c2 == '&') 653 iop->flag = IODUP | (c == '<' ? IORDUP : 0); 654 else { 655 iop->flag = c == '>' ? IOWRITE : IOREAD; 656 if (c == '>' && c2 == '|') 657 iop->flag |= IOCLOB; 658 else 659 ungetsc(c2); 660 } 661 662 iop->name = (char *) 0; 663 iop->delim = (char *) 0; 664 iop->heredoc = (char *) 0; 665 Xfree(ws, wp); /* free word */ 666 yylval.iop = iop; 667 return REDIR; 668 } 669 670 if (wp == dp && state == SBASE) { 671 Xfree(ws, wp); /* free word */ 672 /* no word, process LEX1 character */ 673 switch (c) { 674 default: 675 return c; 676 677 case '|': 678 case '&': 679 case ';': 680 if ((c2 = getsc()) == c) 681 c = (c == ';') ? BREAK : 682 (c == '|') ? LOGOR : 683 (c == '&') ? LOGAND : 684 YYERRCODE; 685 #ifdef KSH 686 else if (c == '|' && c2 == '&') 687 c = COPROC; 688 #endif /* KSH */ 689 else 690 ungetsc(c2); 691 return c; 692 693 case '\n': 694 gethere(); 695 if (cf & CONTIN) 696 goto Again; 697 return c; 698 699 case '(': /*)*/ 700 #ifdef KSH 701 if ((c2 = getsc()) == '(') /*)*/ 702 /* XXX need to handle ((...); (...)) */ 703 c = MDPAREN; 704 else 705 ungetsc(c2); 706 #endif /* KSH */ 707 return c; 708 /*(*/ 709 case ')': 710 return c; 711 } 712 } 713 714 *wp++ = EOS; /* terminate word */ 715 yylval.cp = Xclose(ws, wp); 716 if (state == SWORD 717 #ifdef KSH 718 || state == SLETPAREN 719 #endif /* KSH */ 720 ) /* ONEWORD? */ 721 return LWORD; 722 ungetsc(c); /* unget terminator */ 723 724 /* copy word to unprefixed string ident */ 725 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; ) 726 *dp++ = *sp++; 727 /* Make sure the ident array stays '\0' padded */ 728 memset(dp, 0, (ident+IDENT) - dp + 1); 729 if (c != EOS) 730 *ident = '\0'; /* word is not unquoted */ 731 732 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) { 733 struct tbl *p; 734 int h = hash(ident); 735 736 /* { */ 737 if ((cf & KEYWORD) && (p = mytsearch(&keywords, ident, h)) 738 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) 739 { 740 afree(yylval.cp, ATEMP); 741 return p->val.i; 742 } 743 if ((cf & ALIAS) && (p = mytsearch(&aliases, ident, h)) 744 && (p->flag & ISSET)) 745 { 746 Source *s; 747 748 for (s = source; s->type == SALIAS; s = s->next) 749 if (s->u.tblp == p) 750 return LWORD; 751 /* push alias expansion */ 752 s = pushs(SALIAS, source->areap); 753 s->start = s->str = p->val.s; 754 s->u.tblp = p; 755 s->next = source; 756 source = s; 757 afree(yylval.cp, ATEMP); 758 goto Again; 759 } 760 } 761 762 return LWORD; 763 } 764 765 static void 766 gethere() 767 { 768 struct ioword **p; 769 770 for (p = heres; p < herep; p++) 771 readhere(*p); 772 herep = heres; 773 } 774 775 /* 776 * read "<<word" text into temp file 777 */ 778 779 static void 780 readhere(iop) 781 struct ioword *iop; 782 { 783 int c; 784 char *volatile eof; 785 char *eofp; 786 int skiptabs; 787 XString xs; 788 char *xp; 789 int xpos; 790 791 eof = evalstr(iop->delim, 0); 792 793 if (!(iop->flag & IOEVAL)) 794 ignore_backslash_newline++; 795 796 Xinit(xs, xp, 256, ATEMP); 797 798 for (;;) { 799 eofp = eof; 800 skiptabs = iop->flag & IOSKIP; 801 xpos = Xsavepos(xs, xp); 802 while ((c = getsc()) != 0) { 803 if (skiptabs) { 804 if (c == '\t') 805 continue; 806 skiptabs = 0; 807 } 808 if (c != *eofp) 809 break; 810 Xcheck(xs, xp); 811 Xput(xs, xp, c); 812 eofp++; 813 } 814 /* Allow EOF here so commands with out trailing newlines 815 * will work (eg, ksh -c '...', $(...), etc). 816 */ 817 if (*eofp == '\0' && (c == 0 || c == '\n')) { 818 xp = Xrestpos(xs, xp, xpos); 819 break; 820 } 821 ungetsc(c); 822 while ((c = getsc()) != '\n') { 823 if (c == 0) 824 yyerror("here document `%s' unclosed\n", eof); 825 Xcheck(xs, xp); 826 Xput(xs, xp, c); 827 } 828 Xcheck(xs, xp); 829 Xput(xs, xp, c); 830 } 831 Xput(xs, xp, '\0'); 832 iop->heredoc = Xclose(xs, xp); 833 834 if (!(iop->flag & IOEVAL)) 835 ignore_backslash_newline--; 836 } 837 838 void 839 yyerror(const char *fmt, ...) 840 { 841 va_list va; 842 843 /* pop aliases and re-reads */ 844 while (source->type == SALIAS || source->type == SREREAD) 845 source = source->next; 846 source->str = null; /* zap pending input */ 847 848 error_prefix(true); 849 va_start(va, fmt); 850 shf_vfprintf(shl_out, fmt, va); 851 va_end(va); 852 errorf("%s", null); 853 } 854 855 /* 856 * input for yylex with alias expansion 857 */ 858 859 Source * 860 pushs(type, areap) 861 int type; 862 Area *areap; 863 { 864 Source *s; 865 866 s = (Source *) alloc(sizeof(Source), areap); 867 s->type = type; 868 s->str = null; 869 s->start = NULL; 870 s->line = 0; 871 s->errline = 0; 872 s->file = NULL; 873 s->flags = 0; 874 s->next = NULL; 875 s->areap = areap; 876 if (type == SFILE || type == SSTDIN) { 877 char *dummy; 878 Xinit(s->xs, dummy, 256, s->areap); 879 } else 880 memset(&s->xs, 0, sizeof(s->xs)); 881 return s; 882 } 883 884 static int 885 getsc__() 886 { 887 Source *s = source; 888 int c; 889 890 while ((c = *s->str++) == 0) { 891 s->str = NULL; /* return 0 for EOF by default */ 892 switch (s->type) { 893 case SEOF: 894 s->str = null; 895 return 0; 896 897 case SSTDIN: 898 case SFILE: 899 getsc_line(s); 900 break; 901 902 case SWSTR: 903 break; 904 905 case SSTRING: 906 break; 907 908 case SWORDS: 909 s->start = s->str = *s->u.strv++; 910 s->type = SWORDSEP; 911 break; 912 913 case SWORDSEP: 914 if (*s->u.strv == NULL) { 915 s->start = s->str = newline; 916 s->type = SEOF; 917 } else { 918 s->start = s->str = space; 919 s->type = SWORDS; 920 } 921 break; 922 923 case SALIAS: 924 if (s->flags & SF_ALIASEND) { 925 /* pass on an unused SF_ALIAS flag */ 926 source = s->next; 927 source->flags |= s->flags & SF_ALIAS; 928 s = source; 929 } else if (*s->u.tblp->val.s 930 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1])) 931 { 932 source = s = s->next; /* pop source stack */ 933 /* Note that this alias ended with a space, 934 * enabling alias expansion on the following 935 * word. 936 */ 937 s->flags |= SF_ALIAS; 938 } else { 939 /* At this point, we need to keep the current 940 * alias in the source list so recursive 941 * aliases can be detected and we also need 942 * to return the next character. Do this 943 * by temporarily popping the alias to get 944 * the next character and then put it back 945 * in the source list with the SF_ALIASEND 946 * flag set. 947 */ 948 source = s->next; /* pop source stack */ 949 source->flags |= s->flags & SF_ALIAS; 950 c = getsc__(); 951 if (c) { 952 s->flags |= SF_ALIASEND; 953 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 954 s->start = s->str = s->ugbuf; 955 s->next = source; 956 source = s; 957 } else { 958 s = source; 959 /* avoid reading eof twice */ 960 s->str = NULL; 961 break; 962 } 963 } 964 continue; 965 966 case SREREAD: 967 if (s->start != s->ugbuf) /* yuck */ 968 afree(s->u.freeme, ATEMP); 969 source = s = s->next; 970 continue; 971 } 972 if (s->str == NULL) { 973 s->type = SEOF; 974 s->start = s->str = null; 975 return '\0'; 976 } 977 if (s->flags & SF_ECHO) { 978 shf_puts(s->str, shl_out); 979 shf_flush(shl_out); 980 } 981 } 982 return c; 983 } 984 985 static void 986 getsc_line(s) 987 Source *s; 988 { 989 char *xp = Xstring(s->xs, xp); 990 int interactive = Flag(FTALKING) && s->type == SSTDIN; 991 int have_tty = interactive && (s->flags & SF_TTY); 992 993 /* Done here to ensure nothing odd happens when a timeout occurs */ 994 XcheckN(s->xs, xp, LINE); 995 *xp = '\0'; 996 s->start = s->str = xp; 997 998 #ifdef KSH 999 if (have_tty && ksh_tmout) { 1000 ksh_tmout_state = TMOUT_READING; 1001 alarm(ksh_tmout); 1002 } 1003 #endif /* KSH */ 1004 #ifdef EDIT 1005 if (have_tty && (0 1006 # ifdef VI 1007 || Flag(FVI) 1008 # endif /* VI */ 1009 # ifdef EMACS 1010 || Flag(FEMACS) || Flag(FGMACS) 1011 # endif /* EMACS */ 1012 )) 1013 { 1014 int nread; 1015 1016 nread = x_read(xp, LINE); 1017 if (nread < 0) /* read error */ 1018 nread = 0; 1019 xp[nread] = '\0'; 1020 xp += nread; 1021 } 1022 else 1023 #endif /* EDIT */ 1024 { 1025 if (interactive) { 1026 pprompt(prompt, 0); 1027 } else 1028 s->line++; 1029 1030 while (1) { 1031 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1032 1033 if (!p && shf_error(s->u.shf) 1034 && shf_errno(s->u.shf) == EINTR) 1035 { 1036 shf_clearerr(s->u.shf); 1037 if (trap) 1038 runtraps(0); 1039 continue; 1040 } 1041 if (!p || (xp = p, xp[-1] == '\n')) 1042 break; 1043 /* double buffer size */ 1044 xp++; /* move past null so doubling works... */ 1045 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1046 xp--; /* ...and move back again */ 1047 } 1048 /* flush any unwanted input so other programs/builtins 1049 * can read it. Not very optimal, but less error prone 1050 * than flushing else where, dealing with redirections, 1051 * etc.. 1052 * todo: reduce size of shf buffer (~128?) if SSTDIN 1053 */ 1054 if (s->type == SSTDIN) 1055 shf_flush(s->u.shf); 1056 } 1057 /* XXX: temporary kludge to restore source after a 1058 * trap may have been executed. 1059 */ 1060 source = s; 1061 #ifdef KSH 1062 if (have_tty && ksh_tmout) 1063 { 1064 ksh_tmout_state = TMOUT_EXECUTING; 1065 alarm(0); 1066 } 1067 #endif /* KSH */ 1068 s->start = s->str = Xstring(s->xs, xp); 1069 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1070 /* Note: if input is all nulls, this is not eof */ 1071 if (Xlength(s->xs, xp) == 0) { /* EOF */ 1072 if (s->type == SFILE) 1073 shf_fdclose(s->u.shf); 1074 s->str = NULL; 1075 } else if (interactive) { 1076 #ifdef HISTORY 1077 char *p = Xstring(s->xs, xp); 1078 if (cur_prompt == PS1) 1079 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS)) 1080 p++; 1081 if (*p) { 1082 # ifdef EASY_HISTORY 1083 if (cur_prompt == PS2) 1084 histappend(Xstring(s->xs, xp), 1); 1085 else 1086 # endif /* EASY_HISTORY */ 1087 { 1088 s->line++; 1089 histsave(s->line, s->str, 1); 1090 } 1091 } 1092 #endif /* HISTORY */ 1093 } 1094 if (interactive) 1095 set_prompt(PS2, (Source *) 0); 1096 } 1097 1098 void 1099 set_prompt(to, s) 1100 int to; 1101 Source *s; 1102 { 1103 cur_prompt = to; 1104 1105 switch (to) { 1106 case PS1: /* command */ 1107 #ifdef KSH 1108 /* Substitute ! and !! here, before substitutions are done 1109 * so ! in expanded variables are not expanded. 1110 * NOTE: this is not what at&t ksh does (it does it after 1111 * substitutions, POSIX doesn't say which is to be done. 1112 */ 1113 { 1114 struct shf *shf; 1115 char * volatile ps1; 1116 Area *saved_atemp; 1117 1118 ps1 = str_val(global("PS1")); 1119 shf = shf_sopen((char *) 0, strlen(ps1) * 2, 1120 SHF_WR | SHF_DYNAMIC, (struct shf *) 0); 1121 while (*ps1) { 1122 if (*ps1 != '!' || *++ps1 == '!') 1123 shf_putchar(*ps1++, shf); 1124 else 1125 shf_fprintf(shf, "%d", 1126 s ? s->line + 1 : 0); 1127 } 1128 ps1 = shf_sclose(shf); 1129 saved_atemp = ATEMP; 1130 newenv(E_ERRH); 1131 if (ksh_sigsetjmp(e->jbuf, 0)) { 1132 prompt = safe_prompt; 1133 /* Don't print an error - assume it has already 1134 * been printed. Reason is we may have forked 1135 * to run a command and the child may be 1136 * unwinding its stack through this code as it 1137 * exits. 1138 */ 1139 } else 1140 prompt = str_save(substitute(ps1, 0), 1141 saved_atemp); 1142 quitenv(); 1143 } 1144 #else /* KSH */ 1145 prompt = str_val(global("PS1")); 1146 #endif /* KSH */ 1147 break; 1148 1149 case PS2: /* command continuation */ 1150 prompt = str_val(global("PS2")); 1151 break; 1152 } 1153 } 1154 1155 /* See also related routine, promptlen() in edit.c */ 1156 void 1157 pprompt(cp, ntruncate) 1158 const char *cp; 1159 int ntruncate; 1160 { 1161 #if 0 1162 char nbuf[32]; 1163 int c; 1164 1165 while (*cp != 0) { 1166 if (*cp != '!') 1167 c = *cp++; 1168 else if (*++cp == '!') 1169 c = *cp++; 1170 else { 1171 int len; 1172 char *p; 1173 1174 shf_snprintf(p = nbuf, sizeof(nbuf), "%d", 1175 source->line + 1); 1176 len = strlen(nbuf); 1177 if (ntruncate) { 1178 if (ntruncate >= len) { 1179 ntruncate -= len; 1180 continue; 1181 } 1182 p += ntruncate; 1183 len -= ntruncate; 1184 ntruncate = 0; 1185 } 1186 shf_write(p, len, shl_out); 1187 continue; 1188 } 1189 if (ntruncate) 1190 --ntruncate; 1191 else 1192 shf_putc(c, shl_out); 1193 } 1194 #endif /* 0 */ 1195 shf_puts(cp + ntruncate, shl_out); 1196 shf_flush(shl_out); 1197 } 1198 1199 /* Read the variable part of a ${...} expression (ie, up to but not including 1200 * the :[-+?=#%] or close-brace. 1201 */ 1202 static char * 1203 get_brace_var(wsp, wp) 1204 XString *wsp; 1205 char *wp; 1206 { 1207 enum parse_state { 1208 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1209 PS_NUMBER, PS_VAR1, PS_END 1210 } 1211 state; 1212 char c; 1213 1214 state = PS_INITIAL; 1215 while (1) { 1216 c = getsc(); 1217 /* State machine to figure out where the variable part ends. */ 1218 switch (state) { 1219 case PS_INITIAL: 1220 if (c == '#') { 1221 state = PS_SAW_HASH; 1222 break; 1223 } 1224 /* fall through.. */ 1225 case PS_SAW_HASH: 1226 if (letter(c)) 1227 state = PS_IDENT; 1228 else if (digit(c)) 1229 state = PS_NUMBER; 1230 else if (ctype(c, C_VAR1)) 1231 state = PS_VAR1; 1232 else 1233 state = PS_END; 1234 break; 1235 case PS_IDENT: 1236 if (!letnum(c)) { 1237 state = PS_END; 1238 if (c == '[') { 1239 char *tmp, *p; 1240 1241 if (!arraysub(&tmp)) 1242 yyerror("missing ]\n"); 1243 *wp++ = c; 1244 for (p = tmp; *p; ) { 1245 Xcheck(*wsp, wp); 1246 *wp++ = *p++; 1247 } 1248 afree(tmp, ATEMP); 1249 c = getsc(); /* the ] */ 1250 } 1251 } 1252 break; 1253 case PS_NUMBER: 1254 if (!digit(c)) 1255 state = PS_END; 1256 break; 1257 case PS_VAR1: 1258 state = PS_END; 1259 break; 1260 case PS_END: /* keep gcc happy */ 1261 break; 1262 } 1263 if (state == PS_END) { 1264 *wp++ = '\0'; /* end of variable part */ 1265 ungetsc(c); 1266 break; 1267 } 1268 Xcheck(*wsp, wp); 1269 *wp++ = c; 1270 } 1271 return wp; 1272 } 1273 1274 /* 1275 * Save an array subscript - returns true if matching bracket found, false 1276 * if eof or newline was found. 1277 * (Returned string double null terminated) 1278 */ 1279 static int 1280 arraysub(strp) 1281 char **strp; 1282 { 1283 XString ws; 1284 char *wp; 1285 char c; 1286 int depth = 1; /* we are just past the initial [ */ 1287 1288 Xinit(ws, wp, 32, ATEMP); 1289 1290 do { 1291 c = getsc(); 1292 Xcheck(ws, wp); 1293 *wp++ = c; 1294 if (c == '[') 1295 depth++; 1296 else if (c == ']') 1297 depth--; 1298 } while (depth > 0 && c && c != '\n'); 1299 1300 *wp++ = '\0'; 1301 *strp = Xclose(ws, wp); 1302 1303 return depth == 0 ? 1 : 0; 1304 } 1305 1306 /* Unget a char: handles case when we are already at the start of the buffer */ 1307 static const char * 1308 ungetsc(c) 1309 int c; 1310 { 1311 if (backslash_skip) 1312 backslash_skip--; 1313 /* Don't unget eof... */ 1314 if (source->str == null && c == '\0') 1315 return source->str; 1316 if (source->str > source->start) 1317 source->str--; 1318 else { 1319 Source *s; 1320 1321 s = pushs(SREREAD, source->areap); 1322 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1323 s->start = s->str = s->ugbuf; 1324 s->next = source; 1325 source = s; 1326 } 1327 return source->str; 1328 } 1329 1330 1331 /* Called to get a char that isn't a \newline sequence. */ 1332 static int 1333 getsc_bn ARGS((void)) 1334 { 1335 int c, c2; 1336 1337 if (ignore_backslash_newline) 1338 return getsc_(); 1339 1340 if (backslash_skip == 1) { 1341 backslash_skip = 2; 1342 return getsc_(); 1343 } 1344 1345 backslash_skip = 0; 1346 1347 while (1) { 1348 c = getsc_(); 1349 if (c == '\\') { 1350 if ((c2 = getsc_()) == '\n') 1351 /* ignore the \newline; get the next char... */ 1352 continue; 1353 ungetsc(c2); 1354 backslash_skip = 1; 1355 } 1356 return c; 1357 } 1358 } 1359 1360 static Lex_state * 1361 push_state_(si, old_end) 1362 State_info *si; 1363 Lex_state *old_end; 1364 { 1365 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP); 1366 1367 new[0].ls_info.base = old_end; 1368 si->base = &new[0]; 1369 si->end = &new[STATE_BSIZE]; 1370 return &new[1]; 1371 } 1372 1373 static Lex_state * 1374 pop_state_(si, old_end) 1375 State_info *si; 1376 Lex_state *old_end; 1377 { 1378 Lex_state *old_base = si->base; 1379 1380 si->base = old_end->ls_info.base - STATE_BSIZE; 1381 si->end = old_end->ls_info.base; 1382 1383 afree(old_base, ATEMP); 1384 1385 return si->base + STATE_BSIZE - 1; 1386 } 1387