re.c revision b9afefc9
1/* 2 * Copyright (c) 2002 by The XFree86 Project, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 * 22 * Except as contained in this notice, the name of the XFree86 Project shall 23 * not be used in advertising or otherwise to promote the sale, use or other 24 * dealings in this Software without prior written authorization from the 25 * XFree86 Project. 26 * 27 * Author: Paulo César Pereira de Andrade 28 */ 29 30/* $XFree86: xc/programs/xedit/lisp/re/re.c,v 1.8 2002/11/17 07:51:30 paulo Exp $ */ 31 32#include <stdio.h> 33#include "rep.h" 34 35/* 36 * Types 37 */ 38 39/* Information used when generating the final form of the compiled re. 40 */ 41struct _re_inf { 42 rec_alt *alt; 43 unsigned char *cod; 44 long len; 45 long spc; 46 47 /* Start offset of special repetition instruction */ 48 long sr[MAX_DEPTH]; 49 50 /* Jump offset of special repetition instruction */ 51 long sj[MAX_DEPTH]; 52 53 /* Just a flag, to know if this nesting is for a special repetition */ 54 char sp[MAX_DEPTH]; 55 56 int bas; /* Alternatives/repetitions depth */ 57 int par; /* Open parenthesis counter */ 58 int ref; /* Backreference counter */ 59 60 rec_pat *apat; /* Alternatives duplicate patterns 61 * if a special repetition is found, 62 * this is done to somewhat simplify 63 * the bytecode engine and still allow 64 * some complex (and time consuming) 65 * patterns. */ 66 67 int flags; 68 int ecode; 69}; 70 71/* This structure is not associated with re_cod as it's data only matters 72 * to the current match search. 73 */ 74struct _re_eng { 75 unsigned char *bas; /* Base string pointer */ 76 unsigned char *str; /* String to search for pattern */ 77 unsigned char *end; /* Where to stop searching */ 78 unsigned char *cod; /* Pointer in the re_cod structure */ 79 long off; /* Number of used entries in so/eo etc */ 80 81 /* Match offset/nesting information */ 82 long so[MAX_DEPTH]; /* (s)tart of (m)atch */ 83 long eo[MAX_DEPTH]; /* (e)nd of (m)atch */ 84 long sv[MAX_DEPTH]; /* (s)a(v)e match end offset */ 85 long re[MAX_DEPTH]; /* (re)petition count */ 86 long ss[MAX_DEPTH]; /* (s)ave (s)tart of match */ 87 unsigned char *rcod[MAX_DEPTH]; /* restart position in regex code */ 88 unsigned char *rstr[MAX_DEPTH]; /* restart position in string */ 89 90 /* Group/backreference information */ 91 long goff; 92 long gso[9]; 93 long geo[9]; 94}; 95 96/* 97 * Prototypes 98 */ 99static void reinit(void); 100static int rec_check(re_inf*, int); 101static int rec_code(re_inf*, ReCode); 102static int rec_byte(re_inf*, int); 103static int rec_byte_byte(re_inf*, int, int); 104static int rec_code_byte(re_inf*, ReCode, int); 105static int rec_length(re_inf*, int); 106static int rec_code_byte_byte(re_inf*, ReCode, int, int); 107static int rec_build_alt(re_inf*, rec_alt*); 108static int rec_build_pat(re_inf*, rec_pat*); 109static int rec_build_rng(re_inf*, rec_rng*); 110static int rec_build_grp(re_inf*, rec_grp*); 111static int rec_build_stl(re_inf*, rec_stl*); 112static int rec_build_rep(re_inf*, rec_rep*); 113static int rec_inc_spc(re_inf*); 114static int rec_dec_spc(re_inf*); 115static int rec_add_spc(re_inf*, int); 116static int rec_off_spc(re_inf*); 117static int rec_alt_spc(re_inf*, int); 118static int rec_rep_spc(re_inf*, int); 119#ifdef DEBUG 120static void redump(re_cod*); 121#endif 122 123/* 124 * Initialization 125 */ 126unsigned char re__alnum[256]; 127unsigned char re__odigit[256]; 128unsigned char re__ddigit[256]; 129unsigned char re__xdigit[256]; 130unsigned char re__control[256]; 131 132/* 133 * Implementation 134 */ 135int 136recomp(re_cod *preg, const char *pattern, int flags) 137{ 138 int i, ecode; 139 re_inf inf; 140 141 reinit(); 142 143 preg->cod = NULL; 144 inf.alt = irec_comp(pattern, 145 flags & RE_PEND ? preg->re_endp : 146 pattern + strlen(pattern), 147 flags, &ecode); 148 if (ecode != 0) 149 return (ecode); 150 151 inf.cod = NULL; 152 inf.len = inf.spc = 0; 153 inf.bas = 0; 154 inf.par = 0; 155 inf.ref = 0; 156 inf.apat = NULL; 157 inf.flags = flags; 158 inf.ecode = 0; 159 for (i = 0; i < MAX_DEPTH; i++) 160 inf.sp[i] = 0; 161 162 /* First byte is runtime modifier flags */ 163 if (rec_byte(&inf, flags & (RE_NEWLINE | RE_NOSUB)) == 0 && 164 rec_byte(&inf, 0xff) == 0 && 165 rec_build_alt(&inf, inf.alt) == 0 && 166 rec_rep_spc(&inf, 0) == 0 && 167 rec_code(&inf, Re_Done) == 0) { 168 /* Number of possible references, loops will not leave this 169 * value correct, but it is cheap to read it from the second 170 * byte, instead of adding several extra checks in the bytecode. */ 171 if (inf.ref) 172 inf.cod[1] = inf.ref - 1; 173 preg->cod = inf.cod; 174 /* Public structure member */ 175 preg->re_nsub = inf.ref; 176 } 177 178 irec_free_alt(inf.alt); 179 if (inf.ecode) 180 free(inf.cod); 181#ifdef DEBUG 182 else if (flags & RE_DUMP) 183 redump(preg); 184#endif 185 186 return (inf.ecode); 187} 188 189int 190reexec(const re_cod *preg, const char *string, 191 int nmatch, re_mat pmat[], int flags) 192{ 193 unsigned char *ptr, *str, newline, nosub; 194 int len, si, ci, bas, i, j, k, l, m; 195 re_eng eng; 196 197 if (preg == NULL || preg->cod == NULL || nmatch < 0 || 198 ((flags & RE_STARTEND) && 199 (pmat == NULL || pmat[0].rm_eo < pmat[0].rm_so))) 200 return (RE_INVARG); 201 202 eng.str = (unsigned char*)string; 203 if (flags & RE_STARTEND) { 204 eng.end = eng.str + pmat[0].rm_eo; 205 eng.str += pmat[0].rm_so; 206 } 207 else 208 eng.end = eng.str + strlen(string); 209 eng.bas = eng.str; 210 nosub = preg->cod[0] & RE_NOSUB; 211 newline = preg->cod[0] & RE_NEWLINE; 212 eng.cod = preg->cod + 2; 213 214 if (!nosub && preg->cod[1] != 0xff) { 215 for (i = 0; i <= preg->cod[1]; i++) { 216 eng.gso[i] = 0; 217 eng.geo[i] = -1; 218 } 219 } 220 221 /* Setup to search for start of match from the first character */ 222 eng.so[0] = 0; 223 eng.eo[0] = eng.sv[0] = -1; 224 eng.rcod[0] = eng.cod; 225 eng.rstr[0] = eng.str + 1; 226 eng.off = 0; 227 eng.goff = -1; 228 for (ci = si = 1;;) { 229reset: 230 switch (*eng.cod) { 231 /**************************************************** 232 * One byte codes * 233 ****************************************************/ 234 case Re_Any: 235 if (eng.str == eng.end || (newline && eng.str[0] == '\n')) 236 goto fail; 237 goto match; 238 case Re_AnyEatAnyTimes: 239 if (newline) { 240 for (ptr = eng.str; ptr < eng.end; ptr++) { 241 if (*ptr == '\n') 242 break; 243 } 244 si = ptr - eng.str; 245 } 246 else 247 si = eng.end - eng.str; 248 goto match; 249 case Re_AnyEatMaybe: 250 si = eng.end > eng.str; 251 if (newline && si && eng.str[0] == '\n') 252 si = 0; 253 goto match; 254 case Re_AnyEatAtLeast: 255 if (newline) { 256 for (ptr = eng.str; ptr < eng.end; ptr++) { 257 if (*ptr == '\n') 258 break; 259 } 260 si = ptr - eng.str; 261 } 262 else 263 si = eng.end - eng.str; 264 if (si == 0) { 265 si = 1; 266 goto fail; 267 } 268 goto match; 269 case Re_Odigit: 270 if (eng.str >= eng.end) 271 goto fail; 272 if (re__odigit[eng.str[0]]) 273 goto match; 274 goto fail; 275 case Re_OdigitNot: 276 if (eng.str >= eng.end || re__odigit[eng.str[0]]) 277 goto fail; 278 goto match; 279 case Re_Digit: 280 if (eng.str >= eng.end) 281 goto fail; 282 if (re__ddigit[eng.str[0]]) 283 goto match; 284 goto fail; 285 case Re_DigitNot: 286 if (eng.str >= eng.end || re__ddigit[eng.str[0]]) 287 goto fail; 288 goto match; 289 case Re_Xdigit: 290 if (eng.str >= eng.end) 291 goto fail; 292 if (re__xdigit[eng.str[0]]) 293 goto match; 294 goto fail; 295 case Re_XdigitNot: 296 if (eng.str >= eng.end || re__xdigit[eng.str[0]]) 297 goto fail; 298 goto match; 299 case Re_Space: 300 if (eng.str >= eng.end) 301 goto fail; 302 if (eng.str[0] == ' ' || eng.str[0] == '\t') 303 goto match; 304 goto fail; 305 case Re_SpaceNot: 306 if (eng.str >= eng.end) 307 goto fail; 308 if (eng.str[0] != ' ' && eng.str[0] != '\t') 309 goto match; 310 goto fail; 311 case Re_Tab: 312 if (eng.str >= eng.end) 313 goto fail; 314 if (eng.str[0] == '\t') 315 goto match; 316 goto fail; 317 case Re_Newline: 318 if (eng.str >= eng.end) 319 goto fail; 320 if (eng.str[0] == '\n') 321 goto match; 322 goto fail; 323 case Re_Lower: 324 if (eng.str >= eng.end) 325 goto fail; 326 if (eng.str[0] >= 'a' && eng.str[0] <= 'z') 327 goto match; 328 goto fail; 329 case Re_Upper: 330 if (eng.str >= eng.end) 331 goto fail; 332 if (eng.str[0] >= 'A' && eng.str[0] <= 'Z') 333 goto match; 334 goto fail; 335 case Re_Alnum: 336 if (eng.str >= eng.end) 337 goto fail; 338 if (re__alnum[eng.str[0]]) 339 goto match; 340 goto fail; 341 case Re_AlnumNot: 342 if (eng.str >= eng.end) 343 goto fail; 344 if (re__alnum[eng.str[0]]) 345 goto fail; 346 goto match; 347 case Re_Control: 348 if (eng.str >= eng.end) 349 goto fail; 350 if (re__control[eng.str[0]]) 351 goto match; 352 goto fail; 353 case Re_ControlNot: 354 if (eng.str >= eng.end || re__control[eng.str[0]]) 355 goto fail; 356 goto match; 357 358 /**************************************************** 359 * One byte codes, match special emtpy strings * 360 ****************************************************/ 361 case Re_Bol: 362 if (eng.str == eng.bas) { 363 if ((flags & RE_NOTBOL)) { 364 /* String does not start at the beginning of a line */ 365 if (newline) 366 goto fail; 367 goto wont; 368 } 369 si = 0; 370 goto match; 371 } 372 if (newline && eng.str[-1] == '\n') { 373 si = 0; 374 goto match; 375 } 376 goto fail; 377 case Re_Eol: 378 if (eng.str == eng.end) { 379 if (flags & RE_NOTEOL) 380 /* String does not finish at the end of a line */ 381 goto wont; 382 si = 0; 383 goto match; 384 } 385 if (newline && eng.str[0] == '\n') { 386 si = 0; 387 goto match; 388 } 389 goto fail; 390 case Re_Bow: 391 if (eng.str >= eng.end || 392 (eng.str > eng.bas && 393 (re__alnum[eng.str[-1]]))) 394 goto fail; 395 if (re__alnum[eng.str[0]]) { 396 si = 0; 397 goto match; 398 } 399 goto fail; 400 case Re_Eow: 401 if (eng.str == eng.bas || 402 (eng.str < eng.end && 403 re__alnum[eng.str[0]])) 404 goto fail; 405 if (re__alnum[eng.str[-1]]) { 406 si = 0; 407 goto match; 408 } 409 goto fail; 410 411 /**************************************************** 412 * One byte code, one byte argument * 413 ****************************************************/ 414 case Re_Literal: 415 if (eng.str >= eng.end) 416 goto fail; 417 if (eng.str[0] == eng.cod[1]) { 418 ci = 2; 419 goto match; 420 } 421 goto fail; 422 case Re_LiteralNot: 423 if (eng.str >= eng.end) 424 goto fail; 425 if (eng.str[0] != eng.cod[1]) { 426 ci = 2; 427 goto match; 428 } 429 goto fail; 430 case Re_SearchLiteral: 431 for (str = eng.str; str < eng.end; str++) { 432 if (*str == eng.cod[1]) { 433 ci = 2; 434 eng.str = str; 435 goto match; 436 } 437 } 438 /* This bytecode only happens in the toplevel */ 439 eng.so[0] = str - eng.bas; 440 eng.str = str; 441 goto fail; 442 443 /**************************************************** 444 * One byte code, two bytes argument * 445 ****************************************************/ 446 case Re_CaseLiteral: 447 if (eng.str >= eng.end) 448 goto fail; 449 if (eng.str[0] == eng.cod[1] || eng.str[0] == eng.cod[2]) { 450 ci = 3; 451 goto match; 452 } 453 goto fail; 454 case Re_CaseLiteralNot: 455 if (eng.str >= eng.end) 456 goto fail; 457 if (eng.str[0] != eng.cod[1] && eng.str[0] != eng.cod[2]) { 458 ci = 3; 459 goto match; 460 } 461 goto fail; 462 case Re_SearchCaseLiteral: 463 for (str = eng.str; str < eng.end; str++) { 464 if (*str == eng.cod[1] || *str == eng.cod[2]) { 465 ci = 3; 466 eng.str = str; 467 goto match; 468 } 469 } 470 eng.so[0] = str - eng.bas; 471 eng.str = str; 472 goto fail; 473 474 /**************************************************** 475 * One byte codes, two arguments, n bytes * 476 ****************************************************/ 477 case Re_String: 478 len = eng.cod[1]; 479 if (len & 0x80) { 480 i = 3; 481 len = (len & 0x7f) + (eng.cod[2] << 7); 482 } 483 else 484 i = 2; 485 if (eng.end - eng.str < len) 486 goto fail; 487 ptr = eng.cod + i; 488 str = eng.str; 489 for (k = len; k > 0; k--) { 490 if (*ptr++ != *str++) 491 goto fail; 492 } 493 ci = i + len; 494 si = len; 495 goto match; 496 case Re_SearchString: 497 len = eng.cod[1]; 498 if (len & 0x80) { 499 i = 3; 500 len = (len & 0x7f) + (eng.cod[2] << 7); 501 } 502 else 503 i = 2; 504 for (str = eng.str; eng.end - str >= len; str = eng.str++) { 505 for (ptr = eng.cod + i, str = eng.str, k = len; k > 0; k--) 506 if (*ptr++ != *str++) 507 break; 508 if (k == 0) { 509 /* Substring found */ 510 ci = i + len; 511 si = str - eng.str; 512 goto match; 513 } 514 } 515 eng.so[0] = eng.end - eng.bas; 516 eng.str = eng.end; 517 goto fail; 518 519 case Re_CaseString: 520 len = eng.cod[1]; 521 if (len & 0x80) { 522 i = 3; 523 len = (len & 0x7f) + (eng.cod[2] << 7); 524 } 525 else 526 i = 2; 527 528 len >>= 1; 529 /* Check if there are at least len/2 bytes left, string 530 * is represented as two bytes, lower and upper case */ 531 if (eng.end - eng.str < len) 532 goto fail; 533 ptr = eng.cod + i; 534 str = eng.str; 535 for (k = len; k > 0; str++, ptr += 2, k--) { 536 if (*str != ptr[0] && *str != ptr[1]) 537 goto fail; 538 } 539 ci = i + (len << 1); 540 si = len; 541 goto match; 542 case Re_SearchCaseString: 543 len = eng.cod[1]; 544 if (len & 0x80) { 545 i = 3; 546 len = (len & 0x7f) + (eng.cod[2] << 7); 547 } 548 else 549 i = 2; 550 len >>= 1; 551 for (str = eng.str; eng.end - str >= len; str = eng.str++) { 552 for (ptr = eng.cod + i, str = eng.str, k = len; 553 k > 0; k--, ptr += 2, str++) 554 if (ptr[0] != str[0] && ptr[1] != str[0]) 555 break; 556 if (k == 0) { 557 /* Substring found */ 558 ci = i + (len << 1); 559 si = str - eng.str; 560 goto match; 561 } 562 } 563 eng.so[0] = eng.end - eng.bas; 564 eng.str = eng.end; 565 goto fail; 566 567 case Re_StringList: 568 /* Number of strings */ 569 k = eng.cod[1]; 570 571 /* Where to jump after match */ 572 bas = eng.cod[2] | (eng.cod[3] << 8); 573 574 str = eng.str; 575 ptr = eng.cod + k + 4; 576 l = eng.end - eng.str; 577 for (j = 0; j < k; j++) { 578 len = eng.cod[j + 4]; 579 if (len <= l) { 580 for (i = 0; i < len; i++) 581 if (ptr[i] != str[i]) 582 goto next_stl; 583 goto stl_match; 584 } 585next_stl: 586 ptr += len; 587 } 588 goto fail; 589stl_match: 590 ci = bas; 591 si = len; 592 goto match; 593 594 case Re_CaseStringList: 595 /* Number of strings */ 596 k = eng.cod[1]; 597 598 /* Where to jump after match */ 599 bas = eng.cod[2] | (eng.cod[3] << 8); 600 601 str = eng.str; 602 ptr = eng.cod + k + 4; 603 l = eng.end - eng.str; 604 for (j = 0; j < k; j++) { 605 len = eng.cod[j + 4]; 606 if ((len >> 1) <= l) { 607 for (i = m = 0; i < len; m++, i += 2) 608 if (ptr[i] != str[m] && ptr[i + 1] != str[m]) 609 goto next_cstl; 610 goto cstl_match; 611 } 612next_cstl: 613 ptr += len; 614 } 615 goto fail; 616cstl_match: 617 ci = bas; 618 si = len >> 1; 619 goto match; 620 621 622 case Re_LargeStringList: 623 /* Where to jump after match */ 624 bas = eng.cod[1] | (eng.cod[2] << 8); 625 626 str = eng.str; 627 628 /* First entry in index map */ 629 ptr = eng.cod + 3; 630 i = (int)str[0] << 1; 631 j = ptr[i] | (ptr[i + 1] << 8); 632 if (j == 0xffff) 633 /* No entry with this byte */ 634 goto fail; 635 636 /* Bytes left in input */ 637 l = eng.end - eng.str; 638 639 /* First entry matching initial byte */ 640 ptr += 512 + j; 641 642 for (len = ptr[0]; 643 str[0] == ptr[1]; 644 ptr += len + 1, len = ptr[0]) { 645 if (len <= l) { 646 for (i = 1; i < len; i++) { 647 if (ptr[i + 1] != str[i]) 648 goto next_lstl; 649 } 650 ci = bas; 651 si = len; 652 goto match; 653 } 654next_lstl:; 655 } 656 goto fail; 657 658 case Re_LargeCaseStringList: 659 /* Where to jump after match */ 660 bas = eng.cod[1] | (eng.cod[2] << 8); 661 662 str = eng.str; 663 664 /* First entry in index map */ 665 ptr = eng.cod + 3; 666 i = (int)str[0] << 1; 667 j = ptr[i] | (ptr[i + 1] << 8); 668 if (j == 0xffff) 669 /* No entry with this byte */ 670 goto fail; 671 672 /* Bytes left in input */ 673 l = eng.end - eng.str; 674 675 /* First entry matching initial byte */ 676 ptr += 512 + j; 677 678 for (len = ptr[0]; 679 str[0] == ptr[1] || str[0] == ptr[2]; 680 ptr += len + 1, len = ptr[0]) { 681 if ((k = (len >> 1)) <= l) { 682 for (i = 2, j = 1; i < len; i += 2, j++) { 683 if (ptr[i + 1] != str[j] && ptr[i + 2] != str[j]) 684 goto next_lcstl; 685 } 686 ci = bas; 687 si = k; 688 goto match; 689 } 690next_lcstl:; 691 } 692 goto fail; 693 694 695 /**************************************************** 696 * Character range matching * 697 ****************************************************/ 698 case Re_Range: 699 if (eng.str < eng.end && eng.cod[eng.str[0] + 1]) { 700 ci = 257; 701 goto match; 702 } 703 goto fail; 704 case Re_RangeNot: 705 if (eng.str >= eng.end || eng.cod[eng.str[0] + 1]) 706 goto fail; 707 ci = 257; 708 goto match; 709 710 /**************************************************** 711 * Group handling * 712 ****************************************************/ 713 case Re_Open: 714 if (++eng.goff >= 9) 715 return (RE_ASSERT); 716 eng.gso[eng.goff] = eng.str - eng.bas; 717 ++eng.cod; 718 continue; 719 case Re_Close: 720 eng.geo[eng.goff] = eng.str - eng.bas; 721 ++eng.cod; 722 continue; 723 case Re_Update: 724 bas = eng.cod[1]; 725 eng.geo[eng.goff] = eng.str - eng.bas; 726 eng.cod += 2; /* + Update + bas */ 727 continue; 728 729 /**************************************************** 730 * Backreference * 731 ****************************************************/ 732 case Re_Backref: 733 i = eng.cod[1]; 734 j = eng.gso[i]; 735 k = eng.geo[i]; 736 len = k - j; 737 if (k < j || eng.end - eng.str < len) 738 goto fail; 739 ptr = eng.bas + j; 740 str = eng.str; 741 for (l = len; l > 0; l--) { 742 if (*ptr++ != *str++) 743 goto fail; 744 } 745 ci = 2; 746 si = len; 747 goto match; 748 749 /**************************************************** 750 * Alternatives handling * 751 ****************************************************/ 752 case Re_Alt: 753 bas = eng.off; 754 if (++eng.off >= MAX_DEPTH) 755 return (RE_ASSERT); 756 757 /* Get offset of next alternative */ 758 i = eng.cod[1] | (eng.cod[2] << 8); 759 760 /* Setup for next alternative if the current fails */ 761 eng.rcod[eng.off] = eng.cod + i + 1; /* + Alt */ 762 763 /* If fail, test the next alternative in the same string */ 764 eng.rstr[eng.off] = eng.str; 765 766 /* Setup match offsets */ 767 if (eng.so[bas] <= eng.eo[bas]) 768 eng.so[eng.off] = eng.eo[bas]; 769 else 770 eng.so[eng.off] = eng.so[bas]; 771 eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1; 772 773 /* Save start of possible previous matches */ 774 eng.ss[eng.off] = eng.so[bas]; 775 776 /* Skip code */ 777 eng.cod += 3; 778 779 /* Go try the first alternative */ 780 continue; 781 782 case Re_AltNext: 783 bas = eng.off - 1; 784 /* Check if matched and if it is a better match */ 785 if (eng.sv[eng.off] - eng.so[eng.off] < 786 eng.eo[eng.off] - eng.so[eng.off]) 787 eng.sv[eng.off] = eng.eo[eng.off]; 788 789 /* Get offset of next alternative */ 790 i = eng.cod[1] | (eng.cod[2] << 8); 791 792 /* Setup for next alternative if the current fails */ 793 eng.rcod[eng.off] = eng.cod + i + 1; /* + AltNext */ 794 795 /* Setup match offset */ 796 eng.eo[eng.off] = eng.so[eng.off] - 1; 797 798 /* Reset string for next alternative */ 799 eng.str = eng.rstr[eng.off]; 800 801 /* Skip code */ 802 eng.cod += 3; 803 804 /* Go try the next alternative */ 805 continue; 806 807 case Re_AltDone: 808 bas = eng.off - 1; 809 /* Check if matched and if it is a better match */ 810 if (eng.sv[eng.off] - eng.so[eng.off] < 811 eng.eo[eng.off] - eng.so[eng.off]) 812 eng.sv[eng.off] = eng.eo[eng.off]; 813 814 /* If there is a match */ 815 if (eng.sv[eng.off] >= eng.so[eng.off]) { 816 eng.so[bas] = eng.ss[eng.off]; 817 eng.eo[bas] = eng.sv[eng.off]; 818 eng.str = eng.bas + eng.eo[bas]; 819 820 /* Pop stack and skip code */ 821 --eng.off; 822 ++eng.cod; 823 824 /* Go try next regular expression pattern */ 825 continue; 826 } 827 828 /* Failed, reset string and pop stack */ 829 eng.str = eng.rstr[eng.off]; 830 --eng.off; 831 goto fail; 832 833 834 /**************************************************** 835 * Repetition * 836 ****************************************************/ 837 838 /* Note that the repetition counter is not 839 * updated for <re>*, <re>+ and <re>? 840 * it is easy to updated, but since it is not 841 * really useful, code to do it was removed 842 * to save a few cpu cicles. */ 843 844#define REPETITION_SETUP() \ 845 if (++eng.off >= MAX_DEPTH) \ 846 return (RE_ASSERT); \ 847 \ 848 /* Return here for recovery if match fail */ \ 849 eng.rcod[eng.off] = eng.cod; \ 850 \ 851 /* Setup match offsets */ \ 852 if (eng.so[bas] <= eng.eo[bas]) \ 853 eng.so[eng.off] = eng.eo[bas]; \ 854 else \ 855 eng.so[eng.off] = eng.so[bas]; \ 856 eng.ss[eng.off] = eng.so[bas]; \ 857 eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\ 858 \ 859 /* Skip repetition instruction */ \ 860 eng.cod += 4; 861 862 863 case Re_AnyTimes: 864 bas = eng.cod[1]; 865 if (eng.off == bas) { 866 /* First iteration */ 867 REPETITION_SETUP(); 868 } 869 else { 870 if (eng.eo[eng.off] >= eng.so[eng.off] && 871 eng.eo[eng.off] > eng.sv[eng.off]) { 872 /* Update offset of match */ 873 eng.sv[eng.off] = eng.eo[eng.off]; 874 875 /* Skip repetition instruction */ 876 eng.cod += 4; 877 } 878 else { 879 /* Match failed but it is ok */ 880 len = eng.cod[2] | (eng.cod[3] << 8); 881 eng.so[bas] = eng.ss[eng.off]; 882 if (eng.sv[eng.off] >= eng.so[eng.off]) 883 /* Something matched earlier, update */ 884 eng.eo[bas] = eng.sv[eng.off]; 885 else if (eng.eo[bas] < eng.so[bas]) 886 /* Empty match */ 887 eng.eo[bas] = eng.so[bas]; 888 889 /* Try next pattern at correct offset */ 890 eng.str = eng.bas + eng.eo[bas]; 891 892 /* Pop stack and skip code */ 893 --eng.off; 894 eng.cod += len; 895 } 896 } 897 continue; 898 899 case Re_Maybe: 900 bas = eng.cod[1]; 901 if (eng.off == bas) { 902 /* First iteration */ 903 REPETITION_SETUP(); 904 } 905 else { 906 /* Matched or first iteration is done */ 907 len = eng.cod[2] | (eng.cod[3] << 8); 908 eng.so[bas] = eng.ss[eng.off]; 909 if (eng.eo[eng.off] > eng.so[eng.off]) { 910 /* Something matched earlier, update */ 911 eng.eo[bas] = eng.eo[eng.off]; 912 eng.str = eng.bas + eng.eo[bas]; 913 /* Don't need to update counter */ 914 } 915 else { 916 /* Empty match */ 917 if (eng.eo[bas] < eng.so[bas]) 918 eng.eo[bas] = eng.so[bas]; 919 920 /* Try next pattern at correct offset */ 921 eng.str = eng.bas + eng.eo[bas]; 922 } 923 924 /* Pop stack */ 925 --eng.off; 926 927 /* Skip code */ 928 eng.cod += len; 929 } 930 continue; 931 932 case Re_AtLeast: 933 bas = eng.cod[1]; 934 if (eng.off == bas) { 935 /* First iteration */ 936 REPETITION_SETUP(); 937 } 938 else { 939 if (eng.eo[eng.off] >= eng.so[eng.off] && 940 eng.eo[eng.off] > eng.sv[eng.off]) { 941 /* Update offset of match */ 942 eng.sv[eng.off] = eng.eo[eng.off]; 943 944 /* Skip repetition instruction */ 945 eng.cod += 4; 946 } 947 else { 948 /* Last try failed */ 949 len = eng.cod[2] | (eng.cod[3] << 8); 950 if (eng.sv[eng.off] >= eng.so[eng.off]) { 951 /* Something matched earlier, update */ 952 eng.so[bas] = eng.ss[eng.off]; 953 eng.eo[bas] = eng.sv[eng.off]; 954 eng.str = eng.bas + eng.eo[bas]; 955 } 956 else { 957 /* Do it here, so that the fail label does 958 * not need to do too expensive work for 959 * simple patterns. */ 960 eng.so[bas] = eng.str - eng.bas; 961 962 /* Zero matches, pop stack and restart */ 963 --eng.off; 964 goto fail; 965 } 966 967 /* Pop stack and skip code */ 968 --eng.off; 969 eng.cod += len; 970 } 971 } 972 continue; 973 974 975 /**************************************************** 976 * Repetition with arguments * 977 ****************************************************/ 978 case Re_Exact: 979#define COMPLEX_REPETITION_SETUP_0() \ 980 i = eng.cod[1]; \ 981 bas = eng.cod[2]; 982 983#define COMPLEX_REPETITION_SETUP() \ 984 /* First iteration */ \ 985 if (++eng.off >= MAX_DEPTH) \ 986 return (RE_ASSERT); \ 987 \ 988 /* Remeber number or repetitions */ \ 989 eng.re[eng.off] = 0; \ 990 \ 991 /* Return here for recovery if match fail */ \ 992 eng.rcod[eng.off] = eng.cod; \ 993 \ 994 /* Setup match offsets */ \ 995 if (eng.so[bas] <= eng.eo[bas]) \ 996 eng.so[eng.off] = eng.eo[bas]; \ 997 else \ 998 eng.so[eng.off] = eng.so[bas]; \ 999 eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\ 1000 eng.ss[eng.off] = eng.so[bas]; \ 1001 \ 1002 /* Skip repetition instruction */ \ 1003 eng.cod += 5; 1004 1005 COMPLEX_REPETITION_SETUP_0(); 1006 if (eng.off == bas) { 1007 /* First iteration */ 1008 COMPLEX_REPETITION_SETUP(); 1009 } 1010 else { 1011 if (eng.eo[eng.off] >= eng.so[eng.off] && 1012 eng.eo[eng.off] > eng.sv[eng.off]) { 1013 /* Update offset of match */ 1014 eng.sv[eng.off] = eng.eo[eng.off]; 1015 1016 /* Update repetition counter */ 1017 if (++eng.re[eng.off] == i) { 1018 /* Matched the required times */ 1019 eng.so[bas] = eng.ss[eng.off]; 1020 eng.eo[bas] = eng.sv[eng.off]; 1021 eng.str = eng.bas + eng.eo[bas]; 1022 1023 /* Update code */ 1024 k = eng.cod[3] | (eng.cod[4] << 8); 1025 eng.cod += k; 1026 1027 /* Pop stack and go for next pattern */ 1028 --eng.off; 1029 continue; 1030 } 1031 1032 /* Skip repetition instruction */ 1033 eng.cod += 5; 1034 } 1035 else { 1036 /* Do it here, so that the fail label does 1037 * not need to do too expensive work for 1038 * simple patterns. */ 1039 eng.so[bas] = eng.str - eng.bas; 1040 1041 /* Pop stack and restart */ 1042 --eng.off; 1043 goto fail; 1044 } 1045 } 1046 continue; 1047 1048 case Re_Min: 1049 COMPLEX_REPETITION_SETUP_0(); 1050 if (eng.off == bas) { 1051 /* First iteration */ 1052 COMPLEX_REPETITION_SETUP(); 1053 } 1054 else { 1055 if (eng.eo[eng.off] >= eng.so[eng.off] && 1056 eng.eo[eng.off] > eng.sv[eng.off]) { 1057 /* Update offset of match */ 1058 eng.sv[eng.off] = eng.eo[eng.off]; 1059 1060 /* Update repetition counter */ 1061 ++eng.re[eng.off]; 1062 1063 /* Skip repetition instruction and try again */ 1064 eng.cod += 5; 1065 } 1066 else { 1067 /* Match failed! */ 1068 if (eng.re[eng.off] < i) { 1069 /* Do it here, so that the fail label does 1070 * not need to do too expensive work for 1071 * simple patterns. */ 1072 eng.so[bas] = eng.str - eng.bas; 1073 1074 /* Didn't match required number of times */ 1075 --eng.off; 1076 goto fail; 1077 } 1078 else { 1079 /* Matched minimum number of times */ 1080 eng.eo[bas] = eng.sv[eng.off]; 1081 eng.str = eng.bas + eng.eo[bas]; 1082 k = eng.cod[3] | (eng.cod[4] << 8); 1083 1084 /* Update code and pop stack */ 1085 eng.cod += k; 1086 --eng.off; 1087 } 1088 } 1089 } 1090 continue; 1091 1092 case Re_Max: 1093 COMPLEX_REPETITION_SETUP_0(); 1094 if (eng.off == bas) { 1095 /* First iteration */ 1096 COMPLEX_REPETITION_SETUP(); 1097 } 1098 else { 1099 if (eng.eo[eng.off] >= eng.so[eng.off] && 1100 eng.eo[eng.off] > eng.sv[eng.off]) { 1101 /* Update offset of match */ 1102 eng.sv[eng.off] = eng.eo[eng.off]; 1103 1104 /* Update repetition counter */ 1105 if (++eng.re[eng.off] == i) { 1106 /* Matched the maximum times */ 1107 eng.so[bas] = eng.ss[eng.off]; 1108 eng.eo[bas] = eng.sv[eng.off]; 1109 eng.str = eng.bas + eng.eo[bas]; 1110 1111 k = eng.cod[3] | (eng.cod[4] << 8); 1112 1113 /* Update code and pop stack */ 1114 eng.cod += k; 1115 --eng.off; 1116 continue; 1117 } 1118 1119 /* Skip repetition instruction and try again */ 1120 eng.cod += 5; 1121 } 1122 else { 1123 /* No matches, but zero matches are ok */ 1124 k = eng.cod[3] | (eng.cod[4] << 8); 1125 if (eng.sv[eng.off] >= eng.so[eng.off]) { 1126 /* Something matched earlier, update */ 1127 eng.so[bas] = eng.ss[eng.off]; 1128 eng.eo[bas] = eng.sv[eng.off]; 1129 eng.str = eng.bas + eng.eo[bas]; 1130 } 1131 else { 1132 /* Empty match */ 1133 if (eng.eo[bas] < eng.so[bas]) 1134 eng.eo[bas] = eng.so[bas]; 1135 1136 /* Try next pattern at correct offset */ 1137 eng.str = eng.bas + eng.eo[bas]; 1138 } 1139 1140 /* Pop stack and update code */ 1141 --eng.off; 1142 eng.cod += k; 1143 } 1144 } 1145 continue; 1146 1147 case Re_MinMax: 1148 bas = eng.cod[3]; 1149 if (eng.off == bas) { 1150 /* First iteration */ 1151 COMPLEX_REPETITION_SETUP(); 1152 } 1153 else { 1154 if (eng.eo[eng.off] >= eng.so[eng.off] && 1155 eng.eo[eng.off] > eng.sv[eng.off]) { 1156 /* Update offset of match */ 1157 eng.sv[eng.off] = eng.eo[eng.off]; 1158 1159 /* Update repetition counter */ 1160 if (++eng.re[eng.off] == eng.cod[2]) { 1161 /* Matched the maximum times */ 1162 eng.so[bas] = eng.ss[eng.off]; 1163 eng.eo[bas] = eng.sv[eng.off]; 1164 eng.str = eng.bas + eng.eo[bas]; 1165 k = eng.cod[4] | (eng.cod[5] << 8); 1166 1167 /* Update code and pop stack */ 1168 eng.cod += k; 1169 --eng.off; 1170 continue; 1171 } 1172 1173 /* Skip repetition instruction and try again */ 1174 eng.cod += 6; 1175 } 1176 else { 1177 /* Match failed! */ 1178 if (eng.re[eng.off] < eng.cod[1]) { 1179 /* Do it here, so that the fail label does 1180 * not need to do too expensive work for 1181 * simple patterns. */ 1182 eng.so[bas] = eng.str - eng.bas; 1183 1184 /* Didn't match required number of times */ 1185 --eng.off; 1186 goto fail; 1187 } 1188 else { 1189 /* Matched minimum number of times */ 1190 eng.so[bas] = eng.ss[eng.off]; 1191 eng.eo[bas] = eng.sv[eng.off]; 1192 eng.str = eng.bas + eng.eo[bas]; 1193 k = eng.cod[4] | (eng.cod[5] << 8); 1194 1195 /* Update code and pop stack */ 1196 eng.cod += k; 1197 --eng.off; 1198 } 1199 } 1200 } 1201 continue; 1202 1203 1204 /**************************************************** 1205 * Special repetition handling * 1206 ****************************************************/ 1207 case Re_AnyAnyTimes: 1208 /* code(1) + bas(1) + gbas(1) + jump(2) */ 1209 bas = eng.cod[1]; 1210 if (eng.off == bas) { 1211 /* First iteration */ 1212 if (++eng.off >= MAX_DEPTH) 1213 return (RE_ASSERT); 1214 1215 /* Return here for recovery if match fail */ 1216 eng.rcod[eng.off] = eng.cod; 1217 1218 /* If fail, test the next pattern at the same point */ 1219 eng.rstr[eng.off] = eng.str; 1220 1221 /* Setup match offsets */ 1222 eng.so[eng.off] = eng.str - eng.bas; 1223 eng.eo[eng.off] = eng.so[eng.off] - 1; 1224 1225 if (newline) 1226 /* Use the repetition counter to store start of 1227 * skipped string, to later check if skipping a 1228 * newline. */ 1229 eng.re[eng.off] = eng.so[eng.off]; 1230 1231 /* Save start of possible previous matches */ 1232 eng.ss[eng.off] = eng.so[bas]; 1233 1234 /* Skip repetition instruction */ 1235 eng.cod += 5; 1236 } 1237 else { 1238 /* -1 as an unsigned char */ 1239 if (eng.cod[2] != 0xff) 1240 eng.goff = eng.cod[2]; 1241 else 1242 eng.goff = -1; 1243 1244 if (newline) { 1245 ptr = eng.bas + eng.re[eng.off]; 1246 str = eng.bas + eng.so[eng.off]; 1247 for (; ptr < str; ptr++) 1248 if (*ptr == '\n') { 1249 eng.cod = eng.rcod[0]; 1250 eng.so[0] = ptr - eng.bas + 1; 1251 eng.eo[0] = eng.so[0] - 1; 1252 eng.rstr[0] = eng.str = ptr + 1; 1253 eng.off = 0; 1254 goto reset; 1255 } 1256 /* If looping, don't do too many noops */ 1257 eng.re[eng.off] = ptr - eng.bas; 1258 } 1259 1260 if (eng.eo[eng.off] >= eng.so[eng.off]) { 1261 /* Note that this is only true if all possibly 1262 * nested special repetitions also matched. */ 1263 1264 if (eng.goff >= 0) { 1265 if (eng.cod[5] == Re_Update) 1266 eng.gso[eng.goff] = eng.eo[bas] + 1267 (eng.so[bas] > eng.eo[bas]); 1268 else if (eng.geo[eng.goff] < eng.so[eng.off]) 1269 eng.geo[eng.goff] = eng.so[eng.off]; 1270 } 1271 1272 /* Jump relative offset */ 1273 len = eng.cod[3] | (eng.cod[4] << 8); 1274 1275 /* Restore offset from where started trying */ 1276 eng.so[bas] = eng.ss[eng.off]; 1277 eng.eo[bas] = eng.eo[eng.off]; 1278 1279 /* Pop stack and skip code */ 1280 --eng.off; 1281 eng.cod += len; 1282 } 1283 else { 1284 /* Only give up if the entire string was scanned */ 1285 if (eng.str < eng.end) { 1286 /* Update restart point for next pattern */ 1287 eng.str = ++eng.rstr[eng.off]; 1288 1289 /* Reset start of nested match */ 1290 eng.so[eng.off] = eng.str - eng.bas; 1291 1292 /* Skip repetition instruction */ 1293 eng.cod += 5; 1294 } 1295 else { 1296 /* Entire string scanned and failed */ 1297 1298 /* Jump relative offset */ 1299 len = eng.cod[3] | (eng.cod[4] << 8); 1300 1301 /* Restore offset from where started trying */ 1302 eng.so[bas] = eng.ss[eng.off]; 1303 eng.eo[bas] = eng.ss[eng.off] - 1; 1304 1305 /* Pop stack and skip code */ 1306 --eng.off; 1307 eng.cod += len; 1308 } 1309 } 1310 } 1311 continue; 1312 1313 /* This is significantly different than matching <re>.*<re> 1314 * because it may need to restart several times since it is 1315 * possible to find too many false positives, for example: 1316 * a.*b => once one "a" is found, scan all 1317 * the remaining string searching for a "b" 1318 * a.?b => the string may have too many "a"s, but the 1319 * first occurrences of "a" may not be followed 1320 * by any-character and a "b" or a single "b". 1321 */ 1322 case Re_AnyMaybe: 1323 bas = eng.cod[1]; 1324 if (eng.off == bas) { 1325 /* First iteration */ 1326 if (++eng.off >= MAX_DEPTH) 1327 return (RE_ASSERT); 1328 1329 /* Return here for recovery if match fail */ 1330 eng.rcod[eng.off] = eng.cod; 1331 1332 /* First try without eating a byte */ 1333 eng.rstr[eng.off] = eng.str; 1334 1335 /* Remember this is the first try if match fail */ 1336 eng.re[eng.off] = 0; 1337 1338 /* Setup match offsets */ 1339 eng.so[eng.off] = eng.str - eng.bas; 1340 eng.eo[eng.off] = eng.so[eng.off] - 1; 1341 1342 /* Save start of possible previous matches */ 1343 eng.ss[eng.off] = eng.so[bas]; 1344 1345 /* Skip repetition instruction */ 1346 eng.cod += 5; 1347 } 1348 else { 1349 /* -1 as an unsigned char */ 1350 if (eng.cod[2] != 0xff) 1351 eng.goff = eng.cod[2]; 1352 else 1353 eng.goff = -1; 1354 1355 if (eng.eo[eng.off] >= eng.so[eng.off]) { 1356 /* Something matched */ 1357 1358 if (eng.goff >= 0) { 1359 if (eng.cod[5] == Re_Update) 1360 eng.gso[eng.goff] = eng.eo[bas] + 1361 (eng.so[bas] > eng.eo[bas]); 1362 else if (eng.geo[eng.goff] < eng.so[eng.off]) 1363 eng.geo[eng.goff] = eng.so[eng.off]; 1364 } 1365 1366 /* Jump relative offset */ 1367 len = eng.cod[3] | (eng.cod[4] << 8); 1368 1369 /* Update offset of match */ 1370 eng.eo[bas] = eng.eo[eng.off]; 1371 1372 /* Pop stack and skip code */ 1373 --eng.off; 1374 eng.cod += len; 1375 } 1376 else if (eng.re[eng.off] == 0 && 1377 (!newline || eng.rstr[eng.off][1] != '\n')) { 1378 /* Try this time skiping a byte */ 1379 ++eng.re[eng.off]; 1380 1381 /* Reset string, skip code and go try one time more */ 1382 eng.str = ++eng.rstr[eng.off]; 1383 eng.cod += 5; 1384 } 1385 else { 1386 /* Failed to match */ 1387 1388 /* Update offsets */ 1389 eng.eo[bas] = eng.ss[eng.off]; 1390 eng.so[bas] = eng.eo[bas] + 1; 1391 1392 eng.str = eng.rstr[eng.off] + (eng.re[eng.off] == 0); 1393 1394 /* Pop stack and return to toplevel code */ 1395 --eng.off; 1396 if (eng.str >= eng.end) 1397 goto wont; 1398 eng.cod = eng.rcod[bas]; 1399 } 1400 } 1401 continue; 1402 1403 /* .+ almost identical to .* but requires eating at least one byte */ 1404 case Re_AnyAtLeast: 1405 bas = eng.cod[1]; 1406 if (eng.off == bas) { 1407 /* First iteration */ 1408 if (++eng.off >= MAX_DEPTH) 1409 return (RE_ASSERT); 1410 1411 /* Return here for recovery if match fail */ 1412 eng.rcod[eng.off] = eng.cod; 1413 1414 /* Skip one byte for the restart string */ 1415 if (newline && eng.str[0] == '\n') { 1416 /* Cannot skip newline */ 1417 eng.cod = eng.rcod[0]; 1418 eng.rstr[0] = ++eng.str; 1419 eng.so[0] = eng.str - eng.bas; 1420 eng.eo[0] = eng.so[0] - 1; 1421 eng.off = 0; 1422 goto reset; 1423 } 1424 eng.rstr[eng.off] = ++eng.str; 1425 1426 /* Setup match offsets */ 1427 eng.so[eng.off] = eng.str - eng.bas; 1428 eng.eo[eng.off] = eng.so[eng.off] - 1; 1429 1430 if (newline) 1431 /* Use the repetition counter to store start of 1432 * skipped string, to later check if skipping a 1433 * newline. */ 1434 eng.re[eng.off] = eng.so[eng.off]; 1435 1436 /* Save start of possible previous matches */ 1437 eng.ss[eng.off] = eng.so[bas]; 1438 1439 /* Skip repetition instruction */ 1440 eng.cod += 5; 1441 } 1442 else { 1443 /* -1 as an unsigned char */ 1444 if (eng.cod[2] != 0xff) 1445 eng.goff = eng.cod[2]; 1446 else 1447 eng.goff = -1; 1448 1449 if (newline) { 1450 ptr = eng.bas + eng.re[eng.off]; 1451 str = eng.bas + eng.so[eng.off]; 1452 for (; ptr < str; ptr++) 1453 if (*ptr == '\n') { 1454 eng.cod = eng.rcod[0]; 1455 eng.so[0] = ptr - eng.bas + 1; 1456 eng.eo[0] = eng.so[0] - 1; 1457 eng.rstr[0] = eng.str = ptr + 1; 1458 eng.off = 0; 1459 goto reset; 1460 } 1461 /* If looping, don't do too many noops */ 1462 eng.re[eng.off] = ptr - eng.bas; 1463 } 1464 1465 if (eng.eo[eng.off] >= eng.so[eng.off]) { 1466 /* Note that this is only true if all possibly 1467 * nested special repetitions also matched. */ 1468 1469 if (eng.goff >= 0) { 1470 if (eng.cod[5] == Re_Update) 1471 eng.gso[eng.goff] = eng.eo[bas] + 1472 (eng.so[bas] > eng.eo[bas]); 1473 else if (eng.geo[eng.goff] < eng.so[eng.off]) 1474 eng.geo[eng.goff] = eng.so[eng.off]; 1475 } 1476 1477 /* Jump relative offset */ 1478 len = eng.cod[3] | (eng.cod[4] << 8); 1479 1480 /* Restore offset from where started trying */ 1481 eng.so[bas] = eng.ss[eng.off]; 1482 eng.eo[bas] = eng.eo[eng.off]; 1483 1484 /* Pop stack and skip code */ 1485 --eng.off; 1486 eng.cod += len; 1487 } 1488 else { 1489 /* Only give up if the entire string was scanned */ 1490 if (eng.str < eng.end) { 1491 /* Update restart point for next pattern */ 1492 eng.str = ++eng.rstr[eng.off]; 1493 1494 /* Reset start of nested match */ 1495 eng.so[eng.off] = eng.str - eng.bas; 1496 1497 /* Skip repetition instruction */ 1498 eng.cod += 5; 1499 } 1500 else { 1501 /* Entire string scanned and failed */ 1502 1503 /* Jump relative offset */ 1504 len = eng.cod[3] | (eng.cod[4] << 8); 1505 1506 /* Restore offset from where started trying */ 1507 eng.so[bas] = eng.ss[eng.off]; 1508 eng.eo[bas] = eng.ss[eng.off] - 1; 1509 1510 /* Pop stack and skip code */ 1511 --eng.off; 1512 eng.cod += len; 1513 } 1514 } 1515 } 1516 continue; 1517 1518 1519 /**************************************************** 1520 * Repetition matched! * 1521 ****************************************************/ 1522 case Re_RepJump: 1523 /* eng.cod[1] is toplevel offset of repetition */ 1524 if (eng.off > eng.cod[1]) 1525 /* If still needs to try matches */ 1526 eng.cod -= eng.cod[2]; 1527 else 1528 eng.cod += 3; /* + RepJump + bas + len-size */ 1529 continue; 1530 1531 case Re_RepLongJump: 1532 /* eng.cod[1] is toplevel offset of repetition */ 1533 if (eng.off > eng.cod[1]) 1534 /* If still needs to try matches */ 1535 eng.cod -= eng.cod[2] | (eng.cod[3] << 8); 1536 else 1537 eng.cod += 4; /* + RepLongJump + bas + len-size */ 1538 continue; 1539 1540 /**************************************************** 1541 * Finished * 1542 ****************************************************/ 1543 case Re_DoneIf: 1544 if (eng.eo[eng.off] >= eng.so[eng.off]) { 1545 eng.so[0] = eng.ss[eng.off]; 1546 eng.eo[0] = eng.eo[eng.off]; 1547 goto done; 1548 } 1549 ++eng.cod; 1550 continue; 1551 case Re_MaybeDone: 1552 if (eng.eo[eng.off] >= eng.so[eng.off]) { 1553 eng.so[0] = eng.ss[eng.off]; 1554 eng.eo[0] = eng.eo[eng.off]; 1555 goto done; 1556 } 1557 ++eng.cod; 1558 continue; 1559 case Re_Done: 1560 goto done; 1561 1562 default: 1563 /* Fatal internal error */ 1564 return (RE_ASSERT); 1565 } 1566 1567 1568wont: 1569 /* Surely won't match */ 1570 if (eng.off == 0) { 1571 eng.eo[0] = eng.so[0] - 1; 1572 break; 1573 } 1574 1575 1576fail: 1577 if (eng.off == 0) { 1578 /* If the entire string scanned */ 1579 if (++eng.str > eng.end) { 1580 eng.eo[0] = eng.so[0] - 1; 1581 break; 1582 } 1583 eng.goff = -1; 1584 /* Update start of possible match after restart */ 1585 if (eng.eo[0] >= eng.so[0]) { 1586 /* If first fail */ 1587 eng.str = eng.rstr[0]; 1588 ++eng.rstr[0]; 1589 eng.so[0] = eng.str - eng.bas; 1590 eng.eo[0] = eng.so[eng.off] - 1; 1591 } 1592 else 1593 /* Just trying at next byte */ 1594 ++eng.so[0]; 1595 } 1596 else 1597 /* Remember this match failed */ 1598 eng.eo[eng.off] = eng.so[eng.off] - 1; 1599 1600 /* Restart code */ 1601 eng.cod = eng.rcod[eng.off]; 1602 continue; 1603 1604 1605match: 1606 /* If first match */ 1607 if (eng.eo[eng.off] < eng.so[eng.off]) { 1608 if (eng.off == 0) 1609 eng.rstr[0] = eng.str + 1; 1610 eng.so[eng.off] = eng.eo[eng.off] = eng.str - eng.bas; 1611 } 1612 eng.eo[eng.off] += si; 1613 eng.cod += ci; 1614 eng.str += si; 1615 ci = si = 1; 1616 continue; 1617 1618done: 1619 break; 1620 } 1621 1622 if (nmatch) { 1623 if (flags & RE_STARTEND) 1624 len = pmat[0].rm_so; 1625 else 1626 len = 0; 1627 if (!nosub) { 1628 if (preg->cod[1] != 0xff) 1629 eng.goff = preg->cod[1]; 1630 pmat[0].rm_so = eng.so[0]; 1631 pmat[0].rm_eo = eng.eo[0]; 1632 for (i = 1; i < nmatch; i++) { 1633 if (i - 1 <= eng.goff) { 1634 pmat[i].rm_so = eng.gso[i - 1]; 1635 pmat[i].rm_eo = eng.geo[i - 1]; 1636 } 1637 else { 1638 pmat[i].rm_so = 0; 1639 pmat[i].rm_eo = -1; 1640 } 1641 } 1642 if (len) { 1643 /* Update offsets, since the match was done in a substring */ 1644 j = eng.goff + 2 > nmatch ? nmatch : eng.goff + 2; 1645 for (i = 0; i < j; i++) { 1646 pmat[i].rm_so += len; 1647 pmat[i].rm_eo += len; 1648 } 1649 } 1650 } 1651 else { 1652 /* Already know these values, allow compiling the regex with 1653 * RE_NOSUB to use parenthesis only for grouping, but avoiding 1654 * the runtime overhead of keeping track of the subexpression 1655 * offsets. */ 1656 pmat[0].rm_so = eng.so[0] + len; 1657 pmat[0].rm_eo = eng.eo[0] + len; 1658 } 1659 } 1660 1661 return (eng.so[0] <= eng.eo[0] ? 0 : RE_NOMATCH); 1662} 1663 1664int 1665reerror(int ecode, const re_cod *preg, char *ebuffer, int ebuffer_size) 1666{ 1667 static char *errors[] = { 1668 "No error", 1669 "Failed to match", /* NOMATCH */ 1670 1671 /* Errors not generated */ 1672 "Invalid regular expression", /* BADPAT */ 1673 "Invalid collating element", /* ECOLLATE */ 1674 "Invalid character class", /* ECTYPE */ 1675 1676 "`\' applied to unescapable character", /* EESCAPE */ 1677 "Invalid backreference number", /* ESUBREG */ 1678 "Brackets `[ ]' not balanced", /* EBRACK */ 1679 "Parentheses `( )' not balanced", /* EPAREN */ 1680 "Braces `{ }' not balanced", /* EBRACE */ 1681 "Invalid repetition count(s) in `{ }'", /* BADBR */ 1682 "Invalid character range in `[ ]'", /* ERANGE */ 1683 "Out of memory", /* ESPACE */ 1684 "`?', `*', or `+' operand invalid", /* BADRPT */ 1685 "Empty (sub)expression", /* EMPTY */ 1686 "Assertion error - you found a bug", /* ASSERT */ 1687 "Invalid argument" /* INVARG */ 1688 }; 1689 char *str; 1690 1691 if (ecode >= 0 && ecode < sizeof(errors) / sizeof(errors[0])) 1692 str = errors[ecode]; 1693 else 1694 str = "Unknown error"; 1695 1696 return (snprintf(ebuffer, ebuffer_size, "%s", str)); 1697} 1698 1699void 1700refree(re_cod *cod) 1701{ 1702 free(cod->cod); 1703 cod->cod = NULL; 1704} 1705 1706static void 1707reinit(void) 1708{ 1709 int i; 1710 static int first = 1; 1711 1712 if (!first) 1713 return; 1714 first = 0; 1715 1716 re__alnum['_'] = 1; 1717 1718 for (i = '0'; i <= '7'; i++) 1719 re__alnum[i] = re__odigit[i] = re__ddigit[i] = re__xdigit[i] = 1; 1720 1721 for (; i <= '9'; i++) 1722 re__alnum[i] = re__ddigit[i] = re__xdigit[i] = 1; 1723 1724 for (i = 'a'; i <= 'f'; i++) 1725 re__alnum[i] = re__xdigit[i] = 1; 1726 for (; i <= 'z'; i++) 1727 re__alnum[i] = 1; 1728 1729 for (i = 'A'; i <= 'F'; i++) 1730 re__alnum[i] = re__xdigit[i] = 1; 1731 for (; i <= 'Z'; i++) 1732 re__alnum[i] = 1; 1733 1734 for (i = 1; i < 32; i++) 1735 re__control[i] = 1; 1736 re__control[127] = 1; 1737 /* Don't show tabs as control characters */ 1738 re__control['\t'] = 0; 1739} 1740 1741static int 1742rec_check(re_inf *inf, int count) 1743{ 1744 if (inf->len + count >= inf->spc) { 1745 int spc; 1746 unsigned char *cod; 1747 1748 if ((spc = (count % 64)) != 0) 1749 spc = 64 - spc; 1750 spc += count + inf->spc; 1751 if ((cod = realloc(inf->cod, spc)) == NULL) 1752 return (inf->ecode = RE_ESPACE); 1753 inf->cod = cod; 1754 inf->spc = spc; 1755 } 1756 1757 return (inf->ecode); 1758} 1759 1760static int 1761rec_code(re_inf *inf, ReCode code) 1762{ 1763 if (rec_check(inf, 1) == 0) 1764 inf->cod[inf->len++] = code; 1765 1766 return (inf->ecode); 1767} 1768 1769static int 1770rec_byte(re_inf *inf, int value) 1771{ 1772 if (rec_check(inf, 1) == 0) 1773 inf->cod[inf->len++] = value; 1774 1775 return (inf->ecode); 1776} 1777 1778static int 1779rec_code_byte(re_inf *inf, ReCode code, int value) 1780{ 1781 if (rec_check(inf, 2) == 0) { 1782 inf->cod[inf->len++] = code; 1783 inf->cod[inf->len++] = value; 1784 } 1785 1786 return (inf->ecode); 1787} 1788 1789static int 1790rec_length(re_inf *inf, int length) 1791{ 1792 int lo, hi, two; 1793 1794 if (length >= 16384) 1795 return (inf->ecode = RE_ESPACE); 1796 1797 lo = length & 0xff; 1798 hi = length & 0xff00; 1799 two = ((length > 0x7f) != 0) + 1; 1800 if (two == 2) { 1801 hi <<= 1; 1802 hi |= (lo & 0x80) != 0; 1803 lo |= 0x80; 1804 } 1805 1806 if (rec_check(inf, two) == 0) { 1807 inf->cod[inf->len++] = lo; 1808 if (two == 2) 1809 inf->cod[inf->len++] = hi >> 8; 1810 } 1811 1812 return (inf->ecode); 1813} 1814 1815static int 1816rec_byte_byte(re_inf *inf, int value0, int value1) 1817{ 1818 if (rec_check(inf, 2) == 0) { 1819 inf->cod[inf->len++] = value0; 1820 inf->cod[inf->len++] = value1; 1821 } 1822 1823 return (inf->ecode); 1824} 1825 1826static int 1827rec_code_byte_byte(re_inf *inf, ReCode code, int value0, int value1) 1828{ 1829 if (rec_check(inf, 3) == 0) { 1830 inf->cod[inf->len++] = code; 1831 inf->cod[inf->len++] = value0; 1832 inf->cod[inf->len++] = value1; 1833 } 1834 1835 return (inf->ecode); 1836} 1837 1838static int 1839rec_build_alt(re_inf *inf, rec_alt *alt) 1840{ 1841 int offset, value, bas = inf->bas + 1; 1842 1843 if (alt) { 1844 if (alt->next) { 1845 if (rec_inc_spc(inf)) 1846 return (inf->ecode); 1847 1848 /* A real a list of alternatives */ 1849 rec_code(inf, Re_Alt); 1850 1851 offset = inf->len; /* Remember current offset */ 1852 rec_byte_byte(inf, 0, 0); /* Reserve two bytes for retry address */ 1853 while (alt && inf->ecode == 0) { 1854 if (rec_build_pat(inf, alt->pat)) 1855 break; 1856 alt = alt->next; 1857 if (alt && inf->ecode == 0) { 1858 /* Handle (hyper)complex repetitions */ 1859 if (inf->bas != bas) { 1860 /* Duplicate patterns up to end of expression */ 1861 rec_build_pat(inf, inf->apat); 1862 /* Restore engine state for next alternative(s) */ 1863 rec_alt_spc(inf, bas - 1); 1864 } 1865 1866 /* If the jump would be so long */ 1867 if ((value = inf->len - offset) >= 16384) { 1868 inf->ecode = RE_ESPACE; 1869 break; 1870 } 1871 inf->cod[offset] = value & 0xff; 1872 inf->cod[offset + 1] = (value & 0xff00) >> 8; 1873 1874 rec_code(inf, Re_AltNext); 1875 offset = inf->len; 1876 rec_byte_byte(inf, 0, 0); 1877 } 1878 } 1879 if (inf->ecode == 0) { 1880 /* Handle (hyper)complex repetitions */ 1881 if (inf->bas != bas) { 1882 /* Duplicate patterns up to end of expression */ 1883 rec_build_pat(inf, inf->apat); 1884 /* Restore engine state for next alternative(s) */ 1885 rec_alt_spc(inf, bas - 1); 1886 } 1887 1888 /* If the jump would be so long */ 1889 if ((value = inf->len - offset) >= 16384) 1890 return (inf->ecode = RE_ESPACE); 1891 inf->cod[offset] = value & 0xff; 1892 inf->cod[offset + 1] = (value & 0xff00) >> 8; 1893 /* Last jump is here */ 1894 rec_code(inf, Re_AltDone); 1895 } 1896 rec_dec_spc(inf); 1897 } 1898 else 1899 /* Single alternative */ 1900 rec_build_pat(inf, alt->pat); 1901 } 1902 1903 return (inf->ecode); 1904} 1905 1906static int 1907rec_build_pat(re_inf *inf, rec_pat *pat) 1908{ 1909 rec_pat *apat; 1910 int length, offset = 0, distance, jump = 0, bas = 0; 1911 1912 while (pat && inf->ecode == 0) { 1913 if (pat->rep) { 1914 bas = inf->bas; 1915 if (pat->type == Rep_Group && !inf->par && rec_code(inf, Re_Open)) 1916 return (inf->ecode); 1917 if (rec_inc_spc(inf)) 1918 return (inf->ecode); 1919 offset = inf->len; 1920 if (rec_build_rep(inf, pat->rep)) 1921 break; 1922 /* Reserve space to jump after repetition done */ 1923 jump = inf->len; 1924 rec_byte_byte(inf, 0, 0); 1925 } 1926 switch (pat->type) { 1927 case Rep_AnyAnyTimes: 1928 case Rep_AnyMaybe: 1929 case Rep_AnyAtLeast: 1930 if (rec_add_spc(inf, pat->type == Rep_AnyMaybe)) 1931 return (inf->ecode); 1932 if (rec_code(inf, (ReCode)pat->type) == 0 && 1933 rec_byte(inf, inf->bas - 1) == 0 && 1934 rec_byte(inf, inf->ref - 1) == 0) 1935 rec_off_spc(inf); 1936 break; 1937 case Rep_Literal: 1938 case Rep_LiteralNot: 1939 case Rep_SearchLiteral: 1940 rec_code_byte(inf, (ReCode)pat->type, pat->data.chr); 1941 break; 1942 case Rep_CaseLiteral: 1943 case Rep_CaseLiteralNot: 1944 case Rep_SearchCaseLiteral: 1945 rec_code_byte_byte(inf, (ReCode)pat->type, 1946 pat->data.cse.lower, pat->data.cse.upper); 1947 break; 1948 case Rep_Range: 1949 case Rep_RangeNot: 1950 if (rec_code(inf, (ReCode)pat->type) == 0) 1951 rec_build_rng(inf, pat->data.rng); 1952 break; 1953 case Rep_String: 1954 case Rep_SearchString: 1955 case Rep_CaseString: 1956 case Rep_SearchCaseString: 1957 rec_code(inf, (ReCode)pat->type); 1958 length = strlen((char*)pat->data.str); 1959 if (rec_length(inf, length) == 0 && rec_check(inf, length) == 0) { 1960 memcpy(inf->cod + inf->len, pat->data.str, length); 1961 inf->len += length; 1962 } 1963 break; 1964 case Rep_Any: 1965 case Rep_AnyEatAnyTimes: 1966 case Rep_AnyEatMaybe: 1967 case Rep_AnyEatAtLeast: 1968 case Rep_Odigit: 1969 case Rep_OdigitNot: 1970 case Rep_Digit: 1971 case Rep_DigitNot: 1972 case Rep_Xdigit: 1973 case Rep_XdigitNot: 1974 case Rep_Space: 1975 case Rep_SpaceNot: 1976 case Rep_Tab: 1977 case Rep_Newline: 1978 case Rep_Lower: 1979 case Rep_Upper: 1980 case Rep_Alnum: 1981 case Rep_AlnumNot: 1982 case Rep_Control: 1983 case Rep_ControlNot: 1984 case Rep_Bol: 1985 case Rep_Eol: 1986 case Rep_Bow: 1987 case Rep_Eow: 1988 rec_code(inf, (ReCode)pat->type); 1989 break; 1990 case Rep_Backref: 1991 rec_code_byte(inf, Re_Backref, pat->data.chr); 1992 break; 1993 case Rep_Group: 1994 if (pat->rep == NULL && !inf->par && rec_code(inf, Re_Open)) 1995 break; 1996 apat = inf->apat; 1997 inf->apat = pat->next; 1998 rec_build_grp(inf, pat->data.grp); 1999 inf->apat = apat; 2000 break; 2001 case Rep_StringList: 2002 rec_build_stl(inf, pat->data.stl); 2003 break; 2004 } 2005 if (pat->rep) { 2006#if 0 2007 if (rec_dec_spc(inf)) 2008 return (inf->ecode); 2009#else 2010 if (rec_rep_spc(inf, bas)) 2011 return (inf->ecode); 2012#endif 2013 distance = inf->len - offset; 2014 if (distance > 255) { 2015 if (rec_code(inf, Re_RepLongJump) || 2016 rec_byte(inf, inf->bas) || 2017 rec_byte(inf, distance & 0xff) || 2018 rec_byte(inf, (distance & 0xff00) >> 8)) 2019 break; 2020 } 2021 else if (rec_code(inf, Re_RepJump) || 2022 rec_byte(inf, inf->bas) || 2023 rec_byte(inf, distance)) 2024 break; 2025 distance = inf->len - offset; 2026 inf->cod[jump] = distance & 0xff; 2027 inf->cod[jump + 1] = (distance & 0xff00) >> 8; 2028 } 2029 pat = pat->next; 2030 } 2031 2032 return (inf->ecode); 2033} 2034 2035static int 2036rec_build_rng(re_inf *inf, rec_rng *rng) 2037{ 2038 if (rec_check(inf, sizeof(rng->range)) == 0) { 2039 memcpy(inf->cod + inf->len, rng->range, sizeof(rng->range)); 2040 inf->len += sizeof(rng->range); 2041 } 2042 2043 return (inf->ecode); 2044} 2045 2046static int 2047rec_build_grp(re_inf *inf, rec_grp *grp) 2048{ 2049 int par = inf->par; 2050 2051 if (!(inf->flags & RE_NOSUB)) { 2052 ++inf->par; 2053 if (par == 0) 2054 ++inf->ref; 2055 if (rec_build_alt(inf, grp->alt) == 0) { 2056 if (par == 0) { 2057 if (grp->comp) 2058 rec_code_byte(inf, Re_Update, inf->ref - 1); 2059 else 2060 rec_code(inf, Re_Close); 2061 } 2062 } 2063 --inf->par; 2064 } 2065 else 2066 rec_build_alt(inf, grp->alt); 2067 2068 return (inf->ecode); 2069} 2070 2071static int 2072rec_build_stl(re_inf *inf, rec_stl *stl) 2073{ 2074 int i, len, rlen; 2075 ReCode code; 2076 2077 /* Calculate jump distance information */ 2078 rlen = stl->tlen + stl->nstrs + 4; 2079 /* + code + nstrs + place-offset + data-length */ 2080 2081 if (stl->nstrs >= LARGE_STL_COUNT) { 2082 rlen += 511; /* Don't write number of strings */ 2083 code = stl->type == Rep_StringList ? 2084 Re_LargeStringList : Re_LargeCaseStringList; 2085 } 2086 else 2087 code = (ReCode)stl->type; 2088 2089 if (rlen >= 16386) 2090 return (inf->ecode = RE_ESPACE); 2091 if (rec_check(inf, rlen) || 2092 rec_code(inf, code)) 2093 return (inf->ecode); 2094 2095 /* Space is allocated, just write the data */ 2096 if (stl->nstrs < LARGE_STL_COUNT) 2097 inf->cod[inf->len++] = stl->nstrs; 2098 2099 inf->cod[inf->len++] = rlen & 0xff; 2100 inf->cod[inf->len++] = (rlen & 0xff00) >> 8; 2101 2102 if (stl->nstrs < LARGE_STL_COUNT) { 2103 for (i = 0; i < stl->nstrs; i++) 2104 inf->cod[inf->len++] = stl->lens[i]; 2105 for (i = 0; i < stl->nstrs; i++) { 2106 len = stl->lens[i]; 2107 if (len > 2) { 2108 memcpy(inf->cod + inf->len, stl->strs[i], len); 2109 inf->len += len; 2110 } 2111 else { 2112 if (len == 1) 2113 inf->cod[inf->len++] = (long)stl->strs[i]; 2114 else { 2115 inf->cod[inf->len++] = (long)stl->strs[i] & 0xff; 2116 inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8; 2117 } 2118 } 2119 } 2120 } 2121 else { 2122 /* The string length goes before the string itself */ 2123 int j, chl, chu; 2124 2125 /* Fill everything with an invalid jump address */ 2126 memset(inf->cod + inf->len, 0xff, 512); 2127 for (i = len = 0, j = -1; i < stl->nstrs; i++) { 2128 chl = stl->lens[i] > 2 ? stl->strs[i][0] : (long)stl->strs[i] & 0xff; 2129 if (chl != j) { 2130 inf->cod[inf->len + (chl << 1)] = len & 0xff; 2131 inf->cod[inf->len + (chl << 1) + 1] = (len & 0xff00) >> 8; 2132 if (code == Re_LargeCaseStringList) { 2133 chu = stl->lens[i] > 2 ? 2134 stl->strs[i][1] : ((long)(stl->strs[i]) & 0xff00) >> 8; 2135 inf->cod[inf->len + (chu << 1)] = len & 0xff; 2136 inf->cod[inf->len + (chu << 1) + 1] = (len & 0xff00) >> 8; 2137 } 2138 j = chl; 2139 } 2140 len += stl->lens[i] + 1; 2141 } 2142 inf->len += 512; 2143 2144 for (i = 0; i < stl->nstrs; i++) { 2145 len = stl->lens[i]; 2146 inf->cod[inf->len++] = len; 2147 if (len > 2) { 2148 memcpy(inf->cod + inf->len, stl->strs[i], len); 2149 inf->len += len; 2150 } 2151 else { 2152 if (len == 1) 2153 inf->cod[inf->len++] = (long)stl->strs[i]; 2154 else { 2155 inf->cod[inf->len++] = (long)stl->strs[i] & 0xff; 2156 inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8; 2157 } 2158 } 2159 } 2160 } 2161 2162 return (inf->ecode); 2163} 2164 2165static int 2166rec_build_rep(re_inf *inf, rec_rep *rep) 2167{ 2168 if (rep) { 2169 switch (rep->type) { 2170 case Rer_AnyTimes: 2171 case Rer_AtLeast: 2172 case Rer_Maybe: 2173 rec_code(inf, (ReCode)rep->type); 2174 break; 2175 case Rer_Exact: 2176 if (rec_code(inf, Re_Exact) == 0) 2177 rec_byte(inf, rep->mine); 2178 break; 2179 case Rer_Min: 2180 if (rec_code(inf, Re_Min) == 0) 2181 rec_byte(inf, rep->mine); 2182 break; 2183 case Rer_Max: 2184 if (rec_code(inf, Re_Max) == 0) 2185 rec_byte(inf, rep->maxc); 2186 break; 2187 case Rer_MinMax: 2188 if (rec_code(inf, Re_MinMax) == 0 && 2189 rec_byte(inf, rep->mine) == 0) 2190 rec_byte(inf, rep->maxc); 2191 break; 2192 } 2193 /* It is incremented in rec_build_pat */ 2194 rec_byte(inf, inf->bas - 1); 2195 } 2196 2197 return (inf->ecode); 2198} 2199 2200static int 2201rec_inc_spc(re_inf *inf) 2202{ 2203 if (++inf->bas >= MAX_DEPTH) 2204 return (inf->ecode = RE_ESPACE); 2205 2206 return (inf->ecode); 2207} 2208 2209static int 2210rec_dec_spc(re_inf *inf) 2211{ 2212 if (--inf->bas < 0) 2213 return (inf->ecode = RE_ASSERT); 2214 2215 return (inf->ecode); 2216} 2217 2218static int 2219rec_add_spc(re_inf *inf, int maybe) 2220{ 2221 if (++inf->bas >= MAX_DEPTH) 2222 return (inf->ecode = RE_ESPACE); 2223 inf->sp[inf->bas] = maybe + 1; 2224 2225 return (inf->ecode); 2226} 2227 2228/* Could be joined with rec_rep_spc, code almost identical */ 2229static int 2230rec_alt_spc(re_inf *inf, int top) 2231{ 2232 int distance, i, bas = inf->bas; 2233 2234 while ((inf->bas > top) && inf->sp[inf->bas]) { 2235 /* Jump to this repetition for cleanup */ 2236 distance = inf->len - inf->sr[inf->bas]; 2237 2238 /* This will generate a jump to a jump decision opcode */ 2239 inf->sj[inf->bas] = inf->len; 2240 2241 if (distance > 255) { 2242 if (rec_code(inf, Re_RepLongJump) || 2243 rec_byte(inf, inf->bas - 1) || 2244 rec_byte(inf, distance & 0xff) || 2245 rec_byte(inf, (distance & 0xff00) >> 8)) 2246 break; 2247 } 2248 else if (rec_code(inf, Re_RepJump) || 2249 rec_byte(inf, inf->bas - 1) || 2250 rec_byte(inf, distance)) 2251 break; 2252 2253 /* Top of stack value before repetition, or end condition value */ 2254 --inf->bas; 2255 } 2256 2257 i = inf->bas + 1; 2258 2259 if (inf->ecode == 0 && i <= bas && inf->sp[i]) { 2260 /* Only the repetition at the bottom jump to code after testing 2261 * all possibilities */ 2262 distance = inf->len - inf->sr[i]; 2263 inf->cod[inf->sr[i] + 3] = distance & 0xff; 2264 inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8; 2265 2266 /* The bottom jump is here */ 2267 if (rec_code(inf, inf->sp[i] == 1 ? Re_DoneIf : Re_MaybeDone)) 2268 return (inf->ecode); 2269 2270 /* Generate jumps to the previous special repetition */ 2271 for (++i; i <= bas; i++) { 2272 if (inf->sp[i]) { 2273 distance = inf->sj[i] - inf->sr[i]; 2274 inf->cod[inf->sr[i] + 3] = distance & 0xff; 2275 inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8; 2276 } 2277 } 2278 } 2279 2280 return (inf->ecode); 2281} 2282 2283static int 2284rec_rep_spc(re_inf *inf, int top) 2285{ 2286 int distance, i, bas = inf->bas; 2287 2288 while (inf->bas > top) { 2289 if (inf->sp[inf->bas]) { 2290 /* Jump to this repetition for cleanup */ 2291 distance = inf->len - inf->sr[inf->bas]; 2292 2293 /* This will generate a jump to a jump decision opcode */ 2294 inf->sj[inf->bas] = inf->len; 2295 2296 if (distance > 255) { 2297 if (rec_code(inf, Re_RepLongJump) || 2298 rec_byte(inf, inf->bas - 1) || 2299 rec_byte(inf, distance & 0xff) || 2300 rec_byte(inf, (distance & 0xff00) >> 8)) 2301 break; 2302 } 2303 else if (rec_code(inf, Re_RepJump) || 2304 rec_byte(inf, inf->bas - 1) || 2305 rec_byte(inf, distance)) 2306 break; 2307 } 2308 2309 /* Top of stack value before repetition, or end condition value */ 2310 --inf->bas; 2311 } 2312 2313 /* Find first special repetition offset. XXX This should be a noop */ 2314 for (i = 0; i < bas; i++) 2315 if (inf->sp[i]) 2316 break; 2317 2318 if (inf->ecode == 0 && i <= bas && inf->sp[i]) { 2319 /* Only the repetition at the bottom jump to code after testing 2320 * all possibilities */ 2321 distance = inf->len - inf->sr[i]; 2322 inf->cod[inf->sr[i] + 3] = distance & 0xff; 2323 inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8; 2324 2325 /* Generate jumps to the previous special repetition */ 2326 for (++i; i <= bas; i++) { 2327 if (inf->sp[i]) { 2328 distance = inf->sj[i] - inf->sr[i]; 2329 inf->cod[inf->sr[i] + 3] = distance & 0xff; 2330 inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8; 2331 } 2332 } 2333 } 2334 2335 return (inf->ecode); 2336} 2337 2338static int 2339rec_off_spc(re_inf *inf) 2340{ 2341 /* The jump address before the three bytes instruction */ 2342 inf->sr[inf->bas] = inf->len - 3; 2343 /* Don't know yet where to go after done with the special 2344 * repetition, just reserve two bytes for the jump address. */ 2345 return (rec_byte_byte(inf, 0, 0)); 2346} 2347 2348#ifdef DEBUG 2349static void 2350redump(re_cod *code) 2351{ 2352 int i, j, k; 2353 unsigned char *cod = code->cod, *stl; 2354 2355 if (cod[0] & RE_NOSUB) 2356 printf("Nosub\n"); 2357 if (cod[0] & RE_NEWLINE) 2358 printf("Newline\n"); 2359 ++cod; 2360 if (cod[0] != 0xff) 2361 printf("%d backrefs\n", cod[0] + 1); 2362 ++cod; 2363 for (;;) { 2364 switch (*cod++) { 2365 case Re_Open: 2366 printf("Open"); 2367 break; 2368 case Re_Close: 2369 printf("Close"); 2370 break; 2371 case Re_Update: 2372 printf("Update (%d)", (int)*cod++); 2373 break; 2374 case Re_Alt: 2375 printf("Alt"); 2376 i = cod[0] | cod[1]; 2377 cod += 2; 2378 printf(" %d", i); 2379 break; 2380 case Re_AltNext: 2381 printf("Alt-next"); 2382 i = cod[0] | cod[1]; 2383 cod += 2; 2384 printf(" %d", i); 2385 break; 2386 case Re_AltDone: 2387 printf("Alt-done"); 2388 break; 2389 case Re_AnyTimes: 2390 printf("-> Anytimes %d", (int)*cod++); 2391 i = cod[0] | (cod[1] << 8); 2392 cod += 2; 2393 printf(" /%d", i); 2394 break; 2395 case Re_AnyEatAnyTimes: 2396 printf("Any-eat-anytimes"); 2397 break; 2398 case Re_AnyAnyTimes: 2399 printf("-> Any-anytimes %d", (int)*cod++); 2400 printf(" (%d)", (int)*cod++); 2401 i = cod[0] | (cod[1] << 8); 2402 cod += 2; 2403 printf(" /%d", i); 2404 break; 2405 case Re_AnyEatMaybe: 2406 printf("Any-eat-maybe"); 2407 break; 2408 case Re_AnyMaybe: 2409 printf("-> Any-maybe %d", (int)*cod++); 2410 printf(" (%d)", (int)*cod++); 2411 i = cod[0] | (cod[1] << 8); 2412 cod += 2; 2413 printf(" /%d", i); 2414 break; 2415 case Re_AnyAtLeast: 2416 printf("-> Any-atleast %d", (int)*cod++); 2417 printf(" (%d)", (int)*cod++); 2418 i = cod[0] | (cod[1] << 8); 2419 cod += 2; 2420 printf(" /%d", i); 2421 break; 2422 case Re_AnyEatAtLeast: 2423 printf("Any-eat-atleast"); 2424 break; 2425 case Re_Maybe: 2426 printf("-> Maybe %d", (int)*cod++); 2427 i = cod[0] | (cod[1] << 8); 2428 cod += 2; 2429 printf(" /%d", i); 2430 break; 2431 case Re_AtLeast: 2432 printf("-> Atleast %d", (int)*cod++); 2433 i = cod[0] | (cod[1] << 8); 2434 cod += 2; 2435 printf(" /%d", i); 2436 break; 2437 case Re_Exact: 2438 printf("-> Exact "); 2439 i = *cod++; 2440 printf("%d", i); 2441 printf(" %d", (int)*cod++); 2442 i = cod[0] | (cod[1] << 8); 2443 cod += 2; 2444 printf(" /%d", i); 2445 break; 2446 case Re_Min: 2447 printf("-> Min "); 2448 i = *cod++; 2449 printf("%d", i); 2450 printf(" %d", (int)*cod++); 2451 i = cod[0] | (cod[1] << 8); 2452 cod += 2; 2453 printf(" /%d", i); 2454 break; 2455 case Re_Max: 2456 printf("-> Max "); 2457 i = *cod++; 2458 printf("%d", i); 2459 printf(" %d", (int)*cod++); 2460 i = cod[0] | (cod[1] << 8); 2461 cod += 2; 2462 printf(" /%d", i); 2463 break; 2464 case Re_MinMax: 2465 printf("-> Min-max "); 2466 i = *cod++; 2467 printf("%d ", i); 2468 i = *cod++; 2469 printf("%d", i); 2470 printf(" %d", (int)*cod++); 2471 i = cod[0] | (cod[1] << 8); 2472 cod += 2; 2473 printf(" /%d", i); 2474 break; 2475 case Re_RepJump: 2476 printf("<- Rep-jump %d ", (int)*cod++); 2477 i = *cod++; 2478 printf("%d", i); 2479 break; 2480 case Re_RepLongJump: 2481 printf("<- Rep-long-jump %d ", (int)*cod++); 2482 i = cod[0] | (cod[1] << 8); 2483 printf("%d", i); 2484 break; 2485 case Re_Any: 2486 printf("Any"); 2487 break; 2488 case Re_Odigit: 2489 printf("Odigit"); 2490 break; 2491 case Re_OdigitNot: 2492 printf("Odigit-not"); 2493 break; 2494 case Re_Digit: 2495 printf("Digit"); 2496 break; 2497 case Re_DigitNot: 2498 printf("Digit-not"); 2499 break; 2500 case Re_Xdigit: 2501 printf("Xdigit"); 2502 break; 2503 case Re_XdigitNot: 2504 printf("Xdigit-not"); 2505 break; 2506 case Re_Space: 2507 printf("Space"); 2508 break; 2509 case Re_SpaceNot: 2510 printf("Space-not"); 2511 break; 2512 case Re_Tab: 2513 printf("Tab"); 2514 break; 2515 case Re_Newline: 2516 printf("Newline"); 2517 break; 2518 case Re_Lower: 2519 printf("Lower"); 2520 break; 2521 case Re_Upper: 2522 printf("Upper"); 2523 break; 2524 case Re_Alnum: 2525 printf("Alnum"); 2526 break; 2527 case Re_AlnumNot: 2528 printf("Alnum-not"); 2529 break; 2530 case Re_Control: 2531 printf("Control"); 2532 break; 2533 case Re_ControlNot: 2534 printf("Control-not"); 2535 break; 2536 case Re_Bol: 2537 printf("Bol"); 2538 break; 2539 case Re_Eol: 2540 printf("Eol"); 2541 break; 2542 case Re_Bow: 2543 printf("Bow"); 2544 break; 2545 case Re_Eow: 2546 printf("Eow"); 2547 break; 2548 case Re_Range: 2549 printf("Range "); 2550 goto range; 2551 case Re_RangeNot: 2552 printf("Range-not "); 2553range: 2554 for (i = 0; i < 256; i += 32) { 2555 for (j = k = 0; j < 32; j++) 2556 k |= (*cod++ & 1) << (31 - j); 2557 printf("%x ", k); 2558 } 2559 break; 2560 case Re_Literal: 2561 printf("Literal %c", *cod++); 2562 break; 2563 case Re_LiteralNot: 2564 printf("Literal-not %c", *cod++); 2565 break; 2566 case Re_SearchLiteral: 2567 printf("Search-literal %c", *cod++); 2568 break; 2569 case Re_CaseLiteral: 2570 printf("Case-literal %c", *cod++); 2571 putchar(*cod++); 2572 break; 2573 case Re_CaseLiteralNot: 2574 printf("Case-literal-not %c", *cod++); 2575 putchar(*cod++); 2576 break; 2577 case Re_SearchCaseLiteral: 2578 printf("Search-case-literal %c", *cod++); 2579 putchar(*cod++); 2580 break; 2581 case Re_String: 2582 printf("String "); 2583 goto string; 2584 case Re_SearchString: 2585 printf("Search-string "); 2586 goto string; 2587 case Re_CaseString: 2588 printf("Case-string "); 2589 goto string; 2590 case Re_SearchCaseString: 2591 printf("Search-case-string "); 2592string: 2593 i = *cod++; 2594 if (i & 0x80) 2595 i = (i & 0x7f) | (*cod++ << 7); 2596 for (j = 0; j < i; j++) 2597 putchar(*cod++); 2598 break; 2599 case Re_StringList: 2600 printf("String-list"); 2601 goto string_list; 2602 case Re_CaseStringList: 2603 printf("Case-string-list"); 2604string_list: 2605 j = *cod++; 2606 cod += 2; 2607 stl = cod + j; 2608 for (i = 0; i < j; i++) { 2609 k = *cod++; 2610 putchar(i ? ',' : ' '); 2611 fwrite(stl, k, 1, stdout); 2612 stl += k; 2613 } 2614 cod = stl; 2615 break; 2616 case Re_LargeStringList: 2617 printf("Large-string-list"); 2618large_string_list: 2619 i = cod[0] | (cod[1] << 8); 2620 stl = cod + i - 1; 2621 for (i = 0, cod += 514; cod < stl; i++) { 2622 k = *cod++; 2623 putchar(i ? ',' : ' '); 2624 fwrite(cod, k, 1, stdout); 2625 cod += k; 2626 } 2627 cod = stl; 2628 break; 2629 case Re_LargeCaseStringList: 2630 printf("Large-case-string-list"); 2631 goto large_string_list; 2632 case Re_Backref: 2633 printf("Backref %d", (int)*cod++); 2634 break; 2635 case Re_DoneIf: 2636 printf("Done-if"); 2637 break; 2638 case Re_MaybeDone: 2639 printf("Maybe-done"); 2640 break; 2641 case Re_Done: 2642 printf("Done\n"); 2643 return; 2644 } 2645 putchar('\n'); 2646 } 2647} 2648#endif 2649