1/* 2 * Copyright (c) 2002 by The XFree86 Project, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 * 22 * Except as contained in this notice, the name of the XFree86 Project shall 23 * not be used in advertising or otherwise to promote the sale, use or other 24 * dealings in this Software without prior written authorization from the 25 * XFree86 Project. 26 * 27 * Author: Paulo César Pereira de Andrade 28 */ 29 30/* $XFree86: xc/programs/xedit/lisp/re/rec.c,v 1.3 2002/11/15 07:01:33 paulo Exp $ */ 31 32#include "rep.h" 33 34/* 35 * Types 36 */ 37 38/* Information used while compiling the intermediate format of the re. */ 39typedef struct _irec_info { 40 unsigned char *ptr; /* Pointer in the given regex pattern */ 41 unsigned char *end; /* End of regex pattern */ 42 int flags; /* Compile flags */ 43 rec_alt *alt; /* Toplevel first/single alternative */ 44 45 rec_alt *palt; /* Current alternative being compiled */ 46 rec_grp *pgrp; /* Current group, if any */ 47 rec_pat *ppat; /* Current pattern, if any */ 48 49 /* Number of open parenthesis, for error checking */ 50 int nparens; 51 52 int ngrps; /* Number of groups, for backreference */ 53 54 int ecode; 55} irec_info; 56 57 58/* 59 * Prototypes 60 */ 61 62 /* (i)ntermediate (r)egular (e)xpression (c)ompile 63 * Generates an intermediate stage compiled regex from 64 * the specified pattern argument. Basically builds an 65 * intermediate data structure to analyse and do syntax 66 * error checking. 67 */ 68static void irec_simple_pattern(irec_info*, rec_pat_t); 69static void irec_literal_pattern(irec_info*, int); 70static void irec_case_literal_pattern(irec_info*, int); 71static void irec_open_group(irec_info*); 72static void irec_close_group(irec_info*); 73static void irec_range(irec_info*); 74static void irec_range_single(irec_info*, int); 75static void irec_range_complex(irec_info*, int, int); 76static void irec_escape(irec_info*); 77static void irec_simple_repetition(irec_info*, rec_rep_t); 78static void irec_complex_repetition(irec_info*); 79static void irec_add_repetition(irec_info*, rec_rep*); 80static void irec_free(irec_info*); 81static void irec_free_grp(rec_grp*); 82static void irec_free_pats(rec_pat*); 83 84 85/* 86 * Implementation 87 */ 88rec_alt * 89irec_comp(const char *pattern, const char *endp, int flags, int *ecode) 90{ 91 unsigned char *ptr; 92 rec_alt *alt; 93 irec_info inf; 94 95 if (pattern == NULL || endp < pattern) { 96 *ecode = RE_INVARG; 97 return (NULL); 98 } 99 100 if (endp == pattern) { 101 *ecode = RE_EMPTY; 102 return (NULL); 103 } 104 105 alt = calloc(1, sizeof(rec_alt)); 106 if (alt == NULL) { 107 *ecode = RE_ESPACE; 108 return (NULL); 109 } 110 111 inf.ptr = (unsigned char*)pattern; 112 inf.end = (unsigned char*)endp; 113 inf.flags = flags; 114 inf.alt = inf.palt = alt; 115 inf.pgrp = NULL; 116 inf.ppat = NULL; 117 inf.nparens = inf.ngrps = 0; 118 inf.ecode = 0; 119 120 if (flags & RE_NOSPEC) { 121 /* Just searching for a character or substring */ 122 for (; inf.ecode == 0 && inf.ptr < inf.end; inf.ptr++) { 123 if (!(flags & RE_ICASE) || 124 (!isupper(*inf.ptr) && !islower(*inf.ptr))) 125 irec_literal_pattern(&inf, *inf.ptr); 126 else 127 irec_case_literal_pattern(&inf, *inf.ptr); 128 } 129 } 130 /* inf.ptr = inf.end is nul if flags & RE_NOSPEC */ 131 for (; inf.ecode == 0 && inf.ptr < inf.end;) { 132 switch (*inf.ptr++) { 133 case '*': 134 irec_simple_repetition(&inf, Rer_AnyTimes); 135 break; 136 case '+': 137 irec_simple_repetition(&inf, Rer_AtLeast); 138 break; 139 case '?': 140 irec_simple_repetition(&inf, Rer_Maybe); 141 break; 142 case '.': 143 irec_simple_pattern(&inf, Rep_Any); 144 break; 145 case '^': 146 if (flags & RE_NEWLINE) 147 /* It is up to the user decide if this can match */ 148 irec_simple_pattern(&inf, Rep_Bol); 149 else { 150 for (ptr = inf.ptr - 1; 151 ptr > (unsigned char*)pattern && *ptr == '('; ptr--) 152 ; 153 /* If at the start of a pattern */ 154 if (ptr == (unsigned char*)pattern || *ptr == '|') 155 irec_simple_pattern(&inf, Rep_Bol); 156 else 157 /* In the middle of a pattern, treat as literal */ 158 irec_literal_pattern(&inf, '^'); 159 } 160 break; 161 case '$': 162 if (flags & RE_NEWLINE) 163 irec_simple_pattern(&inf, Rep_Eol); 164 else { 165 /* Look ahead to check if is the last char of a group */ 166 for (ptr = inf.ptr; ptr < inf.end && *ptr == ')'; ptr++) 167 ; 168 if (*ptr == '\0' || *ptr == '|') 169 /* Last character of pattern, an EOL match */ 170 irec_simple_pattern(&inf, Rep_Eol); 171 else 172 /* Normal character */ 173 irec_literal_pattern(&inf, '$'); 174 } 175 break; 176 case '(': 177 irec_open_group(&inf); 178 break; 179 case ')': 180 /* Look ahead to check if need to close the group now */ 181 ptr = inf.ptr; 182 if (*ptr != '*' && *ptr != '+' && *ptr != '?' && *ptr != '{') 183 /* If a repetition does not follow */ 184 irec_close_group(&inf); 185 else if (inf.pgrp == NULL) 186 /* A repetition follows, but current group is implicit */ 187 inf.ecode = RE_EPAREN; 188 else 189 /* Can do this as next character is known */ 190 inf.ppat = NULL; 191 break; 192 case '[': 193 irec_range(&inf); 194 break; 195 case ']': 196 irec_literal_pattern(&inf, ']'); 197 break; 198 case '{': 199 irec_complex_repetition(&inf); 200 break; 201 case '}': 202 irec_literal_pattern(&inf, '}'); 203 break; 204 case '|': 205 /* If first character in the pattern */ 206 if (inf.ptr - 1 == (unsigned char*)pattern || 207 /* If last character in the pattern */ 208 inf.ptr >= inf.end || 209 /* If empty pattern */ 210 inf.ptr[0] == '|' || 211 inf.ptr[0] == ')') 212 inf.ecode = RE_EMPTY; 213 else { 214 rec_alt *alt = calloc(1, sizeof(rec_alt)); 215 216 if (alt) { 217 alt->prev = inf.palt; 218 inf.palt->next = alt; 219 inf.palt = alt; 220 inf.ppat = NULL; 221 } 222 else 223 inf.ecode = RE_ESPACE; 224 } 225 break; 226 case '\\': 227 irec_escape(&inf); 228 break; 229 default: 230 if (!(flags & RE_ICASE) || 231 (!isupper(inf.ptr[-1]) && !islower(inf.ptr[-1]))) 232 irec_literal_pattern(&inf, inf.ptr[-1]); 233 else 234 irec_case_literal_pattern(&inf, inf.ptr[-1]); 235 break; 236 } 237 } 238 239 /* Check if not all groups closed */ 240 if (inf.ecode == 0 && inf.nparens) 241 inf.ecode = RE_EPAREN; 242 243 if (inf.ecode == 0) 244 inf.ecode = orec_comp(inf.alt, flags); 245 246 /* If an error generated */ 247 if (inf.ecode) { 248 irec_free(&inf); 249 alt = NULL; 250 } 251 252 *ecode = inf.ecode; 253 254 return (alt); 255} 256 257void 258irec_free_alt(rec_alt *alt) 259{ 260 rec_alt *next; 261 262 while (alt) { 263 next = alt->next; 264 irec_free_pats(alt->pat); 265 free(alt); 266 alt = next; 267 } 268} 269 270 271 272static void 273irec_simple_pattern(irec_info *inf, rec_pat_t type) 274{ 275 rec_pat *pat; 276 277 /* Always add a new pattern to list */ 278 if ((pat = calloc(1, sizeof(rec_pat))) == NULL) { 279 inf->ecode = RE_ESPACE; 280 return; 281 } 282 283 pat->type = type; 284 if ((pat->prev = inf->ppat) != NULL) 285 inf->ppat->next = pat; 286 else 287 inf->palt->pat = pat; 288 inf->ppat = pat; 289} 290 291static void 292irec_literal_pattern(irec_info *inf, int value) 293{ 294 int length; 295 rec_pat *pat; 296 unsigned char chr, *str; 297 298 /* If there is a current pattern */ 299 if (inf->ppat && inf->ppat->rep == NULL) { 300 switch (inf->ppat->type) { 301 case Rep_Literal: 302 /* Start literal string */ 303 chr = inf->ppat->data.chr; 304 if ((str = malloc(16)) == NULL) { 305 inf->ecode = RE_ESPACE; 306 return; 307 } 308 inf->ppat->type = Rep_String; 309 inf->ppat->data.str = str; 310 str[0] = chr; 311 str[1] = value; 312 str[2] = '\0'; 313 return; 314 315 case Rep_String: 316 /* Augments literal string */ 317 length = strlen((char*)inf->ppat->data.str); 318 if ((length % 16) >= 14) { 319 if ((str = realloc(inf->ppat->data.str, 320 length + 18)) == NULL) { 321 inf->ecode = RE_ESPACE; 322 return; 323 } 324 inf->ppat->data.str = str; 325 } 326 inf->ppat->data.str[length] = value; 327 inf->ppat->data.str[length + 1] = '\0'; 328 return; 329 330 default: 331 /* Anything else is added as a new pattern list element */ 332 break; 333 } 334 } 335 336 if ((pat = calloc(1, sizeof(rec_pat))) == NULL) { 337 inf->ecode = RE_ESPACE; 338 return; 339 } 340 341 pat->type = Rep_Literal; 342 pat->data.chr = value; 343 if ((pat->prev = inf->ppat) != NULL) 344 inf->ppat->next = pat; 345 else 346 inf->palt->pat = pat; 347 inf->ppat = pat; 348} 349 350static void 351irec_case_literal_pattern(irec_info *inf, int value) 352{ 353 int length; 354 rec_pat *pat; 355 unsigned char plower, pupper, lower, upper, *str; 356 357 lower = tolower(value); 358 upper = toupper(value); 359 360 /* If there is a current pattern */ 361 if (inf->ppat && inf->ppat->rep == NULL) { 362 switch (inf->ppat->type) { 363 case Rep_CaseLiteral: 364 /* Start case literal string */ 365 plower = inf->ppat->data.cse.lower; 366 pupper = inf->ppat->data.cse.upper; 367 if ((str = malloc(32)) == NULL) { 368 inf->ecode = RE_ESPACE; 369 return; 370 } 371 inf->ppat->type = Rep_CaseString; 372 inf->ppat->data.str = str; 373 str[0] = plower; 374 str[1] = pupper; 375 str[2] = lower; 376 str[3] = upper; 377 str[4] = '\0'; 378 return; 379 380 case Rep_CaseString: 381 /* Augments case literal string */ 382 length = strlen((char*)inf->ppat->data.str); 383 if (((length) % 32) >= 28) { 384 if ((str = realloc(inf->ppat->data.str, 385 length + 36)) == NULL) { 386 inf->ecode = RE_ESPACE; 387 return; 388 } 389 inf->ppat->data.str = str; 390 } 391 inf->ppat->data.str[length] = lower; 392 inf->ppat->data.str[length + 1] = upper; 393 inf->ppat->data.str[length + 2] = '\0'; 394 return; 395 396 default: 397 /* Anything else is added as a new pattern list element */ 398 break; 399 } 400 } 401 402 if ((pat = calloc(1, sizeof(rec_pat))) == NULL) { 403 inf->ecode = RE_ESPACE; 404 return; 405 } 406 407 pat->type = Rep_CaseLiteral; 408 pat->data.cse.lower = lower; 409 pat->data.cse.upper = upper; 410 pat->prev = inf->ppat; 411 if ((pat->prev = inf->ppat) != NULL) 412 inf->ppat->next = pat; 413 else 414 inf->palt->pat = pat; 415 inf->ppat = pat; 416} 417 418static void 419irec_open_group(irec_info *inf) 420{ 421 rec_pat *pat; 422 rec_alt *alt; 423 rec_grp *grp; 424 425 if ((grp = calloc(1, sizeof(rec_grp))) == NULL) { 426 inf->ecode = RE_ESPACE; 427 return; 428 } 429 430 if ((pat = calloc(1, sizeof(rec_pat))) == NULL) { 431 free(grp); 432 inf->ecode = RE_ESPACE; 433 return; 434 } 435 436 if ((alt = calloc(1, sizeof(rec_alt))) == NULL) { 437 free(grp); 438 free(pat); 439 inf->ecode = RE_ESPACE; 440 return; 441 } 442 443 pat->type = Rep_Group; 444 pat->data.grp = grp; 445 grp->parent = pat; 446 grp->palt = inf->palt; 447 grp->pgrp = inf->pgrp; 448 grp->alt = alt; 449 grp->comp = 0; 450 if ((pat->prev = inf->ppat) != NULL) 451 inf->ppat->next = pat; 452 else 453 inf->palt->pat = pat; 454 inf->palt = alt; 455 inf->ppat = NULL; 456 457 /* Only toplevel parenthesis supported */ 458 if (++inf->nparens == 1) 459 ++inf->ngrps; 460 461 inf->pgrp = grp; 462} 463 464static void 465irec_close_group(irec_info *inf) 466{ 467 if (inf->pgrp == NULL) { 468 inf->ecode = RE_EPAREN; 469 return; 470 } 471 472 inf->palt = inf->pgrp->palt; 473 inf->ppat = inf->pgrp->parent; 474 inf->pgrp = inf->pgrp->pgrp; 475 476 --inf->nparens; 477} 478 479static void 480irec_range(irec_info *inf) 481{ 482 int count; 483 rec_pat *pat; 484 rec_rng *rng; 485 int not = inf->ptr[0] == '^'; 486 487 if (not) 488 ++inf->ptr; 489 490 pat = calloc(1, sizeof(rec_pat)); 491 if (pat == NULL) { 492 inf->ecode = RE_ESPACE; 493 return; 494 } 495 496 rng = calloc(1, sizeof(rec_rng)); 497 if (pat == NULL) { 498 free(pat); 499 inf->ecode = RE_ESPACE; 500 return; 501 } 502 503 pat->data.rng = rng; 504 pat->type = not ? Rep_RangeNot : Rep_Range; 505 if ((pat->prev = inf->ppat) != NULL) 506 inf->ppat->next = pat; 507 else 508 inf->palt->pat = pat; 509 inf->ppat = pat; 510 511 /* First pass, add everything seen */ 512 for (count = 0; inf->ecode == 0; count++) { 513 /* If bracket not closed */ 514 if (inf->ptr == inf->end) { 515 inf->ecode = RE_EBRACK; 516 return; 517 } 518 /* If not the first character */ 519 else if (inf->ptr[0] == ']' && count) 520 break; 521 else { 522 /* If not a range of characters */ 523 if (inf->ptr[1] != '-' || inf->ptr[2] == ']') { 524 irec_range_single(inf, inf->ptr[0]); 525 ++inf->ptr; 526 } 527 else { 528 if ((inf->flags & RE_NEWLINE) && 529 inf->ptr[0] < '\n' && inf->ptr[2] > '\n') { 530 /* Unless it is forced to be a delimiter, don't allow 531 * a newline in a character range */ 532 if (inf->ptr[0] == '\n' - 1) 533 irec_range_single(inf, inf->ptr[0]); 534 else 535 irec_range_complex(inf, inf->ptr[0], '\n' - 1); 536 if (inf->ptr[2] == '\n' + 1) 537 irec_range_single(inf, inf->ptr[2]); 538 else 539 irec_range_complex(inf, '\n' + 1, inf->ptr[2]); 540 } 541 else 542 irec_range_complex(inf, inf->ptr[0], inf->ptr[2]); 543 inf->ptr += 3; 544 } 545 } 546 } 547 548 /* Skip ] */ 549 ++inf->ptr; 550} 551 552static void 553irec_range_single(irec_info *inf, int value) 554{ 555 if (value >= 0 && value <= 255) 556 inf->ppat->data.rng->range[value] = 1; 557 558 if (inf->flags & RE_ICASE) { 559 if (islower(value)) { 560 value = toupper(value); 561 if (value >= 0 && value <= 255) 562 inf->ppat->data.rng->range[value] = 1; 563 } 564 else if (isupper(value)) { 565 value = tolower(value); 566 if (value >= 0 && value <= 255) 567 inf->ppat->data.rng->range[value] = 1; 568 } 569 } 570} 571 572static void 573irec_range_complex(irec_info *inf, int chrf, int chrt) 574{ 575 if (chrf > chrt) { 576 inf->ecode = RE_ERANGE; 577 return; 578 } 579 580 for (; chrf <= chrt; chrf++) 581 irec_range_single(inf, chrf); 582} 583 584static void 585irec_escape(irec_info *inf) 586{ 587 rec_pat *pat; 588 unsigned char chr = inf->ptr[0]; 589 590 if (chr == 0) { 591 inf->ecode = RE_EESCAPE; 592 return; 593 } 594 ++inf->ptr; 595 switch (chr) { 596 case 'o': 597 irec_simple_pattern(inf, Rep_Odigit); 598 break; 599 case 'O': 600 irec_simple_pattern(inf, Rep_OdigitNot); 601 break; 602 case 'd': 603 irec_simple_pattern(inf, Rep_Digit); 604 break; 605 case 'D': 606 irec_simple_pattern(inf, Rep_DigitNot); 607 break; 608 case 'x': 609 irec_simple_pattern(inf, Rep_Xdigit); 610 break; 611 case 'X': 612 irec_simple_pattern(inf, Rep_XdigitNot); 613 break; 614 case 's': 615 irec_simple_pattern(inf, Rep_Space); 616 break; 617 case 'S': 618 irec_simple_pattern(inf, Rep_SpaceNot); 619 break; 620 case 't': 621 irec_simple_pattern(inf, Rep_Tab); 622 break; 623 case 'n': 624 irec_simple_pattern(inf, Rep_Newline); 625 break; 626 case 'l': 627 irec_simple_pattern(inf, Rep_Lower); 628 break; 629 case 'u': 630 irec_simple_pattern(inf, Rep_Upper); 631 break; 632 case 'w': 633 irec_simple_pattern(inf, Rep_Alnum); 634 break; 635 case 'W': 636 irec_simple_pattern(inf, Rep_AlnumNot); 637 break; 638 case 'c': 639 irec_simple_pattern(inf, Rep_Control); 640 break; 641 case 'C': 642 irec_simple_pattern(inf, Rep_ControlNot); 643 break; 644 case '<': 645 irec_simple_pattern(inf, Rep_Bow); 646 break; 647 case '>': 648 irec_simple_pattern(inf, Rep_Eow); 649 break; 650 case '1': case '2': case '3': 651 case '4': case '5': case '6': 652 case '7': case '8': case '9': 653 if ((inf->flags & RE_NOSUB) || (chr -= '1') >= inf->ngrps) { 654 inf->ecode = RE_ESUBREG; 655 return; 656 } 657 if ((pat = calloc(1, sizeof(rec_pat))) == NULL) { 658 inf->ecode = RE_ESPACE; 659 return; 660 } 661 pat->type = Rep_Backref; 662 pat->data.chr = chr; 663 pat->prev = inf->ppat; 664 if (inf->ppat) 665 inf->ppat->next = pat; 666 else 667 inf->palt->pat = pat; 668 inf->ppat = pat; 669 break; 670 671 /* True literals */ 672 case '0': 673 irec_literal_pattern(inf, '\0'); 674 break; 675 case 'a': 676 irec_literal_pattern(inf, '\a'); 677 break; 678 case 'b': 679 irec_literal_pattern(inf, '\b'); 680 break; 681 case 'f': 682 irec_literal_pattern(inf, '\f'); 683 break; 684 case 'r': 685 irec_literal_pattern(inf, '\r'); 686 break; 687 case 'v': 688 irec_literal_pattern(inf, '\v'); 689 break; 690 691 default: 692 /* Don't check if case insensitive regular expression */ 693 irec_literal_pattern(inf, chr); 694 break; 695 } 696} 697 698static void 699irec_simple_repetition(irec_info *inf, rec_rep_t type) 700{ 701 rec_rep *rep; 702 703 /* If nowhere to add repetition */ 704 if ((inf->pgrp == NULL && inf->ppat == NULL) || 705 /* If repetition already added to last/current pattern */ 706 (inf->pgrp == NULL && inf->ppat->rep != NULL) || 707 /* If repetition already added to last/current group */ 708 (inf->ppat == NULL && inf->pgrp->parent->rep != NULL)) { 709 inf->ecode = RE_BADRPT; 710 return; 711 } 712 713 if ((rep = calloc(1, sizeof(rec_rep))) == NULL) { 714 inf->ecode = RE_ESPACE; 715 return; 716 } 717 718 rep->type = type; 719 irec_add_repetition(inf, rep); 720} 721 722static void 723irec_complex_repetition(irec_info *inf) 724{ 725 int exact; 726 rec_rep *rep; 727 long mine, maxc; 728 unsigned char *end; 729 730 /* If nowhere to add repetition */ 731 if ((inf->pgrp == NULL && inf->ppat == NULL) || 732 /* If repetition already added to last/current pattern */ 733 (inf->pgrp == NULL && inf->ppat->rep != NULL) || 734 /* If repetition already added to last/current group */ 735 (inf->ppat == NULL && inf->pgrp->parent->rep != NULL)) { 736 inf->ecode = RE_EBADBR; 737 return; 738 } 739 740 exact = 0; 741 mine = maxc = -1; 742 if (inf->ptr[0] == ',') 743 /* Specify max number of ocurrences only */ 744 goto domax; 745 else if (!isdigit(inf->ptr[0])) 746 goto badbr; 747 748 mine = strtol((char*)inf->ptr, (char**)&end, 10); 749 inf->ptr = end; 750 if (inf->ptr[0] == '}') { 751 exact = 1; 752 ++inf->ptr; 753 goto redone; 754 } 755 else if (inf->ptr[0] != ',') 756 goto badbr; 757 758domax: 759 /* Add one to skip comma */ 760 ++inf->ptr; 761 if (inf->ptr[0] == '}') { 762 ++inf->ptr; 763 goto redone; 764 } 765 else if (!isdigit(inf->ptr[0])) 766 goto badbr; 767 maxc = strtol((char*)inf->ptr, (char**)&end, 10); 768 inf->ptr = end; 769 if (inf->ptr[0] != '}') 770 goto badbr; 771 ++inf->ptr; 772 773redone: 774 if (mine == maxc) { 775 maxc = -1; 776 exact = 1; 777 } 778 779 /* Check range and if min-max parameters are valid */ 780 if (mine >= 255 || maxc >= 255 || 781 (mine >= 0 && maxc >= 0 && mine > maxc)) 782 goto badbr; 783 784 /* Check for noop */ 785 if (exact && mine == 1) 786 return; 787 788 if ((rep = calloc(1, sizeof(rec_rep))) == NULL) { 789 inf->ecode = RE_ESPACE; 790 return; 791 } 792 793 /* Convert {0,1} to ? */ 794 if (mine == 0 && maxc == 1) 795 rep->type = Rer_Maybe; 796 else if (exact) { 797 rep->type = Rer_Exact; 798 rep->mine = mine; 799 } 800 /* Convert {0,} to * */ 801 else if (mine == 0 && maxc == -1) 802 rep->type = Rer_AnyTimes; 803 /* Convert {1,} to + */ 804 else if (mine == 1 && maxc == -1) 805 rep->type = Rer_AtLeast; 806 else if (maxc == -1) { 807 rep->type = Rer_Min; 808 rep->mine = mine; 809 } 810 else if (mine < 1) { 811 rep->type = Rer_Max; 812 rep->maxc = maxc; 813 } 814 else { 815 rep->type = Rer_MinMax; 816 rep->mine = mine; 817 rep->maxc = maxc; 818 } 819 820 irec_add_repetition(inf, rep); 821 822 return; 823 824badbr: 825 inf->ecode = RE_EBADBR; 826} 827 828/* The rep argument is allocated and has no reference yet, 829 * if something fails it must be freed before returning. 830 */ 831static void 832irec_add_repetition(irec_info *inf, rec_rep *rep) 833{ 834 int length; 835 rec_pat *pat; 836 rec_grp *grp; 837 rec_rep_t rept; 838 unsigned char value, upper; 839 840 rept = rep->type; 841 842 if (inf->ppat == NULL) { 843 rec_pat *any; 844 rec_grp *grp = inf->pgrp; 845 846 if (rept == Rer_AnyTimes || rept == Rer_Maybe || rept == Re_AtLeast) { 847 /* Convert (.)* to (.*), ((.))* not handled and may not match */ 848 any = NULL; 849 850 if (grp->alt && grp->alt->pat) { 851 for (any = grp->alt->pat; any->next; any = any->next) 852 ; 853 switch (any->type) { 854 case Rep_Any: 855 break; 856 case Rep_AnyAnyTimes: 857 case Rep_AnyMaybe: 858 case Rep_AnyAtLeast: 859 free(rep); 860 inf->ecode = RE_BADRPT; 861 return; 862 default: 863 any = NULL; 864 break; 865 } 866 } 867 if (any) { 868 free(rep); 869 rep = NULL; 870 any->type = (rept == Rer_AnyTimes) ? Rep_AnyAnyTimes : 871 (rept == Rer_AtLeast) ? Rep_AnyAtLeast : 872 Rep_AnyMaybe; 873 while (grp) { 874 ++grp->comp; 875 grp = grp->pgrp; 876 } 877 } 878 } 879 inf->pgrp->parent->rep = rep; 880 irec_close_group(inf); 881 return; 882 } 883 884 switch (inf->ppat->type) { 885 case Rep_Bol: 886 case Rep_Eol: 887 case Rep_Bow: 888 case Rep_Eow: 889 case Rep_AnyAnyTimes: 890 case Rep_AnyMaybe: 891 case Rep_AnyAtLeast: 892 /* Markers that cannot repeat */ 893 free(rep); 894 inf->ecode = RE_BADRPT; 895 return; 896 897 case Rep_Any: 898 grp = inf->pgrp; 899 free(rep); 900 if (rept == Rer_AnyTimes || 901 rept == Rer_Maybe || 902 rept == Rer_AtLeast) { 903 inf->ppat->type = (rept == Rer_AnyTimes) ? 904 Rep_AnyAnyTimes : 905 (rept == Rer_Maybe) ? 906 Rep_AnyMaybe : 907 Rep_AnyAtLeast; 908 while (grp) { 909 ++grp->comp; 910 grp = grp->pgrp; 911 } 912 } 913 else 914 /* XXX Not (yet) implemented */ 915 inf->ecode = RE_BADRPT; 916 rep = NULL; 917 break; 918 919 case Rep_String: 920 if ((pat = calloc(1, sizeof(rec_pat))) == NULL) { 921 free(rep); 922 inf->ecode = RE_ESPACE; 923 return; 924 } 925 926 length = strlen((char*)inf->ppat->data.str); 927 pat->type = Rep_Literal; 928 pat->prev = inf->ppat; 929 pat->data.chr = inf->ppat->data.str[length - 1]; 930 if (length == 2) { 931 /* Must convert to two Rep_Literals */ 932 value = inf->ppat->data.str[0]; 933 free(inf->ppat->data.str); 934 inf->ppat->data.chr = value; 935 inf->ppat->type = Rep_Literal; 936 } 937 else 938 /* Must remove last character from string */ 939 inf->ppat->data.str[length - 1] = '\0'; 940 inf->ppat->next = pat; 941 inf->ppat = pat; 942 break; 943 944 case Rep_CaseString: 945 if ((pat = calloc(1, sizeof(rec_pat))) == NULL) { 946 free(rep); 947 inf->ecode = RE_ESPACE; 948 return; 949 } 950 951 length = strlen((char*)inf->ppat->data.str); 952 pat->type = Rep_CaseLiteral; 953 pat->prev = inf->ppat; 954 pat->data.cse.lower = inf->ppat->data.str[length - 2]; 955 pat->data.cse.upper = inf->ppat->data.str[length - 1]; 956 if (length == 4) { 957 /* Must convert to two Rep_CaseLiterals */ 958 value = inf->ppat->data.str[0]; 959 upper = inf->ppat->data.str[1]; 960 free(inf->ppat->data.str); 961 inf->ppat->data.cse.lower = value; 962 inf->ppat->data.cse.upper = upper; 963 inf->ppat->next = pat; 964 inf->ppat->type = Rep_CaseLiteral; 965 } 966 else 967 /* Must remove last character pair from string */ 968 inf->ppat->data.str[length - 2] = '\0'; 969 inf->ppat->next = pat; 970 inf->ppat = pat; 971 break; 972 973 default: 974 /* Anything else does not need special handling */ 975 break; 976 } 977 978 inf->ppat->rep = rep; 979} 980 981static void 982irec_free(irec_info *inf) 983{ 984 irec_free_alt(inf->alt); 985} 986 987static void 988irec_free_grp(rec_grp *grp) 989{ 990 if (grp->alt) 991 irec_free_alt(grp->alt); 992 free(grp); 993} 994 995static void 996irec_free_pats(rec_pat *pat) 997{ 998 rec_pat *next; 999 rec_pat_t rect; 1000 1001 while (pat) { 1002 next = pat->next; 1003 if (pat->rep) 1004 free(pat->rep); 1005 rect = pat->type; 1006 if (rect == Rep_Range || rect == Rep_RangeNot) 1007 free(pat->data.rng); 1008 else if (rect == Rep_Group) 1009 irec_free_grp(pat->data.grp); 1010 else if (rect == Rep_StringList) 1011 orec_free_stl(pat->data.stl); 1012 free(pat); 1013 pat = next; 1014 } 1015} 1016