Home | History | Annotate | Line # | Download | only in re
      1 /*
      2  * Copyright (c) 2002 by The XFree86 Project, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20  * SOFTWARE.
     21  *
     22  * Except as contained in this notice, the name of the XFree86 Project shall
     23  * not be used in advertising or otherwise to promote the sale, use or other
     24  * dealings in this Software without prior written authorization from the
     25  * XFree86 Project.
     26  *
     27  * Author: Paulo Csar Pereira de Andrade
     28  */
     29 
     30 /* $XFree86: xc/programs/xedit/lisp/re/rec.c,v 1.3 2002/11/15 07:01:33 paulo Exp $ */
     31 
     32 #include "rep.h"
     33 
     34 /*
     35  * Types
     36  */
     37 
     38 /*  Information used while compiling the intermediate format of the re. */
     39 typedef struct _irec_info {
     40     unsigned char *ptr;		/* Pointer in the given regex pattern */
     41     unsigned char *end;		/* End of regex pattern */
     42     int flags;			/* Compile flags */
     43     rec_alt *alt;		/* Toplevel first/single alternative */
     44 
     45     rec_alt *palt;		/* Current alternative being compiled */
     46     rec_grp *pgrp;		/* Current group, if any */
     47     rec_pat *ppat;		/* Current pattern, if any */
     48 
     49     /*  Number of open parenthesis, for error checking */
     50     int nparens;
     51 
     52     int ngrps;			/* Number of groups, for backreference */
     53 
     54     int ecode;
     55 } irec_info;
     56 
     57 
     58 /*
     59  * Prototypes
     60  */
     61 
     62 	/* (i)ntermediate (r)egular (e)xpression (c)ompile
     63 	 *  Generates an intermediate stage compiled regex from
     64 	 * the specified pattern argument. Basically builds an
     65 	 * intermediate data structure to analyse and do syntax
     66 	 * error checking.
     67 	 */
     68 static void irec_simple_pattern(irec_info*, rec_pat_t);
     69 static void irec_literal_pattern(irec_info*, int);
     70 static void irec_case_literal_pattern(irec_info*, int);
     71 static void irec_open_group(irec_info*);
     72 static void irec_close_group(irec_info*);
     73 static void irec_range(irec_info*);
     74 static void irec_range_single(irec_info*, int);
     75 static void irec_range_complex(irec_info*, int, int);
     76 static void irec_escape(irec_info*);
     77 static void irec_simple_repetition(irec_info*, rec_rep_t);
     78 static void irec_complex_repetition(irec_info*);
     79 static void irec_add_repetition(irec_info*, rec_rep*);
     80 static void irec_free(irec_info*);
     81 static void irec_free_grp(rec_grp*);
     82 static void irec_free_pats(rec_pat*);
     83 
     84 
     85 /*
     86  * Implementation
     87  */
     88 rec_alt *
     89 irec_comp(const char *pattern, const char *endp, int flags, int *ecode)
     90 {
     91     unsigned char *ptr;
     92     rec_alt *alt;
     93     irec_info inf;
     94 
     95     if (pattern == NULL || endp < pattern) {
     96 	*ecode = RE_INVARG;
     97 	return (NULL);
     98     }
     99 
    100     if (endp == pattern) {
    101 	*ecode = RE_EMPTY;
    102 	return (NULL);
    103     }
    104 
    105     alt = calloc(1, sizeof(rec_alt));
    106     if (alt == NULL) {
    107 	*ecode = RE_ESPACE;
    108 	return (NULL);
    109     }
    110 
    111     inf.ptr = (unsigned char*)pattern;
    112     inf.end = (unsigned char*)endp;
    113     inf.flags = flags;
    114     inf.alt = inf.palt = alt;
    115     inf.pgrp = NULL;
    116     inf.ppat = NULL;
    117     inf.nparens = inf.ngrps = 0;
    118     inf.ecode = 0;
    119 
    120     if (flags & RE_NOSPEC) {
    121 	/* Just searching for a character or substring */
    122 	for (; inf.ecode == 0 && inf.ptr < inf.end; inf.ptr++) {
    123 	    if (!(flags & RE_ICASE) ||
    124 		(!isupper(*inf.ptr) && !islower(*inf.ptr)))
    125 		irec_literal_pattern(&inf, *inf.ptr);
    126 	    else
    127 		irec_case_literal_pattern(&inf, *inf.ptr);
    128 	}
    129     }
    130     /* inf.ptr = inf.end is nul if flags & RE_NOSPEC */
    131     for (; inf.ecode == 0 && inf.ptr < inf.end;) {
    132 	switch (*inf.ptr++) {
    133 	    case '*':
    134 		irec_simple_repetition(&inf, Rer_AnyTimes);
    135 		break;
    136 	    case '+':
    137 		irec_simple_repetition(&inf, Rer_AtLeast);
    138 		break;
    139 	    case '?':
    140 		irec_simple_repetition(&inf, Rer_Maybe);
    141 		break;
    142 	    case '.':
    143 		irec_simple_pattern(&inf, Rep_Any);
    144 		break;
    145 	    case '^':
    146 		if (flags & RE_NEWLINE)
    147 		    /* It is up to the user decide if this can match */
    148 		    irec_simple_pattern(&inf, Rep_Bol);
    149 		else {
    150 		    for (ptr = inf.ptr - 1;
    151 			 ptr > (unsigned char*)pattern && *ptr == '('; ptr--)
    152 			;
    153 		    /* If at the start of a pattern */
    154 		    if (ptr == (unsigned char*)pattern || *ptr == '|')
    155 			irec_simple_pattern(&inf, Rep_Bol);
    156 		    else
    157 			/* In the middle of a pattern, treat as literal */
    158 			irec_literal_pattern(&inf, '^');
    159 		}
    160 		break;
    161 	    case '$':
    162 		if (flags & RE_NEWLINE)
    163 		    irec_simple_pattern(&inf, Rep_Eol);
    164 		else {
    165 		    /* Look ahead to check if is the last char of a group */
    166 		    for (ptr = inf.ptr; ptr < inf.end && *ptr == ')'; ptr++)
    167 			;
    168 		    if (*ptr == '\0' || *ptr == '|')
    169 			/* Last character of pattern, an EOL match */
    170 			irec_simple_pattern(&inf, Rep_Eol);
    171 		    else
    172 			/* Normal character */
    173 			irec_literal_pattern(&inf, '$');
    174 		}
    175 		break;
    176 	    case '(':
    177 		irec_open_group(&inf);
    178 		break;
    179 	    case ')':
    180 		/* Look ahead to check if need to close the group now */
    181 		ptr = inf.ptr;
    182 		if (*ptr != '*' && *ptr != '+' && *ptr != '?' && *ptr != '{')
    183 		    /* If a repetition does not follow */
    184 		    irec_close_group(&inf);
    185 		else if (inf.pgrp == NULL)
    186 		    /* A repetition follows, but current group is implicit */
    187 		    inf.ecode = RE_EPAREN;
    188 		else
    189 		    /* Can do this as next character is known */
    190 		    inf.ppat = NULL;
    191 		break;
    192 	    case '[':
    193 		irec_range(&inf);
    194 		break;
    195 	    case ']':
    196 		irec_literal_pattern(&inf, ']');
    197 		break;
    198 	    case '{':
    199 		irec_complex_repetition(&inf);
    200 		break;
    201 	    case '}':
    202 		irec_literal_pattern(&inf, '}');
    203 		break;
    204 	    case '|':
    205 		    /* If first character in the pattern */
    206 		if (inf.ptr - 1 == (unsigned char*)pattern ||
    207 		    /* If last character in the pattern */
    208 		    inf.ptr >= inf.end ||
    209 		    /* If empty pattern */
    210 		    inf.ptr[0] == '|' ||
    211 		    inf.ptr[0] == ')')
    212 		    inf.ecode = RE_EMPTY;
    213 		else {
    214 		    rec_alt *alt = calloc(1, sizeof(rec_alt));
    215 
    216 		    if (alt) {
    217 			alt->prev = inf.palt;
    218 			inf.palt->next = alt;
    219 			inf.palt = alt;
    220 			inf.ppat = NULL;
    221 		    }
    222 		    else
    223 			inf.ecode = RE_ESPACE;
    224 		}
    225 		break;
    226 	    case '\\':
    227 		irec_escape(&inf);
    228 		break;
    229 	    default:
    230 		if (!(flags & RE_ICASE) ||
    231 		    (!isupper(inf.ptr[-1]) && !islower(inf.ptr[-1])))
    232 		    irec_literal_pattern(&inf, inf.ptr[-1]);
    233 		else
    234 		    irec_case_literal_pattern(&inf, inf.ptr[-1]);
    235 		break;
    236 	}
    237     }
    238 
    239     /* Check if not all groups closed */
    240     if (inf.ecode == 0 && inf.nparens)
    241 	inf.ecode = RE_EPAREN;
    242 
    243     if (inf.ecode == 0)
    244 	inf.ecode = orec_comp(inf.alt, flags);
    245 
    246     /* If an error generated */
    247     if (inf.ecode) {
    248 	irec_free(&inf);
    249 	alt = NULL;
    250     }
    251 
    252     *ecode = inf.ecode;
    253 
    254     return (alt);
    255 }
    256 
    257 void
    258 irec_free_alt(rec_alt *alt)
    259 {
    260     rec_alt *next;
    261 
    262     while (alt) {
    263 	next = alt->next;
    264 	irec_free_pats(alt->pat);
    265 	free(alt);
    266 	alt = next;
    267     }
    268 }
    269 
    270 
    271 
    272 static void
    273 irec_simple_pattern(irec_info *inf, rec_pat_t type)
    274 {
    275     rec_pat *pat;
    276 
    277     /* Always add a new pattern to list */
    278     if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
    279 	inf->ecode = RE_ESPACE;
    280 	return;
    281     }
    282 
    283     pat->type = type;
    284     if ((pat->prev = inf->ppat) != NULL)
    285 	inf->ppat->next = pat;
    286     else
    287 	inf->palt->pat = pat;
    288     inf->ppat = pat;
    289 }
    290 
    291 static void
    292 irec_literal_pattern(irec_info *inf, int value)
    293 {
    294     int length;
    295     rec_pat *pat;
    296     unsigned char chr, *str;
    297 
    298 	/* If there is a current pattern */
    299     if (inf->ppat && inf->ppat->rep == NULL) {
    300 	switch (inf->ppat->type) {
    301 	    case Rep_Literal:
    302 		/* Start literal string */
    303 		chr = inf->ppat->data.chr;
    304 		if ((str = malloc(16)) == NULL) {
    305 		    inf->ecode = RE_ESPACE;
    306 		    return;
    307 		}
    308 		inf->ppat->type = Rep_String;
    309 		inf->ppat->data.str = str;
    310 		str[0] = chr;
    311 		str[1] = value;
    312 		str[2] = '\0';
    313 		return;
    314 
    315 	    case Rep_String:
    316 		/* Augments literal string */
    317 		length = strlen((char*)inf->ppat->data.str);
    318 		if ((length % 16) >= 14) {
    319 		    if ((str = realloc(inf->ppat->data.str,
    320 				       length + 18)) == NULL) {
    321 			inf->ecode = RE_ESPACE;
    322 			return;
    323 		    }
    324 		    inf->ppat->data.str = str;
    325 		}
    326 		inf->ppat->data.str[length] = value;
    327 		inf->ppat->data.str[length + 1] = '\0';
    328 		return;
    329 
    330 	    default:
    331 		/* Anything else is added as a new pattern list element */
    332 		break;
    333 	}
    334     }
    335 
    336     if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
    337 	inf->ecode = RE_ESPACE;
    338 	return;
    339     }
    340 
    341     pat->type = Rep_Literal;
    342     pat->data.chr = value;
    343     if ((pat->prev = inf->ppat) != NULL)
    344 	inf->ppat->next = pat;
    345     else
    346 	inf->palt->pat = pat;
    347     inf->ppat = pat;
    348 }
    349 
    350 static void
    351 irec_case_literal_pattern(irec_info *inf, int value)
    352 {
    353     int length;
    354     rec_pat *pat;
    355     unsigned char plower, pupper, lower, upper, *str;
    356 
    357     lower = tolower(value);
    358     upper = toupper(value);
    359 
    360 	/* If there is a current pattern */
    361     if (inf->ppat && inf->ppat->rep == NULL) {
    362 	switch (inf->ppat->type) {
    363 	    case Rep_CaseLiteral:
    364 		/* Start case literal string */
    365 		plower = inf->ppat->data.cse.lower;
    366 		pupper = inf->ppat->data.cse.upper;
    367 		if ((str = malloc(32)) == NULL) {
    368 		    inf->ecode = RE_ESPACE;
    369 		    return;
    370 		}
    371 		inf->ppat->type = Rep_CaseString;
    372 		inf->ppat->data.str = str;
    373 		str[0] = plower;
    374 		str[1] = pupper;
    375 		str[2] = lower;
    376 		str[3] = upper;
    377 		str[4] = '\0';
    378 		return;
    379 
    380 	    case Rep_CaseString:
    381 		/* Augments case literal string */
    382 		length = strlen((char*)inf->ppat->data.str);
    383 		if (((length) % 32) >= 28) {
    384 		    if ((str = realloc(inf->ppat->data.str,
    385 				       length + 36)) == NULL) {
    386 			inf->ecode = RE_ESPACE;
    387 			return;
    388 		    }
    389 		    inf->ppat->data.str = str;
    390 		}
    391 		inf->ppat->data.str[length] = lower;
    392 		inf->ppat->data.str[length + 1] = upper;
    393 		inf->ppat->data.str[length + 2] = '\0';
    394 		return;
    395 
    396 	    default:
    397 		/* Anything else is added as a new pattern list element */
    398 		break;
    399 	}
    400     }
    401 
    402     if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
    403 	inf->ecode = RE_ESPACE;
    404 	return;
    405     }
    406 
    407     pat->type = Rep_CaseLiteral;
    408     pat->data.cse.lower = lower;
    409     pat->data.cse.upper = upper;
    410     pat->prev = inf->ppat;
    411     if ((pat->prev = inf->ppat) != NULL)
    412 	inf->ppat->next = pat;
    413     else
    414 	inf->palt->pat = pat;
    415     inf->ppat = pat;
    416 }
    417 
    418 static void
    419 irec_open_group(irec_info *inf)
    420 {
    421     rec_pat *pat;
    422     rec_alt *alt;
    423     rec_grp *grp;
    424 
    425     if ((grp = calloc(1, sizeof(rec_grp))) == NULL) {
    426 	inf->ecode = RE_ESPACE;
    427 	return;
    428     }
    429 
    430     if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
    431 	free(grp);
    432 	inf->ecode = RE_ESPACE;
    433 	return;
    434     }
    435 
    436     if ((alt = calloc(1, sizeof(rec_alt))) == NULL) {
    437 	free(grp);
    438 	free(pat);
    439 	inf->ecode = RE_ESPACE;
    440 	return;
    441     }
    442 
    443     pat->type = Rep_Group;
    444     pat->data.grp = grp;
    445     grp->parent = pat;
    446     grp->palt = inf->palt;
    447     grp->pgrp = inf->pgrp;
    448     grp->alt = alt;
    449     grp->comp = 0;
    450     if ((pat->prev = inf->ppat) != NULL)
    451 	inf->ppat->next = pat;
    452     else
    453 	inf->palt->pat = pat;
    454     inf->palt = alt;
    455     inf->ppat = NULL;
    456 
    457     /* Only toplevel parenthesis supported */
    458     if (++inf->nparens == 1)
    459 	++inf->ngrps;
    460 
    461     inf->pgrp = grp;
    462 }
    463 
    464 static void
    465 irec_close_group(irec_info *inf)
    466 {
    467     if (inf->pgrp == NULL) {
    468 	inf->ecode = RE_EPAREN;
    469 	return;
    470     }
    471 
    472     inf->palt = inf->pgrp->palt;
    473     inf->ppat = inf->pgrp->parent;
    474     inf->pgrp = inf->pgrp->pgrp;
    475 
    476     --inf->nparens;
    477 }
    478 
    479 static void
    480 irec_range(irec_info *inf)
    481 {
    482     int count;
    483     rec_pat *pat;
    484     rec_rng *rng;
    485     int not = inf->ptr[0] == '^';
    486 
    487     if (not)
    488 	++inf->ptr;
    489 
    490     pat = calloc(1, sizeof(rec_pat));
    491     if (pat == NULL) {
    492 	inf->ecode = RE_ESPACE;
    493 	return;
    494     }
    495 
    496     rng = calloc(1, sizeof(rec_rng));
    497     if (pat == NULL) {
    498 	free(pat);
    499 	inf->ecode = RE_ESPACE;
    500 	return;
    501     }
    502 
    503     pat->data.rng = rng;
    504     pat->type = not ? Rep_RangeNot : Rep_Range;
    505     if ((pat->prev = inf->ppat) != NULL)
    506 	inf->ppat->next = pat;
    507     else
    508 	inf->palt->pat = pat;
    509     inf->ppat = pat;
    510 
    511     /* First pass, add everything seen */
    512     for (count = 0; inf->ecode == 0; count++) {
    513 	/* If bracket not closed */
    514 	if (inf->ptr == inf->end) {
    515 	    inf->ecode = RE_EBRACK;
    516 	    return;
    517 	}
    518 	/* If not the first character */
    519 	else if (inf->ptr[0] == ']' && count)
    520 	    break;
    521 	else {
    522 	    /* If not a range of characters */
    523 	    if (inf->ptr[1] != '-' || inf->ptr[2] == ']') {
    524 		irec_range_single(inf, inf->ptr[0]);
    525 		++inf->ptr;
    526 	    }
    527 	    else {
    528 		if ((inf->flags & RE_NEWLINE) &&
    529 		    inf->ptr[0] < '\n' && inf->ptr[2] > '\n') {
    530 		    /*  Unless it is forced to be a delimiter, don't allow
    531 		     * a newline in a character range */
    532 		    if (inf->ptr[0] == '\n' - 1)
    533 			irec_range_single(inf, inf->ptr[0]);
    534 		    else
    535 			irec_range_complex(inf, inf->ptr[0], '\n' - 1);
    536 		    if (inf->ptr[2] == '\n' + 1)
    537 			irec_range_single(inf, inf->ptr[2]);
    538 		    else
    539 			irec_range_complex(inf, '\n' + 1, inf->ptr[2]);
    540 		}
    541 		else
    542 		    irec_range_complex(inf, inf->ptr[0], inf->ptr[2]);
    543 		inf->ptr += 3;
    544 	    }
    545 	}
    546     }
    547 
    548     /* Skip ] */
    549     ++inf->ptr;
    550 }
    551 
    552 static void
    553 irec_range_single(irec_info *inf, int value)
    554 {
    555     if (value >= 0 && value <= 255)
    556 	inf->ppat->data.rng->range[value] = 1;
    557 
    558     if (inf->flags & RE_ICASE) {
    559 	if (islower(value)) {
    560 	    value = toupper(value);
    561 	    if (value >= 0 && value <= 255)
    562 		inf->ppat->data.rng->range[value] = 1;
    563 	}
    564 	else if (isupper(value)) {
    565 	    value = tolower(value);
    566 	    if (value >= 0 && value <= 255)
    567 		inf->ppat->data.rng->range[value] = 1;
    568 	}
    569     }
    570 }
    571 
    572 static void
    573 irec_range_complex(irec_info *inf, int chrf, int chrt)
    574 {
    575     if (chrf > chrt) {
    576 	inf->ecode = RE_ERANGE;
    577 	return;
    578     }
    579 
    580     for (; chrf <= chrt; chrf++)
    581 	irec_range_single(inf, chrf);
    582 }
    583 
    584 static void
    585 irec_escape(irec_info *inf)
    586 {
    587     rec_pat *pat;
    588     unsigned char chr = inf->ptr[0];
    589 
    590     if (chr == 0) {
    591 	inf->ecode = RE_EESCAPE;
    592 	return;
    593     }
    594     ++inf->ptr;
    595     switch (chr) {
    596 	case 'o':
    597 	    irec_simple_pattern(inf, Rep_Odigit);
    598 	    break;
    599 	case 'O':
    600 	    irec_simple_pattern(inf, Rep_OdigitNot);
    601 	    break;
    602 	case 'd':
    603 	    irec_simple_pattern(inf, Rep_Digit);
    604 	    break;
    605 	case 'D':
    606 	    irec_simple_pattern(inf, Rep_DigitNot);
    607 	    break;
    608 	case 'x':
    609 	    irec_simple_pattern(inf, Rep_Xdigit);
    610 	    break;
    611 	case 'X':
    612 	    irec_simple_pattern(inf, Rep_XdigitNot);
    613 	    break;
    614 	case 's':
    615 	    irec_simple_pattern(inf, Rep_Space);
    616 	    break;
    617 	case 'S':
    618 	    irec_simple_pattern(inf, Rep_SpaceNot);
    619 	    break;
    620 	case 't':
    621 	    irec_simple_pattern(inf, Rep_Tab);
    622 	    break;
    623 	case 'n':
    624 	    irec_simple_pattern(inf, Rep_Newline);
    625 	    break;
    626 	case 'l':
    627 	    irec_simple_pattern(inf, Rep_Lower);
    628 	    break;
    629 	case 'u':
    630 	    irec_simple_pattern(inf, Rep_Upper);
    631 	    break;
    632 	case 'w':
    633 	    irec_simple_pattern(inf, Rep_Alnum);
    634 	    break;
    635 	case 'W':
    636 	    irec_simple_pattern(inf, Rep_AlnumNot);
    637 	    break;
    638 	case 'c':
    639 	    irec_simple_pattern(inf, Rep_Control);
    640 	    break;
    641 	case 'C':
    642 	    irec_simple_pattern(inf, Rep_ControlNot);
    643 	    break;
    644 	case '<':
    645 	    irec_simple_pattern(inf, Rep_Bow);
    646 	    break;
    647 	case '>':
    648 	    irec_simple_pattern(inf, Rep_Eow);
    649 	    break;
    650 	case '1':	case '2':	case '3':
    651 	case '4':	case '5':	case '6':
    652 	case '7':	case '8':	case '9':
    653 	    if ((inf->flags & RE_NOSUB) || (chr -= '1') >= inf->ngrps) {
    654 		inf->ecode = RE_ESUBREG;
    655 		return;
    656 	    }
    657 	    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
    658 		inf->ecode = RE_ESPACE;
    659 		return;
    660 	    }
    661 	    pat->type = Rep_Backref;
    662 	    pat->data.chr = chr;
    663 	    pat->prev = inf->ppat;
    664 	    if (inf->ppat)
    665 		inf->ppat->next = pat;
    666 	    else
    667 		inf->palt->pat = pat;
    668 	    inf->ppat = pat;
    669 	    break;
    670 
    671 	/* True literals */
    672 	case '0':
    673 	    irec_literal_pattern(inf, '\0');
    674 	    break;
    675 	case 'a':
    676 	    irec_literal_pattern(inf, '\a');
    677 	    break;
    678 	case 'b':
    679 	    irec_literal_pattern(inf, '\b');
    680 	    break;
    681 	case 'f':
    682 	    irec_literal_pattern(inf, '\f');
    683 	    break;
    684 	case 'r':
    685 	    irec_literal_pattern(inf, '\r');
    686 	    break;
    687 	case 'v':
    688 	    irec_literal_pattern(inf, '\v');
    689 	    break;
    690 
    691 	default:
    692 	    /* Don't check if case insensitive regular expression */
    693 	    irec_literal_pattern(inf, chr);
    694 	    break;
    695     }
    696 }
    697 
    698 static void
    699 irec_simple_repetition(irec_info *inf, rec_rep_t type)
    700 {
    701     rec_rep *rep;
    702 
    703 	/* If nowhere to add repetition */
    704     if ((inf->pgrp == NULL && inf->ppat == NULL) ||
    705 	/* If repetition already added to last/current pattern */
    706 	(inf->pgrp == NULL && inf->ppat->rep != NULL) ||
    707 	/* If repetition already added to last/current group */
    708 	(inf->ppat == NULL && inf->pgrp->parent->rep != NULL)) {
    709 	inf->ecode = RE_BADRPT;
    710 	return;
    711     }
    712 
    713     if ((rep = calloc(1, sizeof(rec_rep))) == NULL) {
    714 	inf->ecode = RE_ESPACE;
    715 	return;
    716     }
    717 
    718     rep->type = type;
    719     irec_add_repetition(inf, rep);
    720 }
    721 
    722 static void
    723 irec_complex_repetition(irec_info *inf)
    724 {
    725     int exact;
    726     rec_rep *rep;
    727     long mine, maxc;
    728     unsigned char *end;
    729 
    730 	/* If nowhere to add repetition */
    731     if ((inf->pgrp == NULL && inf->ppat == NULL) ||
    732 	/* If repetition already added to last/current pattern */
    733 	(inf->pgrp == NULL && inf->ppat->rep != NULL) ||
    734 	/* If repetition already added to last/current group */
    735 	(inf->ppat == NULL && inf->pgrp->parent->rep != NULL)) {
    736 	inf->ecode = RE_EBADBR;
    737 	return;
    738     }
    739 
    740     exact = 0;
    741     mine = maxc = -1;
    742     if (inf->ptr[0] == ',')
    743 	/* Specify max number of ocurrences only */
    744 	goto domax;
    745     else if (!isdigit(inf->ptr[0]))
    746 	goto badbr;
    747 
    748     mine = strtol((char*)inf->ptr, (char**)&end, 10);
    749     inf->ptr = end;
    750     if (inf->ptr[0] == '}') {
    751 	exact = 1;
    752 	++inf->ptr;
    753 	goto redone;
    754     }
    755     else if (inf->ptr[0] != ',')
    756 	goto badbr;
    757 
    758 domax:
    759 	/* Add one to skip comma */
    760     ++inf->ptr;
    761     if (inf->ptr[0] == '}') {
    762 	++inf->ptr;
    763 	goto redone;
    764     }
    765     else if (!isdigit(inf->ptr[0]))
    766 	goto badbr;
    767     maxc = strtol((char*)inf->ptr, (char**)&end, 10);
    768     inf->ptr = end;
    769     if (inf->ptr[0] != '}')
    770 	goto badbr;
    771     ++inf->ptr;
    772 
    773 redone:
    774     if (mine == maxc) {
    775 	maxc = -1;
    776 	exact = 1;
    777     }
    778 
    779     /* Check range and if min-max parameters are valid */
    780     if (mine >= 255 || maxc >= 255 ||
    781 	(mine >= 0 && maxc >= 0 && mine > maxc))
    782 	goto badbr;
    783 
    784     /* Check for noop */
    785     if (exact && mine == 1)
    786 	return;
    787 
    788     if ((rep = calloc(1, sizeof(rec_rep))) == NULL) {
    789 	inf->ecode = RE_ESPACE;
    790 	return;
    791     }
    792 
    793 	/* Convert {0,1} to ? */
    794     if (mine == 0 && maxc == 1)
    795 	rep->type = Rer_Maybe;
    796     else if (exact) {
    797 	rep->type = Rer_Exact;
    798 	rep->mine = mine;
    799     }
    800 	/* Convert {0,} to * */
    801     else if (mine == 0 && maxc == -1)
    802 	rep->type = Rer_AnyTimes;
    803 	/* Convert {1,} to + */
    804     else if (mine == 1 && maxc == -1)
    805 	rep->type = Rer_AtLeast;
    806     else if (maxc == -1) {
    807 	rep->type = Rer_Min;
    808 	rep->mine = mine;
    809     }
    810     else if (mine < 1) {
    811 	rep->type = Rer_Max;
    812 	rep->maxc = maxc;
    813     }
    814     else {
    815 	rep->type = Rer_MinMax;
    816 	rep->mine = mine;
    817 	rep->maxc = maxc;
    818     }
    819 
    820     irec_add_repetition(inf, rep);
    821 
    822     return;
    823 
    824 badbr:
    825     inf->ecode = RE_EBADBR;
    826 }
    827 
    828 /*  The rep argument is allocated and has no reference yet,
    829  * if something fails it must be freed before returning.
    830  */
    831 static void
    832 irec_add_repetition(irec_info *inf, rec_rep *rep)
    833 {
    834     int length;
    835     rec_pat *pat;
    836     rec_grp *grp;
    837     rec_rep_t rept;
    838     unsigned char value, upper;
    839 
    840     rept = rep->type;
    841 
    842     if (inf->ppat == NULL) {
    843 	rec_pat *any;
    844 	rec_grp *grp = inf->pgrp;
    845 
    846 	if (rept == Rer_AnyTimes || rept == Rer_Maybe || rept == Re_AtLeast) {
    847 	    /* Convert (.)* to (.*), ((.))* not handled and may not match */
    848 	    any = NULL;
    849 
    850 	    if (grp->alt && grp->alt->pat) {
    851 		for (any = grp->alt->pat; any->next; any = any->next)
    852 		    ;
    853 		switch (any->type) {
    854 		    case Rep_Any:
    855 			break;
    856 		    case Rep_AnyAnyTimes:
    857 		    case Rep_AnyMaybe:
    858 		    case Rep_AnyAtLeast:
    859 			free(rep);
    860 			inf->ecode = RE_BADRPT;
    861 			return;
    862 		    default:
    863 			any = NULL;
    864 			break;
    865 		}
    866 	    }
    867 	    if (any) {
    868 		free(rep);
    869 		rep = NULL;
    870 		any->type = (rept == Rer_AnyTimes) ? Rep_AnyAnyTimes :
    871 			    (rept == Rer_AtLeast) ? Rep_AnyAtLeast :
    872 			    Rep_AnyMaybe;
    873 		while (grp) {
    874 		    ++grp->comp;
    875 		    grp = grp->pgrp;
    876 		}
    877 	    }
    878 	}
    879 	inf->pgrp->parent->rep = rep;
    880 	irec_close_group(inf);
    881 	return;
    882     }
    883 
    884     switch (inf->ppat->type) {
    885 	case Rep_Bol:
    886 	case Rep_Eol:
    887 	case Rep_Bow:
    888 	case Rep_Eow:
    889 	case Rep_AnyAnyTimes:
    890 	case Rep_AnyMaybe:
    891 	case Rep_AnyAtLeast:
    892 	    /* Markers that cannot repeat */
    893 	    free(rep);
    894 	    inf->ecode = RE_BADRPT;
    895 	    return;
    896 
    897 	case Rep_Any:
    898 	    grp = inf->pgrp;
    899 	    free(rep);
    900 	    if (rept == Rer_AnyTimes ||
    901 		rept == Rer_Maybe ||
    902 		rept == Rer_AtLeast) {
    903 		inf->ppat->type = (rept == Rer_AnyTimes) ?
    904 				   Rep_AnyAnyTimes :
    905 				  (rept == Rer_Maybe) ?
    906 				   Rep_AnyMaybe :
    907 				   Rep_AnyAtLeast;
    908 		while (grp) {
    909 		    ++grp->comp;
    910 		    grp = grp->pgrp;
    911 		}
    912 	    }
    913 	    else
    914 		/* XXX Not (yet) implemented */
    915 		inf->ecode = RE_BADRPT;
    916 	    rep = NULL;
    917 	    break;
    918 
    919 	case Rep_String:
    920 	    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
    921 		free(rep);
    922 		inf->ecode = RE_ESPACE;
    923 		return;
    924 	    }
    925 
    926 	    length = strlen((char*)inf->ppat->data.str);
    927 	    pat->type = Rep_Literal;
    928 	    pat->prev = inf->ppat;
    929 	    pat->data.chr = inf->ppat->data.str[length - 1];
    930 	    if (length == 2) {
    931 		/* Must convert to two Rep_Literals */
    932 		value = inf->ppat->data.str[0];
    933 		free(inf->ppat->data.str);
    934 		inf->ppat->data.chr = value;
    935 		inf->ppat->type = Rep_Literal;
    936 	    }
    937 	    else
    938 		/* Must remove last character from string */
    939 		inf->ppat->data.str[length - 1] = '\0';
    940 	    inf->ppat->next = pat;
    941 	    inf->ppat = pat;
    942 	    break;
    943 
    944 	case Rep_CaseString:
    945 	    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
    946 		free(rep);
    947 		inf->ecode = RE_ESPACE;
    948 		return;
    949 	    }
    950 
    951 	    length = strlen((char*)inf->ppat->data.str);
    952 	    pat->type = Rep_CaseLiteral;
    953 	    pat->prev = inf->ppat;
    954 	    pat->data.cse.lower = inf->ppat->data.str[length - 2];
    955 	    pat->data.cse.upper = inf->ppat->data.str[length - 1];
    956 	    if (length == 4) {
    957 		/* Must convert to two Rep_CaseLiterals */
    958 		value = inf->ppat->data.str[0];
    959 		upper = inf->ppat->data.str[1];
    960 		free(inf->ppat->data.str);
    961 		inf->ppat->data.cse.lower = value;
    962 		inf->ppat->data.cse.upper = upper;
    963 		inf->ppat->next = pat;
    964 		inf->ppat->type = Rep_CaseLiteral;
    965 	    }
    966 	    else
    967 		/* Must remove last character pair from string */
    968 		inf->ppat->data.str[length - 2] = '\0';
    969 	    inf->ppat->next = pat;
    970 	    inf->ppat = pat;
    971 	    break;
    972 
    973 	default:
    974 	    /* Anything else does not need special handling */
    975 	    break;
    976     }
    977 
    978     inf->ppat->rep = rep;
    979 }
    980 
    981 static void
    982 irec_free(irec_info *inf)
    983 {
    984     irec_free_alt(inf->alt);
    985 }
    986 
    987 static void
    988 irec_free_grp(rec_grp *grp)
    989 {
    990     if (grp->alt)
    991 	irec_free_alt(grp->alt);
    992     free(grp);
    993 }
    994 
    995 static void
    996 irec_free_pats(rec_pat *pat)
    997 {
    998     rec_pat *next;
    999     rec_pat_t rect;
   1000 
   1001     while (pat) {
   1002 	next = pat->next;
   1003 	if (pat->rep)
   1004 	    free(pat->rep);
   1005 	rect = pat->type;
   1006 	if (rect == Rep_Range || rect == Rep_RangeNot)
   1007 	    free(pat->data.rng);
   1008 	else if (rect == Rep_Group)
   1009 	    irec_free_grp(pat->data.grp);
   1010 	else if (rect == Rep_StringList)
   1011 	    orec_free_stl(pat->data.stl);
   1012 	free(pat);
   1013 	pat = next;
   1014     }
   1015 }
   1016