1/*
2 * Copyright (c) 2002 by The XFree86 Project, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 * Except as contained in this notice, the name of the XFree86 Project shall
23 * not be used in advertising or otherwise to promote the sale, use or other
24 * dealings in this Software without prior written authorization from the
25 * XFree86 Project.
26 *
27 * Author: Paulo César Pereira de Andrade
28 */
29
30/* $XFree86: xc/programs/xedit/lisp/re/rec.c,v 1.3 2002/11/15 07:01:33 paulo Exp $ */
31
32#include "rep.h"
33
34/*
35 * Types
36 */
37
38/*  Information used while compiling the intermediate format of the re. */
39typedef struct _irec_info {
40    unsigned char *ptr;		/* Pointer in the given regex pattern */
41    unsigned char *end;		/* End of regex pattern */
42    int flags;			/* Compile flags */
43    rec_alt *alt;		/* Toplevel first/single alternative */
44
45    rec_alt *palt;		/* Current alternative being compiled */
46    rec_grp *pgrp;		/* Current group, if any */
47    rec_pat *ppat;		/* Current pattern, if any */
48
49    /*  Number of open parenthesis, for error checking */
50    int nparens;
51
52    int ngrps;			/* Number of groups, for backreference */
53
54    int ecode;
55} irec_info;
56
57
58/*
59 * Prototypes
60 */
61
62	/* (i)ntermediate (r)egular (e)xpression (c)ompile
63	 *  Generates an intermediate stage compiled regex from
64	 * the specified pattern argument. Basically builds an
65	 * intermediate data structure to analyse and do syntax
66	 * error checking.
67	 */
68static void irec_simple_pattern(irec_info*, rec_pat_t);
69static void irec_literal_pattern(irec_info*, int);
70static void irec_case_literal_pattern(irec_info*, int);
71static void irec_open_group(irec_info*);
72static void irec_close_group(irec_info*);
73static void irec_range(irec_info*);
74static void irec_range_single(irec_info*, int);
75static void irec_range_complex(irec_info*, int, int);
76static void irec_escape(irec_info*);
77static void irec_simple_repetition(irec_info*, rec_rep_t);
78static void irec_complex_repetition(irec_info*);
79static void irec_add_repetition(irec_info*, rec_rep*);
80static void irec_free(irec_info*);
81static void irec_free_grp(rec_grp*);
82static void irec_free_pats(rec_pat*);
83
84
85/*
86 * Implementation
87 */
88rec_alt *
89irec_comp(const char *pattern, const char *endp, int flags, int *ecode)
90{
91    unsigned char *ptr;
92    rec_alt *alt;
93    irec_info inf;
94
95    if (pattern == NULL || endp < pattern) {
96	*ecode = RE_INVARG;
97	return (NULL);
98    }
99
100    if (endp == pattern) {
101	*ecode = RE_EMPTY;
102	return (NULL);
103    }
104
105    alt = calloc(1, sizeof(rec_alt));
106    if (alt == NULL) {
107	*ecode = RE_ESPACE;
108	return (NULL);
109    }
110
111    inf.ptr = (unsigned char*)pattern;
112    inf.end = (unsigned char*)endp;
113    inf.flags = flags;
114    inf.alt = inf.palt = alt;
115    inf.pgrp = NULL;
116    inf.ppat = NULL;
117    inf.nparens = inf.ngrps = 0;
118    inf.ecode = 0;
119
120    if (flags & RE_NOSPEC) {
121	/* Just searching for a character or substring */
122	for (; inf.ecode == 0 && inf.ptr < inf.end; inf.ptr++) {
123	    if (!(flags & RE_ICASE) ||
124		(!isupper(*inf.ptr) && !islower(*inf.ptr)))
125		irec_literal_pattern(&inf, *inf.ptr);
126	    else
127		irec_case_literal_pattern(&inf, *inf.ptr);
128	}
129    }
130    /* inf.ptr = inf.end is nul if flags & RE_NOSPEC */
131    for (; inf.ecode == 0 && inf.ptr < inf.end;) {
132	switch (*inf.ptr++) {
133	    case '*':
134		irec_simple_repetition(&inf, Rer_AnyTimes);
135		break;
136	    case '+':
137		irec_simple_repetition(&inf, Rer_AtLeast);
138		break;
139	    case '?':
140		irec_simple_repetition(&inf, Rer_Maybe);
141		break;
142	    case '.':
143		irec_simple_pattern(&inf, Rep_Any);
144		break;
145	    case '^':
146		if (flags & RE_NEWLINE)
147		    /* It is up to the user decide if this can match */
148		    irec_simple_pattern(&inf, Rep_Bol);
149		else {
150		    for (ptr = inf.ptr - 1;
151			 ptr > (unsigned char*)pattern && *ptr == '('; ptr--)
152			;
153		    /* If at the start of a pattern */
154		    if (ptr == (unsigned char*)pattern || *ptr == '|')
155			irec_simple_pattern(&inf, Rep_Bol);
156		    else
157			/* In the middle of a pattern, treat as literal */
158			irec_literal_pattern(&inf, '^');
159		}
160		break;
161	    case '$':
162		if (flags & RE_NEWLINE)
163		    irec_simple_pattern(&inf, Rep_Eol);
164		else {
165		    /* Look ahead to check if is the last char of a group */
166		    for (ptr = inf.ptr; ptr < inf.end && *ptr == ')'; ptr++)
167			;
168		    if (*ptr == '\0' || *ptr == '|')
169			/* Last character of pattern, an EOL match */
170			irec_simple_pattern(&inf, Rep_Eol);
171		    else
172			/* Normal character */
173			irec_literal_pattern(&inf, '$');
174		}
175		break;
176	    case '(':
177		irec_open_group(&inf);
178		break;
179	    case ')':
180		/* Look ahead to check if need to close the group now */
181		ptr = inf.ptr;
182		if (*ptr != '*' && *ptr != '+' && *ptr != '?' && *ptr != '{')
183		    /* If a repetition does not follow */
184		    irec_close_group(&inf);
185		else if (inf.pgrp == NULL)
186		    /* A repetition follows, but current group is implicit */
187		    inf.ecode = RE_EPAREN;
188		else
189		    /* Can do this as next character is known */
190		    inf.ppat = NULL;
191		break;
192	    case '[':
193		irec_range(&inf);
194		break;
195	    case ']':
196		irec_literal_pattern(&inf, ']');
197		break;
198	    case '{':
199		irec_complex_repetition(&inf);
200		break;
201	    case '}':
202		irec_literal_pattern(&inf, '}');
203		break;
204	    case '|':
205		    /* If first character in the pattern */
206		if (inf.ptr - 1 == (unsigned char*)pattern ||
207		    /* If last character in the pattern */
208		    inf.ptr >= inf.end ||
209		    /* If empty pattern */
210		    inf.ptr[0] == '|' ||
211		    inf.ptr[0] == ')')
212		    inf.ecode = RE_EMPTY;
213		else {
214		    rec_alt *alt = calloc(1, sizeof(rec_alt));
215
216		    if (alt) {
217			alt->prev = inf.palt;
218			inf.palt->next = alt;
219			inf.palt = alt;
220			inf.ppat = NULL;
221		    }
222		    else
223			inf.ecode = RE_ESPACE;
224		}
225		break;
226	    case '\\':
227		irec_escape(&inf);
228		break;
229	    default:
230		if (!(flags & RE_ICASE) ||
231		    (!isupper(inf.ptr[-1]) && !islower(inf.ptr[-1])))
232		    irec_literal_pattern(&inf, inf.ptr[-1]);
233		else
234		    irec_case_literal_pattern(&inf, inf.ptr[-1]);
235		break;
236	}
237    }
238
239    /* Check if not all groups closed */
240    if (inf.ecode == 0 && inf.nparens)
241	inf.ecode = RE_EPAREN;
242
243    if (inf.ecode == 0)
244	inf.ecode = orec_comp(inf.alt, flags);
245
246    /* If an error generated */
247    if (inf.ecode) {
248	irec_free(&inf);
249	alt = NULL;
250    }
251
252    *ecode = inf.ecode;
253
254    return (alt);
255}
256
257void
258irec_free_alt(rec_alt *alt)
259{
260    rec_alt *next;
261
262    while (alt) {
263	next = alt->next;
264	irec_free_pats(alt->pat);
265	free(alt);
266	alt = next;
267    }
268}
269
270
271
272static void
273irec_simple_pattern(irec_info *inf, rec_pat_t type)
274{
275    rec_pat *pat;
276
277    /* Always add a new pattern to list */
278    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
279	inf->ecode = RE_ESPACE;
280	return;
281    }
282
283    pat->type = type;
284    if ((pat->prev = inf->ppat) != NULL)
285	inf->ppat->next = pat;
286    else
287	inf->palt->pat = pat;
288    inf->ppat = pat;
289}
290
291static void
292irec_literal_pattern(irec_info *inf, int value)
293{
294    int length;
295    rec_pat *pat;
296    unsigned char chr, *str;
297
298	/* If there is a current pattern */
299    if (inf->ppat && inf->ppat->rep == NULL) {
300	switch (inf->ppat->type) {
301	    case Rep_Literal:
302		/* Start literal string */
303		chr = inf->ppat->data.chr;
304		if ((str = malloc(16)) == NULL) {
305		    inf->ecode = RE_ESPACE;
306		    return;
307		}
308		inf->ppat->type = Rep_String;
309		inf->ppat->data.str = str;
310		str[0] = chr;
311		str[1] = value;
312		str[2] = '\0';
313		return;
314
315	    case Rep_String:
316		/* Augments literal string */
317		length = strlen((char*)inf->ppat->data.str);
318		if ((length % 16) >= 14) {
319		    if ((str = realloc(inf->ppat->data.str,
320				       length + 18)) == NULL) {
321			inf->ecode = RE_ESPACE;
322			return;
323		    }
324		    inf->ppat->data.str = str;
325		}
326		inf->ppat->data.str[length] = value;
327		inf->ppat->data.str[length + 1] = '\0';
328		return;
329
330	    default:
331		/* Anything else is added as a new pattern list element */
332		break;
333	}
334    }
335
336    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
337	inf->ecode = RE_ESPACE;
338	return;
339    }
340
341    pat->type = Rep_Literal;
342    pat->data.chr = value;
343    if ((pat->prev = inf->ppat) != NULL)
344	inf->ppat->next = pat;
345    else
346	inf->palt->pat = pat;
347    inf->ppat = pat;
348}
349
350static void
351irec_case_literal_pattern(irec_info *inf, int value)
352{
353    int length;
354    rec_pat *pat;
355    unsigned char plower, pupper, lower, upper, *str;
356
357    lower = tolower(value);
358    upper = toupper(value);
359
360	/* If there is a current pattern */
361    if (inf->ppat && inf->ppat->rep == NULL) {
362	switch (inf->ppat->type) {
363	    case Rep_CaseLiteral:
364		/* Start case literal string */
365		plower = inf->ppat->data.cse.lower;
366		pupper = inf->ppat->data.cse.upper;
367		if ((str = malloc(32)) == NULL) {
368		    inf->ecode = RE_ESPACE;
369		    return;
370		}
371		inf->ppat->type = Rep_CaseString;
372		inf->ppat->data.str = str;
373		str[0] = plower;
374		str[1] = pupper;
375		str[2] = lower;
376		str[3] = upper;
377		str[4] = '\0';
378		return;
379
380	    case Rep_CaseString:
381		/* Augments case literal string */
382		length = strlen((char*)inf->ppat->data.str);
383		if (((length) % 32) >= 28) {
384		    if ((str = realloc(inf->ppat->data.str,
385				       length + 36)) == NULL) {
386			inf->ecode = RE_ESPACE;
387			return;
388		    }
389		    inf->ppat->data.str = str;
390		}
391		inf->ppat->data.str[length] = lower;
392		inf->ppat->data.str[length + 1] = upper;
393		inf->ppat->data.str[length + 2] = '\0';
394		return;
395
396	    default:
397		/* Anything else is added as a new pattern list element */
398		break;
399	}
400    }
401
402    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
403	inf->ecode = RE_ESPACE;
404	return;
405    }
406
407    pat->type = Rep_CaseLiteral;
408    pat->data.cse.lower = lower;
409    pat->data.cse.upper = upper;
410    pat->prev = inf->ppat;
411    if ((pat->prev = inf->ppat) != NULL)
412	inf->ppat->next = pat;
413    else
414	inf->palt->pat = pat;
415    inf->ppat = pat;
416}
417
418static void
419irec_open_group(irec_info *inf)
420{
421    rec_pat *pat;
422    rec_alt *alt;
423    rec_grp *grp;
424
425    if ((grp = calloc(1, sizeof(rec_grp))) == NULL) {
426	inf->ecode = RE_ESPACE;
427	return;
428    }
429
430    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
431	free(grp);
432	inf->ecode = RE_ESPACE;
433	return;
434    }
435
436    if ((alt = calloc(1, sizeof(rec_alt))) == NULL) {
437	free(grp);
438	free(pat);
439	inf->ecode = RE_ESPACE;
440	return;
441    }
442
443    pat->type = Rep_Group;
444    pat->data.grp = grp;
445    grp->parent = pat;
446    grp->palt = inf->palt;
447    grp->pgrp = inf->pgrp;
448    grp->alt = alt;
449    grp->comp = 0;
450    if ((pat->prev = inf->ppat) != NULL)
451	inf->ppat->next = pat;
452    else
453	inf->palt->pat = pat;
454    inf->palt = alt;
455    inf->ppat = NULL;
456
457    /* Only toplevel parenthesis supported */
458    if (++inf->nparens == 1)
459	++inf->ngrps;
460
461    inf->pgrp = grp;
462}
463
464static void
465irec_close_group(irec_info *inf)
466{
467    if (inf->pgrp == NULL) {
468	inf->ecode = RE_EPAREN;
469	return;
470    }
471
472    inf->palt = inf->pgrp->palt;
473    inf->ppat = inf->pgrp->parent;
474    inf->pgrp = inf->pgrp->pgrp;
475
476    --inf->nparens;
477}
478
479static void
480irec_range(irec_info *inf)
481{
482    int count;
483    rec_pat *pat;
484    rec_rng *rng;
485    int not = inf->ptr[0] == '^';
486
487    if (not)
488	++inf->ptr;
489
490    pat = calloc(1, sizeof(rec_pat));
491    if (pat == NULL) {
492	inf->ecode = RE_ESPACE;
493	return;
494    }
495
496    rng = calloc(1, sizeof(rec_rng));
497    if (pat == NULL) {
498	free(pat);
499	inf->ecode = RE_ESPACE;
500	return;
501    }
502
503    pat->data.rng = rng;
504    pat->type = not ? Rep_RangeNot : Rep_Range;
505    if ((pat->prev = inf->ppat) != NULL)
506	inf->ppat->next = pat;
507    else
508	inf->palt->pat = pat;
509    inf->ppat = pat;
510
511    /* First pass, add everything seen */
512    for (count = 0; inf->ecode == 0; count++) {
513	/* If bracket not closed */
514	if (inf->ptr == inf->end) {
515	    inf->ecode = RE_EBRACK;
516	    return;
517	}
518	/* If not the first character */
519	else if (inf->ptr[0] == ']' && count)
520	    break;
521	else {
522	    /* If not a range of characters */
523	    if (inf->ptr[1] != '-' || inf->ptr[2] == ']') {
524		irec_range_single(inf, inf->ptr[0]);
525		++inf->ptr;
526	    }
527	    else {
528		if ((inf->flags & RE_NEWLINE) &&
529		    inf->ptr[0] < '\n' && inf->ptr[2] > '\n') {
530		    /*  Unless it is forced to be a delimiter, don't allow
531		     * a newline in a character range */
532		    if (inf->ptr[0] == '\n' - 1)
533			irec_range_single(inf, inf->ptr[0]);
534		    else
535			irec_range_complex(inf, inf->ptr[0], '\n' - 1);
536		    if (inf->ptr[2] == '\n' + 1)
537			irec_range_single(inf, inf->ptr[2]);
538		    else
539			irec_range_complex(inf, '\n' + 1, inf->ptr[2]);
540		}
541		else
542		    irec_range_complex(inf, inf->ptr[0], inf->ptr[2]);
543		inf->ptr += 3;
544	    }
545	}
546    }
547
548    /* Skip ] */
549    ++inf->ptr;
550}
551
552static void
553irec_range_single(irec_info *inf, int value)
554{
555    if (value >= 0 && value <= 255)
556	inf->ppat->data.rng->range[value] = 1;
557
558    if (inf->flags & RE_ICASE) {
559	if (islower(value)) {
560	    value = toupper(value);
561	    if (value >= 0 && value <= 255)
562		inf->ppat->data.rng->range[value] = 1;
563	}
564	else if (isupper(value)) {
565	    value = tolower(value);
566	    if (value >= 0 && value <= 255)
567		inf->ppat->data.rng->range[value] = 1;
568	}
569    }
570}
571
572static void
573irec_range_complex(irec_info *inf, int chrf, int chrt)
574{
575    if (chrf > chrt) {
576	inf->ecode = RE_ERANGE;
577	return;
578    }
579
580    for (; chrf <= chrt; chrf++)
581	irec_range_single(inf, chrf);
582}
583
584static void
585irec_escape(irec_info *inf)
586{
587    rec_pat *pat;
588    unsigned char chr = inf->ptr[0];
589
590    if (chr == 0) {
591	inf->ecode = RE_EESCAPE;
592	return;
593    }
594    ++inf->ptr;
595    switch (chr) {
596	case 'o':
597	    irec_simple_pattern(inf, Rep_Odigit);
598	    break;
599	case 'O':
600	    irec_simple_pattern(inf, Rep_OdigitNot);
601	    break;
602	case 'd':
603	    irec_simple_pattern(inf, Rep_Digit);
604	    break;
605	case 'D':
606	    irec_simple_pattern(inf, Rep_DigitNot);
607	    break;
608	case 'x':
609	    irec_simple_pattern(inf, Rep_Xdigit);
610	    break;
611	case 'X':
612	    irec_simple_pattern(inf, Rep_XdigitNot);
613	    break;
614	case 's':
615	    irec_simple_pattern(inf, Rep_Space);
616	    break;
617	case 'S':
618	    irec_simple_pattern(inf, Rep_SpaceNot);
619	    break;
620	case 't':
621	    irec_simple_pattern(inf, Rep_Tab);
622	    break;
623	case 'n':
624	    irec_simple_pattern(inf, Rep_Newline);
625	    break;
626	case 'l':
627	    irec_simple_pattern(inf, Rep_Lower);
628	    break;
629	case 'u':
630	    irec_simple_pattern(inf, Rep_Upper);
631	    break;
632	case 'w':
633	    irec_simple_pattern(inf, Rep_Alnum);
634	    break;
635	case 'W':
636	    irec_simple_pattern(inf, Rep_AlnumNot);
637	    break;
638	case 'c':
639	    irec_simple_pattern(inf, Rep_Control);
640	    break;
641	case 'C':
642	    irec_simple_pattern(inf, Rep_ControlNot);
643	    break;
644	case '<':
645	    irec_simple_pattern(inf, Rep_Bow);
646	    break;
647	case '>':
648	    irec_simple_pattern(inf, Rep_Eow);
649	    break;
650	case '1':	case '2':	case '3':
651	case '4':	case '5':	case '6':
652	case '7':	case '8':	case '9':
653	    if ((inf->flags & RE_NOSUB) || (chr -= '1') >= inf->ngrps) {
654		inf->ecode = RE_ESUBREG;
655		return;
656	    }
657	    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
658		inf->ecode = RE_ESPACE;
659		return;
660	    }
661	    pat->type = Rep_Backref;
662	    pat->data.chr = chr;
663	    pat->prev = inf->ppat;
664	    if (inf->ppat)
665		inf->ppat->next = pat;
666	    else
667		inf->palt->pat = pat;
668	    inf->ppat = pat;
669	    break;
670
671	/* True literals */
672	case '0':
673	    irec_literal_pattern(inf, '\0');
674	    break;
675	case 'a':
676	    irec_literal_pattern(inf, '\a');
677	    break;
678	case 'b':
679	    irec_literal_pattern(inf, '\b');
680	    break;
681	case 'f':
682	    irec_literal_pattern(inf, '\f');
683	    break;
684	case 'r':
685	    irec_literal_pattern(inf, '\r');
686	    break;
687	case 'v':
688	    irec_literal_pattern(inf, '\v');
689	    break;
690
691	default:
692	    /* Don't check if case insensitive regular expression */
693	    irec_literal_pattern(inf, chr);
694	    break;
695    }
696}
697
698static void
699irec_simple_repetition(irec_info *inf, rec_rep_t type)
700{
701    rec_rep *rep;
702
703	/* If nowhere to add repetition */
704    if ((inf->pgrp == NULL && inf->ppat == NULL) ||
705	/* If repetition already added to last/current pattern */
706	(inf->pgrp == NULL && inf->ppat->rep != NULL) ||
707	/* If repetition already added to last/current group */
708	(inf->ppat == NULL && inf->pgrp->parent->rep != NULL)) {
709	inf->ecode = RE_BADRPT;
710	return;
711    }
712
713    if ((rep = calloc(1, sizeof(rec_rep))) == NULL) {
714	inf->ecode = RE_ESPACE;
715	return;
716    }
717
718    rep->type = type;
719    irec_add_repetition(inf, rep);
720}
721
722static void
723irec_complex_repetition(irec_info *inf)
724{
725    int exact;
726    rec_rep *rep;
727    long mine, maxc;
728    unsigned char *end;
729
730	/* If nowhere to add repetition */
731    if ((inf->pgrp == NULL && inf->ppat == NULL) ||
732	/* If repetition already added to last/current pattern */
733	(inf->pgrp == NULL && inf->ppat->rep != NULL) ||
734	/* If repetition already added to last/current group */
735	(inf->ppat == NULL && inf->pgrp->parent->rep != NULL)) {
736	inf->ecode = RE_EBADBR;
737	return;
738    }
739
740    exact = 0;
741    mine = maxc = -1;
742    if (inf->ptr[0] == ',')
743	/* Specify max number of ocurrences only */
744	goto domax;
745    else if (!isdigit(inf->ptr[0]))
746	goto badbr;
747
748    mine = strtol((char*)inf->ptr, (char**)&end, 10);
749    inf->ptr = end;
750    if (inf->ptr[0] == '}') {
751	exact = 1;
752	++inf->ptr;
753	goto redone;
754    }
755    else if (inf->ptr[0] != ',')
756	goto badbr;
757
758domax:
759	/* Add one to skip comma */
760    ++inf->ptr;
761    if (inf->ptr[0] == '}') {
762	++inf->ptr;
763	goto redone;
764    }
765    else if (!isdigit(inf->ptr[0]))
766	goto badbr;
767    maxc = strtol((char*)inf->ptr, (char**)&end, 10);
768    inf->ptr = end;
769    if (inf->ptr[0] != '}')
770	goto badbr;
771    ++inf->ptr;
772
773redone:
774    if (mine == maxc) {
775	maxc = -1;
776	exact = 1;
777    }
778
779    /* Check range and if min-max parameters are valid */
780    if (mine >= 255 || maxc >= 255 ||
781	(mine >= 0 && maxc >= 0 && mine > maxc))
782	goto badbr;
783
784    /* Check for noop */
785    if (exact && mine == 1)
786	return;
787
788    if ((rep = calloc(1, sizeof(rec_rep))) == NULL) {
789	inf->ecode = RE_ESPACE;
790	return;
791    }
792
793	/* Convert {0,1} to ? */
794    if (mine == 0 && maxc == 1)
795	rep->type = Rer_Maybe;
796    else if (exact) {
797	rep->type = Rer_Exact;
798	rep->mine = mine;
799    }
800	/* Convert {0,} to * */
801    else if (mine == 0 && maxc == -1)
802	rep->type = Rer_AnyTimes;
803	/* Convert {1,} to + */
804    else if (mine == 1 && maxc == -1)
805	rep->type = Rer_AtLeast;
806    else if (maxc == -1) {
807	rep->type = Rer_Min;
808	rep->mine = mine;
809    }
810    else if (mine < 1) {
811	rep->type = Rer_Max;
812	rep->maxc = maxc;
813    }
814    else {
815	rep->type = Rer_MinMax;
816	rep->mine = mine;
817	rep->maxc = maxc;
818    }
819
820    irec_add_repetition(inf, rep);
821
822    return;
823
824badbr:
825    inf->ecode = RE_EBADBR;
826}
827
828/*  The rep argument is allocated and has no reference yet,
829 * if something fails it must be freed before returning.
830 */
831static void
832irec_add_repetition(irec_info *inf, rec_rep *rep)
833{
834    int length;
835    rec_pat *pat;
836    rec_grp *grp;
837    rec_rep_t rept;
838    unsigned char value, upper;
839
840    rept = rep->type;
841
842    if (inf->ppat == NULL) {
843	rec_pat *any;
844	rec_grp *grp = inf->pgrp;
845
846	if (rept == Rer_AnyTimes || rept == Rer_Maybe || rept == Re_AtLeast) {
847	    /* Convert (.)* to (.*), ((.))* not handled and may not match */
848	    any = NULL;
849
850	    if (grp->alt && grp->alt->pat) {
851		for (any = grp->alt->pat; any->next; any = any->next)
852		    ;
853		switch (any->type) {
854		    case Rep_Any:
855			break;
856		    case Rep_AnyAnyTimes:
857		    case Rep_AnyMaybe:
858		    case Rep_AnyAtLeast:
859			free(rep);
860			inf->ecode = RE_BADRPT;
861			return;
862		    default:
863			any = NULL;
864			break;
865		}
866	    }
867	    if (any) {
868		free(rep);
869		rep = NULL;
870		any->type = (rept == Rer_AnyTimes) ? Rep_AnyAnyTimes :
871			    (rept == Rer_AtLeast) ? Rep_AnyAtLeast :
872			    Rep_AnyMaybe;
873		while (grp) {
874		    ++grp->comp;
875		    grp = grp->pgrp;
876		}
877	    }
878	}
879	inf->pgrp->parent->rep = rep;
880	irec_close_group(inf);
881	return;
882    }
883
884    switch (inf->ppat->type) {
885	case Rep_Bol:
886	case Rep_Eol:
887	case Rep_Bow:
888	case Rep_Eow:
889	case Rep_AnyAnyTimes:
890	case Rep_AnyMaybe:
891	case Rep_AnyAtLeast:
892	    /* Markers that cannot repeat */
893	    free(rep);
894	    inf->ecode = RE_BADRPT;
895	    return;
896
897	case Rep_Any:
898	    grp = inf->pgrp;
899	    free(rep);
900	    if (rept == Rer_AnyTimes ||
901		rept == Rer_Maybe ||
902		rept == Rer_AtLeast) {
903		inf->ppat->type = (rept == Rer_AnyTimes) ?
904				   Rep_AnyAnyTimes :
905				  (rept == Rer_Maybe) ?
906				   Rep_AnyMaybe :
907				   Rep_AnyAtLeast;
908		while (grp) {
909		    ++grp->comp;
910		    grp = grp->pgrp;
911		}
912	    }
913	    else
914		/* XXX Not (yet) implemented */
915		inf->ecode = RE_BADRPT;
916	    rep = NULL;
917	    break;
918
919	case Rep_String:
920	    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
921		free(rep);
922		inf->ecode = RE_ESPACE;
923		return;
924	    }
925
926	    length = strlen((char*)inf->ppat->data.str);
927	    pat->type = Rep_Literal;
928	    pat->prev = inf->ppat;
929	    pat->data.chr = inf->ppat->data.str[length - 1];
930	    if (length == 2) {
931		/* Must convert to two Rep_Literals */
932		value = inf->ppat->data.str[0];
933		free(inf->ppat->data.str);
934		inf->ppat->data.chr = value;
935		inf->ppat->type = Rep_Literal;
936	    }
937	    else
938		/* Must remove last character from string */
939		inf->ppat->data.str[length - 1] = '\0';
940	    inf->ppat->next = pat;
941	    inf->ppat = pat;
942	    break;
943
944	case Rep_CaseString:
945	    if ((pat = calloc(1, sizeof(rec_pat))) == NULL) {
946		free(rep);
947		inf->ecode = RE_ESPACE;
948		return;
949	    }
950
951	    length = strlen((char*)inf->ppat->data.str);
952	    pat->type = Rep_CaseLiteral;
953	    pat->prev = inf->ppat;
954	    pat->data.cse.lower = inf->ppat->data.str[length - 2];
955	    pat->data.cse.upper = inf->ppat->data.str[length - 1];
956	    if (length == 4) {
957		/* Must convert to two Rep_CaseLiterals */
958		value = inf->ppat->data.str[0];
959		upper = inf->ppat->data.str[1];
960		free(inf->ppat->data.str);
961		inf->ppat->data.cse.lower = value;
962		inf->ppat->data.cse.upper = upper;
963		inf->ppat->next = pat;
964		inf->ppat->type = Rep_CaseLiteral;
965	    }
966	    else
967		/* Must remove last character pair from string */
968		inf->ppat->data.str[length - 2] = '\0';
969	    inf->ppat->next = pat;
970	    inf->ppat = pat;
971	    break;
972
973	default:
974	    /* Anything else does not need special handling */
975	    break;
976    }
977
978    inf->ppat->rep = rep;
979}
980
981static void
982irec_free(irec_info *inf)
983{
984    irec_free_alt(inf->alt);
985}
986
987static void
988irec_free_grp(rec_grp *grp)
989{
990    if (grp->alt)
991	irec_free_alt(grp->alt);
992    free(grp);
993}
994
995static void
996irec_free_pats(rec_pat *pat)
997{
998    rec_pat *next;
999    rec_pat_t rect;
1000
1001    while (pat) {
1002	next = pat->next;
1003	if (pat->rep)
1004	    free(pat->rep);
1005	rect = pat->type;
1006	if (rect == Rep_Range || rect == Rep_RangeNot)
1007	    free(pat->data.rng);
1008	else if (rect == Rep_Group)
1009	    irec_free_grp(pat->data.grp);
1010	else if (rect == Rep_StringList)
1011	    orec_free_stl(pat->data.stl);
1012	free(pat);
1013	pat = next;
1014    }
1015}
1016