re.c revision b9afefc9
1/*
2 * Copyright (c) 2002 by The XFree86 Project, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 * Except as contained in this notice, the name of the XFree86 Project shall
23 * not be used in advertising or otherwise to promote the sale, use or other
24 * dealings in this Software without prior written authorization from the
25 * XFree86 Project.
26 *
27 * Author: Paulo César Pereira de Andrade
28 */
29
30/* $XFree86: xc/programs/xedit/lisp/re/re.c,v 1.8 2002/11/17 07:51:30 paulo Exp $ */
31
32#include <stdio.h>
33#include "rep.h"
34
35/*
36 * Types
37 */
38
39/*  Information used when generating the final form of the compiled re.
40 */
41struct _re_inf {
42    rec_alt *alt;
43    unsigned char *cod;
44    long len;
45    long spc;
46
47    /* Start offset of special repetition instruction */
48    long sr[MAX_DEPTH];
49
50    /* Jump offset of special repetition instruction */
51    long sj[MAX_DEPTH];
52
53    /* Just a flag, to know if this nesting is for a special repetition */
54    char sp[MAX_DEPTH];
55
56    int bas;			/* Alternatives/repetitions depth */
57    int par;			/* Open parenthesis counter */
58    int ref;			/* Backreference counter */
59
60    rec_pat *apat;		/*  Alternatives duplicate patterns
61				 * if a special repetition is found,
62				 * this is done to somewhat simplify
63				 * the bytecode engine and still allow
64				 * some complex (and time consuming)
65				 * patterns. */
66
67    int flags;
68    int ecode;
69};
70
71/*  This structure is not associated with re_cod as it's data only matters
72 * to the current match search.
73 */
74struct _re_eng {
75    unsigned char *bas;		/* Base string pointer */
76    unsigned char *str;		/* String to search for pattern */
77    unsigned char *end;		/* Where to stop searching */
78    unsigned char *cod;		/* Pointer in the re_cod structure */
79    long off;			/* Number of used entries in so/eo etc */
80
81    /* Match offset/nesting information */
82    long so[MAX_DEPTH];		/* (s)tart of (m)atch */
83    long eo[MAX_DEPTH];		/* (e)nd of (m)atch */
84    long sv[MAX_DEPTH];		/* (s)a(v)e match end offset */
85    long re[MAX_DEPTH];		/* (re)petition count */
86    long ss[MAX_DEPTH];		/* (s)ave (s)tart of match */
87    unsigned char *rcod[MAX_DEPTH];	/* restart position in regex code */
88    unsigned char *rstr[MAX_DEPTH];	/* restart position in string */
89
90    /* Group/backreference information */
91    long goff;
92    long gso[9];
93    long geo[9];
94};
95
96/*
97 * Prototypes
98 */
99static void reinit(void);
100static int rec_check(re_inf*, int);
101static int rec_code(re_inf*, ReCode);
102static int rec_byte(re_inf*, int);
103static int rec_byte_byte(re_inf*, int, int);
104static int rec_code_byte(re_inf*, ReCode, int);
105static int rec_length(re_inf*, int);
106static int rec_code_byte_byte(re_inf*, ReCode, int, int);
107static int rec_build_alt(re_inf*, rec_alt*);
108static int rec_build_pat(re_inf*, rec_pat*);
109static int rec_build_rng(re_inf*, rec_rng*);
110static int rec_build_grp(re_inf*, rec_grp*);
111static int rec_build_stl(re_inf*, rec_stl*);
112static int rec_build_rep(re_inf*, rec_rep*);
113static int rec_inc_spc(re_inf*);
114static int rec_dec_spc(re_inf*);
115static int rec_add_spc(re_inf*, int);
116static int rec_off_spc(re_inf*);
117static int rec_alt_spc(re_inf*, int);
118static int rec_rep_spc(re_inf*, int);
119#ifdef DEBUG
120static void redump(re_cod*);
121#endif
122
123/*
124 * Initialization
125 */
126unsigned char re__alnum[256];
127unsigned char re__odigit[256];
128unsigned char re__ddigit[256];
129unsigned char re__xdigit[256];
130unsigned char re__control[256];
131
132/*
133 * Implementation
134 */
135int
136recomp(re_cod *preg, const char *pattern, int flags)
137{
138    int i, ecode;
139    re_inf inf;
140
141    reinit();
142
143    preg->cod = NULL;
144    inf.alt = irec_comp(pattern,
145			flags & RE_PEND ? preg->re_endp :
146				pattern + strlen(pattern),
147			flags, &ecode);
148    if (ecode != 0)
149	return (ecode);
150
151    inf.cod = NULL;
152    inf.len = inf.spc = 0;
153    inf.bas = 0;
154    inf.par = 0;
155    inf.ref = 0;
156    inf.apat = NULL;
157    inf.flags = flags;
158    inf.ecode = 0;
159    for (i = 0; i < MAX_DEPTH; i++)
160	inf.sp[i] = 0;
161
162    /* First byte is runtime modifier flags */
163    if (rec_byte(&inf, flags & (RE_NEWLINE | RE_NOSUB)) == 0 &&
164	rec_byte(&inf, 0xff) == 0 &&
165	rec_build_alt(&inf, inf.alt) == 0 &&
166	rec_rep_spc(&inf, 0) == 0 &&
167	rec_code(&inf, Re_Done) == 0) {
168	/*  Number of possible references, loops will not leave this
169	 * value correct, but it is cheap to read it from the second
170	 * byte, instead of adding several extra checks in the bytecode. */
171	if (inf.ref)
172	    inf.cod[1] = inf.ref - 1;
173	preg->cod = inf.cod;
174	/*  Public structure member */
175	preg->re_nsub = inf.ref;
176    }
177
178    irec_free_alt(inf.alt);
179    if (inf.ecode)
180	free(inf.cod);
181#ifdef DEBUG
182    else if (flags & RE_DUMP)
183	redump(preg);
184#endif
185
186    return (inf.ecode);
187}
188
189int
190reexec(const re_cod *preg, const char *string,
191       int nmatch, re_mat pmat[], int flags)
192{
193    unsigned char *ptr, *str, newline, nosub;
194    int len, si, ci, bas, i, j, k, l, m;
195    re_eng eng;
196
197    if (preg == NULL || preg->cod == NULL || nmatch < 0 ||
198	((flags & RE_STARTEND) &&
199	 (pmat == NULL || pmat[0].rm_eo < pmat[0].rm_so)))
200	return (RE_INVARG);
201
202    eng.str = (unsigned char*)string;
203    if (flags & RE_STARTEND) {
204	eng.end = eng.str + pmat[0].rm_eo;
205	eng.str += pmat[0].rm_so;
206    }
207    else
208	eng.end = eng.str + strlen(string);
209    eng.bas = eng.str;
210    nosub = preg->cod[0] & RE_NOSUB;
211    newline = preg->cod[0] & RE_NEWLINE;
212    eng.cod = preg->cod + 2;
213
214    if (!nosub && preg->cod[1] != 0xff) {
215	for (i = 0; i <= preg->cod[1]; i++) {
216	    eng.gso[i] = 0;
217	    eng.geo[i] = -1;
218	}
219    }
220
221    /* Setup to search for start of match from the first character */
222    eng.so[0] = 0;
223    eng.eo[0] = eng.sv[0] = -1;
224    eng.rcod[0] = eng.cod;
225    eng.rstr[0] = eng.str + 1;
226    eng.off = 0;
227    eng.goff = -1;
228    for (ci = si = 1;;) {
229reset:
230	switch (*eng.cod) {
231	    /****************************************************
232	     * One byte codes					*
233	     ****************************************************/
234	    case Re_Any:
235		if (eng.str == eng.end || (newline && eng.str[0] == '\n'))
236		    goto fail;
237		goto match;
238	    case Re_AnyEatAnyTimes:
239		if (newline) {
240		    for (ptr = eng.str; ptr < eng.end; ptr++) {
241			if (*ptr == '\n')
242			    break;
243		    }
244		    si = ptr - eng.str;
245		}
246		else
247		    si = eng.end - eng.str;
248		goto match;
249	    case Re_AnyEatMaybe:
250		si = eng.end > eng.str;
251		if (newline && si && eng.str[0] == '\n')
252		    si = 0;
253		goto match;
254	    case Re_AnyEatAtLeast:
255		if (newline) {
256		    for (ptr = eng.str; ptr < eng.end; ptr++) {
257			if (*ptr == '\n')
258			    break;
259		    }
260		    si = ptr - eng.str;
261		}
262		else
263		    si = eng.end - eng.str;
264		if (si == 0) {
265		    si = 1;
266		    goto fail;
267		}
268		goto match;
269	    case Re_Odigit:
270		if (eng.str >= eng.end)
271		    goto fail;
272		if (re__odigit[eng.str[0]])
273		    goto match;
274		goto fail;
275	    case Re_OdigitNot:
276		if (eng.str >= eng.end || re__odigit[eng.str[0]])
277		    goto fail;
278		goto match;
279	    case Re_Digit:
280		if (eng.str >= eng.end)
281		    goto fail;
282		if (re__ddigit[eng.str[0]])
283		    goto match;
284		goto fail;
285	    case Re_DigitNot:
286		if (eng.str >= eng.end || re__ddigit[eng.str[0]])
287		    goto fail;
288		goto match;
289	    case Re_Xdigit:
290		if (eng.str >= eng.end)
291		    goto fail;
292		if (re__xdigit[eng.str[0]])
293		    goto match;
294		goto fail;
295	    case Re_XdigitNot:
296		if (eng.str >= eng.end || re__xdigit[eng.str[0]])
297		    goto fail;
298		goto match;
299	    case Re_Space:
300		if (eng.str >= eng.end)
301		    goto fail;
302		if (eng.str[0] == ' ' || eng.str[0] == '\t')
303		    goto match;
304		goto fail;
305	    case Re_SpaceNot:
306		if (eng.str >= eng.end)
307		    goto fail;
308		if (eng.str[0] != ' ' && eng.str[0] != '\t')
309		    goto match;
310		goto fail;
311	    case Re_Tab:
312		if (eng.str >= eng.end)
313		    goto fail;
314		if (eng.str[0] == '\t')
315		    goto match;
316		goto fail;
317	    case Re_Newline:
318		if (eng.str >= eng.end)
319		    goto fail;
320		if (eng.str[0] == '\n')
321		    goto match;
322		goto fail;
323	    case Re_Lower:
324		if (eng.str >= eng.end)
325		    goto fail;
326		if (eng.str[0] >= 'a' && eng.str[0] <= 'z')
327		    goto match;
328		goto fail;
329	    case Re_Upper:
330		if (eng.str >= eng.end)
331		    goto fail;
332		if (eng.str[0] >= 'A' && eng.str[0] <= 'Z')
333		    goto match;
334		goto fail;
335	    case Re_Alnum:
336		if (eng.str >= eng.end)
337		    goto fail;
338		if (re__alnum[eng.str[0]])
339		    goto match;
340		goto fail;
341	    case Re_AlnumNot:
342		if (eng.str >= eng.end)
343		    goto fail;
344		if (re__alnum[eng.str[0]])
345		    goto fail;
346		goto match;
347	    case Re_Control:
348		if (eng.str >= eng.end)
349		    goto fail;
350		if (re__control[eng.str[0]])
351		    goto match;
352		goto fail;
353	    case Re_ControlNot:
354		if (eng.str >= eng.end || re__control[eng.str[0]])
355		    goto fail;
356		goto match;
357
358	    /****************************************************
359	     * One byte codes, match special emtpy strings	*
360	     ****************************************************/
361	    case Re_Bol:
362		if (eng.str == eng.bas) {
363		    if ((flags & RE_NOTBOL)) {
364			/* String does not start at the beginning of a line */
365			if (newline)
366			    goto fail;
367			goto wont;
368		    }
369		    si = 0;
370		    goto match;
371		}
372		if (newline && eng.str[-1] == '\n') {
373		    si = 0;
374		    goto match;
375		}
376		goto fail;
377	    case Re_Eol:
378		if (eng.str == eng.end) {
379		    if (flags & RE_NOTEOL)
380			/* String does not finish at the end of a line */
381			goto wont;
382		    si = 0;
383		    goto match;
384		}
385		if (newline && eng.str[0] == '\n') {
386		    si = 0;
387		    goto match;
388		}
389		goto fail;
390	    case Re_Bow:
391		if (eng.str >= eng.end ||
392		    (eng.str > eng.bas &&
393		     (re__alnum[eng.str[-1]])))
394		    goto fail;
395		if (re__alnum[eng.str[0]]) {
396		    si = 0;
397		    goto match;
398		}
399		goto fail;
400	    case Re_Eow:
401		if (eng.str == eng.bas ||
402		    (eng.str < eng.end &&
403		     re__alnum[eng.str[0]]))
404		    goto fail;
405		if (re__alnum[eng.str[-1]]) {
406		    si = 0;
407		    goto match;
408		}
409		goto fail;
410
411	    /****************************************************
412	     * One byte code, one byte argument			*
413	     ****************************************************/
414	    case Re_Literal:
415		if (eng.str >= eng.end)
416		    goto fail;
417		if (eng.str[0] == eng.cod[1]) {
418		    ci = 2;
419		    goto match;
420		}
421		goto fail;
422	    case Re_LiteralNot:
423		if (eng.str >= eng.end)
424		    goto fail;
425		if (eng.str[0] != eng.cod[1]) {
426		    ci = 2;
427		    goto match;
428		}
429		goto fail;
430	    case Re_SearchLiteral:
431		for (str = eng.str; str < eng.end; str++) {
432		    if (*str == eng.cod[1]) {
433			ci = 2;
434			eng.str = str;
435			goto match;
436		    }
437		}
438		/* This bytecode only happens in the toplevel */
439		eng.so[0] = str - eng.bas;
440		eng.str = str;
441		goto fail;
442
443	    /****************************************************
444	     * One byte code, two bytes argument		*
445	     ****************************************************/
446	    case Re_CaseLiteral:
447		if (eng.str >= eng.end)
448		    goto fail;
449		if (eng.str[0] == eng.cod[1] || eng.str[0] == eng.cod[2]) {
450		    ci = 3;
451		    goto match;
452		}
453		goto fail;
454	    case Re_CaseLiteralNot:
455		if (eng.str >= eng.end)
456		    goto fail;
457		if (eng.str[0] != eng.cod[1] && eng.str[0] != eng.cod[2]) {
458		    ci = 3;
459		    goto match;
460		}
461		goto fail;
462	    case Re_SearchCaseLiteral:
463		for (str = eng.str; str < eng.end; str++) {
464		    if (*str == eng.cod[1] || *str == eng.cod[2]) {
465			ci = 3;
466			eng.str = str;
467			goto match;
468		    }
469		}
470		eng.so[0] = str - eng.bas;
471		eng.str = str;
472		goto fail;
473
474	    /****************************************************
475	     * One byte codes, two arguments, n bytes		*
476	     ****************************************************/
477	    case Re_String:
478		len = eng.cod[1];
479		if (len & 0x80) {
480		    i = 3;
481		    len = (len & 0x7f) + (eng.cod[2] << 7);
482		}
483		else
484		    i = 2;
485		if (eng.end - eng.str < len)
486		    goto fail;
487		ptr = eng.cod + i;
488		str = eng.str;
489		for (k = len; k > 0; k--) {
490		    if (*ptr++ != *str++)
491			goto fail;
492		}
493		ci = i + len;
494		si = len;
495		goto match;
496	    case Re_SearchString:
497		len = eng.cod[1];
498		if (len & 0x80) {
499		    i = 3;
500		    len = (len & 0x7f) + (eng.cod[2] << 7);
501		}
502		else
503		    i = 2;
504		for (str = eng.str; eng.end - str >= len; str = eng.str++) {
505		    for (ptr = eng.cod + i, str = eng.str, k = len; k > 0; k--)
506			if (*ptr++ != *str++)
507			    break;
508		    if (k == 0) {
509			/* Substring found */
510			ci = i + len;
511			si = str - eng.str;
512			goto match;
513		    }
514		}
515		eng.so[0] = eng.end - eng.bas;
516		eng.str = eng.end;
517		goto fail;
518
519	    case Re_CaseString:
520		len = eng.cod[1];
521		if (len & 0x80) {
522		    i = 3;
523		    len = (len & 0x7f) + (eng.cod[2] << 7);
524		}
525		else
526		    i = 2;
527
528		len >>= 1;
529		/*  Check if there are at least len/2 bytes left, string
530		 * is represented as two bytes, lower and upper case */
531		if (eng.end - eng.str < len)
532		    goto fail;
533		ptr = eng.cod + i;
534		str = eng.str;
535		for (k = len; k > 0; str++, ptr += 2, k--) {
536		    if (*str != ptr[0] && *str != ptr[1])
537			goto fail;
538		}
539		ci = i + (len << 1);
540		si = len;
541		goto match;
542	    case Re_SearchCaseString:
543		len = eng.cod[1];
544		if (len & 0x80) {
545		    i = 3;
546		    len = (len & 0x7f) + (eng.cod[2] << 7);
547		}
548		else
549		    i = 2;
550		len >>= 1;
551		for (str = eng.str; eng.end - str >= len; str = eng.str++) {
552		    for (ptr = eng.cod + i, str = eng.str, k = len;
553			 k > 0; k--, ptr += 2, str++)
554			if (ptr[0] != str[0] && ptr[1] != str[0])
555			    break;
556		    if (k == 0) {
557			/* Substring found */
558			ci = i + (len << 1);
559			si = str - eng.str;
560			goto match;
561		    }
562		}
563		eng.so[0] = eng.end - eng.bas;
564		eng.str = eng.end;
565		goto fail;
566
567	    case Re_StringList:
568		/* Number of strings */
569		k = eng.cod[1];
570
571		/* Where to jump after match */
572		bas = eng.cod[2] | (eng.cod[3] << 8);
573
574		str = eng.str;
575		ptr = eng.cod + k + 4;
576		l = eng.end - eng.str;
577		for (j = 0; j < k; j++) {
578		    len = eng.cod[j + 4];
579		    if (len <= l) {
580			for (i = 0; i < len; i++)
581			    if (ptr[i] != str[i])
582				goto next_stl;
583			goto stl_match;
584		    }
585next_stl:
586		    ptr += len;
587		}
588		goto fail;
589stl_match:
590		ci = bas;
591		si = len;
592		goto match;
593
594	    case Re_CaseStringList:
595		/* Number of strings */
596		k = eng.cod[1];
597
598		/* Where to jump after match */
599		bas = eng.cod[2] | (eng.cod[3] << 8);
600
601		str = eng.str;
602		ptr = eng.cod + k + 4;
603		l = eng.end - eng.str;
604		for (j = 0; j < k; j++) {
605		    len = eng.cod[j + 4];
606		    if ((len >> 1) <= l) {
607			for (i = m = 0; i < len; m++, i += 2)
608			    if (ptr[i] != str[m] && ptr[i + 1] != str[m])
609				goto next_cstl;
610			goto cstl_match;
611		    }
612next_cstl:
613		    ptr += len;
614		}
615		goto fail;
616cstl_match:
617		ci = bas;
618		si = len >> 1;
619		goto match;
620
621
622	    case Re_LargeStringList:
623		/* Where to jump after match */
624		bas = eng.cod[1] | (eng.cod[2] << 8);
625
626		str = eng.str;
627
628		/* First entry in index map */
629		ptr = eng.cod + 3;
630		i = (int)str[0] << 1;
631		j = ptr[i] | (ptr[i + 1] << 8);
632		if (j == 0xffff)
633		    /* No entry with this byte */
634		    goto fail;
635
636		/* Bytes left in input */
637		l = eng.end - eng.str;
638
639		/* First entry matching initial byte */
640		ptr += 512 + j;
641
642		for (len = ptr[0];
643		     str[0] == ptr[1];
644		     ptr += len + 1, len = ptr[0]) {
645		    if (len <= l) {
646			for (i = 1; i < len; i++) {
647			    if (ptr[i + 1] != str[i])
648				goto next_lstl;
649			}
650			ci = bas;
651			si = len;
652			goto match;
653		    }
654next_lstl:;
655		}
656		goto fail;
657
658	    case Re_LargeCaseStringList:
659		/* Where to jump after match */
660		bas = eng.cod[1] | (eng.cod[2] << 8);
661
662		str = eng.str;
663
664		/* First entry in index map */
665		ptr = eng.cod + 3;
666		i = (int)str[0] << 1;
667		j = ptr[i] | (ptr[i + 1] << 8);
668		if (j == 0xffff)
669		    /* No entry with this byte */
670		    goto fail;
671
672		/* Bytes left in input */
673		l = eng.end - eng.str;
674
675		/* First entry matching initial byte */
676		ptr += 512 + j;
677
678		for (len = ptr[0];
679		     str[0] == ptr[1] || str[0] == ptr[2];
680		     ptr += len + 1, len = ptr[0]) {
681		    if ((k = (len >> 1)) <= l) {
682			for (i = 2, j = 1; i < len; i += 2, j++) {
683			    if (ptr[i + 1] != str[j] && ptr[i + 2] != str[j])
684				goto next_lcstl;
685			}
686			ci = bas;
687			si = k;
688			goto match;
689		    }
690next_lcstl:;
691		}
692		goto fail;
693
694
695	    /****************************************************
696	     * Character range matching				*
697	     ****************************************************/
698	    case Re_Range:
699		if (eng.str < eng.end && eng.cod[eng.str[0] + 1]) {
700		    ci = 257;
701		    goto match;
702		}
703		goto fail;
704	    case Re_RangeNot:
705		if (eng.str >= eng.end || eng.cod[eng.str[0] + 1])
706		    goto fail;
707		ci = 257;
708		goto match;
709
710	    /****************************************************
711	     * Group handling					*
712	     ****************************************************/
713	    case Re_Open:
714		if (++eng.goff >= 9)
715		    return (RE_ASSERT);
716		eng.gso[eng.goff] = eng.str - eng.bas;
717		++eng.cod;
718		continue;
719	    case Re_Close:
720		eng.geo[eng.goff] = eng.str - eng.bas;
721		++eng.cod;
722		continue;
723	    case Re_Update:
724		bas = eng.cod[1];
725		eng.geo[eng.goff] = eng.str - eng.bas;
726		eng.cod += 2;		/* + Update + bas */
727		continue;
728
729	    /****************************************************
730	     * Backreference					*
731	     ****************************************************/
732	    case Re_Backref:
733		i = eng.cod[1];
734		j = eng.gso[i];
735		k = eng.geo[i];
736		len = k - j;
737		if (k < j || eng.end - eng.str < len)
738		    goto fail;
739		ptr = eng.bas + j;
740		str = eng.str;
741		for (l = len; l > 0; l--) {
742		    if (*ptr++ != *str++)
743			goto fail;
744		}
745		ci = 2;
746		si = len;
747		goto match;
748
749	    /****************************************************
750	     * Alternatives handling				*
751	     ****************************************************/
752	    case Re_Alt:
753		bas = eng.off;
754		if (++eng.off >= MAX_DEPTH)
755		    return (RE_ASSERT);
756
757		/* Get offset of next alternative */
758		i = eng.cod[1] | (eng.cod[2] << 8);
759
760		/* Setup for next alternative if the current fails */
761		eng.rcod[eng.off] = eng.cod + i + 1;	/* + Alt */
762
763		/* If fail, test the next alternative in the same string */
764		eng.rstr[eng.off] = eng.str;
765
766		/* Setup match offsets */
767		if (eng.so[bas] <= eng.eo[bas])
768		    eng.so[eng.off] = eng.eo[bas];
769		else
770		    eng.so[eng.off] = eng.so[bas];
771		eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;
772
773		/* Save start of possible previous matches */
774		eng.ss[eng.off] = eng.so[bas];
775
776		/* Skip code */
777		eng.cod += 3;
778
779		/* Go try the first alternative */
780		continue;
781
782	    case Re_AltNext:
783		bas = eng.off - 1;
784		/* Check if matched and if it is a better match */
785		if (eng.sv[eng.off] - eng.so[eng.off] <
786		    eng.eo[eng.off] - eng.so[eng.off])
787		    eng.sv[eng.off] = eng.eo[eng.off];
788
789		/* Get offset of next alternative */
790		i = eng.cod[1] | (eng.cod[2] << 8);
791
792		/* Setup for next alternative if the current fails */
793		eng.rcod[eng.off] = eng.cod + i + 1;	/* + AltNext */
794
795		/* Setup match offset */
796		eng.eo[eng.off] = eng.so[eng.off] - 1;
797
798		/* Reset string for next alternative */
799		eng.str = eng.rstr[eng.off];
800
801		/* Skip code */
802		eng.cod += 3;
803
804		/* Go try the next alternative */
805		continue;
806
807	    case Re_AltDone:
808		bas = eng.off - 1;
809		/* Check if matched and if it is a better match */
810		if (eng.sv[eng.off] - eng.so[eng.off] <
811		    eng.eo[eng.off] - eng.so[eng.off])
812		    eng.sv[eng.off] = eng.eo[eng.off];
813
814		/* If there is a match */
815		if (eng.sv[eng.off] >= eng.so[eng.off]) {
816		    eng.so[bas] = eng.ss[eng.off];
817		    eng.eo[bas] = eng.sv[eng.off];
818		    eng.str = eng.bas + eng.eo[bas];
819
820		    /* Pop stack and skip code */
821		    --eng.off;
822		    ++eng.cod;
823
824		    /* Go try next regular expression pattern */
825		    continue;
826		}
827
828		/* Failed, reset string and pop stack */
829		eng.str = eng.rstr[eng.off];
830		--eng.off;
831		goto fail;
832
833
834	    /****************************************************
835	     * Repetition					*
836	     ****************************************************/
837
838		/*  Note that the repetition counter is not
839		 * updated for <re>*, <re>+ and <re>?
840		 * it is easy to updated, but since it is not
841		 * really useful, code to do it was removed
842		 * to save a few cpu cicles. */
843
844#define REPETITION_SETUP()					\
845	if (++eng.off >= MAX_DEPTH)				\
846	    return (RE_ASSERT);					\
847								\
848	/* Return here for recovery if match fail */		\
849	eng.rcod[eng.off] = eng.cod;				\
850								\
851	/* Setup match offsets */				\
852	if (eng.so[bas] <= eng.eo[bas])				\
853	    eng.so[eng.off] = eng.eo[bas];			\
854	else							\
855	    eng.so[eng.off] = eng.so[bas];			\
856	eng.ss[eng.off] = eng.so[bas];				\
857	eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\
858								\
859	/* Skip repetition instruction */			\
860	eng.cod += 4;
861
862
863	    case Re_AnyTimes:
864		bas = eng.cod[1];
865		if (eng.off == bas) {
866		    /* First iteration */
867		    REPETITION_SETUP();
868		}
869		else {
870		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
871			eng.eo[eng.off] > eng.sv[eng.off]) {
872			/* Update offset of match */
873			eng.sv[eng.off] = eng.eo[eng.off];
874
875			/* Skip repetition instruction */
876			eng.cod += 4;
877		    }
878		    else {
879			/* Match failed but it is ok */
880			len = eng.cod[2] | (eng.cod[3] << 8);
881			eng.so[bas] = eng.ss[eng.off];
882			if (eng.sv[eng.off] >= eng.so[eng.off])
883			    /* Something matched earlier, update */
884			    eng.eo[bas] = eng.sv[eng.off];
885			else if (eng.eo[bas] < eng.so[bas])
886			    /* Empty match */
887			    eng.eo[bas] = eng.so[bas];
888
889			/* Try next pattern at correct offset */
890			eng.str = eng.bas + eng.eo[bas];
891
892			/* Pop stack and skip code */
893			--eng.off;
894			eng.cod += len;
895		    }
896		}
897		continue;
898
899	    case Re_Maybe:
900		bas = eng.cod[1];
901		if (eng.off == bas) {
902		    /* First iteration */
903		    REPETITION_SETUP();
904		}
905		else {
906		    /* Matched or first iteration is done */
907		    len = eng.cod[2] | (eng.cod[3] << 8);
908		    eng.so[bas] = eng.ss[eng.off];
909		    if (eng.eo[eng.off] > eng.so[eng.off]) {
910			/* Something matched earlier, update */
911			eng.eo[bas] = eng.eo[eng.off];
912			eng.str = eng.bas + eng.eo[bas];
913			/* Don't need to update counter */
914		    }
915		    else {
916			/* Empty match */
917			if (eng.eo[bas] < eng.so[bas])
918			    eng.eo[bas] = eng.so[bas];
919
920			/* Try next pattern at correct offset */
921			eng.str = eng.bas + eng.eo[bas];
922		    }
923
924		    /* Pop stack */
925		    --eng.off;
926
927		    /* Skip code */
928		    eng.cod += len;
929		}
930		continue;
931
932	    case Re_AtLeast:
933		bas = eng.cod[1];
934		if (eng.off == bas) {
935		    /* First iteration */
936		    REPETITION_SETUP();
937		}
938		else {
939		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
940			eng.eo[eng.off] > eng.sv[eng.off]) {
941			/* Update offset of match */
942			eng.sv[eng.off] = eng.eo[eng.off];
943
944			/* Skip repetition instruction */
945			eng.cod += 4;
946		    }
947		    else {
948			/* Last try failed */
949			len = eng.cod[2] | (eng.cod[3] << 8);
950			if (eng.sv[eng.off] >= eng.so[eng.off]) {
951			    /* Something matched earlier, update */
952			    eng.so[bas] = eng.ss[eng.off];
953			    eng.eo[bas] = eng.sv[eng.off];
954			    eng.str = eng.bas + eng.eo[bas];
955			}
956			else {
957			    /*	Do it here, so that the fail label does
958			     * not need to do too expensive work for
959			     * simple patterns. */
960			    eng.so[bas] = eng.str - eng.bas;
961
962			    /* Zero matches, pop stack and restart */
963			    --eng.off;
964			    goto fail;
965			}
966
967			/* Pop stack and skip code */
968			--eng.off;
969			eng.cod += len;
970		    }
971		}
972		continue;
973
974
975	    /****************************************************
976	     * Repetition with arguments			*
977	     ****************************************************/
978	    case Re_Exact:
979#define COMPLEX_REPETITION_SETUP_0()				\
980	i = eng.cod[1];						\
981	bas = eng.cod[2];
982
983#define COMPLEX_REPETITION_SETUP()				\
984	/* First iteration */					\
985	if (++eng.off >= MAX_DEPTH)				\
986	    return (RE_ASSERT);					\
987								\
988	/*  Remeber number or repetitions */			\
989	eng.re[eng.off] = 0;					\
990								\
991	/* Return here for recovery if match fail */		\
992	eng.rcod[eng.off] = eng.cod;				\
993								\
994	/* Setup match offsets */				\
995	if (eng.so[bas] <= eng.eo[bas])				\
996	    eng.so[eng.off] = eng.eo[bas];			\
997	else							\
998	    eng.so[eng.off] = eng.so[bas];			\
999	eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\
1000	eng.ss[eng.off] = eng.so[bas];				\
1001								\
1002	/* Skip repetition instruction */			\
1003	eng.cod += 5;
1004
1005		COMPLEX_REPETITION_SETUP_0();
1006		if (eng.off == bas) {
1007		    /* First iteration */
1008		    COMPLEX_REPETITION_SETUP();
1009		}
1010		else {
1011		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
1012			eng.eo[eng.off] > eng.sv[eng.off]) {
1013			/* Update offset of match */
1014			eng.sv[eng.off] = eng.eo[eng.off];
1015
1016			/* Update repetition counter */
1017			if (++eng.re[eng.off] == i) {
1018			    /* Matched the required times */
1019			    eng.so[bas] = eng.ss[eng.off];
1020			    eng.eo[bas] = eng.sv[eng.off];
1021			    eng.str = eng.bas + eng.eo[bas];
1022
1023			    /* Update code */
1024			    k = eng.cod[3] | (eng.cod[4] << 8);
1025			    eng.cod += k;
1026
1027			    /* Pop stack and go for next pattern */
1028			    --eng.off;
1029			    continue;
1030			}
1031
1032			/* Skip repetition instruction */
1033			eng.cod += 5;
1034		    }
1035		    else {
1036			/*  Do it here, so that the fail label does
1037			 * not need to do too expensive work for
1038			 * simple patterns. */
1039			eng.so[bas] = eng.str - eng.bas;
1040
1041			/* Pop stack and restart */
1042			--eng.off;
1043			goto fail;
1044		    }
1045		}
1046		continue;
1047
1048	    case Re_Min:
1049		COMPLEX_REPETITION_SETUP_0();
1050		if (eng.off == bas) {
1051		    /* First iteration */
1052		    COMPLEX_REPETITION_SETUP();
1053		}
1054		else {
1055		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
1056			eng.eo[eng.off] > eng.sv[eng.off]) {
1057			/* Update offset of match */
1058			eng.sv[eng.off] = eng.eo[eng.off];
1059
1060			/* Update repetition counter */
1061			++eng.re[eng.off];
1062
1063			/* Skip repetition instruction and try again */
1064			eng.cod += 5;
1065		    }
1066		    else {
1067			/* Match failed! */
1068			if (eng.re[eng.off] < i) {
1069			    /*	Do it here, so that the fail label does
1070			     * not need to do too expensive work for
1071			     * simple patterns. */
1072			    eng.so[bas] = eng.str - eng.bas;
1073
1074			    /* Didn't match required number of times */
1075			    --eng.off;
1076			    goto fail;
1077			}
1078			else {
1079			    /* Matched minimum number of times */
1080			    eng.eo[bas] = eng.sv[eng.off];
1081			    eng.str = eng.bas + eng.eo[bas];
1082			    k = eng.cod[3] | (eng.cod[4] << 8);
1083
1084			    /* Update code and pop stack */
1085			    eng.cod += k;
1086			    --eng.off;
1087			}
1088		    }
1089		}
1090		continue;
1091
1092	    case Re_Max:
1093		COMPLEX_REPETITION_SETUP_0();
1094		if (eng.off == bas) {
1095		    /* First iteration */
1096		    COMPLEX_REPETITION_SETUP();
1097		}
1098		else {
1099		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
1100			eng.eo[eng.off] > eng.sv[eng.off]) {
1101			/* Update offset of match */
1102			eng.sv[eng.off] = eng.eo[eng.off];
1103
1104			/* Update repetition counter */
1105			if (++eng.re[eng.off] == i) {
1106			    /* Matched the maximum times */
1107			    eng.so[bas] = eng.ss[eng.off];
1108			    eng.eo[bas] = eng.sv[eng.off];
1109			    eng.str = eng.bas + eng.eo[bas];
1110
1111			    k = eng.cod[3] | (eng.cod[4] << 8);
1112
1113			    /* Update code and pop stack */
1114			    eng.cod += k;
1115			    --eng.off;
1116			    continue;
1117			}
1118
1119			/* Skip repetition instruction and try again */
1120			eng.cod += 5;
1121		    }
1122		    else {
1123			/* No matches, but zero matches are ok */
1124			k = eng.cod[3] | (eng.cod[4] << 8);
1125			if (eng.sv[eng.off] >= eng.so[eng.off]) {
1126			    /* Something matched earlier, update */
1127			    eng.so[bas] = eng.ss[eng.off];
1128			    eng.eo[bas] = eng.sv[eng.off];
1129			    eng.str = eng.bas + eng.eo[bas];
1130			}
1131			else {
1132			    /* Empty match */
1133			    if (eng.eo[bas] < eng.so[bas])
1134				eng.eo[bas] = eng.so[bas];
1135
1136			    /* Try next pattern at correct offset */
1137			    eng.str = eng.bas + eng.eo[bas];
1138			}
1139
1140			/* Pop stack and update code */
1141			--eng.off;
1142			eng.cod += k;
1143		    }
1144		}
1145		continue;
1146
1147	    case Re_MinMax:
1148		bas = eng.cod[3];
1149		if (eng.off == bas) {
1150		    /* First iteration */
1151		    COMPLEX_REPETITION_SETUP();
1152		}
1153		else {
1154		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
1155			eng.eo[eng.off] > eng.sv[eng.off]) {
1156			/* Update offset of match */
1157			eng.sv[eng.off] = eng.eo[eng.off];
1158
1159			/* Update repetition counter */
1160			if (++eng.re[eng.off] == eng.cod[2]) {
1161			    /* Matched the maximum times */
1162			    eng.so[bas] = eng.ss[eng.off];
1163			    eng.eo[bas] = eng.sv[eng.off];
1164			    eng.str = eng.bas + eng.eo[bas];
1165			    k = eng.cod[4] | (eng.cod[5] << 8);
1166
1167			    /* Update code and pop stack */
1168			    eng.cod += k;
1169			    --eng.off;
1170			    continue;
1171			}
1172
1173			/* Skip repetition instruction and try again */
1174			eng.cod += 6;
1175		    }
1176		    else {
1177			/* Match failed! */
1178			if (eng.re[eng.off] < eng.cod[1]) {
1179			    /*	Do it here, so that the fail label does
1180			     * not need to do too expensive work for
1181			     * simple patterns. */
1182			    eng.so[bas] = eng.str - eng.bas;
1183
1184			    /* Didn't match required number of times */
1185			    --eng.off;
1186			    goto fail;
1187			}
1188			else {
1189			    /* Matched minimum number of times */
1190			    eng.so[bas] = eng.ss[eng.off];
1191			    eng.eo[bas] = eng.sv[eng.off];
1192			    eng.str = eng.bas + eng.eo[bas];
1193			    k = eng.cod[4] | (eng.cod[5] << 8);
1194
1195			    /* Update code and pop stack */
1196			    eng.cod += k;
1197			    --eng.off;
1198			}
1199		    }
1200		}
1201		continue;
1202
1203
1204	    /****************************************************
1205	     * Special repetition handling			*
1206	     ****************************************************/
1207	    case Re_AnyAnyTimes:
1208		/* code(1) + bas(1) + gbas(1) + jump(2) */
1209		bas = eng.cod[1];
1210		if (eng.off == bas) {
1211		    /* First iteration */
1212		    if (++eng.off >= MAX_DEPTH)
1213			return (RE_ASSERT);
1214
1215		    /* Return here for recovery if match fail */
1216		    eng.rcod[eng.off] = eng.cod;
1217
1218		    /* If fail, test the next pattern at the same point */
1219		    eng.rstr[eng.off] = eng.str;
1220
1221		    /* Setup match offsets */
1222		    eng.so[eng.off] = eng.str - eng.bas;
1223		    eng.eo[eng.off] = eng.so[eng.off] - 1;
1224
1225		    if (newline)
1226			/*  Use the repetition counter to store start of
1227			 * skipped string, to later check if skipping a
1228			 * newline. */
1229			eng.re[eng.off] = eng.so[eng.off];
1230
1231		    /* Save start of possible previous matches */
1232		    eng.ss[eng.off] = eng.so[bas];
1233
1234		    /* Skip repetition instruction */
1235		    eng.cod += 5;
1236		}
1237		else {
1238		    /* -1 as an unsigned char */
1239		    if (eng.cod[2] != 0xff)
1240			eng.goff = eng.cod[2];
1241		    else
1242			eng.goff = -1;
1243
1244		    if (newline) {
1245			ptr = eng.bas + eng.re[eng.off];
1246			str = eng.bas + eng.so[eng.off];
1247			for (; ptr < str; ptr++)
1248			    if (*ptr == '\n') {
1249				eng.cod = eng.rcod[0];
1250				eng.so[0] = ptr - eng.bas + 1;
1251				eng.eo[0] = eng.so[0] - 1;
1252				eng.rstr[0] = eng.str = ptr + 1;
1253				eng.off = 0;
1254				goto reset;
1255			    }
1256			/* If looping, don't do too many noops */
1257			eng.re[eng.off] = ptr - eng.bas;
1258		    }
1259
1260		    if (eng.eo[eng.off] >= eng.so[eng.off]) {
1261			/* Note that this is only true if all possibly
1262			 * nested special repetitions also matched. */
1263
1264			if (eng.goff >= 0) {
1265			    if (eng.cod[5] == Re_Update)
1266				eng.gso[eng.goff] = eng.eo[bas] +
1267						    (eng.so[bas] > eng.eo[bas]);
1268			    else if (eng.geo[eng.goff] < eng.so[eng.off])
1269				eng.geo[eng.goff] = eng.so[eng.off];
1270			}
1271
1272			/* Jump relative offset */
1273			len = eng.cod[3] | (eng.cod[4] << 8);
1274
1275			/* Restore offset from where started trying */
1276			eng.so[bas] = eng.ss[eng.off];
1277			eng.eo[bas] = eng.eo[eng.off];
1278
1279			/* Pop stack and skip code */
1280			--eng.off;
1281			eng.cod += len;
1282		    }
1283		    else {
1284			/* Only give up if the entire string was scanned */
1285			if (eng.str < eng.end) {
1286			    /* Update restart point for next pattern */
1287			    eng.str = ++eng.rstr[eng.off];
1288
1289			    /* Reset start of nested match */
1290			    eng.so[eng.off] = eng.str - eng.bas;
1291
1292			    /* Skip repetition instruction */
1293			    eng.cod += 5;
1294			}
1295			else {
1296			    /* Entire string scanned and failed */
1297
1298			    /* Jump relative offset */
1299			    len = eng.cod[3] | (eng.cod[4] << 8);
1300
1301			    /* Restore offset from where started trying */
1302			    eng.so[bas] = eng.ss[eng.off];
1303			    eng.eo[bas] = eng.ss[eng.off] - 1;
1304
1305			    /* Pop stack and skip code */
1306			    --eng.off;
1307			    eng.cod += len;
1308			}
1309		    }
1310		}
1311		continue;
1312
1313	    /*  This is significantly different than matching <re>.*<re>
1314	     * because it may need to restart several times since it is
1315	     * possible to find too many false positives, for example:
1316	     *	a.*b	=> once one "a" is found, scan all
1317	     *		   the remaining string searching for a "b"
1318	     *  a.?b	=> the string may have too many "a"s, but the
1319	     *		   first occurrences of "a" may not be followed
1320	     *		   by any-character and a "b" or a single "b".
1321	     */
1322	    case Re_AnyMaybe:
1323		bas = eng.cod[1];
1324		if (eng.off == bas) {
1325		    /* First iteration */
1326		    if (++eng.off >= MAX_DEPTH)
1327			return (RE_ASSERT);
1328
1329		    /* Return here for recovery if match fail */
1330		    eng.rcod[eng.off] = eng.cod;
1331
1332		    /* First try without eating a byte */
1333		    eng.rstr[eng.off] = eng.str;
1334
1335		    /* Remember this is the first try if match fail */
1336		    eng.re[eng.off] = 0;
1337
1338		    /* Setup match offsets */
1339		    eng.so[eng.off] = eng.str - eng.bas;
1340		    eng.eo[eng.off] = eng.so[eng.off] - 1;
1341
1342		    /* Save start of possible previous matches */
1343		    eng.ss[eng.off] = eng.so[bas];
1344
1345		    /* Skip repetition instruction */
1346		    eng.cod += 5;
1347		}
1348		else {
1349		    /* -1 as an unsigned char */
1350		    if (eng.cod[2] != 0xff)
1351			eng.goff = eng.cod[2];
1352		    else
1353			eng.goff = -1;
1354
1355		    if (eng.eo[eng.off] >= eng.so[eng.off]) {
1356			/* Something matched */
1357
1358			if (eng.goff >= 0) {
1359			    if (eng.cod[5] == Re_Update)
1360				eng.gso[eng.goff] = eng.eo[bas] +
1361						    (eng.so[bas] > eng.eo[bas]);
1362			    else if (eng.geo[eng.goff] < eng.so[eng.off])
1363				eng.geo[eng.goff] = eng.so[eng.off];
1364			}
1365
1366			/* Jump relative offset */
1367			len = eng.cod[3] | (eng.cod[4] << 8);
1368
1369			/* Update offset of match */
1370			eng.eo[bas] = eng.eo[eng.off];
1371
1372			/* Pop stack and skip code */
1373			--eng.off;
1374			eng.cod += len;
1375		    }
1376		    else if (eng.re[eng.off] == 0 &&
1377			     (!newline || eng.rstr[eng.off][1] != '\n')) {
1378			/* Try this time skiping a byte */
1379			++eng.re[eng.off];
1380
1381			/* Reset string, skip code and go try one time more */
1382			eng.str = ++eng.rstr[eng.off];
1383			eng.cod += 5;
1384		    }
1385		    else {
1386			/* Failed to match */
1387
1388			/* Update offsets */
1389			eng.eo[bas] = eng.ss[eng.off];
1390			eng.so[bas] = eng.eo[bas] + 1;
1391
1392			eng.str = eng.rstr[eng.off] + (eng.re[eng.off] == 0);
1393
1394			/* Pop stack and return to toplevel code */
1395			--eng.off;
1396			if (eng.str >= eng.end)
1397			    goto wont;
1398			eng.cod = eng.rcod[bas];
1399		    }
1400		}
1401		continue;
1402
1403	    /* .+ almost identical to .* but requires eating at least one byte */
1404	    case Re_AnyAtLeast:
1405		bas = eng.cod[1];
1406		if (eng.off == bas) {
1407		    /* First iteration */
1408		    if (++eng.off >= MAX_DEPTH)
1409			return (RE_ASSERT);
1410
1411		    /* Return here for recovery if match fail */
1412		    eng.rcod[eng.off] = eng.cod;
1413
1414		    /* Skip one byte for the restart string */
1415		    if (newline && eng.str[0] == '\n') {
1416			/* Cannot skip newline */
1417			eng.cod = eng.rcod[0];
1418			eng.rstr[0] = ++eng.str;
1419			eng.so[0] = eng.str - eng.bas;
1420			eng.eo[0] = eng.so[0] - 1;
1421			eng.off = 0;
1422			goto reset;
1423		    }
1424		    eng.rstr[eng.off] = ++eng.str;
1425
1426		    /* Setup match offsets */
1427		    eng.so[eng.off] = eng.str - eng.bas;
1428		    eng.eo[eng.off] = eng.so[eng.off] - 1;
1429
1430		    if (newline)
1431			/*  Use the repetition counter to store start of
1432			 * skipped string, to later check if skipping a
1433			 * newline. */
1434			eng.re[eng.off] = eng.so[eng.off];
1435
1436		    /* Save start of possible previous matches */
1437		    eng.ss[eng.off] = eng.so[bas];
1438
1439		    /* Skip repetition instruction */
1440		    eng.cod += 5;
1441		}
1442		else {
1443		    /* -1 as an unsigned char */
1444		    if (eng.cod[2] != 0xff)
1445			eng.goff = eng.cod[2];
1446		    else
1447			eng.goff = -1;
1448
1449		    if (newline) {
1450			ptr = eng.bas + eng.re[eng.off];
1451			str = eng.bas + eng.so[eng.off];
1452			for (; ptr < str; ptr++)
1453			    if (*ptr == '\n') {
1454				eng.cod = eng.rcod[0];
1455				eng.so[0] = ptr - eng.bas + 1;
1456				eng.eo[0] = eng.so[0] - 1;
1457				eng.rstr[0] = eng.str = ptr + 1;
1458				eng.off = 0;
1459				goto reset;
1460			    }
1461			/* If looping, don't do too many noops */
1462			eng.re[eng.off] = ptr - eng.bas;
1463		    }
1464
1465		    if (eng.eo[eng.off] >= eng.so[eng.off]) {
1466			/* Note that this is only true if all possibly
1467			 * nested special repetitions also matched. */
1468
1469			if (eng.goff >= 0) {
1470			    if (eng.cod[5] == Re_Update)
1471				eng.gso[eng.goff] = eng.eo[bas] +
1472						    (eng.so[bas] > eng.eo[bas]);
1473			    else if (eng.geo[eng.goff] < eng.so[eng.off])
1474				eng.geo[eng.goff] = eng.so[eng.off];
1475			}
1476
1477			/* Jump relative offset */
1478			len = eng.cod[3] | (eng.cod[4] << 8);
1479
1480			/* Restore offset from where started trying */
1481			eng.so[bas] = eng.ss[eng.off];
1482			eng.eo[bas] = eng.eo[eng.off];
1483
1484			/* Pop stack and skip code */
1485			--eng.off;
1486			eng.cod += len;
1487		    }
1488		    else {
1489			/* Only give up if the entire string was scanned */
1490			if (eng.str < eng.end) {
1491			    /* Update restart point for next pattern */
1492			    eng.str = ++eng.rstr[eng.off];
1493
1494			    /* Reset start of nested match */
1495			    eng.so[eng.off] = eng.str - eng.bas;
1496
1497			    /* Skip repetition instruction */
1498			    eng.cod += 5;
1499			}
1500			else {
1501			    /* Entire string scanned and failed */
1502
1503			    /* Jump relative offset */
1504			    len = eng.cod[3] | (eng.cod[4] << 8);
1505
1506			    /* Restore offset from where started trying */
1507			    eng.so[bas] = eng.ss[eng.off];
1508			    eng.eo[bas] = eng.ss[eng.off] - 1;
1509
1510			    /* Pop stack and skip code */
1511			    --eng.off;
1512			    eng.cod += len;
1513			}
1514		    }
1515		}
1516		continue;
1517
1518
1519	    /****************************************************
1520	     * Repetition matched!				*
1521	     ****************************************************/
1522	    case Re_RepJump:
1523		/* eng.cod[1] is toplevel offset of repetition */
1524		if (eng.off > eng.cod[1])
1525		    /* If still needs to try matches */
1526		    eng.cod -= eng.cod[2];
1527		else
1528		    eng.cod += 3;	/* + RepJump + bas + len-size */
1529		continue;
1530
1531	    case Re_RepLongJump:
1532		/* eng.cod[1] is toplevel offset of repetition */
1533		if (eng.off > eng.cod[1])
1534		    /* If still needs to try matches */
1535		    eng.cod -= eng.cod[2] | (eng.cod[3] << 8);
1536		else
1537		    eng.cod += 4;	/* + RepLongJump + bas + len-size */
1538		continue;
1539
1540	    /****************************************************
1541	     * Finished						*
1542	     ****************************************************/
1543	    case Re_DoneIf:
1544		if (eng.eo[eng.off] >= eng.so[eng.off]) {
1545		    eng.so[0] = eng.ss[eng.off];
1546		    eng.eo[0] = eng.eo[eng.off];
1547		    goto done;
1548		}
1549		++eng.cod;
1550		continue;
1551	    case Re_MaybeDone:
1552		if (eng.eo[eng.off] >= eng.so[eng.off]) {
1553		    eng.so[0] = eng.ss[eng.off];
1554		    eng.eo[0] = eng.eo[eng.off];
1555		    goto done;
1556		}
1557		++eng.cod;
1558		continue;
1559	    case Re_Done:
1560		goto done;
1561
1562	    default:
1563		/* Fatal internal error */
1564		return (RE_ASSERT);
1565	}
1566
1567
1568wont:
1569	/* Surely won't match */
1570	if (eng.off == 0) {
1571	    eng.eo[0] = eng.so[0] - 1;
1572	    break;
1573	}
1574
1575
1576fail:
1577	if (eng.off == 0) {
1578	    /* If the entire string scanned */
1579	    if (++eng.str > eng.end) {
1580		eng.eo[0] = eng.so[0] - 1;
1581		break;
1582	    }
1583	    eng.goff = -1;
1584	    /* Update start of possible match after restart */
1585	    if (eng.eo[0] >= eng.so[0]) {
1586		/* If first fail */
1587		eng.str = eng.rstr[0];
1588		++eng.rstr[0];
1589		eng.so[0] = eng.str - eng.bas;
1590		eng.eo[0] = eng.so[eng.off] - 1;
1591	    }
1592	    else
1593		/* Just trying at next byte */
1594		++eng.so[0];
1595	}
1596	else
1597	    /* Remember this match failed */
1598	    eng.eo[eng.off] = eng.so[eng.off] - 1;
1599
1600	/* Restart code */
1601	eng.cod = eng.rcod[eng.off];
1602	continue;
1603
1604
1605match:
1606	/* If first match */
1607	if (eng.eo[eng.off] < eng.so[eng.off]) {
1608	    if (eng.off == 0)
1609		eng.rstr[0] = eng.str + 1;
1610	    eng.so[eng.off] = eng.eo[eng.off] = eng.str - eng.bas;
1611	}
1612	eng.eo[eng.off] += si;
1613	eng.cod += ci;
1614	eng.str += si;
1615	ci = si = 1;
1616	continue;
1617
1618done:
1619	break;
1620    }
1621
1622    if (nmatch) {
1623	if (flags & RE_STARTEND)
1624	    len = pmat[0].rm_so;
1625	else
1626	    len = 0;
1627	if (!nosub) {
1628	    if (preg->cod[1] != 0xff)
1629		eng.goff = preg->cod[1];
1630	    pmat[0].rm_so = eng.so[0];
1631	    pmat[0].rm_eo = eng.eo[0];
1632	    for (i = 1; i < nmatch; i++) {
1633		if (i - 1 <= eng.goff) {
1634		    pmat[i].rm_so = eng.gso[i - 1];
1635		    pmat[i].rm_eo = eng.geo[i - 1];
1636		}
1637		else {
1638		    pmat[i].rm_so = 0;
1639		    pmat[i].rm_eo = -1;
1640		}
1641	    }
1642	    if (len) {
1643		/* Update offsets, since the match was done in a substring */
1644		j = eng.goff + 2 > nmatch ? nmatch : eng.goff + 2;
1645		for (i = 0; i < j; i++) {
1646		    pmat[i].rm_so += len;
1647		    pmat[i].rm_eo += len;
1648		}
1649	    }
1650	}
1651	else {
1652	    /*	Already know these values, allow compiling the regex with
1653	     * RE_NOSUB to use parenthesis only for grouping, but avoiding
1654	     * the runtime overhead of keeping track of the subexpression
1655	     * offsets. */
1656	    pmat[0].rm_so = eng.so[0] + len;
1657	    pmat[0].rm_eo = eng.eo[0] + len;
1658	}
1659    }
1660
1661    return (eng.so[0] <= eng.eo[0] ? 0 : RE_NOMATCH);
1662}
1663
1664int
1665reerror(int ecode, const re_cod *preg, char *ebuffer, int ebuffer_size)
1666{
1667    static char *errors[] = {
1668	"No error",
1669	"Failed to match",			/* NOMATCH */
1670
1671	/* Errors not generated */
1672	"Invalid regular expression",		/* BADPAT */
1673	"Invalid collating element",		/* ECOLLATE */
1674	"Invalid character class",		/* ECTYPE */
1675
1676	"`\' applied to unescapable character",	/* EESCAPE */
1677	"Invalid backreference number",		/* ESUBREG */
1678	"Brackets `[ ]' not balanced",		/* EBRACK */
1679	"Parentheses `( )' not balanced",	/* EPAREN */
1680	"Braces `{ }' not balanced",		/* EBRACE */
1681	"Invalid repetition count(s) in `{ }'",	/* BADBR */
1682	"Invalid character range in `[ ]'",	/* ERANGE */
1683	"Out of memory",			/* ESPACE */
1684	"`?', `*', or `+' operand invalid",	/* BADRPT */
1685	"Empty (sub)expression",		/* EMPTY */
1686	"Assertion error - you found a bug",	/* ASSERT */
1687	"Invalid argument"			/* INVARG */
1688    };
1689    char *str;
1690
1691    if (ecode >= 0 && ecode < sizeof(errors) / sizeof(errors[0]))
1692	str = errors[ecode];
1693    else
1694	str = "Unknown error";
1695
1696    return (snprintf(ebuffer, ebuffer_size, "%s", str));
1697}
1698
1699void
1700refree(re_cod *cod)
1701{
1702    free(cod->cod);
1703    cod->cod = NULL;
1704}
1705
1706static void
1707reinit(void)
1708{
1709    int i;
1710    static int first = 1;
1711
1712    if (!first)
1713	return;
1714    first = 0;
1715
1716    re__alnum['_'] = 1;
1717
1718    for (i = '0'; i <= '7'; i++)
1719	re__alnum[i] = re__odigit[i] = re__ddigit[i] = re__xdigit[i] = 1;
1720
1721    for (; i <= '9'; i++)
1722	re__alnum[i] = re__ddigit[i] = re__xdigit[i] = 1;
1723
1724    for (i = 'a'; i <= 'f'; i++)
1725	re__alnum[i] = re__xdigit[i] = 1;
1726    for (; i <= 'z'; i++)
1727	re__alnum[i] = 1;
1728
1729    for (i = 'A'; i <= 'F'; i++)
1730	re__alnum[i] = re__xdigit[i] = 1;
1731    for (; i <= 'Z'; i++)
1732	re__alnum[i] = 1;
1733
1734    for (i = 1; i < 32; i++)
1735	re__control[i] = 1;
1736    re__control[127] = 1;
1737    /* Don't show tabs as control characters */
1738    re__control['\t'] = 0;
1739}
1740
1741static int
1742rec_check(re_inf *inf, int count)
1743{
1744    if (inf->len + count >= inf->spc) {
1745	int spc;
1746	unsigned char *cod;
1747
1748	if ((spc = (count % 64)) != 0)
1749	    spc = 64 - spc;
1750	spc += count + inf->spc;
1751	if ((cod = realloc(inf->cod, spc)) == NULL)
1752	    return (inf->ecode = RE_ESPACE);
1753	inf->cod = cod;
1754	inf->spc = spc;
1755    }
1756
1757    return (inf->ecode);
1758}
1759
1760static int
1761rec_code(re_inf *inf, ReCode code)
1762{
1763    if (rec_check(inf, 1) == 0)
1764	inf->cod[inf->len++] = code;
1765
1766    return (inf->ecode);
1767}
1768
1769static int
1770rec_byte(re_inf *inf, int value)
1771{
1772    if (rec_check(inf, 1) == 0)
1773	inf->cod[inf->len++] = value;
1774
1775    return (inf->ecode);
1776}
1777
1778static int
1779rec_code_byte(re_inf *inf, ReCode code, int value)
1780{
1781    if (rec_check(inf, 2) == 0) {
1782	inf->cod[inf->len++] = code;
1783	inf->cod[inf->len++] = value;
1784    }
1785
1786    return (inf->ecode);
1787}
1788
1789static int
1790rec_length(re_inf *inf, int length)
1791{
1792    int lo, hi, two;
1793
1794    if (length >= 16384)
1795	return (inf->ecode = RE_ESPACE);
1796
1797    lo = length & 0xff;
1798    hi = length & 0xff00;
1799    two = ((length > 0x7f) != 0) + 1;
1800    if (two == 2) {
1801	hi <<= 1;
1802	hi |= (lo & 0x80) != 0;
1803	lo |= 0x80;
1804    }
1805
1806    if (rec_check(inf, two) == 0) {
1807	inf->cod[inf->len++] = lo;
1808	if (two == 2)
1809	    inf->cod[inf->len++] = hi >> 8;
1810    }
1811
1812    return (inf->ecode);
1813}
1814
1815static int
1816rec_byte_byte(re_inf *inf, int value0, int value1)
1817{
1818    if (rec_check(inf, 2) == 0) {
1819	inf->cod[inf->len++] = value0;
1820	inf->cod[inf->len++] = value1;
1821    }
1822
1823    return (inf->ecode);
1824}
1825
1826static int
1827rec_code_byte_byte(re_inf *inf, ReCode code, int value0, int value1)
1828{
1829    if (rec_check(inf, 3) == 0) {
1830	inf->cod[inf->len++] = code;
1831	inf->cod[inf->len++] = value0;
1832	inf->cod[inf->len++] = value1;
1833    }
1834
1835    return (inf->ecode);
1836}
1837
1838static int
1839rec_build_alt(re_inf *inf, rec_alt *alt)
1840{
1841    int offset, value, bas = inf->bas + 1;
1842
1843    if (alt) {
1844	if (alt->next) {
1845	    if (rec_inc_spc(inf))
1846		return (inf->ecode);
1847
1848	    /* A real a list of alternatives */
1849	    rec_code(inf, Re_Alt);
1850
1851	    offset = inf->len;		/* Remember current offset */
1852	    rec_byte_byte(inf, 0, 0);	/* Reserve two bytes for retry address */
1853	    while (alt && inf->ecode == 0) {
1854		if (rec_build_pat(inf, alt->pat))
1855		    break;
1856		alt = alt->next;
1857		if (alt && inf->ecode == 0) {
1858		    /* Handle (hyper)complex repetitions */
1859		    if (inf->bas != bas) {
1860			/* Duplicate patterns up to end of expression */
1861			rec_build_pat(inf, inf->apat);
1862			/* Restore engine state for next alternative(s) */
1863			rec_alt_spc(inf, bas - 1);
1864		    }
1865
1866		    /* If the jump would be so long */
1867		    if ((value = inf->len - offset) >= 16384) {
1868			inf->ecode = RE_ESPACE;
1869			break;
1870		    }
1871		    inf->cod[offset] = value & 0xff;
1872		    inf->cod[offset + 1] = (value & 0xff00) >> 8;
1873
1874		    rec_code(inf, Re_AltNext);
1875		    offset = inf->len;
1876		    rec_byte_byte(inf, 0, 0);
1877		}
1878	    }
1879	    if (inf->ecode == 0) {
1880		/* Handle (hyper)complex repetitions */
1881		if (inf->bas != bas) {
1882		    /* Duplicate patterns up to end of expression */
1883		    rec_build_pat(inf, inf->apat);
1884		    /* Restore engine state for next alternative(s) */
1885		    rec_alt_spc(inf, bas - 1);
1886		}
1887
1888		/* If the jump would be so long */
1889		if ((value = inf->len - offset) >= 16384)
1890		    return (inf->ecode = RE_ESPACE);
1891		inf->cod[offset] = value & 0xff;
1892		inf->cod[offset + 1] = (value & 0xff00) >> 8;
1893		/* Last jump is here */
1894		rec_code(inf, Re_AltDone);
1895	    }
1896	    rec_dec_spc(inf);
1897	}
1898	else
1899	    /* Single alternative */
1900	    rec_build_pat(inf, alt->pat);
1901    }
1902
1903    return (inf->ecode);
1904}
1905
1906static int
1907rec_build_pat(re_inf *inf, rec_pat *pat)
1908{
1909    rec_pat *apat;
1910    int length, offset = 0, distance, jump = 0, bas = 0;
1911
1912    while (pat && inf->ecode == 0) {
1913	if (pat->rep) {
1914	    bas = inf->bas;
1915	    if (pat->type == Rep_Group && !inf->par && rec_code(inf, Re_Open))
1916		return (inf->ecode);
1917	    if (rec_inc_spc(inf))
1918		return (inf->ecode);
1919	    offset = inf->len;
1920	    if (rec_build_rep(inf, pat->rep))
1921		break;
1922	    /* Reserve space to jump after repetition done */
1923	    jump = inf->len;
1924	    rec_byte_byte(inf, 0, 0);
1925	}
1926	switch (pat->type) {
1927	    case Rep_AnyAnyTimes:
1928	    case Rep_AnyMaybe:
1929	    case Rep_AnyAtLeast:
1930		if (rec_add_spc(inf, pat->type == Rep_AnyMaybe))
1931		    return (inf->ecode);
1932		if (rec_code(inf, (ReCode)pat->type) == 0 &&
1933		    rec_byte(inf, inf->bas - 1) == 0 &&
1934		    rec_byte(inf, inf->ref - 1) == 0)
1935		    rec_off_spc(inf);
1936		break;
1937	    case Rep_Literal:
1938	    case Rep_LiteralNot:
1939	    case Rep_SearchLiteral:
1940		rec_code_byte(inf, (ReCode)pat->type, pat->data.chr);
1941		break;
1942	    case Rep_CaseLiteral:
1943	    case Rep_CaseLiteralNot:
1944	    case Rep_SearchCaseLiteral:
1945		rec_code_byte_byte(inf, (ReCode)pat->type,
1946				   pat->data.cse.lower, pat->data.cse.upper);
1947		break;
1948	    case Rep_Range:
1949	    case Rep_RangeNot:
1950		if (rec_code(inf, (ReCode)pat->type) == 0)
1951		    rec_build_rng(inf, pat->data.rng);
1952		break;
1953	    case Rep_String:
1954	    case Rep_SearchString:
1955	    case Rep_CaseString:
1956	    case Rep_SearchCaseString:
1957		rec_code(inf, (ReCode)pat->type);
1958		length = strlen((char*)pat->data.str);
1959		if (rec_length(inf, length) == 0 && rec_check(inf, length) == 0) {
1960		    memcpy(inf->cod + inf->len, pat->data.str, length);
1961		    inf->len += length;
1962		}
1963		break;
1964	    case Rep_Any:
1965	    case Rep_AnyEatAnyTimes:
1966	    case Rep_AnyEatMaybe:
1967	    case Rep_AnyEatAtLeast:
1968	    case Rep_Odigit:
1969	    case Rep_OdigitNot:
1970	    case Rep_Digit:
1971	    case Rep_DigitNot:
1972	    case Rep_Xdigit:
1973	    case Rep_XdigitNot:
1974	    case Rep_Space:
1975	    case Rep_SpaceNot:
1976	    case Rep_Tab:
1977	    case Rep_Newline:
1978	    case Rep_Lower:
1979	    case Rep_Upper:
1980	    case Rep_Alnum:
1981	    case Rep_AlnumNot:
1982	    case Rep_Control:
1983	    case Rep_ControlNot:
1984	    case Rep_Bol:
1985	    case Rep_Eol:
1986	    case Rep_Bow:
1987	    case Rep_Eow:
1988		rec_code(inf, (ReCode)pat->type);
1989		break;
1990	    case Rep_Backref:
1991		rec_code_byte(inf, Re_Backref, pat->data.chr);
1992		break;
1993	    case Rep_Group:
1994		if (pat->rep == NULL && !inf->par && rec_code(inf, Re_Open))
1995		    break;
1996		apat = inf->apat;
1997		inf->apat = pat->next;
1998		rec_build_grp(inf, pat->data.grp);
1999		inf->apat = apat;
2000		break;
2001	    case Rep_StringList:
2002		rec_build_stl(inf, pat->data.stl);
2003		break;
2004	}
2005	if (pat->rep) {
2006#if 0
2007	    if (rec_dec_spc(inf))
2008		return (inf->ecode);
2009#else
2010	    if (rec_rep_spc(inf, bas))
2011		return (inf->ecode);
2012#endif
2013	    distance = inf->len - offset;
2014	    if (distance > 255) {
2015		if (rec_code(inf, Re_RepLongJump) ||
2016		    rec_byte(inf, inf->bas) ||
2017		    rec_byte(inf, distance & 0xff) ||
2018		    rec_byte(inf, (distance & 0xff00) >> 8))
2019		break;
2020	    }
2021	    else if (rec_code(inf, Re_RepJump) ||
2022		     rec_byte(inf, inf->bas) ||
2023		     rec_byte(inf, distance))
2024		break;
2025	    distance = inf->len - offset;
2026	    inf->cod[jump] = distance & 0xff;
2027	    inf->cod[jump + 1] = (distance & 0xff00) >> 8;
2028	}
2029	pat = pat->next;
2030    }
2031
2032    return (inf->ecode);
2033}
2034
2035static int
2036rec_build_rng(re_inf *inf, rec_rng *rng)
2037{
2038    if (rec_check(inf, sizeof(rng->range)) == 0) {
2039	memcpy(inf->cod + inf->len, rng->range, sizeof(rng->range));
2040	inf->len += sizeof(rng->range);
2041    }
2042
2043    return (inf->ecode);
2044}
2045
2046static int
2047rec_build_grp(re_inf *inf, rec_grp *grp)
2048{
2049    int par = inf->par;
2050
2051    if (!(inf->flags & RE_NOSUB)) {
2052	++inf->par;
2053	if (par == 0)
2054	    ++inf->ref;
2055	if (rec_build_alt(inf, grp->alt) == 0) {
2056	    if (par == 0) {
2057		if (grp->comp)
2058		    rec_code_byte(inf, Re_Update, inf->ref - 1);
2059		else
2060		    rec_code(inf, Re_Close);
2061	    }
2062	}
2063	--inf->par;
2064    }
2065    else
2066	rec_build_alt(inf, grp->alt);
2067
2068    return (inf->ecode);
2069}
2070
2071static int
2072rec_build_stl(re_inf *inf, rec_stl *stl)
2073{
2074    int i, len, rlen;
2075    ReCode code;
2076
2077    /* Calculate jump distance information */
2078    rlen = stl->tlen + stl->nstrs + 4;
2079    /* + code + nstrs + place-offset + data-length */
2080
2081    if (stl->nstrs >= LARGE_STL_COUNT) {
2082	rlen += 511;		/* Don't write number of strings */
2083	code = stl->type == Rep_StringList ?
2084		Re_LargeStringList : Re_LargeCaseStringList;
2085    }
2086    else
2087	code = (ReCode)stl->type;
2088
2089    if (rlen >= 16386)
2090	return (inf->ecode = RE_ESPACE);
2091    if (rec_check(inf, rlen) ||
2092	rec_code(inf, code))
2093	return (inf->ecode);
2094
2095    /* Space is allocated, just write the data */
2096    if (stl->nstrs < LARGE_STL_COUNT)
2097	inf->cod[inf->len++] = stl->nstrs;
2098
2099    inf->cod[inf->len++] = rlen & 0xff;
2100    inf->cod[inf->len++] = (rlen & 0xff00) >> 8;
2101
2102    if (stl->nstrs < LARGE_STL_COUNT) {
2103	for (i = 0; i < stl->nstrs; i++)
2104	    inf->cod[inf->len++] = stl->lens[i];
2105	for (i = 0; i < stl->nstrs; i++) {
2106	    len = stl->lens[i];
2107	    if (len > 2) {
2108		memcpy(inf->cod + inf->len, stl->strs[i], len);
2109		inf->len += len;
2110	    }
2111	    else {
2112		if (len == 1)
2113		    inf->cod[inf->len++] = (long)stl->strs[i];
2114		else {
2115		    inf->cod[inf->len++] = (long)stl->strs[i] & 0xff;
2116		    inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8;
2117		}
2118	    }
2119	}
2120    }
2121    else {
2122	/* The string length goes before the string itself */
2123	int j, chl, chu;
2124
2125	/* Fill everything with an invalid jump address */
2126	memset(inf->cod + inf->len, 0xff, 512);
2127	for (i = len = 0, j = -1; i < stl->nstrs; i++) {
2128	    chl = stl->lens[i] > 2 ? stl->strs[i][0] : (long)stl->strs[i] & 0xff;
2129	    if (chl != j) {
2130		inf->cod[inf->len + (chl << 1)] = len & 0xff;
2131		inf->cod[inf->len + (chl << 1) + 1] = (len & 0xff00) >> 8;
2132		if (code == Re_LargeCaseStringList) {
2133		    chu = stl->lens[i] > 2 ?
2134			stl->strs[i][1] : ((long)(stl->strs[i]) & 0xff00) >> 8;
2135		    inf->cod[inf->len + (chu << 1)] = len & 0xff;
2136		    inf->cod[inf->len + (chu << 1) + 1] = (len & 0xff00) >> 8;
2137		}
2138		j = chl;
2139	    }
2140	    len += stl->lens[i] + 1;
2141	}
2142	inf->len += 512;
2143
2144	for (i = 0; i < stl->nstrs; i++) {
2145	    len = stl->lens[i];
2146	    inf->cod[inf->len++] = len;
2147	    if (len > 2) {
2148		memcpy(inf->cod + inf->len, stl->strs[i], len);
2149		inf->len += len;
2150	    }
2151	    else {
2152		if (len == 1)
2153		    inf->cod[inf->len++] = (long)stl->strs[i];
2154		else {
2155		    inf->cod[inf->len++] = (long)stl->strs[i] & 0xff;
2156		    inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8;
2157		}
2158	    }
2159	}
2160    }
2161
2162    return (inf->ecode);
2163}
2164
2165static int
2166rec_build_rep(re_inf *inf, rec_rep *rep)
2167{
2168    if (rep) {
2169	switch (rep->type) {
2170	    case Rer_AnyTimes:
2171	    case Rer_AtLeast:
2172	    case Rer_Maybe:
2173		rec_code(inf, (ReCode)rep->type);
2174		break;
2175	    case Rer_Exact:
2176		if (rec_code(inf, Re_Exact) == 0)
2177		    rec_byte(inf, rep->mine);
2178		break;
2179	    case Rer_Min:
2180		if (rec_code(inf, Re_Min) == 0)
2181		    rec_byte(inf, rep->mine);
2182		break;
2183	    case Rer_Max:
2184		if (rec_code(inf, Re_Max) == 0)
2185		    rec_byte(inf, rep->maxc);
2186		break;
2187	    case Rer_MinMax:
2188		if (rec_code(inf, Re_MinMax) == 0 &&
2189		    rec_byte(inf, rep->mine) == 0)
2190		    rec_byte(inf, rep->maxc);
2191		break;
2192	}
2193	/* It is incremented in rec_build_pat */
2194	rec_byte(inf, inf->bas - 1);
2195    }
2196
2197    return (inf->ecode);
2198}
2199
2200static int
2201rec_inc_spc(re_inf *inf)
2202{
2203    if (++inf->bas >= MAX_DEPTH)
2204	return (inf->ecode = RE_ESPACE);
2205
2206    return (inf->ecode);
2207}
2208
2209static int
2210rec_dec_spc(re_inf *inf)
2211{
2212    if (--inf->bas < 0)
2213	return (inf->ecode = RE_ASSERT);
2214
2215    return (inf->ecode);
2216}
2217
2218static int
2219rec_add_spc(re_inf *inf, int maybe)
2220{
2221    if (++inf->bas >= MAX_DEPTH)
2222	return (inf->ecode = RE_ESPACE);
2223    inf->sp[inf->bas] = maybe + 1;
2224
2225    return (inf->ecode);
2226}
2227
2228/* Could be joined with rec_rep_spc, code almost identical */
2229static int
2230rec_alt_spc(re_inf *inf, int top)
2231{
2232    int distance, i, bas = inf->bas;
2233
2234    while ((inf->bas > top) && inf->sp[inf->bas]) {
2235	/* Jump to this repetition for cleanup */
2236	distance = inf->len - inf->sr[inf->bas];
2237
2238	/* This will generate a jump to a jump decision opcode */
2239	inf->sj[inf->bas] = inf->len;
2240
2241	if (distance > 255) {
2242	    if (rec_code(inf, Re_RepLongJump) ||
2243		rec_byte(inf, inf->bas - 1) ||
2244		rec_byte(inf, distance & 0xff) ||
2245		rec_byte(inf, (distance & 0xff00) >> 8))
2246	    break;
2247	}
2248	else if (rec_code(inf, Re_RepJump) ||
2249		 rec_byte(inf, inf->bas - 1) ||
2250		 rec_byte(inf, distance))
2251 	    break;
2252
2253	/* Top of stack value before repetition, or end condition value */
2254	--inf->bas;
2255    }
2256
2257    i = inf->bas + 1;
2258
2259    if (inf->ecode == 0 && i <= bas && inf->sp[i]) {
2260	/*  Only the repetition at the bottom jump to code after testing
2261	 * all possibilities */
2262	distance = inf->len - inf->sr[i];
2263	inf->cod[inf->sr[i] + 3] = distance & 0xff;
2264	inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2265
2266	/* The bottom jump is here */
2267	if (rec_code(inf, inf->sp[i] == 1 ? Re_DoneIf : Re_MaybeDone))
2268	   return (inf->ecode);
2269
2270	/*  Generate jumps to the previous special repetition */
2271	for (++i; i <= bas; i++) {
2272	    if (inf->sp[i]) {
2273		distance = inf->sj[i] - inf->sr[i];
2274		inf->cod[inf->sr[i] + 3] = distance & 0xff;
2275		inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2276	    }
2277	}
2278    }
2279
2280    return (inf->ecode);
2281}
2282
2283static int
2284rec_rep_spc(re_inf *inf, int top)
2285{
2286    int distance, i, bas = inf->bas;
2287
2288    while (inf->bas > top) {
2289	if (inf->sp[inf->bas]) {
2290	    /* Jump to this repetition for cleanup */
2291	    distance = inf->len - inf->sr[inf->bas];
2292
2293	    /* This will generate a jump to a jump decision opcode */
2294	    inf->sj[inf->bas] = inf->len;
2295
2296	    if (distance > 255) {
2297		if (rec_code(inf, Re_RepLongJump) ||
2298		    rec_byte(inf, inf->bas - 1) ||
2299		    rec_byte(inf, distance & 0xff) ||
2300		    rec_byte(inf, (distance & 0xff00) >> 8))
2301		break;
2302	    }
2303	    else if (rec_code(inf, Re_RepJump) ||
2304		     rec_byte(inf, inf->bas - 1) ||
2305		     rec_byte(inf, distance))
2306		break;
2307	}
2308
2309	/* Top of stack value before repetition, or end condition value */
2310	--inf->bas;
2311    }
2312
2313    /* Find first special repetition offset. XXX This should be a noop */
2314    for (i = 0; i < bas; i++)
2315	if (inf->sp[i])
2316	    break;
2317
2318    if (inf->ecode == 0 && i <= bas && inf->sp[i]) {
2319	/*  Only the repetition at the bottom jump to code after testing
2320	 * all possibilities */
2321	distance = inf->len - inf->sr[i];
2322	inf->cod[inf->sr[i] + 3] = distance & 0xff;
2323	inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2324
2325	/*  Generate jumps to the previous special repetition */
2326	for (++i; i <= bas; i++) {
2327	    if (inf->sp[i]) {
2328		distance = inf->sj[i] - inf->sr[i];
2329		inf->cod[inf->sr[i] + 3] = distance & 0xff;
2330		inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2331	    }
2332	}
2333    }
2334
2335    return (inf->ecode);
2336}
2337
2338static int
2339rec_off_spc(re_inf *inf)
2340{
2341    /* The jump address before the three bytes instruction */
2342    inf->sr[inf->bas] = inf->len - 3;
2343    /*  Don't know yet where to go after done with the special
2344     * repetition, just reserve two bytes for the jump address. */
2345    return (rec_byte_byte(inf, 0, 0));
2346}
2347
2348#ifdef DEBUG
2349static void
2350redump(re_cod *code)
2351{
2352    int i, j, k;
2353    unsigned char *cod = code->cod, *stl;
2354
2355    if (cod[0] & RE_NOSUB)
2356	printf("Nosub\n");
2357    if (cod[0] & RE_NEWLINE)
2358	printf("Newline\n");
2359    ++cod;
2360    if (cod[0] != 0xff)
2361	printf("%d backrefs\n", cod[0] + 1);
2362    ++cod;
2363    for (;;) {
2364	switch (*cod++) {
2365	    case Re_Open:
2366		printf("Open");
2367		break;
2368	    case Re_Close:
2369		printf("Close");
2370		break;
2371	    case Re_Update:
2372		printf("Update (%d)", (int)*cod++);
2373		break;
2374	    case Re_Alt:
2375		printf("Alt");
2376		i = cod[0] | cod[1];
2377		cod += 2;
2378		printf(" %d", i);
2379		break;
2380	    case Re_AltNext:
2381		printf("Alt-next");
2382		i = cod[0] | cod[1];
2383		cod += 2;
2384		printf(" %d", i);
2385		break;
2386	    case Re_AltDone:
2387		printf("Alt-done");
2388		break;
2389	    case Re_AnyTimes:
2390		printf("-> Anytimes %d", (int)*cod++);
2391		i = cod[0] | (cod[1] << 8);
2392		cod += 2;
2393		printf(" /%d", i);
2394		break;
2395	    case Re_AnyEatAnyTimes:
2396		printf("Any-eat-anytimes");
2397		break;
2398	    case Re_AnyAnyTimes:
2399		printf("-> Any-anytimes %d", (int)*cod++);
2400		printf(" (%d)", (int)*cod++);
2401		i = cod[0] | (cod[1] << 8);
2402		cod += 2;
2403		printf(" /%d", i);
2404		break;
2405	    case Re_AnyEatMaybe:
2406		printf("Any-eat-maybe");
2407		break;
2408	    case Re_AnyMaybe:
2409		printf("-> Any-maybe %d", (int)*cod++);
2410		printf(" (%d)", (int)*cod++);
2411		i = cod[0] | (cod[1] << 8);
2412		cod += 2;
2413		printf(" /%d", i);
2414		break;
2415	    case Re_AnyAtLeast:
2416		printf("-> Any-atleast %d", (int)*cod++);
2417		printf(" (%d)", (int)*cod++);
2418		i = cod[0] | (cod[1] << 8);
2419		cod += 2;
2420		printf(" /%d", i);
2421		break;
2422	    case Re_AnyEatAtLeast:
2423		printf("Any-eat-atleast");
2424		break;
2425	    case Re_Maybe:
2426		printf("-> Maybe %d", (int)*cod++);
2427		i = cod[0] | (cod[1] << 8);
2428		cod += 2;
2429		printf(" /%d", i);
2430		break;
2431	    case Re_AtLeast:
2432		printf("-> Atleast %d", (int)*cod++);
2433		i = cod[0] | (cod[1] << 8);
2434		cod += 2;
2435		printf(" /%d", i);
2436		break;
2437	    case Re_Exact:
2438		printf("-> Exact ");
2439		i = *cod++;
2440		printf("%d", i);
2441		printf(" %d", (int)*cod++);
2442		i = cod[0] | (cod[1] << 8);
2443		cod += 2;
2444		printf(" /%d", i);
2445		break;
2446	    case Re_Min:
2447		printf("-> Min ");
2448		i = *cod++;
2449		printf("%d", i);
2450		printf(" %d", (int)*cod++);
2451		i = cod[0] | (cod[1] << 8);
2452		cod += 2;
2453		printf(" /%d", i);
2454		break;
2455	    case Re_Max:
2456		printf("-> Max ");
2457		i = *cod++;
2458		printf("%d", i);
2459		printf(" %d", (int)*cod++);
2460		i = cod[0] | (cod[1] << 8);
2461		cod += 2;
2462		printf(" /%d", i);
2463		break;
2464	    case Re_MinMax:
2465		printf("-> Min-max ");
2466		i = *cod++;
2467		printf("%d ", i);
2468		i = *cod++;
2469		printf("%d", i);
2470		printf(" %d", (int)*cod++);
2471		i = cod[0] | (cod[1] << 8);
2472		cod += 2;
2473		printf(" /%d", i);
2474		break;
2475	    case Re_RepJump:
2476		printf("<- Rep-jump %d ", (int)*cod++);
2477		i = *cod++;
2478		printf("%d", i);
2479		break;
2480	    case Re_RepLongJump:
2481		printf("<- Rep-long-jump %d ", (int)*cod++);
2482		i = cod[0] | (cod[1] << 8);
2483		printf("%d", i);
2484		break;
2485	    case Re_Any:
2486		printf("Any");
2487		break;
2488	    case Re_Odigit:
2489		printf("Odigit");
2490		break;
2491	    case Re_OdigitNot:
2492		printf("Odigit-not");
2493		break;
2494	    case Re_Digit:
2495		printf("Digit");
2496		break;
2497	    case Re_DigitNot:
2498		printf("Digit-not");
2499		break;
2500	    case Re_Xdigit:
2501		printf("Xdigit");
2502		break;
2503	    case Re_XdigitNot:
2504		printf("Xdigit-not");
2505		break;
2506	    case Re_Space:
2507		printf("Space");
2508		break;
2509	    case Re_SpaceNot:
2510		printf("Space-not");
2511		break;
2512	    case Re_Tab:
2513		printf("Tab");
2514		break;
2515	    case Re_Newline:
2516		printf("Newline");
2517		break;
2518	    case Re_Lower:
2519		printf("Lower");
2520		break;
2521	    case Re_Upper:
2522		printf("Upper");
2523		break;
2524	    case Re_Alnum:
2525		printf("Alnum");
2526		break;
2527	    case Re_AlnumNot:
2528		printf("Alnum-not");
2529		break;
2530	    case Re_Control:
2531		printf("Control");
2532		break;
2533	    case Re_ControlNot:
2534		printf("Control-not");
2535		break;
2536	    case Re_Bol:
2537		printf("Bol");
2538		break;
2539	    case Re_Eol:
2540		printf("Eol");
2541		break;
2542	    case Re_Bow:
2543		printf("Bow");
2544		break;
2545	    case Re_Eow:
2546		printf("Eow");
2547		break;
2548	    case Re_Range:
2549		printf("Range ");
2550		goto range;
2551	    case Re_RangeNot:
2552		printf("Range-not ");
2553range:
2554		for (i = 0; i < 256; i += 32) {
2555		    for (j = k = 0; j < 32; j++)
2556			k |= (*cod++ & 1) << (31 - j);
2557		    printf("%x ", k);
2558		}
2559		break;
2560	    case Re_Literal:
2561		printf("Literal %c", *cod++);
2562		break;
2563	    case Re_LiteralNot:
2564		printf("Literal-not %c", *cod++);
2565		break;
2566	    case Re_SearchLiteral:
2567		printf("Search-literal %c", *cod++);
2568		break;
2569	    case Re_CaseLiteral:
2570		printf("Case-literal %c", *cod++);
2571		putchar(*cod++);
2572		break;
2573	    case Re_CaseLiteralNot:
2574		printf("Case-literal-not %c", *cod++);
2575		putchar(*cod++);
2576		break;
2577	    case Re_SearchCaseLiteral:
2578		printf("Search-case-literal %c", *cod++);
2579		putchar(*cod++);
2580		break;
2581	    case Re_String:
2582		printf("String ");
2583		goto string;
2584	    case Re_SearchString:
2585		printf("Search-string ");
2586		goto string;
2587	    case Re_CaseString:
2588		printf("Case-string ");
2589		goto string;
2590	    case Re_SearchCaseString:
2591		printf("Search-case-string ");
2592string:
2593		i = *cod++;
2594		if (i & 0x80)
2595		    i = (i & 0x7f) | (*cod++ << 7);
2596		for (j = 0; j < i; j++)
2597		    putchar(*cod++);
2598		break;
2599	    case Re_StringList:
2600		printf("String-list");
2601		goto string_list;
2602	    case Re_CaseStringList:
2603		printf("Case-string-list");
2604string_list:
2605		j = *cod++;
2606		cod += 2;
2607		stl = cod + j;
2608		for (i = 0; i < j; i++) {
2609		    k = *cod++;
2610		    putchar(i ? ',' : ' ');
2611		    fwrite(stl, k, 1, stdout);
2612		    stl += k;
2613		}
2614		cod = stl;
2615		break;
2616	    case Re_LargeStringList:
2617		printf("Large-string-list");
2618large_string_list:
2619		i = cod[0] | (cod[1] << 8);
2620		stl = cod + i - 1;
2621		for (i = 0, cod += 514; cod < stl; i++) {
2622		    k = *cod++;
2623		    putchar(i ? ',' : ' ');
2624		    fwrite(cod, k, 1, stdout);
2625		    cod += k;
2626		}
2627		cod = stl;
2628		break;
2629	    case Re_LargeCaseStringList:
2630		printf("Large-case-string-list");
2631		goto large_string_list;
2632	    case Re_Backref:
2633		printf("Backref %d", (int)*cod++);
2634		break;
2635	    case Re_DoneIf:
2636		printf("Done-if");
2637		break;
2638	    case Re_MaybeDone:
2639		printf("Maybe-done");
2640		break;
2641	    case Re_Done:
2642		printf("Done\n");
2643		return;
2644	}
2645	putchar('\n');
2646    }
2647}
2648#endif
2649