1/*
2 * Copyright (c) 2002 by The XFree86 Project, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 * Except as contained in this notice, the name of the XFree86 Project shall
23 * not be used in advertising or otherwise to promote the sale, use or other
24 * dealings in this Software without prior written authorization from the
25 * XFree86 Project.
26 *
27 * Author: Paulo César Pereira de Andrade
28 */
29
30/* $XFree86: xc/programs/xedit/lisp/re/re.c,v 1.8 2002/11/17 07:51:30 paulo Exp $ */
31
32#include <stdio.h>
33#include "rep.h"
34
35/*
36 * Types
37 */
38
39/*  Information used when generating the final form of the compiled re.
40 */
41struct _re_inf {
42    rec_alt *alt;
43    unsigned char *cod;
44    long len;
45    long spc;
46
47    /* Start offset of special repetition instruction */
48    long sr[MAX_DEPTH];
49
50    /* Jump offset of special repetition instruction */
51    long sj[MAX_DEPTH];
52
53    /* Just a flag, to know if this nesting is for a special repetition */
54    char sp[MAX_DEPTH];
55
56    int bas;			/* Alternatives/repetitions depth */
57    int par;			/* Open parenthesis counter */
58    int ref;			/* Backreference counter */
59
60    rec_pat *apat;		/*  Alternatives duplicate patterns
61				 * if a special repetition is found,
62				 * this is done to somewhat simplify
63				 * the bytecode engine and still allow
64				 * some complex (and time consuming)
65				 * patterns. */
66
67    int flags;
68    int ecode;
69};
70
71/*  This structure is not associated with re_cod as it's data only matters
72 * to the current match search.
73 */
74struct _re_eng {
75    unsigned char *bas;		/* Base string pointer */
76    unsigned char *str;		/* String to search for pattern */
77    unsigned char *end;		/* Where to stop searching */
78    unsigned char *cod;		/* Pointer in the re_cod structure */
79    long off;			/* Number of used entries in so/eo etc */
80
81    /* Match offset/nesting information */
82    long so[MAX_DEPTH];		/* (s)tart of (m)atch */
83    long eo[MAX_DEPTH];		/* (e)nd of (m)atch */
84    long sv[MAX_DEPTH];		/* (s)a(v)e match end offset */
85    long re[MAX_DEPTH];		/* (re)petition count */
86    long ss[MAX_DEPTH];		/* (s)ave (s)tart of match */
87    unsigned char *rcod[MAX_DEPTH];	/* restart position in regex code */
88    unsigned char *rstr[MAX_DEPTH];	/* restart position in string */
89
90    /* Group/backreference information */
91    long goff;
92    long gso[9];
93    long geo[9];
94};
95
96/*
97 * Prototypes
98 */
99static void reinit(void);
100static int rec_check(re_inf*, int);
101static int rec_code(re_inf*, ReCode);
102static int rec_byte(re_inf*, int);
103static int rec_byte_byte(re_inf*, int, int);
104static int rec_code_byte(re_inf*, ReCode, int);
105static int rec_length(re_inf*, int);
106static int rec_code_byte_byte(re_inf*, ReCode, int, int);
107static int rec_build_alt(re_inf*, rec_alt*);
108static int rec_build_pat(re_inf*, rec_pat*);
109static int rec_build_rng(re_inf*, rec_rng*);
110static int rec_build_grp(re_inf*, rec_grp*);
111static int rec_build_stl(re_inf*, rec_stl*);
112static int rec_build_rep(re_inf*, rec_rep*);
113static int rec_inc_spc(re_inf*);
114static int rec_dec_spc(re_inf*);
115static int rec_add_spc(re_inf*, int);
116static int rec_off_spc(re_inf*);
117static int rec_alt_spc(re_inf*, int);
118static int rec_rep_spc(re_inf*, int);
119#ifdef DEBUG
120static void redump(re_cod*);
121#endif
122
123/*
124 * Initialization
125 */
126unsigned char re__alnum[256];
127unsigned char re__odigit[256];
128unsigned char re__ddigit[256];
129unsigned char re__xdigit[256];
130unsigned char re__control[256];
131
132/*
133 * Implementation
134 */
135int
136recomp(re_cod *preg, const char *pattern, int flags)
137{
138    int i, ecode;
139    re_inf inf;
140
141    reinit();
142
143    preg->cod = NULL;
144    inf.alt = irec_comp(pattern,
145			flags & RE_PEND ? preg->re_endp :
146				pattern + strlen(pattern),
147			flags, &ecode);
148    if (ecode != 0)
149	return (ecode);
150
151    inf.cod = NULL;
152    inf.len = inf.spc = 0;
153    inf.bas = 0;
154    inf.par = 0;
155    inf.ref = 0;
156    inf.apat = NULL;
157    inf.flags = flags;
158    inf.ecode = 0;
159    for (i = 0; i < MAX_DEPTH; i++)
160	inf.sp[i] = 0;
161
162    /* First byte is runtime modifier flags */
163    if (rec_byte(&inf, flags & (RE_NEWLINE | RE_NOSUB)) == 0 &&
164	rec_byte(&inf, 0xff) == 0 &&
165	rec_build_alt(&inf, inf.alt) == 0 &&
166	rec_rep_spc(&inf, 0) == 0 &&
167	rec_code(&inf, Re_Done) == 0) {
168	/*  Number of possible references, loops will not leave this
169	 * value correct, but it is cheap to read it from the second
170	 * byte, instead of adding several extra checks in the bytecode. */
171	if (inf.ref)
172	    inf.cod[1] = inf.ref - 1;
173	preg->cod = inf.cod;
174	/*  Public structure member */
175	preg->re_nsub = inf.ref;
176    }
177
178    irec_free_alt(inf.alt);
179    if (inf.ecode)
180	free(inf.cod);
181#ifdef DEBUG
182    else if (flags & RE_DUMP)
183	redump(preg);
184#endif
185
186    return (inf.ecode);
187}
188
189int
190reexec(const re_cod *preg, const char *string,
191       int nmatch, re_mat pmat[], int flags)
192{
193    unsigned char *ptr, *str, newline, nosub;
194    int len, si, ci, bas, i, j, k, l, m;
195    re_eng eng;
196
197    if (preg == NULL || preg->cod == NULL || nmatch < 0 ||
198	((flags & RE_STARTEND) &&
199	 (pmat == NULL || pmat[0].rm_eo < pmat[0].rm_so)))
200	return (RE_INVARG);
201
202    eng.str = (unsigned char*)string;
203    if (flags & RE_STARTEND) {
204	eng.end = eng.str + pmat[0].rm_eo;
205	eng.str += pmat[0].rm_so;
206    }
207    else
208	eng.end = eng.str + strlen(string);
209    eng.bas = eng.str;
210    nosub = preg->cod[0] & RE_NOSUB;
211    newline = preg->cod[0] & RE_NEWLINE;
212    eng.cod = preg->cod + 2;
213
214    if (!nosub && preg->cod[1] != 0xff) {
215	for (i = 0; i <= preg->cod[1]; i++) {
216	    eng.gso[i] = 0;
217	    eng.geo[i] = -1;
218	}
219    }
220
221    /* Setup to search for start of match from the first character */
222    eng.so[0] = 0;
223    eng.eo[0] = eng.sv[0] = -1;
224    eng.rcod[0] = eng.cod;
225    eng.rstr[0] = eng.str + 1;
226    eng.off = 0;
227    eng.goff = -1;
228    for (ci = si = 1;;) {
229reset:
230	switch (*eng.cod) {
231	    /****************************************************
232	     * One byte codes					*
233	     ****************************************************/
234	    case Re_Any:
235		if (eng.str == eng.end || (newline && eng.str[0] == '\n'))
236		    goto fail;
237		goto match;
238	    case Re_AnyEatAnyTimes:
239		if (newline) {
240		    for (ptr = eng.str; ptr < eng.end; ptr++) {
241			if (*ptr == '\n')
242			    break;
243		    }
244		    si = ptr - eng.str;
245		}
246		else
247		    si = eng.end - eng.str;
248		goto match;
249	    case Re_AnyEatMaybe:
250		si = eng.end > eng.str;
251		if (newline && si && eng.str[0] == '\n')
252		    si = 0;
253		goto match;
254	    case Re_AnyEatAtLeast:
255		if (newline) {
256		    for (ptr = eng.str; ptr < eng.end; ptr++) {
257			if (*ptr == '\n')
258			    break;
259		    }
260		    si = ptr - eng.str;
261		}
262		else
263		    si = eng.end - eng.str;
264		if (si == 0) {
265		    si = 1;
266		    goto fail;
267		}
268		goto match;
269	    case Re_Odigit:
270		if (eng.str >= eng.end)
271		    goto fail;
272		if (re__odigit[eng.str[0]])
273		    goto match;
274		goto fail;
275	    case Re_OdigitNot:
276		if (eng.str >= eng.end || re__odigit[eng.str[0]])
277		    goto fail;
278		goto match;
279	    case Re_Digit:
280		if (eng.str >= eng.end)
281		    goto fail;
282		if (re__ddigit[eng.str[0]])
283		    goto match;
284		goto fail;
285	    case Re_DigitNot:
286		if (eng.str >= eng.end || re__ddigit[eng.str[0]])
287		    goto fail;
288		goto match;
289	    case Re_Xdigit:
290		if (eng.str >= eng.end)
291		    goto fail;
292		if (re__xdigit[eng.str[0]])
293		    goto match;
294		goto fail;
295	    case Re_XdigitNot:
296		if (eng.str >= eng.end || re__xdigit[eng.str[0]])
297		    goto fail;
298		goto match;
299	    case Re_Space:
300		if (eng.str >= eng.end)
301		    goto fail;
302		if (eng.str[0] == ' ' || eng.str[0] == '\t')
303		    goto match;
304		goto fail;
305	    case Re_SpaceNot:
306		if (eng.str >= eng.end)
307		    goto fail;
308		if (eng.str[0] != ' ' && eng.str[0] != '\t')
309		    goto match;
310		goto fail;
311	    case Re_Tab:
312		if (eng.str >= eng.end)
313		    goto fail;
314		if (eng.str[0] == '\t')
315		    goto match;
316		goto fail;
317	    case Re_Newline:
318		if (eng.str >= eng.end)
319		    goto fail;
320		if (eng.str[0] == '\n')
321		    goto match;
322		goto fail;
323	    case Re_Lower:
324		if (eng.str >= eng.end)
325		    goto fail;
326		if (eng.str[0] >= 'a' && eng.str[0] <= 'z')
327		    goto match;
328		goto fail;
329	    case Re_Upper:
330		if (eng.str >= eng.end)
331		    goto fail;
332		if (eng.str[0] >= 'A' && eng.str[0] <= 'Z')
333		    goto match;
334		goto fail;
335	    case Re_Alnum:
336		if (eng.str >= eng.end)
337		    goto fail;
338		if (re__alnum[eng.str[0]])
339		    goto match;
340		goto fail;
341	    case Re_AlnumNot:
342		if (eng.str >= eng.end)
343		    goto fail;
344		if (re__alnum[eng.str[0]])
345		    goto fail;
346		goto match;
347	    case Re_Control:
348		if (eng.str >= eng.end)
349		    goto fail;
350		if (re__control[eng.str[0]])
351		    goto match;
352		goto fail;
353	    case Re_ControlNot:
354		if (eng.str >= eng.end || re__control[eng.str[0]])
355		    goto fail;
356		goto match;
357
358	    /****************************************************
359	     * One byte codes, match special emtpy strings	*
360	     ****************************************************/
361	    case Re_Bol:
362		if (eng.str == eng.bas) {
363		    if ((flags & RE_NOTBOL)) {
364			/* String does not start at the beginning of a line */
365			if (newline)
366			    goto fail;
367			goto wont;
368		    }
369		    si = 0;
370		    goto match;
371		}
372		if (newline && eng.str[-1] == '\n') {
373		    si = 0;
374		    goto match;
375		}
376		goto fail;
377	    case Re_Eol:
378		if (eng.str == eng.end) {
379		    if (flags & RE_NOTEOL)
380			/* String does not finish at the end of a line */
381			goto wont;
382		    si = 0;
383		    goto match;
384		}
385		if (newline && eng.str[0] == '\n') {
386		    si = 0;
387		    goto match;
388		}
389		goto fail;
390	    case Re_Bow:
391		if (eng.str >= eng.end ||
392		    (eng.str > eng.bas &&
393		     (re__alnum[eng.str[-1]])))
394		    goto fail;
395		if (re__alnum[eng.str[0]]) {
396		    si = 0;
397		    goto match;
398		}
399		goto fail;
400	    case Re_Eow:
401		if (eng.str == eng.bas ||
402		    (eng.str < eng.end &&
403		     re__alnum[eng.str[0]]))
404		    goto fail;
405		if (re__alnum[eng.str[-1]]) {
406		    si = 0;
407		    goto match;
408		}
409		goto fail;
410
411	    /****************************************************
412	     * One byte code, one byte argument			*
413	     ****************************************************/
414	    case Re_Literal:
415		if (eng.str >= eng.end)
416		    goto fail;
417		if (eng.str[0] == eng.cod[1]) {
418		    ci = 2;
419		    goto match;
420		}
421		goto fail;
422	    case Re_LiteralNot:
423		if (eng.str >= eng.end)
424		    goto fail;
425		if (eng.str[0] != eng.cod[1]) {
426		    ci = 2;
427		    goto match;
428		}
429		goto fail;
430	    case Re_SearchLiteral:
431		for (str = eng.str; str < eng.end; str++) {
432		    if (*str == eng.cod[1]) {
433			ci = 2;
434			eng.str = str;
435			goto match;
436		    }
437		}
438		/* This bytecode only happens in the toplevel */
439		eng.so[0] = str - eng.bas;
440		eng.str = str;
441		goto fail;
442
443	    /****************************************************
444	     * One byte code, two bytes argument		*
445	     ****************************************************/
446	    case Re_CaseLiteral:
447		if (eng.str >= eng.end)
448		    goto fail;
449		if (eng.str[0] == eng.cod[1] || eng.str[0] == eng.cod[2]) {
450		    ci = 3;
451		    goto match;
452		}
453		goto fail;
454	    case Re_CaseLiteralNot:
455		if (eng.str >= eng.end)
456		    goto fail;
457		if (eng.str[0] != eng.cod[1] && eng.str[0] != eng.cod[2]) {
458		    ci = 3;
459		    goto match;
460		}
461		goto fail;
462	    case Re_SearchCaseLiteral:
463		for (str = eng.str; str < eng.end; str++) {
464		    if (*str == eng.cod[1] || *str == eng.cod[2]) {
465			ci = 3;
466			eng.str = str;
467			goto match;
468		    }
469		}
470		eng.so[0] = str - eng.bas;
471		eng.str = str;
472		goto fail;
473
474	    /****************************************************
475	     * One byte codes, two arguments, n bytes		*
476	     ****************************************************/
477	    case Re_String:
478		len = eng.cod[1];
479		if (len & 0x80) {
480		    i = 3;
481		    len = (len & 0x7f) + (eng.cod[2] << 7);
482		}
483		else
484		    i = 2;
485		if (eng.end - eng.str < len)
486		    goto fail;
487		ptr = eng.cod + i;
488		str = eng.str;
489		for (k = len; k > 0; k--) {
490		    if (*ptr++ != *str++)
491			goto fail;
492		}
493		ci = i + len;
494		si = len;
495		goto match;
496	    case Re_SearchString:
497		len = eng.cod[1];
498		if (len & 0x80) {
499		    i = 3;
500		    len = (len & 0x7f) + (eng.cod[2] << 7);
501		}
502		else
503		    i = 2;
504		for (str = eng.str; eng.end - str >= len; str = eng.str++) {
505		    for (ptr = eng.cod + i, str = eng.str, k = len; k > 0; k--)
506			if (*ptr++ != *str++)
507			    break;
508		    if (k == 0) {
509			/* Substring found */
510			ci = i + len;
511			si = str - eng.str;
512			goto match;
513		    }
514		}
515		eng.so[0] = eng.end - eng.bas;
516		eng.str = eng.end;
517		goto fail;
518
519	    case Re_CaseString:
520		len = eng.cod[1];
521		if (len & 0x80) {
522		    i = 3;
523		    len = (len & 0x7f) + (eng.cod[2] << 7);
524		}
525		else
526		    i = 2;
527
528		len >>= 1;
529		/*  Check if there are at least len/2 bytes left, string
530		 * is represented as two bytes, lower and upper case */
531		if (eng.end - eng.str < len)
532		    goto fail;
533		ptr = eng.cod + i;
534		str = eng.str;
535		for (k = len; k > 0; str++, ptr += 2, k--) {
536		    if (*str != ptr[0] && *str != ptr[1])
537			goto fail;
538		}
539		ci = i + (len << 1);
540		si = len;
541		goto match;
542	    case Re_SearchCaseString:
543		len = eng.cod[1];
544		if (len & 0x80) {
545		    i = 3;
546		    len = (len & 0x7f) + (eng.cod[2] << 7);
547		}
548		else
549		    i = 2;
550		len >>= 1;
551		for (str = eng.str; eng.end - str >= len; str = eng.str++) {
552		    for (ptr = eng.cod + i, str = eng.str, k = len;
553			 k > 0; k--, ptr += 2, str++)
554			if (ptr[0] != str[0] && ptr[1] != str[0])
555			    break;
556		    if (k == 0) {
557			/* Substring found */
558			ci = i + (len << 1);
559			si = str - eng.str;
560			goto match;
561		    }
562		}
563		eng.so[0] = eng.end - eng.bas;
564		eng.str = eng.end;
565		goto fail;
566
567	    case Re_StringList:
568		/* Number of strings */
569		k = eng.cod[1];
570
571		/* Where to jump after match */
572		bas = eng.cod[2] | (eng.cod[3] << 8);
573
574		str = eng.str;
575		ptr = eng.cod + k + 4;
576		l = eng.end - eng.str;
577		for (j = 0; j < k; j++) {
578		    len = eng.cod[j + 4];
579		    if (len <= l) {
580			for (i = 0; i < len; i++)
581			    if (ptr[i] != str[i])
582				goto next_stl;
583			goto stl_match;
584		    }
585next_stl:
586		    ptr += len;
587		}
588		goto fail;
589stl_match:
590		ci = bas;
591		si = len;
592		goto match;
593
594	    case Re_CaseStringList:
595		/* Number of strings */
596		k = eng.cod[1];
597
598		/* Where to jump after match */
599		bas = eng.cod[2] | (eng.cod[3] << 8);
600
601		str = eng.str;
602		ptr = eng.cod + k + 4;
603		l = eng.end - eng.str;
604		for (j = 0; j < k; j++) {
605		    len = eng.cod[j + 4];
606		    if ((len >> 1) <= l) {
607			for (i = m = 0; i < len; m++, i += 2)
608			    if (ptr[i] != str[m] && ptr[i + 1] != str[m])
609				goto next_cstl;
610			goto cstl_match;
611		    }
612next_cstl:
613		    ptr += len;
614		}
615		goto fail;
616cstl_match:
617		ci = bas;
618		si = len >> 1;
619		goto match;
620
621
622	    case Re_LargeStringList:
623		/* Where to jump after match */
624		bas = eng.cod[1] | (eng.cod[2] << 8);
625
626		str = eng.str;
627
628		/* First entry in index map */
629		ptr = eng.cod + 3;
630		i = (int)str[0] << 1;
631		j = ptr[i] | (ptr[i + 1] << 8);
632		if (j == 0xffff)
633		    /* No entry with this byte */
634		    goto fail;
635
636		/* Bytes left in input */
637		l = eng.end - eng.str;
638
639		/* First entry matching initial byte */
640		ptr += 512 + j;
641
642		for (len = ptr[0];
643		     str[0] == ptr[1];
644		     ptr += len + 1, len = ptr[0]) {
645		    if (len <= l) {
646			for (i = 1; i < len; i++) {
647			    if (ptr[i + 1] != str[i])
648				goto next_lstl;
649			}
650			ci = bas;
651			si = len;
652			goto match;
653		    }
654next_lstl:;
655		}
656		goto fail;
657
658	    case Re_LargeCaseStringList:
659		/* Where to jump after match */
660		bas = eng.cod[1] | (eng.cod[2] << 8);
661
662		str = eng.str;
663
664		/* First entry in index map */
665		ptr = eng.cod + 3;
666		i = (int)str[0] << 1;
667		j = ptr[i] | (ptr[i + 1] << 8);
668		if (j == 0xffff)
669		    /* No entry with this byte */
670		    goto fail;
671
672		/* Bytes left in input */
673		l = eng.end - eng.str;
674
675		/* First entry matching initial byte */
676		ptr += 512 + j;
677
678		for (len = ptr[0];
679		     str[0] == ptr[1] || str[0] == ptr[2];
680		     ptr += len + 1, len = ptr[0]) {
681		    if ((k = (len >> 1)) <= l) {
682			for (i = 2, j = 1; i < len; i += 2, j++) {
683			    if (ptr[i + 1] != str[j] && ptr[i + 2] != str[j])
684				goto next_lcstl;
685			}
686			ci = bas;
687			si = k;
688			goto match;
689		    }
690next_lcstl:;
691		}
692		goto fail;
693
694
695	    /****************************************************
696	     * Character range matching				*
697	     ****************************************************/
698	    case Re_Range:
699		if (eng.str < eng.end && eng.cod[eng.str[0] + 1]) {
700		    ci = 257;
701		    goto match;
702		}
703		goto fail;
704	    case Re_RangeNot:
705		if (eng.str >= eng.end || eng.cod[eng.str[0] + 1])
706		    goto fail;
707		ci = 257;
708		goto match;
709
710	    /****************************************************
711	     * Group handling					*
712	     ****************************************************/
713	    case Re_Open:
714		if (++eng.goff >= 9)
715		    return (RE_ASSERT);
716		eng.gso[eng.goff] = eng.str - eng.bas;
717		++eng.cod;
718		continue;
719	    case Re_Close:
720		eng.geo[eng.goff] = eng.str - eng.bas;
721		++eng.cod;
722		continue;
723	    case Re_Update:
724		bas = eng.cod[1];
725		eng.geo[eng.goff] = eng.str - eng.bas;
726		eng.cod += 2;		/* + Update + bas */
727		continue;
728
729	    /****************************************************
730	     * Backreference					*
731	     ****************************************************/
732	    case Re_Backref:
733		i = eng.cod[1];
734		j = eng.gso[i];
735		k = eng.geo[i];
736		len = k - j;
737		if (k < j || eng.end - eng.str < len)
738		    goto fail;
739		ptr = eng.bas + j;
740		str = eng.str;
741		for (l = len; l > 0; l--) {
742		    if (*ptr++ != *str++)
743			goto fail;
744		}
745		ci = 2;
746		si = len;
747		goto match;
748
749	    /****************************************************
750	     * Alternatives handling				*
751	     ****************************************************/
752	    case Re_Alt:
753		bas = eng.off;
754		if (++eng.off >= MAX_DEPTH)
755		    return (RE_ASSERT);
756
757		/* Get offset of next alternative */
758		i = eng.cod[1] | (eng.cod[2] << 8);
759
760		/* Setup for next alternative if the current fails */
761		eng.rcod[eng.off] = eng.cod + i + 1;	/* + Alt */
762
763		/* If fail, test the next alternative in the same string */
764		eng.rstr[eng.off] = eng.str;
765
766		/* Setup match offsets */
767		if (eng.so[bas] <= eng.eo[bas])
768		    eng.so[eng.off] = eng.eo[bas];
769		else
770		    eng.so[eng.off] = eng.so[bas];
771		eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;
772
773		/* Save start of possible previous matches */
774		eng.ss[eng.off] = eng.so[bas];
775
776		/* Skip code */
777		eng.cod += 3;
778
779		/* Go try the first alternative */
780		continue;
781
782	    case Re_AltNext:
783		bas = eng.off - 1;
784		/* Check if matched and if it is a better match */
785		if (eng.eo[eng.off] >= eng.so[eng.off] &&
786		    eng.sv[eng.off] - eng.so[eng.off] <
787		    eng.eo[eng.off] - eng.so[eng.off])
788		    eng.sv[eng.off] = eng.eo[eng.off];
789
790		/* Get offset of next alternative */
791		i = eng.cod[1] | (eng.cod[2] << 8);
792
793		/* Setup for next alternative if the current fails */
794		eng.rcod[eng.off] = eng.cod + i + 1;	/* + AltNext */
795
796		/* Setup match offset */
797		eng.eo[eng.off] = eng.so[eng.off] - 1;
798
799		/* Reset string for next alternative */
800		eng.str = eng.rstr[eng.off];
801
802		/* Skip code */
803		eng.cod += 3;
804
805		/* Go try the next alternative */
806		continue;
807
808	    case Re_AltDone:
809		bas = eng.off - 1;
810		/* Check if matched and if it is a better match */
811		if (eng.sv[eng.off] - eng.so[eng.off] <
812		    eng.eo[eng.off] - eng.so[eng.off])
813		    eng.sv[eng.off] = eng.eo[eng.off];
814
815		/* If there is a match */
816		if (eng.sv[eng.off] >= eng.so[eng.off]) {
817		    eng.so[bas] = eng.ss[eng.off];
818		    eng.eo[bas] = eng.sv[eng.off];
819		    eng.str = eng.bas + eng.eo[bas];
820
821		    /* Pop stack and skip code */
822		    --eng.off;
823		    ++eng.cod;
824
825		    /* Go try next regular expression pattern */
826		    continue;
827		}
828
829		/* Failed, reset string and pop stack */
830		eng.str = eng.rstr[eng.off];
831		--eng.off;
832		goto fail;
833
834
835	    /****************************************************
836	     * Repetition					*
837	     ****************************************************/
838
839		/*  Note that the repetition counter is not
840		 * updated for <re>*, <re>+ and <re>?
841		 * it is easy to updated, but since it is not
842		 * really useful, code to do it was removed
843		 * to save a few cpu cicles. */
844
845#define REPETITION_SETUP()					\
846	if (++eng.off >= MAX_DEPTH)				\
847	    return (RE_ASSERT);					\
848								\
849	/* Return here for recovery if match fail */		\
850	eng.rcod[eng.off] = eng.cod;				\
851								\
852	/* Setup match offsets */				\
853	if (eng.so[bas] <= eng.eo[bas])				\
854	    eng.so[eng.off] = eng.eo[bas];			\
855	else							\
856	    eng.so[eng.off] = eng.so[bas];			\
857	eng.ss[eng.off] = eng.so[bas];				\
858	eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\
859								\
860	/* Skip repetition instruction */			\
861	eng.cod += 4;
862
863
864	    case Re_AnyTimes:
865		bas = eng.cod[1];
866		if (eng.off == bas) {
867		    /* First iteration */
868		    REPETITION_SETUP();
869		}
870		else {
871		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
872			eng.eo[eng.off] > eng.sv[eng.off]) {
873			/* Update offset of match */
874			eng.sv[eng.off] = eng.eo[eng.off];
875
876			/* Skip repetition instruction */
877			eng.cod += 4;
878		    }
879		    else {
880			/* Match failed but it is ok */
881			len = eng.cod[2] | (eng.cod[3] << 8);
882			eng.so[bas] = eng.ss[eng.off];
883			if (eng.sv[eng.off] >= eng.so[eng.off])
884			    /* Something matched earlier, update */
885			    eng.eo[bas] = eng.sv[eng.off];
886			else if (eng.eo[bas] < eng.so[bas])
887			    /* Empty match */
888			    eng.eo[bas] = eng.so[bas];
889
890			/* Try next pattern at correct offset */
891			eng.str = eng.bas + eng.eo[bas];
892
893			/* Pop stack and skip code */
894			--eng.off;
895			eng.cod += len;
896		    }
897		}
898		continue;
899
900	    case Re_Maybe:
901		bas = eng.cod[1];
902		if (eng.off == bas) {
903		    /* First iteration */
904		    REPETITION_SETUP();
905		}
906		else {
907		    /* Matched or first iteration is done */
908		    len = eng.cod[2] | (eng.cod[3] << 8);
909		    eng.so[bas] = eng.ss[eng.off];
910		    if (eng.eo[eng.off] > eng.so[eng.off]) {
911			/* Something matched earlier, update */
912			eng.eo[bas] = eng.eo[eng.off];
913			eng.str = eng.bas + eng.eo[bas];
914			/* Don't need to update counter */
915		    }
916		    else {
917			/* Empty match */
918			if (eng.eo[bas] < eng.so[bas])
919			    eng.eo[bas] = eng.so[bas];
920
921			/* Try next pattern at correct offset */
922			eng.str = eng.bas + eng.eo[bas];
923		    }
924
925		    /* Pop stack */
926		    --eng.off;
927
928		    /* Skip code */
929		    eng.cod += len;
930		}
931		continue;
932
933	    case Re_AtLeast:
934		bas = eng.cod[1];
935		if (eng.off == bas) {
936		    /* First iteration */
937		    REPETITION_SETUP();
938		}
939		else {
940		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
941			eng.eo[eng.off] > eng.sv[eng.off]) {
942			/* Update offset of match */
943			eng.sv[eng.off] = eng.eo[eng.off];
944
945			/* Skip repetition instruction */
946			eng.cod += 4;
947		    }
948		    else {
949			/* Last try failed */
950			len = eng.cod[2] | (eng.cod[3] << 8);
951			if (eng.sv[eng.off] >= eng.so[eng.off]) {
952			    /* Something matched earlier, update */
953			    eng.so[bas] = eng.ss[eng.off];
954			    eng.eo[bas] = eng.sv[eng.off];
955			    eng.str = eng.bas + eng.eo[bas];
956			}
957			else {
958			    /*	Do it here, so that the fail label does
959			     * not need to do too expensive work for
960			     * simple patterns. */
961			    eng.so[bas] = eng.str - eng.bas;
962
963			    /* Zero matches, pop stack and restart */
964			    --eng.off;
965			    goto fail;
966			}
967
968			/* Pop stack and skip code */
969			--eng.off;
970			eng.cod += len;
971		    }
972		}
973		continue;
974
975
976	    /****************************************************
977	     * Repetition with arguments			*
978	     ****************************************************/
979	    case Re_Exact:
980#define COMPLEX_REPETITION_SETUP_0()				\
981	i = eng.cod[1];						\
982	bas = eng.cod[2];
983
984#define COMPLEX_REPETITION_SETUP()				\
985	/* First iteration */					\
986	if (++eng.off >= MAX_DEPTH)				\
987	    return (RE_ASSERT);					\
988								\
989	/*  Remeber number or repetitions */			\
990	eng.re[eng.off] = 0;					\
991								\
992	/* Return here for recovery if match fail */		\
993	eng.rcod[eng.off] = eng.cod;				\
994								\
995	/* Setup match offsets */				\
996	if (eng.so[bas] <= eng.eo[bas])				\
997	    eng.so[eng.off] = eng.eo[bas];			\
998	else							\
999	    eng.so[eng.off] = eng.so[bas];			\
1000	eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\
1001	eng.ss[eng.off] = eng.so[bas];				\
1002								\
1003	/* Skip repetition instruction */			\
1004	eng.cod += 5;
1005
1006		COMPLEX_REPETITION_SETUP_0();
1007		if (eng.off == bas) {
1008		    /* First iteration */
1009		    COMPLEX_REPETITION_SETUP();
1010		}
1011		else {
1012		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
1013			eng.eo[eng.off] > eng.sv[eng.off]) {
1014			/* Update offset of match */
1015			eng.sv[eng.off] = eng.eo[eng.off];
1016
1017			/* Update repetition counter */
1018			if (++eng.re[eng.off] == i) {
1019			    /* Matched the required times */
1020			    eng.so[bas] = eng.ss[eng.off];
1021			    eng.eo[bas] = eng.sv[eng.off];
1022			    eng.str = eng.bas + eng.eo[bas];
1023
1024			    /* Update code */
1025			    k = eng.cod[3] | (eng.cod[4] << 8);
1026			    eng.cod += k;
1027
1028			    /* Pop stack and go for next pattern */
1029			    --eng.off;
1030			    continue;
1031			}
1032
1033			/* Skip repetition instruction */
1034			eng.cod += 5;
1035		    }
1036		    else {
1037			/*  Do it here, so that the fail label does
1038			 * not need to do too expensive work for
1039			 * simple patterns. */
1040			eng.so[bas] = eng.str - eng.bas;
1041
1042			/* Pop stack and restart */
1043			--eng.off;
1044			goto fail;
1045		    }
1046		}
1047		continue;
1048
1049	    case Re_Min:
1050		COMPLEX_REPETITION_SETUP_0();
1051		if (eng.off == bas) {
1052		    /* First iteration */
1053		    COMPLEX_REPETITION_SETUP();
1054		}
1055		else {
1056		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
1057			eng.eo[eng.off] > eng.sv[eng.off]) {
1058			/* Update offset of match */
1059			eng.sv[eng.off] = eng.eo[eng.off];
1060
1061			/* Update repetition counter */
1062			++eng.re[eng.off];
1063
1064			/* Skip repetition instruction and try again */
1065			eng.cod += 5;
1066		    }
1067		    else {
1068			/* Match failed! */
1069			if (eng.re[eng.off] < i) {
1070			    /*	Do it here, so that the fail label does
1071			     * not need to do too expensive work for
1072			     * simple patterns. */
1073			    eng.so[bas] = eng.str - eng.bas;
1074
1075			    /* Didn't match required number of times */
1076			    --eng.off;
1077			    goto fail;
1078			}
1079			else {
1080			    /* Matched minimum number of times */
1081			    eng.eo[bas] = eng.sv[eng.off];
1082			    eng.str = eng.bas + eng.eo[bas];
1083			    k = eng.cod[3] | (eng.cod[4] << 8);
1084
1085			    /* Update code and pop stack */
1086			    eng.cod += k;
1087			    --eng.off;
1088			}
1089		    }
1090		}
1091		continue;
1092
1093	    case Re_Max:
1094		COMPLEX_REPETITION_SETUP_0();
1095		if (eng.off == bas) {
1096		    /* First iteration */
1097		    COMPLEX_REPETITION_SETUP();
1098		}
1099		else {
1100		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
1101			eng.eo[eng.off] > eng.sv[eng.off]) {
1102			/* Update offset of match */
1103			eng.sv[eng.off] = eng.eo[eng.off];
1104
1105			/* Update repetition counter */
1106			if (++eng.re[eng.off] == i) {
1107			    /* Matched the maximum times */
1108			    eng.so[bas] = eng.ss[eng.off];
1109			    eng.eo[bas] = eng.sv[eng.off];
1110			    eng.str = eng.bas + eng.eo[bas];
1111
1112			    k = eng.cod[3] | (eng.cod[4] << 8);
1113
1114			    /* Update code and pop stack */
1115			    eng.cod += k;
1116			    --eng.off;
1117			    continue;
1118			}
1119
1120			/* Skip repetition instruction and try again */
1121			eng.cod += 5;
1122		    }
1123		    else {
1124			/* No matches, but zero matches are ok */
1125			k = eng.cod[3] | (eng.cod[4] << 8);
1126			if (eng.sv[eng.off] >= eng.so[eng.off]) {
1127			    /* Something matched earlier, update */
1128			    eng.so[bas] = eng.ss[eng.off];
1129			    eng.eo[bas] = eng.sv[eng.off];
1130			    eng.str = eng.bas + eng.eo[bas];
1131			}
1132			else {
1133			    /* Empty match */
1134			    if (eng.eo[bas] < eng.so[bas])
1135				eng.eo[bas] = eng.so[bas];
1136
1137			    /* Try next pattern at correct offset */
1138			    eng.str = eng.bas + eng.eo[bas];
1139			}
1140
1141			/* Pop stack and update code */
1142			--eng.off;
1143			eng.cod += k;
1144		    }
1145		}
1146		continue;
1147
1148	    case Re_MinMax:
1149		bas = eng.cod[3];
1150		if (eng.off == bas) {
1151		    /* First iteration */
1152		    COMPLEX_REPETITION_SETUP();
1153		}
1154		else {
1155		    if (eng.eo[eng.off] >= eng.so[eng.off] &&
1156			eng.eo[eng.off] > eng.sv[eng.off]) {
1157			/* Update offset of match */
1158			eng.sv[eng.off] = eng.eo[eng.off];
1159
1160			/* Update repetition counter */
1161			if (++eng.re[eng.off] == eng.cod[2]) {
1162			    /* Matched the maximum times */
1163			    eng.so[bas] = eng.ss[eng.off];
1164			    eng.eo[bas] = eng.sv[eng.off];
1165			    eng.str = eng.bas + eng.eo[bas];
1166			    k = eng.cod[4] | (eng.cod[5] << 8);
1167
1168			    /* Update code and pop stack */
1169			    eng.cod += k;
1170			    --eng.off;
1171			    continue;
1172			}
1173
1174			/* Skip repetition instruction and try again */
1175			eng.cod += 6;
1176		    }
1177		    else {
1178			/* Match failed! */
1179			if (eng.re[eng.off] < eng.cod[1]) {
1180			    /*	Do it here, so that the fail label does
1181			     * not need to do too expensive work for
1182			     * simple patterns. */
1183			    eng.so[bas] = eng.str - eng.bas;
1184
1185			    /* Didn't match required number of times */
1186			    --eng.off;
1187			    goto fail;
1188			}
1189			else {
1190			    /* Matched minimum number of times */
1191			    eng.so[bas] = eng.ss[eng.off];
1192			    eng.eo[bas] = eng.sv[eng.off];
1193			    eng.str = eng.bas + eng.eo[bas];
1194			    k = eng.cod[4] | (eng.cod[5] << 8);
1195
1196			    /* Update code and pop stack */
1197			    eng.cod += k;
1198			    --eng.off;
1199			}
1200		    }
1201		}
1202		continue;
1203
1204
1205	    /****************************************************
1206	     * Special repetition handling			*
1207	     ****************************************************/
1208	    case Re_AnyAnyTimes:
1209		/* code(1) + bas(1) + gbas(1) + jump(2) */
1210		bas = eng.cod[1];
1211		if (eng.off == bas) {
1212		    /* First iteration */
1213		    if (++eng.off >= MAX_DEPTH)
1214			return (RE_ASSERT);
1215
1216		    /* Return here for recovery if match fail */
1217		    eng.rcod[eng.off] = eng.cod;
1218
1219		    /* If fail, test the next pattern at the same point */
1220		    eng.rstr[eng.off] = eng.str;
1221
1222		    /* Setup match offsets */
1223		    eng.so[eng.off] = eng.str - eng.bas;
1224		    eng.eo[eng.off] = eng.so[eng.off] - 1;
1225
1226		    if (newline)
1227			/*  Use the repetition counter to store start of
1228			 * skipped string, to later check if skipping a
1229			 * newline. */
1230			eng.re[eng.off] = eng.so[eng.off];
1231
1232		    /* Save start of possible previous matches */
1233		    eng.ss[eng.off] = eng.so[bas];
1234
1235		    /* Skip repetition instruction */
1236		    eng.cod += 5;
1237		}
1238		else {
1239		    /* -1 as an unsigned char */
1240		    if (eng.cod[2] != 0xff)
1241			eng.goff = eng.cod[2];
1242		    else
1243			eng.goff = -1;
1244
1245		    if (newline) {
1246			ptr = eng.bas + eng.re[eng.off];
1247			str = eng.bas + eng.so[eng.off];
1248			for (; ptr < str; ptr++)
1249			    if (*ptr == '\n') {
1250				eng.cod = eng.rcod[0];
1251				eng.so[0] = ptr - eng.bas + 1;
1252				eng.eo[0] = eng.so[0] - 1;
1253				eng.rstr[0] = eng.str = ptr + 1;
1254				eng.off = 0;
1255				goto reset;
1256			    }
1257			/* If looping, don't do too many noops */
1258			eng.re[eng.off] = ptr - eng.bas;
1259		    }
1260
1261		    if (eng.eo[eng.off] >= eng.so[eng.off]) {
1262			/* Note that this is only true if all possibly
1263			 * nested special repetitions also matched. */
1264
1265			if (eng.goff >= 0) {
1266			    if (eng.cod[5] == Re_Update)
1267				eng.gso[eng.goff] = eng.eo[bas] +
1268						    (eng.so[bas] > eng.eo[bas]);
1269			    else if (eng.geo[eng.goff] < eng.so[eng.off])
1270				eng.geo[eng.goff] = eng.so[eng.off];
1271			}
1272
1273			/* Jump relative offset */
1274			len = eng.cod[3] | (eng.cod[4] << 8);
1275
1276			/* Restore offset from where started trying */
1277			eng.so[bas] = eng.ss[eng.off];
1278			eng.eo[bas] = eng.eo[eng.off];
1279
1280			/* Pop stack and skip code */
1281			--eng.off;
1282			eng.cod += len;
1283		    }
1284		    else {
1285			/* Only give up if the entire string was scanned */
1286			if (eng.str < eng.end) {
1287			    /* Update restart point for next pattern */
1288			    eng.str = ++eng.rstr[eng.off];
1289
1290			    /* Reset start of nested match */
1291			    eng.so[eng.off] = eng.str - eng.bas;
1292
1293			    /* Skip repetition instruction */
1294			    eng.cod += 5;
1295			}
1296			else {
1297			    /* Entire string scanned and failed */
1298
1299			    /* Jump relative offset */
1300			    len = eng.cod[3] | (eng.cod[4] << 8);
1301
1302			    /* Restore offset from where started trying */
1303			    eng.so[bas] = eng.ss[eng.off];
1304			    eng.eo[bas] = eng.ss[eng.off] - 1;
1305
1306			    /* Pop stack and skip code */
1307			    --eng.off;
1308			    eng.cod += len;
1309			}
1310		    }
1311		}
1312		continue;
1313
1314	    /*  This is significantly different than matching <re>.*<re>
1315	     * because it may need to restart several times since it is
1316	     * possible to find too many false positives, for example:
1317	     *	a.*b	=> once one "a" is found, scan all
1318	     *		   the remaining string searching for a "b"
1319	     *  a.?b	=> the string may have too many "a"s, but the
1320	     *		   first occurrences of "a" may not be followed
1321	     *		   by any-character and a "b" or a single "b".
1322	     */
1323	    case Re_AnyMaybe:
1324		bas = eng.cod[1];
1325		if (eng.off == bas) {
1326		    /* First iteration */
1327		    if (++eng.off >= MAX_DEPTH)
1328			return (RE_ASSERT);
1329
1330		    /* Return here for recovery if match fail */
1331		    eng.rcod[eng.off] = eng.cod;
1332
1333		    /* First try without eating a byte */
1334		    eng.rstr[eng.off] = eng.str;
1335
1336		    /* Remember this is the first try if match fail */
1337		    eng.re[eng.off] = 0;
1338
1339		    /* Setup match offsets */
1340		    eng.so[eng.off] = eng.str - eng.bas;
1341		    eng.eo[eng.off] = eng.so[eng.off] - 1;
1342
1343		    /* Save start of possible previous matches */
1344		    eng.ss[eng.off] = eng.so[bas];
1345
1346		    /* Skip repetition instruction */
1347		    eng.cod += 5;
1348		}
1349		else {
1350		    /* -1 as an unsigned char */
1351		    if (eng.cod[2] != 0xff)
1352			eng.goff = eng.cod[2];
1353		    else
1354			eng.goff = -1;
1355
1356		    if (eng.eo[eng.off] >= eng.so[eng.off]) {
1357			/* Something matched */
1358
1359			if (eng.goff >= 0) {
1360			    if (eng.cod[5] == Re_Update)
1361				eng.gso[eng.goff] = eng.eo[bas] +
1362						    (eng.so[bas] > eng.eo[bas]);
1363			    else if (eng.geo[eng.goff] < eng.so[eng.off])
1364				eng.geo[eng.goff] = eng.so[eng.off];
1365			}
1366
1367			/* Jump relative offset */
1368			len = eng.cod[3] | (eng.cod[4] << 8);
1369
1370			/* Update offset of match */
1371			eng.eo[bas] = eng.eo[eng.off];
1372
1373			/* Pop stack and skip code */
1374			--eng.off;
1375			eng.cod += len;
1376		    }
1377		    else if (eng.re[eng.off] == 0 &&
1378			     (!newline || eng.rstr[eng.off][1] != '\n')) {
1379			/* Try this time skiping a byte */
1380			++eng.re[eng.off];
1381
1382			/* Reset string, skip code and go try one time more */
1383			eng.str = ++eng.rstr[eng.off];
1384			eng.cod += 5;
1385		    }
1386		    else {
1387			/* Failed to match */
1388
1389			/* Update offsets */
1390			eng.eo[bas] = eng.ss[eng.off];
1391			eng.so[bas] = eng.eo[bas] + 1;
1392
1393			eng.str = eng.rstr[eng.off] + (eng.re[eng.off] == 0);
1394
1395			/* Pop stack and return to toplevel code */
1396			--eng.off;
1397			if (eng.str >= eng.end)
1398			    goto wont;
1399			eng.cod = eng.rcod[bas];
1400		    }
1401		}
1402		continue;
1403
1404	    /* .+ almost identical to .* but requires eating at least one byte */
1405	    case Re_AnyAtLeast:
1406		bas = eng.cod[1];
1407		if (eng.off == bas) {
1408		    /* First iteration */
1409		    if (++eng.off >= MAX_DEPTH)
1410			return (RE_ASSERT);
1411
1412		    /* Return here for recovery if match fail */
1413		    eng.rcod[eng.off] = eng.cod;
1414
1415		    /* Skip one byte for the restart string */
1416		    if (newline && eng.str[0] == '\n') {
1417			/* Cannot skip newline */
1418			eng.cod = eng.rcod[0];
1419			eng.rstr[0] = ++eng.str;
1420			eng.so[0] = eng.str - eng.bas;
1421			eng.eo[0] = eng.so[0] - 1;
1422			eng.off = 0;
1423			goto reset;
1424		    }
1425		    eng.rstr[eng.off] = ++eng.str;
1426
1427		    /* Setup match offsets */
1428		    eng.so[eng.off] = eng.str - eng.bas;
1429		    eng.eo[eng.off] = eng.so[eng.off] - 1;
1430
1431		    if (newline)
1432			/*  Use the repetition counter to store start of
1433			 * skipped string, to later check if skipping a
1434			 * newline. */
1435			eng.re[eng.off] = eng.so[eng.off];
1436
1437		    /* Save start of possible previous matches */
1438		    eng.ss[eng.off] = eng.so[bas];
1439
1440		    /* Skip repetition instruction */
1441		    eng.cod += 5;
1442		}
1443		else {
1444		    /* -1 as an unsigned char */
1445		    if (eng.cod[2] != 0xff)
1446			eng.goff = eng.cod[2];
1447		    else
1448			eng.goff = -1;
1449
1450		    if (newline) {
1451			ptr = eng.bas + eng.re[eng.off];
1452			str = eng.bas + eng.so[eng.off];
1453			for (; ptr < str; ptr++)
1454			    if (*ptr == '\n') {
1455				eng.cod = eng.rcod[0];
1456				eng.so[0] = ptr - eng.bas + 1;
1457				eng.eo[0] = eng.so[0] - 1;
1458				eng.rstr[0] = eng.str = ptr + 1;
1459				eng.off = 0;
1460				goto reset;
1461			    }
1462			/* If looping, don't do too many noops */
1463			eng.re[eng.off] = ptr - eng.bas;
1464		    }
1465
1466		    if (eng.eo[eng.off] >= eng.so[eng.off]) {
1467			/* Note that this is only true if all possibly
1468			 * nested special repetitions also matched. */
1469
1470			if (eng.goff >= 0) {
1471			    if (eng.cod[5] == Re_Update)
1472				eng.gso[eng.goff] = eng.eo[bas] +
1473						    (eng.so[bas] > eng.eo[bas]);
1474			    else if (eng.geo[eng.goff] < eng.so[eng.off])
1475				eng.geo[eng.goff] = eng.so[eng.off];
1476			}
1477
1478			/* Jump relative offset */
1479			len = eng.cod[3] | (eng.cod[4] << 8);
1480
1481			/* Restore offset from where started trying */
1482			eng.so[bas] = eng.ss[eng.off];
1483			eng.eo[bas] = eng.eo[eng.off];
1484
1485			/* Pop stack and skip code */
1486			--eng.off;
1487			eng.cod += len;
1488		    }
1489		    else {
1490			/* Only give up if the entire string was scanned */
1491			if (eng.str < eng.end) {
1492			    /* Update restart point for next pattern */
1493			    eng.str = ++eng.rstr[eng.off];
1494
1495			    /* Reset start of nested match */
1496			    eng.so[eng.off] = eng.str - eng.bas;
1497
1498			    /* Skip repetition instruction */
1499			    eng.cod += 5;
1500			}
1501			else {
1502			    /* Entire string scanned and failed */
1503
1504			    /* Jump relative offset */
1505			    len = eng.cod[3] | (eng.cod[4] << 8);
1506
1507			    /* Restore offset from where started trying */
1508			    eng.so[bas] = eng.ss[eng.off];
1509			    eng.eo[bas] = eng.ss[eng.off] - 1;
1510
1511			    /* Pop stack and skip code */
1512			    --eng.off;
1513			    eng.cod += len;
1514			}
1515		    }
1516		}
1517		continue;
1518
1519
1520	    /****************************************************
1521	     * Repetition matched!				*
1522	     ****************************************************/
1523	    case Re_RepJump:
1524		/* eng.cod[1] is toplevel offset of repetition */
1525		if (eng.off > eng.cod[1])
1526		    /* If still needs to try matches */
1527		    eng.cod -= eng.cod[2];
1528		else
1529		    eng.cod += 3;	/* + RepJump + bas + len-size */
1530		continue;
1531
1532	    case Re_RepLongJump:
1533		/* eng.cod[1] is toplevel offset of repetition */
1534		if (eng.off > eng.cod[1])
1535		    /* If still needs to try matches */
1536		    eng.cod -= eng.cod[2] | (eng.cod[3] << 8);
1537		else
1538		    eng.cod += 4;	/* + RepLongJump + bas + len-size */
1539		continue;
1540
1541	    /****************************************************
1542	     * Finished						*
1543	     ****************************************************/
1544	    case Re_DoneIf:
1545		if (eng.eo[eng.off] >= eng.so[eng.off]) {
1546		    eng.so[0] = eng.ss[eng.off];
1547		    eng.eo[0] = eng.eo[eng.off];
1548		    goto done;
1549		}
1550		++eng.cod;
1551		continue;
1552	    case Re_MaybeDone:
1553		if (eng.eo[eng.off] >= eng.so[eng.off]) {
1554		    eng.so[0] = eng.ss[eng.off];
1555		    eng.eo[0] = eng.eo[eng.off];
1556		    goto done;
1557		}
1558		++eng.cod;
1559		continue;
1560	    case Re_Done:
1561		goto done;
1562
1563	    default:
1564		/* Fatal internal error */
1565		return (RE_ASSERT);
1566	}
1567
1568
1569wont:
1570	/* Surely won't match */
1571	if (eng.off == 0) {
1572	    eng.eo[0] = eng.so[0] - 1;
1573	    break;
1574	}
1575
1576
1577fail:
1578	if (eng.off == 0) {
1579	    /* If the entire string scanned */
1580	    if (++eng.str > eng.end) {
1581		eng.eo[0] = eng.so[0] - 1;
1582		break;
1583	    }
1584	    eng.goff = -1;
1585	    /* Update start of possible match after restart */
1586	    if (eng.eo[0] >= eng.so[0]) {
1587		/* If first fail */
1588		eng.str = eng.rstr[0];
1589		++eng.rstr[0];
1590		eng.so[0] = eng.str - eng.bas;
1591		eng.eo[0] = eng.so[eng.off] - 1;
1592	    }
1593	    else
1594		/* Just trying at next byte */
1595		++eng.so[0];
1596	}
1597	else
1598	    /* Remember this match failed */
1599	    eng.eo[eng.off] = eng.so[eng.off] - 1;
1600
1601	/* Restart code */
1602	eng.cod = eng.rcod[eng.off];
1603	continue;
1604
1605
1606match:
1607	/* If first match */
1608	if (eng.eo[eng.off] < eng.so[eng.off]) {
1609	    if (eng.off == 0)
1610		eng.rstr[0] = eng.str + 1;
1611	    eng.so[eng.off] = eng.eo[eng.off] = eng.str - eng.bas;
1612	}
1613	eng.eo[eng.off] += si;
1614	eng.cod += ci;
1615	eng.str += si;
1616	ci = si = 1;
1617	continue;
1618
1619done:
1620	break;
1621    }
1622
1623    if (nmatch) {
1624	if (flags & RE_STARTEND)
1625	    len = pmat[0].rm_so;
1626	else
1627	    len = 0;
1628	if (!nosub) {
1629	    if (preg->cod[1] != 0xff)
1630		eng.goff = preg->cod[1];
1631	    pmat[0].rm_so = eng.so[0];
1632	    pmat[0].rm_eo = eng.eo[0];
1633	    for (i = 1; i < nmatch; i++) {
1634		if (i - 1 <= eng.goff) {
1635		    pmat[i].rm_so = eng.gso[i - 1];
1636		    pmat[i].rm_eo = eng.geo[i - 1];
1637		}
1638		else {
1639		    pmat[i].rm_so = 0;
1640		    pmat[i].rm_eo = -1;
1641		}
1642	    }
1643	    if (len) {
1644		/* Update offsets, since the match was done in a substring */
1645		j = eng.goff + 2 > nmatch ? nmatch : eng.goff + 2;
1646		for (i = 0; i < j; i++) {
1647		    pmat[i].rm_so += len;
1648		    pmat[i].rm_eo += len;
1649		}
1650	    }
1651	}
1652	else {
1653	    /*	Already know these values, allow compiling the regex with
1654	     * RE_NOSUB to use parenthesis only for grouping, but avoiding
1655	     * the runtime overhead of keeping track of the subexpression
1656	     * offsets. */
1657	    pmat[0].rm_so = eng.so[0] + len;
1658	    pmat[0].rm_eo = eng.eo[0] + len;
1659	}
1660    }
1661
1662    return (eng.so[0] <= eng.eo[0] ? 0 : RE_NOMATCH);
1663}
1664
1665int
1666reerror(int ecode, const re_cod *preg, char *ebuffer, int ebuffer_size)
1667{
1668    static const char *errors[] = {
1669	"No error",
1670	"Failed to match",			/* NOMATCH */
1671
1672	/* Errors not generated */
1673	"Invalid regular expression",		/* BADPAT */
1674	"Invalid collating element",		/* ECOLLATE */
1675	"Invalid character class",		/* ECTYPE */
1676
1677	"`\' applied to unescapable character",	/* EESCAPE */
1678	"Invalid backreference number",		/* ESUBREG */
1679	"Brackets `[ ]' not balanced",		/* EBRACK */
1680	"Parentheses `( )' not balanced",	/* EPAREN */
1681	"Braces `{ }' not balanced",		/* EBRACE */
1682	"Invalid repetition count(s) in `{ }'",	/* BADBR */
1683	"Invalid character range in `[ ]'",	/* ERANGE */
1684	"Out of memory",			/* ESPACE */
1685	"`?', `*', or `+' operand invalid",	/* BADRPT */
1686	"Empty (sub)expression",		/* EMPTY */
1687	"Assertion error - you found a bug",	/* ASSERT */
1688	"Invalid argument"			/* INVARG */
1689    };
1690    const char *str;
1691
1692    if (ecode >= 0 && ecode < sizeof(errors) / sizeof(errors[0]))
1693	str = errors[ecode];
1694    else
1695	str = "Unknown error";
1696
1697    return (snprintf(ebuffer, ebuffer_size, "%s", str));
1698}
1699
1700void
1701refree(re_cod *cod)
1702{
1703    free(cod->cod);
1704    cod->cod = NULL;
1705}
1706
1707static void
1708reinit(void)
1709{
1710    int i;
1711    static int first = 1;
1712
1713    if (!first)
1714	return;
1715    first = 0;
1716
1717    re__alnum['_'] = 1;
1718
1719    for (i = '0'; i <= '7'; i++)
1720	re__alnum[i] = re__odigit[i] = re__ddigit[i] = re__xdigit[i] = 1;
1721
1722    for (; i <= '9'; i++)
1723	re__alnum[i] = re__ddigit[i] = re__xdigit[i] = 1;
1724
1725    for (i = 'a'; i <= 'f'; i++)
1726	re__alnum[i] = re__xdigit[i] = 1;
1727    for (; i <= 'z'; i++)
1728	re__alnum[i] = 1;
1729
1730    for (i = 'A'; i <= 'F'; i++)
1731	re__alnum[i] = re__xdigit[i] = 1;
1732    for (; i <= 'Z'; i++)
1733	re__alnum[i] = 1;
1734
1735    for (i = 1; i < 32; i++)
1736	re__control[i] = 1;
1737    re__control[127] = 1;
1738    /* Don't show tabs as control characters */
1739    re__control['\t'] = 0;
1740}
1741
1742static int
1743rec_check(re_inf *inf, int count)
1744{
1745    if (inf->len + count >= inf->spc) {
1746	int spc;
1747	unsigned char *cod;
1748
1749	if ((spc = (count % 64)) != 0)
1750	    spc = 64 - spc;
1751	spc += count + inf->spc;
1752	if ((cod = realloc(inf->cod, spc)) == NULL)
1753	    return (inf->ecode = RE_ESPACE);
1754	inf->cod = cod;
1755	inf->spc = spc;
1756    }
1757
1758    return (inf->ecode);
1759}
1760
1761static int
1762rec_code(re_inf *inf, ReCode code)
1763{
1764    if (rec_check(inf, 1) == 0)
1765	inf->cod[inf->len++] = code;
1766
1767    return (inf->ecode);
1768}
1769
1770static int
1771rec_byte(re_inf *inf, int value)
1772{
1773    if (rec_check(inf, 1) == 0)
1774	inf->cod[inf->len++] = value;
1775
1776    return (inf->ecode);
1777}
1778
1779static int
1780rec_code_byte(re_inf *inf, ReCode code, int value)
1781{
1782    if (rec_check(inf, 2) == 0) {
1783	inf->cod[inf->len++] = code;
1784	inf->cod[inf->len++] = value;
1785    }
1786
1787    return (inf->ecode);
1788}
1789
1790static int
1791rec_length(re_inf *inf, int length)
1792{
1793    int lo, hi, two;
1794
1795    if (length >= 16384)
1796	return (inf->ecode = RE_ESPACE);
1797
1798    lo = length & 0xff;
1799    hi = length & 0xff00;
1800    two = ((length > 0x7f) != 0) + 1;
1801    if (two == 2) {
1802	hi <<= 1;
1803	hi |= (lo & 0x80) != 0;
1804	lo |= 0x80;
1805    }
1806
1807    if (rec_check(inf, two) == 0) {
1808	inf->cod[inf->len++] = lo;
1809	if (two == 2)
1810	    inf->cod[inf->len++] = hi >> 8;
1811    }
1812
1813    return (inf->ecode);
1814}
1815
1816static int
1817rec_byte_byte(re_inf *inf, int value0, int value1)
1818{
1819    if (rec_check(inf, 2) == 0) {
1820	inf->cod[inf->len++] = value0;
1821	inf->cod[inf->len++] = value1;
1822    }
1823
1824    return (inf->ecode);
1825}
1826
1827static int
1828rec_code_byte_byte(re_inf *inf, ReCode code, int value0, int value1)
1829{
1830    if (rec_check(inf, 3) == 0) {
1831	inf->cod[inf->len++] = code;
1832	inf->cod[inf->len++] = value0;
1833	inf->cod[inf->len++] = value1;
1834    }
1835
1836    return (inf->ecode);
1837}
1838
1839static int
1840rec_build_alt(re_inf *inf, rec_alt *alt)
1841{
1842    int offset, value, bas = inf->bas + 1;
1843
1844    if (alt) {
1845	if (alt->next) {
1846	    if (rec_inc_spc(inf))
1847		return (inf->ecode);
1848
1849	    /* A real a list of alternatives */
1850	    rec_code(inf, Re_Alt);
1851
1852	    offset = inf->len;		/* Remember current offset */
1853	    rec_byte_byte(inf, 0, 0);	/* Reserve two bytes for retry address */
1854	    while (alt && inf->ecode == 0) {
1855		if (rec_build_pat(inf, alt->pat))
1856		    break;
1857		alt = alt->next;
1858		if (alt && inf->ecode == 0) {
1859		    /* Handle (hyper)complex repetitions */
1860		    if (inf->bas != bas) {
1861			/* Duplicate patterns up to end of expression */
1862			rec_build_pat(inf, inf->apat);
1863			/* Restore engine state for next alternative(s) */
1864			rec_alt_spc(inf, bas - 1);
1865		    }
1866
1867		    /* If the jump would be so long */
1868		    if ((value = inf->len - offset) >= 16384) {
1869			inf->ecode = RE_ESPACE;
1870			break;
1871		    }
1872		    inf->cod[offset] = value & 0xff;
1873		    inf->cod[offset + 1] = (value & 0xff00) >> 8;
1874
1875		    rec_code(inf, Re_AltNext);
1876		    offset = inf->len;
1877		    rec_byte_byte(inf, 0, 0);
1878		}
1879	    }
1880	    if (inf->ecode == 0) {
1881		/* Handle (hyper)complex repetitions */
1882		if (inf->bas != bas) {
1883		    /* Duplicate patterns up to end of expression */
1884		    rec_build_pat(inf, inf->apat);
1885		    /* Restore engine state for next alternative(s) */
1886		    rec_alt_spc(inf, bas - 1);
1887		}
1888
1889		/* If the jump would be so long */
1890		if ((value = inf->len - offset) >= 16384)
1891		    return (inf->ecode = RE_ESPACE);
1892		inf->cod[offset] = value & 0xff;
1893		inf->cod[offset + 1] = (value & 0xff00) >> 8;
1894		/* Last jump is here */
1895		rec_code(inf, Re_AltDone);
1896	    }
1897	    rec_dec_spc(inf);
1898	}
1899	else
1900	    /* Single alternative */
1901	    rec_build_pat(inf, alt->pat);
1902    }
1903
1904    return (inf->ecode);
1905}
1906
1907static int
1908rec_build_pat(re_inf *inf, rec_pat *pat)
1909{
1910    rec_pat *apat;
1911    int length, offset = 0, distance, jump = 0, bas = 0;
1912
1913    while (pat && inf->ecode == 0) {
1914	if (pat->rep) {
1915	    bas = inf->bas;
1916	    if (pat->type == Rep_Group && !inf->par && rec_code(inf, Re_Open))
1917		return (inf->ecode);
1918	    if (rec_inc_spc(inf))
1919		return (inf->ecode);
1920	    offset = inf->len;
1921	    if (rec_build_rep(inf, pat->rep))
1922		break;
1923	    /* Reserve space to jump after repetition done */
1924	    jump = inf->len;
1925	    rec_byte_byte(inf, 0, 0);
1926	}
1927	switch (pat->type) {
1928	    case Rep_AnyAnyTimes:
1929	    case Rep_AnyMaybe:
1930	    case Rep_AnyAtLeast:
1931		if (rec_add_spc(inf, pat->type == Rep_AnyMaybe))
1932		    return (inf->ecode);
1933		if (rec_code(inf, (ReCode)pat->type) == 0 &&
1934		    rec_byte(inf, inf->bas - 1) == 0 &&
1935		    rec_byte(inf, inf->ref - 1) == 0)
1936		    rec_off_spc(inf);
1937		break;
1938	    case Rep_Literal:
1939	    case Rep_LiteralNot:
1940	    case Rep_SearchLiteral:
1941		rec_code_byte(inf, (ReCode)pat->type, pat->data.chr);
1942		break;
1943	    case Rep_CaseLiteral:
1944	    case Rep_CaseLiteralNot:
1945	    case Rep_SearchCaseLiteral:
1946		rec_code_byte_byte(inf, (ReCode)pat->type,
1947				   pat->data.cse.lower, pat->data.cse.upper);
1948		break;
1949	    case Rep_Range:
1950	    case Rep_RangeNot:
1951		if (rec_code(inf, (ReCode)pat->type) == 0)
1952		    rec_build_rng(inf, pat->data.rng);
1953		break;
1954	    case Rep_String:
1955	    case Rep_SearchString:
1956	    case Rep_CaseString:
1957	    case Rep_SearchCaseString:
1958		rec_code(inf, (ReCode)pat->type);
1959		length = strlen((char*)pat->data.str);
1960		if (rec_length(inf, length) == 0 && rec_check(inf, length) == 0) {
1961		    memcpy(inf->cod + inf->len, pat->data.str, length);
1962		    inf->len += length;
1963		}
1964		break;
1965	    case Rep_Any:
1966	    case Rep_AnyEatAnyTimes:
1967	    case Rep_AnyEatMaybe:
1968	    case Rep_AnyEatAtLeast:
1969	    case Rep_Odigit:
1970	    case Rep_OdigitNot:
1971	    case Rep_Digit:
1972	    case Rep_DigitNot:
1973	    case Rep_Xdigit:
1974	    case Rep_XdigitNot:
1975	    case Rep_Space:
1976	    case Rep_SpaceNot:
1977	    case Rep_Tab:
1978	    case Rep_Newline:
1979	    case Rep_Lower:
1980	    case Rep_Upper:
1981	    case Rep_Alnum:
1982	    case Rep_AlnumNot:
1983	    case Rep_Control:
1984	    case Rep_ControlNot:
1985	    case Rep_Bol:
1986	    case Rep_Eol:
1987	    case Rep_Bow:
1988	    case Rep_Eow:
1989		rec_code(inf, (ReCode)pat->type);
1990		break;
1991	    case Rep_Backref:
1992		rec_code_byte(inf, Re_Backref, pat->data.chr);
1993		break;
1994	    case Rep_Group:
1995		if (pat->rep == NULL && !inf->par && rec_code(inf, Re_Open))
1996		    break;
1997		apat = inf->apat;
1998		inf->apat = pat->next;
1999		rec_build_grp(inf, pat->data.grp);
2000		inf->apat = apat;
2001		break;
2002	    case Rep_StringList:
2003		rec_build_stl(inf, pat->data.stl);
2004		break;
2005	}
2006	if (pat->rep) {
2007#if 0
2008	    if (rec_dec_spc(inf))
2009		return (inf->ecode);
2010#else
2011	    if (rec_rep_spc(inf, bas))
2012		return (inf->ecode);
2013#endif
2014	    distance = inf->len - offset;
2015	    if (distance > 255) {
2016		if (rec_code(inf, Re_RepLongJump) ||
2017		    rec_byte(inf, inf->bas) ||
2018		    rec_byte(inf, distance & 0xff) ||
2019		    rec_byte(inf, (distance & 0xff00) >> 8))
2020		break;
2021	    }
2022	    else if (rec_code(inf, Re_RepJump) ||
2023		     rec_byte(inf, inf->bas) ||
2024		     rec_byte(inf, distance))
2025		break;
2026	    distance = inf->len - offset;
2027	    inf->cod[jump] = distance & 0xff;
2028	    inf->cod[jump + 1] = (distance & 0xff00) >> 8;
2029	}
2030	pat = pat->next;
2031    }
2032
2033    return (inf->ecode);
2034}
2035
2036static int
2037rec_build_rng(re_inf *inf, rec_rng *rng)
2038{
2039    if (rec_check(inf, sizeof(rng->range)) == 0) {
2040	memcpy(inf->cod + inf->len, rng->range, sizeof(rng->range));
2041	inf->len += sizeof(rng->range);
2042    }
2043
2044    return (inf->ecode);
2045}
2046
2047static int
2048rec_build_grp(re_inf *inf, rec_grp *grp)
2049{
2050    int par = inf->par;
2051
2052    if (!(inf->flags & RE_NOSUB)) {
2053	++inf->par;
2054	if (par == 0)
2055	    ++inf->ref;
2056	if (rec_build_alt(inf, grp->alt) == 0) {
2057	    if (par == 0) {
2058		if (grp->comp)
2059		    rec_code_byte(inf, Re_Update, inf->ref - 1);
2060		else
2061		    rec_code(inf, Re_Close);
2062	    }
2063	}
2064	--inf->par;
2065    }
2066    else
2067	rec_build_alt(inf, grp->alt);
2068
2069    return (inf->ecode);
2070}
2071
2072static int
2073rec_build_stl(re_inf *inf, rec_stl *stl)
2074{
2075    int i, len, rlen;
2076    ReCode code;
2077
2078    /* Calculate jump distance information */
2079    rlen = stl->tlen + stl->nstrs + 4;
2080    /* + code + nstrs + place-offset + data-length */
2081
2082    if (stl->nstrs >= LARGE_STL_COUNT) {
2083	rlen += 511;		/* Don't write number of strings */
2084	code = stl->type == Rep_StringList ?
2085		Re_LargeStringList : Re_LargeCaseStringList;
2086    }
2087    else
2088	code = (ReCode)stl->type;
2089
2090    if (rlen >= 16386)
2091	return (inf->ecode = RE_ESPACE);
2092    if (rec_check(inf, rlen) ||
2093	rec_code(inf, code))
2094	return (inf->ecode);
2095
2096    /* Space is allocated, just write the data */
2097    if (stl->nstrs < LARGE_STL_COUNT)
2098	inf->cod[inf->len++] = stl->nstrs;
2099
2100    inf->cod[inf->len++] = rlen & 0xff;
2101    inf->cod[inf->len++] = (rlen & 0xff00) >> 8;
2102
2103    if (stl->nstrs < LARGE_STL_COUNT) {
2104	for (i = 0; i < stl->nstrs; i++)
2105	    inf->cod[inf->len++] = stl->lens[i];
2106	for (i = 0; i < stl->nstrs; i++) {
2107	    len = stl->lens[i];
2108	    if (len > 2) {
2109		memcpy(inf->cod + inf->len, stl->strs[i], len);
2110		inf->len += len;
2111	    }
2112	    else {
2113		if (len == 1)
2114		    inf->cod[inf->len++] = (long)stl->strs[i];
2115		else {
2116		    inf->cod[inf->len++] = (long)stl->strs[i] & 0xff;
2117		    inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8;
2118		}
2119	    }
2120	}
2121    }
2122    else {
2123	/* The string length goes before the string itself */
2124	int j, chl, chu;
2125
2126	/* Fill everything with an invalid jump address */
2127	memset(inf->cod + inf->len, 0xff, 512);
2128	for (i = len = 0, j = -1; i < stl->nstrs; i++) {
2129	    chl = stl->lens[i] > 2 ? stl->strs[i][0] : (long)stl->strs[i] & 0xff;
2130	    if (chl != j) {
2131		inf->cod[inf->len + (chl << 1)] = len & 0xff;
2132		inf->cod[inf->len + (chl << 1) + 1] = (len & 0xff00) >> 8;
2133		if (code == Re_LargeCaseStringList) {
2134		    chu = stl->lens[i] > 2 ?
2135			stl->strs[i][1] : ((long)(stl->strs[i]) & 0xff00) >> 8;
2136		    inf->cod[inf->len + (chu << 1)] = len & 0xff;
2137		    inf->cod[inf->len + (chu << 1) + 1] = (len & 0xff00) >> 8;
2138		}
2139		j = chl;
2140	    }
2141	    len += stl->lens[i] + 1;
2142	}
2143	inf->len += 512;
2144
2145	for (i = 0; i < stl->nstrs; i++) {
2146	    len = stl->lens[i];
2147	    inf->cod[inf->len++] = len;
2148	    if (len > 2) {
2149		memcpy(inf->cod + inf->len, stl->strs[i], len);
2150		inf->len += len;
2151	    }
2152	    else {
2153		if (len == 1)
2154		    inf->cod[inf->len++] = (long)stl->strs[i];
2155		else {
2156		    inf->cod[inf->len++] = (long)stl->strs[i] & 0xff;
2157		    inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8;
2158		}
2159	    }
2160	}
2161    }
2162
2163    return (inf->ecode);
2164}
2165
2166static int
2167rec_build_rep(re_inf *inf, rec_rep *rep)
2168{
2169    if (rep) {
2170	switch (rep->type) {
2171	    case Rer_AnyTimes:
2172	    case Rer_AtLeast:
2173	    case Rer_Maybe:
2174		rec_code(inf, (ReCode)rep->type);
2175		break;
2176	    case Rer_Exact:
2177		if (rec_code(inf, Re_Exact) == 0)
2178		    rec_byte(inf, rep->mine);
2179		break;
2180	    case Rer_Min:
2181		if (rec_code(inf, Re_Min) == 0)
2182		    rec_byte(inf, rep->mine);
2183		break;
2184	    case Rer_Max:
2185		if (rec_code(inf, Re_Max) == 0)
2186		    rec_byte(inf, rep->maxc);
2187		break;
2188	    case Rer_MinMax:
2189		if (rec_code(inf, Re_MinMax) == 0 &&
2190		    rec_byte(inf, rep->mine) == 0)
2191		    rec_byte(inf, rep->maxc);
2192		break;
2193	}
2194	/* It is incremented in rec_build_pat */
2195	rec_byte(inf, inf->bas - 1);
2196    }
2197
2198    return (inf->ecode);
2199}
2200
2201static int
2202rec_inc_spc(re_inf *inf)
2203{
2204    if (++inf->bas >= MAX_DEPTH)
2205	return (inf->ecode = RE_ESPACE);
2206
2207    return (inf->ecode);
2208}
2209
2210static int
2211rec_dec_spc(re_inf *inf)
2212{
2213    if (--inf->bas < 0)
2214	return (inf->ecode = RE_ASSERT);
2215
2216    return (inf->ecode);
2217}
2218
2219static int
2220rec_add_spc(re_inf *inf, int maybe)
2221{
2222    if (++inf->bas >= MAX_DEPTH)
2223	return (inf->ecode = RE_ESPACE);
2224    inf->sp[inf->bas] = maybe + 1;
2225
2226    return (inf->ecode);
2227}
2228
2229/* Could be joined with rec_rep_spc, code almost identical */
2230static int
2231rec_alt_spc(re_inf *inf, int top)
2232{
2233    int distance, i, bas = inf->bas;
2234
2235    while ((inf->bas > top) && inf->sp[inf->bas]) {
2236	/* Jump to this repetition for cleanup */
2237	distance = inf->len - inf->sr[inf->bas];
2238
2239	/* This will generate a jump to a jump decision opcode */
2240	inf->sj[inf->bas] = inf->len;
2241
2242	if (distance > 255) {
2243	    if (rec_code(inf, Re_RepLongJump) ||
2244		rec_byte(inf, inf->bas - 1) ||
2245		rec_byte(inf, distance & 0xff) ||
2246		rec_byte(inf, (distance & 0xff00) >> 8))
2247	    break;
2248	}
2249	else if (rec_code(inf, Re_RepJump) ||
2250		 rec_byte(inf, inf->bas - 1) ||
2251		 rec_byte(inf, distance))
2252 	    break;
2253
2254	/* Top of stack value before repetition, or end condition value */
2255	--inf->bas;
2256    }
2257
2258    i = inf->bas + 1;
2259
2260    if (inf->ecode == 0 && i <= bas && inf->sp[i]) {
2261	/*  Only the repetition at the bottom jump to code after testing
2262	 * all possibilities */
2263	distance = inf->len - inf->sr[i];
2264	inf->cod[inf->sr[i] + 3] = distance & 0xff;
2265	inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2266
2267	/* The bottom jump is here */
2268	if (rec_code(inf, inf->sp[i] == 1 ? Re_DoneIf : Re_MaybeDone))
2269	   return (inf->ecode);
2270
2271	/*  Generate jumps to the previous special repetition */
2272	for (++i; i <= bas; i++) {
2273	    if (inf->sp[i]) {
2274		distance = inf->sj[i] - inf->sr[i];
2275		inf->cod[inf->sr[i] + 3] = distance & 0xff;
2276		inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2277	    }
2278	}
2279    }
2280
2281    return (inf->ecode);
2282}
2283
2284static int
2285rec_rep_spc(re_inf *inf, int top)
2286{
2287    int distance, i, bas = inf->bas;
2288
2289    while (inf->bas > top) {
2290	if (inf->sp[inf->bas]) {
2291	    /* Jump to this repetition for cleanup */
2292	    distance = inf->len - inf->sr[inf->bas];
2293
2294	    /* This will generate a jump to a jump decision opcode */
2295	    inf->sj[inf->bas] = inf->len;
2296
2297	    if (distance > 255) {
2298		if (rec_code(inf, Re_RepLongJump) ||
2299		    rec_byte(inf, inf->bas - 1) ||
2300		    rec_byte(inf, distance & 0xff) ||
2301		    rec_byte(inf, (distance & 0xff00) >> 8))
2302		break;
2303	    }
2304	    else if (rec_code(inf, Re_RepJump) ||
2305		     rec_byte(inf, inf->bas - 1) ||
2306		     rec_byte(inf, distance))
2307		break;
2308	}
2309
2310	/* Top of stack value before repetition, or end condition value */
2311	--inf->bas;
2312    }
2313
2314    /* Find first special repetition offset. XXX This should be a noop */
2315    for (i = 0; i < bas; i++)
2316	if (inf->sp[i])
2317	    break;
2318
2319    if (inf->ecode == 0 && i <= bas && inf->sp[i]) {
2320	/*  Only the repetition at the bottom jump to code after testing
2321	 * all possibilities */
2322	distance = inf->len - inf->sr[i];
2323	inf->cod[inf->sr[i] + 3] = distance & 0xff;
2324	inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2325
2326	/*  Generate jumps to the previous special repetition */
2327	for (++i; i <= bas; i++) {
2328	    if (inf->sp[i]) {
2329		distance = inf->sj[i] - inf->sr[i];
2330		inf->cod[inf->sr[i] + 3] = distance & 0xff;
2331		inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2332	    }
2333	}
2334    }
2335
2336    return (inf->ecode);
2337}
2338
2339static int
2340rec_off_spc(re_inf *inf)
2341{
2342    /* The jump address before the three bytes instruction */
2343    inf->sr[inf->bas] = inf->len - 3;
2344    /*  Don't know yet where to go after done with the special
2345     * repetition, just reserve two bytes for the jump address. */
2346    return (rec_byte_byte(inf, 0, 0));
2347}
2348
2349#ifdef DEBUG
2350static void
2351redump(re_cod *code)
2352{
2353    int i, j, k;
2354    unsigned char *cod = code->cod, *stl;
2355
2356    if (cod[0] & RE_NOSUB)
2357	printf("Nosub\n");
2358    if (cod[0] & RE_NEWLINE)
2359	printf("Newline\n");
2360    ++cod;
2361    if (cod[0] != 0xff)
2362	printf("%d backrefs\n", cod[0] + 1);
2363    ++cod;
2364    for (;;) {
2365	switch (*cod++) {
2366	    case Re_Open:
2367		printf("Open");
2368		break;
2369	    case Re_Close:
2370		printf("Close");
2371		break;
2372	    case Re_Update:
2373		printf("Update (%d)", (int)*cod++);
2374		break;
2375	    case Re_Alt:
2376		printf("Alt");
2377		i = cod[0] | cod[1];
2378		cod += 2;
2379		printf(" %d", i);
2380		break;
2381	    case Re_AltNext:
2382		printf("Alt-next");
2383		i = cod[0] | cod[1];
2384		cod += 2;
2385		printf(" %d", i);
2386		break;
2387	    case Re_AltDone:
2388		printf("Alt-done");
2389		break;
2390	    case Re_AnyTimes:
2391		printf("-> Anytimes %d", (int)*cod++);
2392		i = cod[0] | (cod[1] << 8);
2393		cod += 2;
2394		printf(" /%d", i);
2395		break;
2396	    case Re_AnyEatAnyTimes:
2397		printf("Any-eat-anytimes");
2398		break;
2399	    case Re_AnyAnyTimes:
2400		printf("-> Any-anytimes %d", (int)*cod++);
2401		printf(" (%d)", (int)*cod++);
2402		i = cod[0] | (cod[1] << 8);
2403		cod += 2;
2404		printf(" /%d", i);
2405		break;
2406	    case Re_AnyEatMaybe:
2407		printf("Any-eat-maybe");
2408		break;
2409	    case Re_AnyMaybe:
2410		printf("-> Any-maybe %d", (int)*cod++);
2411		printf(" (%d)", (int)*cod++);
2412		i = cod[0] | (cod[1] << 8);
2413		cod += 2;
2414		printf(" /%d", i);
2415		break;
2416	    case Re_AnyAtLeast:
2417		printf("-> Any-atleast %d", (int)*cod++);
2418		printf(" (%d)", (int)*cod++);
2419		i = cod[0] | (cod[1] << 8);
2420		cod += 2;
2421		printf(" /%d", i);
2422		break;
2423	    case Re_AnyEatAtLeast:
2424		printf("Any-eat-atleast");
2425		break;
2426	    case Re_Maybe:
2427		printf("-> Maybe %d", (int)*cod++);
2428		i = cod[0] | (cod[1] << 8);
2429		cod += 2;
2430		printf(" /%d", i);
2431		break;
2432	    case Re_AtLeast:
2433		printf("-> Atleast %d", (int)*cod++);
2434		i = cod[0] | (cod[1] << 8);
2435		cod += 2;
2436		printf(" /%d", i);
2437		break;
2438	    case Re_Exact:
2439		printf("-> Exact ");
2440		i = *cod++;
2441		printf("%d", i);
2442		printf(" %d", (int)*cod++);
2443		i = cod[0] | (cod[1] << 8);
2444		cod += 2;
2445		printf(" /%d", i);
2446		break;
2447	    case Re_Min:
2448		printf("-> Min ");
2449		i = *cod++;
2450		printf("%d", i);
2451		printf(" %d", (int)*cod++);
2452		i = cod[0] | (cod[1] << 8);
2453		cod += 2;
2454		printf(" /%d", i);
2455		break;
2456	    case Re_Max:
2457		printf("-> Max ");
2458		i = *cod++;
2459		printf("%d", i);
2460		printf(" %d", (int)*cod++);
2461		i = cod[0] | (cod[1] << 8);
2462		cod += 2;
2463		printf(" /%d", i);
2464		break;
2465	    case Re_MinMax:
2466		printf("-> Min-max ");
2467		i = *cod++;
2468		printf("%d ", i);
2469		i = *cod++;
2470		printf("%d", i);
2471		printf(" %d", (int)*cod++);
2472		i = cod[0] | (cod[1] << 8);
2473		cod += 2;
2474		printf(" /%d", i);
2475		break;
2476	    case Re_RepJump:
2477		printf("<- Rep-jump %d ", (int)*cod++);
2478		i = *cod++;
2479		printf("%d", i);
2480		break;
2481	    case Re_RepLongJump:
2482		printf("<- Rep-long-jump %d ", (int)*cod++);
2483		i = cod[0] | (cod[1] << 8);
2484		printf("%d", i);
2485		break;
2486	    case Re_Any:
2487		printf("Any");
2488		break;
2489	    case Re_Odigit:
2490		printf("Odigit");
2491		break;
2492	    case Re_OdigitNot:
2493		printf("Odigit-not");
2494		break;
2495	    case Re_Digit:
2496		printf("Digit");
2497		break;
2498	    case Re_DigitNot:
2499		printf("Digit-not");
2500		break;
2501	    case Re_Xdigit:
2502		printf("Xdigit");
2503		break;
2504	    case Re_XdigitNot:
2505		printf("Xdigit-not");
2506		break;
2507	    case Re_Space:
2508		printf("Space");
2509		break;
2510	    case Re_SpaceNot:
2511		printf("Space-not");
2512		break;
2513	    case Re_Tab:
2514		printf("Tab");
2515		break;
2516	    case Re_Newline:
2517		printf("Newline");
2518		break;
2519	    case Re_Lower:
2520		printf("Lower");
2521		break;
2522	    case Re_Upper:
2523		printf("Upper");
2524		break;
2525	    case Re_Alnum:
2526		printf("Alnum");
2527		break;
2528	    case Re_AlnumNot:
2529		printf("Alnum-not");
2530		break;
2531	    case Re_Control:
2532		printf("Control");
2533		break;
2534	    case Re_ControlNot:
2535		printf("Control-not");
2536		break;
2537	    case Re_Bol:
2538		printf("Bol");
2539		break;
2540	    case Re_Eol:
2541		printf("Eol");
2542		break;
2543	    case Re_Bow:
2544		printf("Bow");
2545		break;
2546	    case Re_Eow:
2547		printf("Eow");
2548		break;
2549	    case Re_Range:
2550		printf("Range ");
2551		goto range;
2552	    case Re_RangeNot:
2553		printf("Range-not ");
2554range:
2555		for (i = 0; i < 256; i += 32) {
2556		    for (j = k = 0; j < 32; j++)
2557			k |= (*cod++ & 1) << (31 - j);
2558		    printf("%x ", k);
2559		}
2560		break;
2561	    case Re_Literal:
2562		printf("Literal %c", *cod++);
2563		break;
2564	    case Re_LiteralNot:
2565		printf("Literal-not %c", *cod++);
2566		break;
2567	    case Re_SearchLiteral:
2568		printf("Search-literal %c", *cod++);
2569		break;
2570	    case Re_CaseLiteral:
2571		printf("Case-literal %c", *cod++);
2572		putchar(*cod++);
2573		break;
2574	    case Re_CaseLiteralNot:
2575		printf("Case-literal-not %c", *cod++);
2576		putchar(*cod++);
2577		break;
2578	    case Re_SearchCaseLiteral:
2579		printf("Search-case-literal %c", *cod++);
2580		putchar(*cod++);
2581		break;
2582	    case Re_String:
2583		printf("String ");
2584		goto string;
2585	    case Re_SearchString:
2586		printf("Search-string ");
2587		goto string;
2588	    case Re_CaseString:
2589		printf("Case-string ");
2590		goto string;
2591	    case Re_SearchCaseString:
2592		printf("Search-case-string ");
2593string:
2594		i = *cod++;
2595		if (i & 0x80)
2596		    i = (i & 0x7f) | (*cod++ << 7);
2597		for (j = 0; j < i; j++)
2598		    putchar(*cod++);
2599		break;
2600	    case Re_StringList:
2601		printf("String-list");
2602		goto string_list;
2603	    case Re_CaseStringList:
2604		printf("Case-string-list");
2605string_list:
2606		j = *cod++;
2607		cod += 2;
2608		stl = cod + j;
2609		for (i = 0; i < j; i++) {
2610		    k = *cod++;
2611		    putchar(i ? ',' : ' ');
2612		    fwrite(stl, k, 1, stdout);
2613		    stl += k;
2614		}
2615		cod = stl;
2616		break;
2617	    case Re_LargeStringList:
2618		printf("Large-string-list");
2619large_string_list:
2620		i = cod[0] | (cod[1] << 8);
2621		stl = cod + i - 1;
2622		for (i = 0, cod += 514; cod < stl; i++) {
2623		    k = *cod++;
2624		    putchar(i ? ',' : ' ');
2625		    fwrite(cod, k, 1, stdout);
2626		    cod += k;
2627		}
2628		cod = stl;
2629		break;
2630	    case Re_LargeCaseStringList:
2631		printf("Large-case-string-list");
2632		goto large_string_list;
2633	    case Re_Backref:
2634		printf("Backref %d", (int)*cod++);
2635		break;
2636	    case Re_DoneIf:
2637		printf("Done-if");
2638		break;
2639	    case Re_MaybeDone:
2640		printf("Maybe-done");
2641		break;
2642	    case Re_Done:
2643		printf("Done\n");
2644		return;
2645	}
2646	putchar('\n');
2647    }
2648}
2649#endif
2650