Home | History | Annotate | Line # | Download | only in regex
engine.c revision 1.23.2.1
      1  1.23.2.1      yamt /*	$NetBSD: engine.c,v 1.23.2.1 2012/04/17 00:05:22 yamt Exp $	*/
      2       1.5       cgd 
      3       1.4       cgd /*-
      4       1.4       cgd  * Copyright (c) 1992, 1993, 1994
      5       1.4       cgd  *	The Regents of the University of California.  All rights reserved.
      6      1.15       agc  *
      7      1.15       agc  * This code is derived from software contributed to Berkeley by
      8      1.15       agc  * Henry Spencer.
      9      1.15       agc  *
     10      1.15       agc  * Redistribution and use in source and binary forms, with or without
     11      1.15       agc  * modification, are permitted provided that the following conditions
     12      1.15       agc  * are met:
     13      1.15       agc  * 1. Redistributions of source code must retain the above copyright
     14      1.15       agc  *    notice, this list of conditions and the following disclaimer.
     15      1.15       agc  * 2. Redistributions in binary form must reproduce the above copyright
     16      1.15       agc  *    notice, this list of conditions and the following disclaimer in the
     17      1.15       agc  *    documentation and/or other materials provided with the distribution.
     18      1.15       agc  * 3. Neither the name of the University nor the names of its contributors
     19      1.15       agc  *    may be used to endorse or promote products derived from this software
     20      1.15       agc  *    without specific prior written permission.
     21      1.15       agc  *
     22      1.15       agc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23      1.15       agc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24      1.15       agc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25      1.15       agc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26      1.15       agc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27      1.15       agc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28      1.15       agc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29      1.15       agc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30      1.15       agc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31      1.15       agc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32      1.15       agc  * SUCH DAMAGE.
     33      1.15       agc  *
     34      1.15       agc  *	@(#)engine.c	8.5 (Berkeley) 3/20/94
     35      1.15       agc  */
     36      1.15       agc 
     37      1.15       agc /*-
     38      1.15       agc  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
     39       1.4       cgd  *
     40       1.4       cgd  * This code is derived from software contributed to Berkeley by
     41       1.4       cgd  * Henry Spencer.
     42       1.4       cgd  *
     43       1.4       cgd  * Redistribution and use in source and binary forms, with or without
     44       1.4       cgd  * modification, are permitted provided that the following conditions
     45       1.4       cgd  * are met:
     46       1.4       cgd  * 1. Redistributions of source code must retain the above copyright
     47       1.4       cgd  *    notice, this list of conditions and the following disclaimer.
     48       1.4       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     49       1.4       cgd  *    notice, this list of conditions and the following disclaimer in the
     50       1.4       cgd  *    documentation and/or other materials provided with the distribution.
     51       1.4       cgd  * 3. All advertising materials mentioning features or use of this software
     52       1.4       cgd  *    must display the following acknowledgement:
     53       1.4       cgd  *	This product includes software developed by the University of
     54       1.4       cgd  *	California, Berkeley and its contributors.
     55       1.4       cgd  * 4. Neither the name of the University nor the names of its contributors
     56       1.4       cgd  *    may be used to endorse or promote products derived from this software
     57       1.4       cgd  *    without specific prior written permission.
     58       1.4       cgd  *
     59       1.4       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     60       1.4       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     61       1.4       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     62       1.4       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     63       1.4       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     64       1.4       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     65       1.4       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     66       1.4       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     67       1.4       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     68       1.4       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     69       1.4       cgd  * SUCH DAMAGE.
     70       1.4       cgd  *
     71       1.4       cgd  *	@(#)engine.c	8.5 (Berkeley) 3/20/94
     72       1.4       cgd  */
     73       1.4       cgd 
     74       1.1       jtc /*
     75       1.1       jtc  * The matching engine and friends.  This file is #included by regexec.c
     76       1.1       jtc  * after suitable #defines of a variety of macros used herein, so that
     77       1.1       jtc  * different state representations can be used without duplicating masses
     78       1.1       jtc  * of code.
     79       1.1       jtc  */
     80       1.1       jtc 
     81       1.1       jtc #ifdef SNAMES
     82       1.1       jtc #define	matcher	smatcher
     83       1.1       jtc #define	fast	sfast
     84       1.1       jtc #define	slow	sslow
     85       1.1       jtc #define	dissect	sdissect
     86       1.1       jtc #define	backref	sbackref
     87       1.1       jtc #define	step	sstep
     88       1.1       jtc #define	print	sprint
     89       1.1       jtc #define	at	sat
     90       1.1       jtc #define	match	smat
     91      1.14  christos #define	nope	snope
     92       1.1       jtc #endif
     93       1.1       jtc #ifdef LNAMES
     94       1.1       jtc #define	matcher	lmatcher
     95       1.1       jtc #define	fast	lfast
     96       1.1       jtc #define	slow	lslow
     97       1.1       jtc #define	dissect	ldissect
     98       1.1       jtc #define	backref	lbackref
     99       1.1       jtc #define	step	lstep
    100       1.1       jtc #define	print	lprint
    101       1.1       jtc #define	at	lat
    102       1.1       jtc #define	match	lmat
    103      1.14  christos #define	nope	lnope
    104       1.1       jtc #endif
    105       1.1       jtc 
    106       1.1       jtc /* another structure passed up and down to avoid zillions of parameters */
    107       1.1       jtc struct match {
    108       1.1       jtc 	struct re_guts *g;
    109       1.1       jtc 	int eflags;
    110       1.1       jtc 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
    111      1.19      yamt 	const char *offp;	/* offsets work from here */
    112      1.19      yamt 	const char *beginp;	/* start of string -- virtual NUL precedes */
    113      1.19      yamt 	const char *endp;	/* end of string -- virtual NUL here */
    114      1.19      yamt 	const char *coldp;	/* can be no match starting before here */
    115      1.19      yamt 	const char **lastpos;	/* [nplus+1] */
    116       1.1       jtc 	STATEVARS;
    117       1.1       jtc 	states st;		/* current states */
    118       1.1       jtc 	states fresh;		/* states for a fresh start */
    119       1.1       jtc 	states tmp;		/* temporary */
    120       1.1       jtc 	states empty;		/* empty set of states */
    121       1.1       jtc };
    122       1.1       jtc 
    123       1.4       cgd /* ========= begin header generated by ./mkh ========= */
    124       1.4       cgd #ifdef __cplusplus
    125       1.4       cgd extern "C" {
    126       1.4       cgd #endif
    127       1.4       cgd 
    128       1.4       cgd /* === engine.c === */
    129      1.20  junyoung static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
    130      1.20  junyoung static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
    131      1.20  junyoung static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev);
    132      1.20  junyoung static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
    133      1.20  junyoung static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
    134      1.20  junyoung static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
    135       1.4       cgd #define	BOL	(OUT+1)
    136       1.4       cgd #define	EOL	(BOL+1)
    137       1.4       cgd #define	BOLEOL	(BOL+2)
    138       1.4       cgd #define	NOTHING	(BOL+3)
    139       1.4       cgd #define	BOW	(BOL+4)
    140       1.4       cgd #define	EOW	(BOL+5)
    141       1.4       cgd #define	CODEMAX	(BOL+5)		/* highest code used */
    142       1.4       cgd #define	NONCHAR(c)	((c) > CHAR_MAX)
    143       1.4       cgd #define	NNONCHAR	(CODEMAX-CHAR_MAX)
    144       1.4       cgd #ifdef REDEBUG
    145      1.20  junyoung static void print(struct match *m, char *caption, states st, int ch, FILE *d);
    146       1.4       cgd #endif
    147       1.4       cgd #ifdef REDEBUG
    148      1.20  junyoung static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
    149       1.4       cgd #endif
    150       1.4       cgd #ifdef REDEBUG
    151      1.20  junyoung static char *pchar(int ch);
    152       1.4       cgd #endif
    153       1.4       cgd 
    154       1.4       cgd #ifdef __cplusplus
    155       1.4       cgd }
    156       1.4       cgd #endif
    157       1.4       cgd /* ========= end header generated by ./mkh ========= */
    158       1.1       jtc 
    159       1.1       jtc #ifdef REDEBUG
    160       1.1       jtc #define	SP(t, s, c)	print(m, t, s, c, stdout)
    161       1.1       jtc #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
    162       1.1       jtc #define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
    163      1.14  christos static int nope = 0;
    164       1.1       jtc #else
    165       1.1       jtc #define	SP(t, s, c)	/* nothing */
    166       1.1       jtc #define	AT(t, p1, p2, s1, s2)	/* nothing */
    167       1.1       jtc #define	NOTE(s)	/* nothing */
    168       1.1       jtc #endif
    169       1.1       jtc 
    170       1.1       jtc /*
    171       1.1       jtc  - matcher - the actual matching engine
    172       1.8     perry  == static int matcher(struct re_guts *g, char *string, \
    173       1.1       jtc  ==	size_t nmatch, regmatch_t pmatch[], int eflags);
    174       1.1       jtc  */
    175       1.1       jtc static int			/* 0 success, REG_NOMATCH failure */
    176      1.21  junyoung matcher(
    177      1.21  junyoung     struct re_guts *g,
    178      1.21  junyoung     const char *string,
    179      1.21  junyoung     size_t nmatch,
    180      1.21  junyoung     regmatch_t pmatch[],
    181      1.21  junyoung     int eflags)
    182       1.1       jtc {
    183      1.19      yamt 	const char *endp;
    184      1.22     lukem 	size_t i;
    185       1.1       jtc 	struct match mv;
    186       1.8     perry 	struct match *m = &mv;
    187      1.19      yamt 	const char *dp;
    188       1.8     perry 	const sopno gf = g->firststate+1;	/* +1 for OEND */
    189       1.8     perry 	const sopno gl = g->laststate;
    190      1.19      yamt 	const char *start;
    191      1.19      yamt 	const char *stop;
    192      1.13  christos 	int error = 0;
    193       1.1       jtc 
    194      1.12     lukem 	_DIAGASSERT(g != NULL);
    195      1.12     lukem 	_DIAGASSERT(string != NULL);
    196      1.12     lukem 	/* pmatch checked below */
    197      1.12     lukem 
    198       1.1       jtc 	/* simplify the situation where possible */
    199       1.1       jtc 	if (g->cflags&REG_NOSUB)
    200       1.1       jtc 		nmatch = 0;
    201       1.1       jtc 	if (eflags&REG_STARTEND) {
    202      1.12     lukem 		_DIAGASSERT(pmatch != NULL);
    203      1.11  christos 		start = string + (size_t)pmatch[0].rm_so;
    204      1.11  christos 		stop = string + (size_t)pmatch[0].rm_eo;
    205       1.1       jtc 	} else {
    206       1.1       jtc 		start = string;
    207       1.1       jtc 		stop = start + strlen(start);
    208       1.1       jtc 	}
    209       1.1       jtc 	if (stop < start)
    210       1.1       jtc 		return(REG_INVARG);
    211       1.1       jtc 
    212       1.1       jtc 	/* prescreening; this does wonders for this rather slow code */
    213       1.1       jtc 	if (g->must != NULL) {
    214       1.1       jtc 		for (dp = start; dp < stop; dp++)
    215      1.23  christos 			if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
    216      1.23  christos 				memcmp(dp, g->must, g->mlen) == 0)
    217       1.1       jtc 				break;
    218       1.1       jtc 		if (dp == stop)		/* we didn't find g->must */
    219       1.1       jtc 			return(REG_NOMATCH);
    220       1.1       jtc 	}
    221       1.1       jtc 
    222       1.1       jtc 	/* match struct setup */
    223       1.1       jtc 	m->g = g;
    224       1.1       jtc 	m->eflags = eflags;
    225       1.1       jtc 	m->pmatch = NULL;
    226       1.1       jtc 	m->lastpos = NULL;
    227       1.1       jtc 	m->offp = string;
    228       1.1       jtc 	m->beginp = start;
    229       1.1       jtc 	m->endp = stop;
    230       1.1       jtc 	STATESETUP(m, 4);
    231       1.1       jtc 	SETUP(m->st);
    232       1.1       jtc 	SETUP(m->fresh);
    233       1.1       jtc 	SETUP(m->tmp);
    234       1.1       jtc 	SETUP(m->empty);
    235       1.1       jtc 	CLEAR(m->empty);
    236       1.1       jtc 
    237       1.1       jtc 	/* this loop does only one repetition except for backrefs */
    238       1.1       jtc 	for (;;) {
    239       1.1       jtc 		endp = fast(m, start, stop, gf, gl);
    240       1.1       jtc 		if (endp == NULL) {		/* a miss */
    241      1.13  christos 			error = REG_NOMATCH;
    242      1.13  christos 			goto done;
    243       1.1       jtc 		}
    244       1.1       jtc 		if (nmatch == 0 && !g->backrefs)
    245       1.1       jtc 			break;		/* no further info needed */
    246       1.1       jtc 
    247       1.1       jtc 		/* where? */
    248       1.1       jtc 		assert(m->coldp != NULL);
    249       1.1       jtc 		for (;;) {
    250       1.1       jtc 			NOTE("finding start");
    251       1.1       jtc 			endp = slow(m, m->coldp, stop, gf, gl);
    252       1.1       jtc 			if (endp != NULL)
    253       1.1       jtc 				break;
    254       1.1       jtc 			assert(m->coldp < m->endp);
    255       1.1       jtc 			m->coldp++;
    256       1.1       jtc 		}
    257       1.1       jtc 		if (nmatch == 1 && !g->backrefs)
    258       1.1       jtc 			break;		/* no further info needed */
    259       1.1       jtc 
    260       1.1       jtc 		/* oh my, he wants the subexpressions... */
    261       1.1       jtc 		if (m->pmatch == NULL)
    262       1.1       jtc 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
    263       1.1       jtc 							sizeof(regmatch_t));
    264       1.1       jtc 		if (m->pmatch == NULL) {
    265      1.13  christos 			error = REG_ESPACE;
    266      1.13  christos 			goto done;
    267       1.1       jtc 		}
    268       1.1       jtc 		for (i = 1; i <= m->g->nsub; i++)
    269      1.10  drochner 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1;
    270       1.1       jtc 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
    271       1.1       jtc 			NOTE("dissecting");
    272       1.1       jtc 			dp = dissect(m, m->coldp, endp, gf, gl);
    273       1.1       jtc 		} else {
    274       1.1       jtc 			if (g->nplus > 0 && m->lastpos == NULL)
    275      1.19      yamt 				m->lastpos = malloc((g->nplus+1) *
    276      1.19      yamt 							sizeof(const char *));
    277       1.1       jtc 			if (g->nplus > 0 && m->lastpos == NULL) {
    278      1.13  christos 				error = REG_ESPACE;
    279      1.13  christos 				goto done;
    280       1.1       jtc 			}
    281       1.1       jtc 			NOTE("backref dissect");
    282       1.1       jtc 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
    283       1.1       jtc 		}
    284       1.1       jtc 		if (dp != NULL)
    285       1.1       jtc 			break;
    286       1.1       jtc 
    287       1.1       jtc 		/* uh-oh... we couldn't find a subexpression-level match */
    288       1.1       jtc 		assert(g->backrefs);	/* must be back references doing it */
    289       1.1       jtc 		assert(g->nplus == 0 || m->lastpos != NULL);
    290       1.1       jtc 		for (;;) {
    291       1.1       jtc 			if (dp != NULL || endp <= m->coldp)
    292       1.1       jtc 				break;		/* defeat */
    293       1.1       jtc 			NOTE("backoff");
    294       1.1       jtc 			endp = slow(m, m->coldp, endp-1, gf, gl);
    295       1.1       jtc 			if (endp == NULL)
    296       1.1       jtc 				break;		/* defeat */
    297       1.1       jtc 			/* try it on a shorter possibility */
    298       1.1       jtc #ifndef NDEBUG
    299       1.1       jtc 			for (i = 1; i <= m->g->nsub; i++) {
    300      1.10  drochner 				assert(m->pmatch[i].rm_so == (regoff_t)-1);
    301      1.10  drochner 				assert(m->pmatch[i].rm_eo == (regoff_t)-1);
    302       1.1       jtc 			}
    303       1.1       jtc #endif
    304       1.1       jtc 			NOTE("backoff dissect");
    305       1.1       jtc 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
    306       1.1       jtc 		}
    307       1.1       jtc 		assert(dp == NULL || dp == endp);
    308       1.1       jtc 		if (dp != NULL)		/* found a shorter one */
    309       1.1       jtc 			break;
    310       1.1       jtc 
    311       1.1       jtc 		/* despite initial appearances, there is no match here */
    312       1.1       jtc 		NOTE("false alarm");
    313       1.1       jtc 		start = m->coldp + 1;	/* recycle starting later */
    314       1.1       jtc 		assert(start <= stop);
    315       1.1       jtc 	}
    316       1.1       jtc 
    317       1.1       jtc 	/* fill in the details if requested */
    318       1.1       jtc 	if (nmatch > 0) {
    319      1.12     lukem 		_DIAGASSERT(pmatch != NULL);
    320       1.1       jtc 		pmatch[0].rm_so = m->coldp - m->offp;
    321       1.1       jtc 		pmatch[0].rm_eo = endp - m->offp;
    322       1.1       jtc 	}
    323       1.1       jtc 	if (nmatch > 1) {
    324       1.1       jtc 		assert(m->pmatch != NULL);
    325       1.1       jtc 		for (i = 1; i < nmatch; i++)
    326       1.1       jtc 			if (i <= m->g->nsub)
    327       1.1       jtc 				pmatch[i] = m->pmatch[i];
    328       1.1       jtc 			else {
    329      1.10  drochner 				pmatch[i].rm_so = (regoff_t)-1;
    330      1.10  drochner 				pmatch[i].rm_eo = (regoff_t)-1;
    331       1.1       jtc 			}
    332       1.1       jtc 	}
    333       1.1       jtc 
    334      1.13  christos done:
    335      1.13  christos 	if (m->pmatch != NULL) {
    336       1.9  christos 		free(m->pmatch);
    337      1.13  christos 		m->pmatch = NULL;
    338      1.13  christos 	}
    339      1.13  christos 	if (m->lastpos != NULL) {
    340       1.9  christos 		free(m->lastpos);
    341      1.13  christos 		m->lastpos = NULL;
    342      1.13  christos 	}
    343       1.1       jtc 	STATETEARDOWN(m);
    344      1.13  christos 	return error;
    345       1.1       jtc }
    346       1.1       jtc 
    347       1.1       jtc /*
    348       1.1       jtc  - dissect - figure out what matched what, no back references
    349      1.19      yamt  == static const char *dissect(struct match *m, const char *start, \
    350      1.19      yamt  ==	const char *stop, sopno startst, sopno stopst);
    351       1.1       jtc  */
    352      1.19      yamt static const char *			/* == stop (success) always */
    353      1.21  junyoung dissect(
    354      1.21  junyoung     struct match *m,
    355      1.21  junyoung     const char *start,
    356      1.21  junyoung     const char *stop,
    357      1.21  junyoung     sopno startst,
    358      1.21  junyoung     sopno stopst)
    359       1.1       jtc {
    360       1.8     perry 	int i;
    361       1.8     perry 	sopno ss;	/* start sop of current subRE */
    362       1.8     perry 	sopno es;	/* end sop of current subRE */
    363      1.19      yamt 	const char *sp;	/* start of string matched by it */
    364      1.19      yamt 	const char *stp; /* string matched by it cannot pass here */
    365      1.19      yamt 	const char *rest; /* start of rest of string */
    366      1.19      yamt 	const char *tail; /* string unmatched by rest of RE */
    367       1.8     perry 	sopno ssub;	/* start sop of subsubRE */
    368       1.8     perry 	sopno esub;	/* end sop of subsubRE */
    369      1.19      yamt 	const char *ssp; /* start of string matched by subsubRE */
    370      1.19      yamt 	const char *sep; /* end of string matched by subsubRE */
    371      1.19      yamt 	const char *oldssp; /* previous ssp */
    372       1.9  christos #ifndef NDEBUG
    373      1.19      yamt 	const char *dp;
    374       1.9  christos #endif
    375       1.1       jtc 
    376      1.12     lukem 	_DIAGASSERT(m != NULL);
    377      1.12     lukem 	_DIAGASSERT(start != NULL);
    378      1.12     lukem 	_DIAGASSERT(stop != NULL);
    379      1.12     lukem 
    380       1.1       jtc 	AT("diss", start, stop, startst, stopst);
    381       1.1       jtc 	sp = start;
    382       1.1       jtc 	for (ss = startst; ss < stopst; ss = es) {
    383       1.1       jtc 		/* identify end of subRE */
    384       1.1       jtc 		es = ss;
    385       1.1       jtc 		switch (OP(m->g->strip[es])) {
    386       1.1       jtc 		case OPLUS_:
    387       1.1       jtc 		case OQUEST_:
    388       1.1       jtc 			es += OPND(m->g->strip[es]);
    389       1.1       jtc 			break;
    390       1.1       jtc 		case OCH_:
    391       1.1       jtc 			while (OP(m->g->strip[es]) != O_CH)
    392       1.1       jtc 				es += OPND(m->g->strip[es]);
    393       1.1       jtc 			break;
    394       1.1       jtc 		}
    395       1.1       jtc 		es++;
    396       1.1       jtc 
    397       1.1       jtc 		/* figure out what it matched */
    398       1.1       jtc 		switch (OP(m->g->strip[ss])) {
    399       1.1       jtc 		case OEND:
    400       1.1       jtc 			assert(nope);
    401       1.1       jtc 			break;
    402       1.1       jtc 		case OCHAR:
    403       1.1       jtc 			sp++;
    404       1.1       jtc 			break;
    405       1.1       jtc 		case OBOL:
    406       1.1       jtc 		case OEOL:
    407       1.1       jtc 		case OBOW:
    408       1.1       jtc 		case OEOW:
    409       1.1       jtc 			break;
    410       1.1       jtc 		case OANY:
    411       1.1       jtc 		case OANYOF:
    412       1.1       jtc 			sp++;
    413       1.1       jtc 			break;
    414       1.1       jtc 		case OBACK_:
    415       1.1       jtc 		case O_BACK:
    416       1.1       jtc 			assert(nope);
    417       1.1       jtc 			break;
    418       1.1       jtc 		/* cases where length of match is hard to find */
    419       1.1       jtc 		case OQUEST_:
    420       1.1       jtc 			stp = stop;
    421       1.1       jtc 			for (;;) {
    422       1.1       jtc 				/* how long could this one be? */
    423       1.1       jtc 				rest = slow(m, sp, stp, ss, es);
    424       1.1       jtc 				assert(rest != NULL);	/* it did match */
    425       1.1       jtc 				/* could the rest match the rest? */
    426       1.1       jtc 				tail = slow(m, rest, stop, es, stopst);
    427       1.1       jtc 				if (tail == stop)
    428       1.1       jtc 					break;		/* yes! */
    429       1.1       jtc 				/* no -- try a shorter match for this one */
    430       1.1       jtc 				stp = rest - 1;
    431       1.1       jtc 				assert(stp >= sp);	/* it did work */
    432       1.1       jtc 			}
    433       1.1       jtc 			ssub = ss + 1;
    434       1.1       jtc 			esub = es - 1;
    435       1.1       jtc 			/* did innards match? */
    436       1.1       jtc 			if (slow(m, sp, rest, ssub, esub) != NULL) {
    437       1.9  christos #ifdef NDEBUG
    438       1.9  christos 				(void)
    439       1.9  christos #else
    440       1.9  christos 				dp =
    441       1.9  christos #endif
    442       1.9  christos 				    dissect(m, sp, rest, ssub, esub);
    443       1.1       jtc 				assert(dp == rest);
    444       1.1       jtc 			} else		/* no */
    445       1.1       jtc 				assert(sp == rest);
    446       1.1       jtc 			sp = rest;
    447       1.1       jtc 			break;
    448       1.1       jtc 		case OPLUS_:
    449       1.1       jtc 			stp = stop;
    450       1.1       jtc 			for (;;) {
    451       1.1       jtc 				/* how long could this one be? */
    452       1.1       jtc 				rest = slow(m, sp, stp, ss, es);
    453       1.1       jtc 				assert(rest != NULL);	/* it did match */
    454       1.1       jtc 				/* could the rest match the rest? */
    455       1.1       jtc 				tail = slow(m, rest, stop, es, stopst);
    456       1.1       jtc 				if (tail == stop)
    457       1.1       jtc 					break;		/* yes! */
    458       1.1       jtc 				/* no -- try a shorter match for this one */
    459       1.1       jtc 				stp = rest - 1;
    460       1.1       jtc 				assert(stp >= sp);	/* it did work */
    461       1.1       jtc 			}
    462       1.1       jtc 			ssub = ss + 1;
    463       1.1       jtc 			esub = es - 1;
    464       1.1       jtc 			ssp = sp;
    465       1.1       jtc 			oldssp = ssp;
    466       1.1       jtc 			for (;;) {	/* find last match of innards */
    467       1.1       jtc 				sep = slow(m, ssp, rest, ssub, esub);
    468       1.1       jtc 				if (sep == NULL || sep == ssp)
    469       1.1       jtc 					break;	/* failed or matched null */
    470       1.1       jtc 				oldssp = ssp;	/* on to next try */
    471       1.1       jtc 				ssp = sep;
    472       1.1       jtc 			}
    473       1.1       jtc 			if (sep == NULL) {
    474       1.1       jtc 				/* last successful match */
    475       1.1       jtc 				sep = ssp;
    476       1.1       jtc 				ssp = oldssp;
    477       1.1       jtc 			}
    478       1.1       jtc 			assert(sep == rest);	/* must exhaust substring */
    479       1.1       jtc 			assert(slow(m, ssp, sep, ssub, esub) == rest);
    480       1.9  christos #ifdef NDEBUG
    481       1.9  christos 			(void)
    482       1.9  christos #else
    483       1.9  christos 			dp =
    484       1.9  christos #endif
    485       1.9  christos 			    dissect(m, ssp, sep, ssub, esub);
    486       1.1       jtc 			assert(dp == sep);
    487       1.1       jtc 			sp = rest;
    488       1.1       jtc 			break;
    489       1.1       jtc 		case OCH_:
    490       1.1       jtc 			stp = stop;
    491       1.1       jtc 			for (;;) {
    492       1.1       jtc 				/* how long could this one be? */
    493       1.1       jtc 				rest = slow(m, sp, stp, ss, es);
    494       1.1       jtc 				assert(rest != NULL);	/* it did match */
    495       1.1       jtc 				/* could the rest match the rest? */
    496       1.1       jtc 				tail = slow(m, rest, stop, es, stopst);
    497       1.1       jtc 				if (tail == stop)
    498       1.1       jtc 					break;		/* yes! */
    499       1.1       jtc 				/* no -- try a shorter match for this one */
    500       1.1       jtc 				stp = rest - 1;
    501       1.1       jtc 				assert(stp >= sp);	/* it did work */
    502       1.1       jtc 			}
    503       1.1       jtc 			ssub = ss + 1;
    504       1.1       jtc 			esub = ss + OPND(m->g->strip[ss]) - 1;
    505       1.1       jtc 			assert(OP(m->g->strip[esub]) == OOR1);
    506       1.1       jtc 			for (;;) {	/* find first matching branch */
    507       1.1       jtc 				if (slow(m, sp, rest, ssub, esub) == rest)
    508       1.1       jtc 					break;	/* it matched all of it */
    509       1.1       jtc 				/* that one missed, try next one */
    510       1.1       jtc 				assert(OP(m->g->strip[esub]) == OOR1);
    511       1.1       jtc 				esub++;
    512       1.1       jtc 				assert(OP(m->g->strip[esub]) == OOR2);
    513       1.1       jtc 				ssub = esub + 1;
    514       1.1       jtc 				esub += OPND(m->g->strip[esub]);
    515       1.1       jtc 				if (OP(m->g->strip[esub]) == OOR2)
    516       1.1       jtc 					esub--;
    517       1.1       jtc 				else
    518       1.1       jtc 					assert(OP(m->g->strip[esub]) == O_CH);
    519       1.1       jtc 			}
    520       1.9  christos #ifdef NDEBUG
    521       1.9  christos 			(void)
    522       1.9  christos #else
    523       1.9  christos 			dp =
    524       1.9  christos #endif
    525       1.9  christos 			    dissect(m, sp, rest, ssub, esub);
    526       1.1       jtc 			assert(dp == rest);
    527       1.1       jtc 			sp = rest;
    528       1.1       jtc 			break;
    529       1.1       jtc 		case O_PLUS:
    530       1.1       jtc 		case O_QUEST:
    531       1.1       jtc 		case OOR1:
    532       1.1       jtc 		case OOR2:
    533       1.1       jtc 		case O_CH:
    534       1.1       jtc 			assert(nope);
    535       1.1       jtc 			break;
    536       1.1       jtc 		case OLPAREN:
    537       1.1       jtc 			i = OPND(m->g->strip[ss]);
    538       1.2       jtc 			assert(0 < i && i <= m->g->nsub);
    539       1.1       jtc 			m->pmatch[i].rm_so = sp - m->offp;
    540       1.1       jtc 			break;
    541       1.1       jtc 		case ORPAREN:
    542       1.1       jtc 			i = OPND(m->g->strip[ss]);
    543       1.2       jtc 			assert(0 < i && i <= m->g->nsub);
    544       1.1       jtc 			m->pmatch[i].rm_eo = sp - m->offp;
    545       1.1       jtc 			break;
    546       1.1       jtc 		default:		/* uh oh */
    547       1.1       jtc 			assert(nope);
    548       1.1       jtc 			break;
    549       1.1       jtc 		}
    550       1.1       jtc 	}
    551       1.1       jtc 
    552       1.1       jtc 	assert(sp == stop);
    553       1.1       jtc 	return(sp);
    554       1.1       jtc }
    555       1.1       jtc 
    556       1.1       jtc /*
    557       1.1       jtc  - backref - figure out what matched what, figuring in back references
    558      1.19      yamt  == static const char *backref(struct match *m, const char *start, \
    559      1.19      yamt  ==	const char *stop, sopno startst, sopno stopst, sopno lev);
    560       1.1       jtc  */
    561      1.19      yamt static const char *		/* == stop (success) or NULL (failure) */
    562      1.21  junyoung backref(
    563      1.21  junyoung     struct match *m,
    564      1.21  junyoung     const char *start,
    565      1.21  junyoung     const char *stop,
    566      1.21  junyoung     sopno startst,
    567      1.21  junyoung     sopno stopst,
    568      1.21  junyoung     sopno lev)			/* PLUS nesting level */
    569       1.1       jtc {
    570       1.8     perry 	int i;
    571       1.8     perry 	sopno ss;	/* start sop of current subRE */
    572      1.19      yamt 	const char *sp;	/* start of string matched by it */
    573       1.8     perry 	sopno ssub;	/* start sop of subsubRE */
    574       1.8     perry 	sopno esub;	/* end sop of subsubRE */
    575      1.19      yamt 	const char *ssp; /* start of string matched by subsubRE */
    576      1.19      yamt 	const char *dp;
    577       1.8     perry 	size_t len;
    578       1.8     perry 	int hard;
    579       1.8     perry 	sop s;
    580       1.8     perry 	regoff_t offsave;
    581       1.8     perry 	cset *cs;
    582       1.1       jtc 
    583      1.12     lukem 	_DIAGASSERT(m != NULL);
    584      1.12     lukem 	_DIAGASSERT(start != NULL);
    585      1.12     lukem 	_DIAGASSERT(stop != NULL);
    586      1.12     lukem 
    587       1.1       jtc 	AT("back", start, stop, startst, stopst);
    588       1.1       jtc 	sp = start;
    589       1.1       jtc 
    590       1.1       jtc 	/* get as far as we can with easy stuff */
    591       1.1       jtc 	hard = 0;
    592       1.1       jtc 	for (ss = startst; !hard && ss < stopst; ss++)
    593       1.1       jtc 		switch (OP(s = m->g->strip[ss])) {
    594       1.1       jtc 		case OCHAR:
    595       1.1       jtc 			if (sp == stop || *sp++ != (char)OPND(s))
    596       1.1       jtc 				return(NULL);
    597       1.1       jtc 			break;
    598       1.1       jtc 		case OANY:
    599       1.1       jtc 			if (sp == stop)
    600       1.1       jtc 				return(NULL);
    601       1.1       jtc 			sp++;
    602       1.1       jtc 			break;
    603       1.1       jtc 		case OANYOF:
    604       1.1       jtc 			cs = &m->g->sets[OPND(s)];
    605       1.1       jtc 			if (sp == stop || !CHIN(cs, *sp++))
    606       1.1       jtc 				return(NULL);
    607       1.1       jtc 			break;
    608       1.1       jtc 		case OBOL:
    609       1.1       jtc 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
    610       1.1       jtc 					(sp < m->endp && *(sp-1) == '\n' &&
    611       1.1       jtc 						(m->g->cflags&REG_NEWLINE)) )
    612       1.1       jtc 				{ /* yes */ }
    613       1.1       jtc 			else
    614       1.1       jtc 				return(NULL);
    615       1.1       jtc 			break;
    616       1.1       jtc 		case OEOL:
    617       1.1       jtc 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
    618       1.1       jtc 					(sp < m->endp && *sp == '\n' &&
    619       1.1       jtc 						(m->g->cflags&REG_NEWLINE)) )
    620       1.1       jtc 				{ /* yes */ }
    621       1.1       jtc 			else
    622       1.1       jtc 				return(NULL);
    623       1.1       jtc 			break;
    624       1.1       jtc 		case OBOW:
    625      1.17     enami 			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
    626      1.17     enami 					(sp < m->endp && *(sp-1) == '\n' &&
    627      1.17     enami 						(m->g->cflags&REG_NEWLINE)) ||
    628      1.17     enami 					(sp > m->beginp &&
    629      1.17     enami 							!ISWORD(*(sp-1))) ) &&
    630      1.17     enami 					(sp < m->endp && ISWORD(*sp)) )
    631       1.1       jtc 				{ /* yes */ }
    632       1.1       jtc 			else
    633       1.1       jtc 				return(NULL);
    634       1.1       jtc 			break;
    635       1.1       jtc 		case OEOW:
    636      1.17     enami 			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
    637      1.17     enami 					(sp < m->endp && *sp == '\n' &&
    638      1.17     enami 						(m->g->cflags&REG_NEWLINE)) ||
    639      1.17     enami 					(sp < m->endp && !ISWORD(*sp)) ) &&
    640      1.17     enami 					(sp > m->beginp && ISWORD(*(sp-1))) )
    641       1.1       jtc 				{ /* yes */ }
    642       1.1       jtc 			else
    643       1.1       jtc 				return(NULL);
    644       1.1       jtc 			break;
    645       1.1       jtc 		case O_QUEST:
    646       1.1       jtc 			break;
    647       1.1       jtc 		case OOR1:	/* matches null but needs to skip */
    648       1.1       jtc 			ss++;
    649       1.1       jtc 			s = m->g->strip[ss];
    650       1.1       jtc 			do {
    651       1.1       jtc 				assert(OP(s) == OOR2);
    652       1.1       jtc 				ss += OPND(s);
    653       1.1       jtc 			} while (OP(s = m->g->strip[ss]) != O_CH);
    654       1.1       jtc 			/* note that the ss++ gets us past the O_CH */
    655       1.1       jtc 			break;
    656       1.1       jtc 		default:	/* have to make a choice */
    657       1.1       jtc 			hard = 1;
    658       1.1       jtc 			break;
    659       1.1       jtc 		}
    660       1.1       jtc 	if (!hard) {		/* that was it! */
    661       1.1       jtc 		if (sp != stop)
    662       1.1       jtc 			return(NULL);
    663       1.1       jtc 		return(sp);
    664       1.1       jtc 	}
    665       1.1       jtc 	ss--;			/* adjust for the for's final increment */
    666       1.1       jtc 
    667       1.1       jtc 	/* the hard stuff */
    668       1.1       jtc 	AT("hard", sp, stop, ss, stopst);
    669       1.1       jtc 	s = m->g->strip[ss];
    670       1.1       jtc 	switch (OP(s)) {
    671       1.1       jtc 	case OBACK_:		/* the vilest depths */
    672       1.1       jtc 		i = OPND(s);
    673       1.2       jtc 		assert(0 < i && i <= m->g->nsub);
    674      1.10  drochner 		if (m->pmatch[i].rm_eo == (regoff_t)-1)
    675       1.1       jtc 			return(NULL);
    676      1.10  drochner 		assert(m->pmatch[i].rm_so != (regoff_t)-1);
    677      1.11  christos 		len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
    678      1.18  christos 		if (len == 0)
    679      1.18  christos 			return(NULL);
    680       1.1       jtc 		assert(stop - m->beginp >= len);
    681       1.1       jtc 		if (sp > stop - len)
    682       1.1       jtc 			return(NULL);	/* not enough left to match */
    683      1.11  christos 		ssp = m->offp + (size_t)m->pmatch[i].rm_so;
    684       1.1       jtc 		if (memcmp(sp, ssp, len) != 0)
    685       1.1       jtc 			return(NULL);
    686       1.1       jtc 		while (m->g->strip[ss] != SOP(O_BACK, i))
    687       1.1       jtc 			ss++;
    688       1.1       jtc 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
    689       1.9  christos 
    690       1.1       jtc 	case OQUEST_:		/* to null or not */
    691       1.1       jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    692       1.1       jtc 		if (dp != NULL)
    693       1.1       jtc 			return(dp);	/* not */
    694       1.1       jtc 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
    695       1.9  christos 
    696       1.1       jtc 	case OPLUS_:
    697       1.1       jtc 		assert(m->lastpos != NULL);
    698       1.1       jtc 		assert(lev+1 <= m->g->nplus);
    699       1.1       jtc 		m->lastpos[lev+1] = sp;
    700       1.1       jtc 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
    701       1.9  christos 
    702       1.1       jtc 	case O_PLUS:
    703       1.1       jtc 		if (sp == m->lastpos[lev])	/* last pass matched null */
    704       1.1       jtc 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
    705       1.1       jtc 		/* try another pass */
    706       1.1       jtc 		m->lastpos[lev] = sp;
    707       1.1       jtc 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
    708       1.1       jtc 		if (dp == NULL)
    709       1.9  christos 			dp = backref(m, sp, stop, ss+1, stopst, lev-1);
    710       1.9  christos 		return(dp);
    711       1.9  christos 
    712       1.1       jtc 	case OCH_:		/* find the right one, if any */
    713       1.1       jtc 		ssub = ss + 1;
    714       1.1       jtc 		esub = ss + OPND(s) - 1;
    715       1.1       jtc 		assert(OP(m->g->strip[esub]) == OOR1);
    716       1.1       jtc 		for (;;) {	/* find first matching branch */
    717       1.1       jtc 			dp = backref(m, sp, stop, ssub, esub, lev);
    718       1.1       jtc 			if (dp != NULL)
    719       1.1       jtc 				return(dp);
    720       1.1       jtc 			/* that one missed, try next one */
    721       1.1       jtc 			if (OP(m->g->strip[esub]) == O_CH)
    722       1.1       jtc 				return(NULL);	/* there is none */
    723       1.1       jtc 			esub++;
    724       1.1       jtc 			assert(OP(m->g->strip[esub]) == OOR2);
    725       1.1       jtc 			ssub = esub + 1;
    726       1.1       jtc 			esub += OPND(m->g->strip[esub]);
    727       1.1       jtc 			if (OP(m->g->strip[esub]) == OOR2)
    728       1.1       jtc 				esub--;
    729       1.1       jtc 			else
    730       1.1       jtc 				assert(OP(m->g->strip[esub]) == O_CH);
    731       1.1       jtc 		}
    732       1.9  christos 
    733       1.1       jtc 	case OLPAREN:		/* must undo assignment if rest fails */
    734       1.1       jtc 		i = OPND(s);
    735       1.2       jtc 		assert(0 < i && i <= m->g->nsub);
    736       1.1       jtc 		offsave = m->pmatch[i].rm_so;
    737       1.1       jtc 		m->pmatch[i].rm_so = sp - m->offp;
    738       1.1       jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    739       1.1       jtc 		if (dp != NULL)
    740       1.1       jtc 			return(dp);
    741       1.1       jtc 		m->pmatch[i].rm_so = offsave;
    742       1.1       jtc 		return(NULL);
    743       1.9  christos 
    744       1.1       jtc 	case ORPAREN:		/* must undo assignment if rest fails */
    745       1.1       jtc 		i = OPND(s);
    746       1.2       jtc 		assert(0 < i && i <= m->g->nsub);
    747       1.1       jtc 		offsave = m->pmatch[i].rm_eo;
    748       1.1       jtc 		m->pmatch[i].rm_eo = sp - m->offp;
    749       1.1       jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    750       1.1       jtc 		if (dp != NULL)
    751       1.1       jtc 			return(dp);
    752       1.1       jtc 		m->pmatch[i].rm_eo = offsave;
    753       1.1       jtc 		return(NULL);
    754       1.9  christos 
    755       1.1       jtc 	default:		/* uh oh */
    756       1.1       jtc 		assert(nope);
    757       1.1       jtc 		break;
    758       1.1       jtc 	}
    759       1.1       jtc 
    760       1.1       jtc 	/* "can't happen" */
    761       1.1       jtc 	assert(nope);
    762       1.1       jtc 	/* NOTREACHED */
    763       1.7  christos 	return NULL;
    764       1.1       jtc }
    765       1.1       jtc 
    766       1.1       jtc /*
    767       1.1       jtc  - fast - step through the string at top speed
    768      1.19      yamt  == static const char *fast(struct match *m, const char *start, \
    769      1.19      yamt  ==	const char *stop, sopno startst, sopno stopst);
    770       1.1       jtc  */
    771      1.19      yamt static const char *		/* where tentative match ended, or NULL */
    772      1.21  junyoung fast(
    773      1.21  junyoung     struct match *m,
    774      1.21  junyoung     const char *start,
    775      1.21  junyoung     const char *stop,
    776      1.21  junyoung     sopno startst,
    777      1.21  junyoung     sopno stopst)
    778       1.1       jtc {
    779       1.8     perry 	states st = m->st;
    780       1.8     perry 	states fresh = m->fresh;
    781       1.8     perry 	states tmp = m->tmp;
    782      1.19      yamt 	const char *p = start;
    783      1.17     enami 	int c = (start == m->beginp) ? OUT : *(start-1);
    784       1.8     perry 	int lastc;	/* previous c */
    785       1.8     perry 	int flagch;
    786  1.23.2.1      yamt 	size_t i;
    787      1.19      yamt 	const char *coldp; /* last p after which no match was underway */
    788       1.1       jtc 
    789      1.12     lukem 	_DIAGASSERT(m != NULL);
    790      1.12     lukem 	_DIAGASSERT(start != NULL);
    791      1.12     lukem 	_DIAGASSERT(stop != NULL);
    792      1.12     lukem 
    793       1.1       jtc 	CLEAR(st);
    794       1.1       jtc 	SET1(st, startst);
    795       1.1       jtc 	st = step(m->g, startst, stopst, st, NOTHING, st);
    796       1.1       jtc 	ASSIGN(fresh, st);
    797       1.1       jtc 	SP("start", st, *p);
    798       1.1       jtc 	coldp = NULL;
    799       1.1       jtc 	for (;;) {
    800       1.1       jtc 		/* next character */
    801       1.1       jtc 		lastc = c;
    802      1.17     enami 		c = (p == m->endp) ? OUT : *p;
    803       1.1       jtc 		if (EQ(st, fresh))
    804       1.1       jtc 			coldp = p;
    805       1.1       jtc 
    806       1.1       jtc 		/* is there an EOL and/or BOL between lastc and c? */
    807       1.1       jtc 		flagch = '\0';
    808       1.1       jtc 		i = 0;
    809       1.1       jtc 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
    810       1.1       jtc 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
    811       1.1       jtc 			flagch = BOL;
    812       1.1       jtc 			i = m->g->nbol;
    813       1.1       jtc 		}
    814       1.1       jtc 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
    815       1.1       jtc 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
    816       1.1       jtc 			flagch = (flagch == BOL) ? BOLEOL : EOL;
    817       1.1       jtc 			i += m->g->neol;
    818       1.1       jtc 		}
    819       1.1       jtc 		if (i != 0) {
    820       1.1       jtc 			for (; i > 0; i--)
    821       1.1       jtc 				st = step(m->g, startst, stopst, st, flagch, st);
    822       1.1       jtc 			SP("boleol", st, c);
    823       1.1       jtc 		}
    824       1.1       jtc 
    825       1.1       jtc 		/* how about a word boundary? */
    826       1.3       jtc 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
    827       1.3       jtc 					(c != OUT && ISWORD(c)) ) {
    828       1.1       jtc 			flagch = BOW;
    829       1.1       jtc 		}
    830       1.3       jtc 		if ( (lastc != OUT && ISWORD(lastc)) &&
    831       1.3       jtc 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
    832       1.1       jtc 			flagch = EOW;
    833       1.1       jtc 		}
    834       1.1       jtc 		if (flagch == BOW || flagch == EOW) {
    835       1.1       jtc 			st = step(m->g, startst, stopst, st, flagch, st);
    836       1.1       jtc 			SP("boweow", st, c);
    837       1.1       jtc 		}
    838       1.1       jtc 
    839       1.1       jtc 		/* are we done? */
    840       1.1       jtc 		if (ISSET(st, stopst) || p == stop)
    841       1.1       jtc 			break;		/* NOTE BREAK OUT */
    842       1.1       jtc 
    843       1.1       jtc 		/* no, we must deal with this character */
    844       1.1       jtc 		ASSIGN(tmp, st);
    845       1.1       jtc 		ASSIGN(st, fresh);
    846       1.1       jtc 		assert(c != OUT);
    847       1.1       jtc 		st = step(m->g, startst, stopst, tmp, c, st);
    848       1.1       jtc 		SP("aft", st, c);
    849       1.1       jtc 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
    850       1.1       jtc 		p++;
    851       1.1       jtc 	}
    852       1.1       jtc 
    853       1.1       jtc 	assert(coldp != NULL);
    854       1.1       jtc 	m->coldp = coldp;
    855       1.1       jtc 	if (ISSET(st, stopst))
    856       1.1       jtc 		return(p+1);
    857       1.1       jtc 	else
    858       1.1       jtc 		return(NULL);
    859       1.1       jtc }
    860       1.1       jtc 
    861       1.1       jtc /*
    862       1.1       jtc  - slow - step through the string more deliberately
    863      1.19      yamt  == static const char *slow(struct match *m, const char *start, \
    864      1.19      yamt  ==	const char *stop, sopno startst, sopno stopst);
    865       1.1       jtc  */
    866      1.19      yamt static const char *			/* where it ended */
    867      1.21  junyoung slow(
    868      1.21  junyoung     struct match *m,
    869      1.21  junyoung     const char *start,
    870      1.21  junyoung     const char *stop,
    871      1.21  junyoung     sopno startst,
    872      1.21  junyoung     sopno stopst)
    873       1.1       jtc {
    874       1.8     perry 	states st = m->st;
    875       1.8     perry 	states empty = m->empty;
    876       1.8     perry 	states tmp = m->tmp;
    877      1.19      yamt 	const char *p = start;
    878      1.17     enami 	int c = (start == m->beginp) ? OUT : *(start-1);
    879       1.8     perry 	int lastc;	/* previous c */
    880       1.8     perry 	int flagch;
    881  1.23.2.1      yamt 	size_t i;
    882      1.19      yamt 	const char *matchp;	/* last p at which a match ended */
    883       1.1       jtc 
    884      1.12     lukem 	_DIAGASSERT(m != NULL);
    885      1.12     lukem 	_DIAGASSERT(start != NULL);
    886      1.12     lukem 	_DIAGASSERT(stop != NULL);
    887      1.12     lukem 
    888       1.1       jtc 	AT("slow", start, stop, startst, stopst);
    889       1.1       jtc 	CLEAR(st);
    890       1.1       jtc 	SET1(st, startst);
    891       1.1       jtc 	SP("sstart", st, *p);
    892       1.1       jtc 	st = step(m->g, startst, stopst, st, NOTHING, st);
    893       1.1       jtc 	matchp = NULL;
    894       1.1       jtc 	for (;;) {
    895       1.1       jtc 		/* next character */
    896       1.1       jtc 		lastc = c;
    897      1.17     enami 		c = (p == m->endp) ? OUT : *p;
    898       1.1       jtc 
    899       1.1       jtc 		/* is there an EOL and/or BOL between lastc and c? */
    900       1.1       jtc 		flagch = '\0';
    901       1.1       jtc 		i = 0;
    902       1.1       jtc 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
    903       1.1       jtc 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
    904       1.1       jtc 			flagch = BOL;
    905       1.1       jtc 			i = m->g->nbol;
    906       1.1       jtc 		}
    907       1.1       jtc 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
    908       1.1       jtc 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
    909       1.1       jtc 			flagch = (flagch == BOL) ? BOLEOL : EOL;
    910       1.1       jtc 			i += m->g->neol;
    911       1.1       jtc 		}
    912       1.1       jtc 		if (i != 0) {
    913       1.1       jtc 			for (; i > 0; i--)
    914       1.1       jtc 				st = step(m->g, startst, stopst, st, flagch, st);
    915       1.1       jtc 			SP("sboleol", st, c);
    916       1.1       jtc 		}
    917       1.1       jtc 
    918       1.1       jtc 		/* how about a word boundary? */
    919       1.3       jtc 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
    920       1.3       jtc 					(c != OUT && ISWORD(c)) ) {
    921       1.1       jtc 			flagch = BOW;
    922       1.1       jtc 		}
    923       1.3       jtc 		if ( (lastc != OUT && ISWORD(lastc)) &&
    924       1.3       jtc 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
    925       1.1       jtc 			flagch = EOW;
    926       1.1       jtc 		}
    927       1.1       jtc 		if (flagch == BOW || flagch == EOW) {
    928       1.1       jtc 			st = step(m->g, startst, stopst, st, flagch, st);
    929       1.1       jtc 			SP("sboweow", st, c);
    930       1.1       jtc 		}
    931       1.1       jtc 
    932       1.1       jtc 		/* are we done? */
    933       1.1       jtc 		if (ISSET(st, stopst))
    934       1.1       jtc 			matchp = p;
    935       1.1       jtc 		if (EQ(st, empty) || p == stop)
    936       1.1       jtc 			break;		/* NOTE BREAK OUT */
    937       1.1       jtc 
    938       1.1       jtc 		/* no, we must deal with this character */
    939       1.1       jtc 		ASSIGN(tmp, st);
    940       1.1       jtc 		ASSIGN(st, empty);
    941       1.1       jtc 		assert(c != OUT);
    942       1.1       jtc 		st = step(m->g, startst, stopst, tmp, c, st);
    943       1.1       jtc 		SP("saft", st, c);
    944       1.1       jtc 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
    945       1.1       jtc 		p++;
    946       1.1       jtc 	}
    947       1.1       jtc 
    948       1.1       jtc 	return(matchp);
    949       1.1       jtc }
    950       1.1       jtc 
    951       1.1       jtc 
    952       1.1       jtc /*
    953       1.1       jtc  - step - map set of states reachable before char to set reachable after
    954       1.8     perry  == static states step(struct re_guts *g, sopno start, sopno stop, \
    955       1.8     perry  ==	states bef, int ch, states aft);
    956       1.1       jtc  == #define	BOL	(OUT+1)
    957       1.1       jtc  == #define	EOL	(BOL+1)
    958       1.1       jtc  == #define	BOLEOL	(BOL+2)
    959       1.1       jtc  == #define	NOTHING	(BOL+3)
    960       1.1       jtc  == #define	BOW	(BOL+4)
    961       1.1       jtc  == #define	EOW	(BOL+5)
    962       1.1       jtc  == #define	CODEMAX	(BOL+5)		// highest code used
    963       1.1       jtc  == #define	NONCHAR(c)	((c) > CHAR_MAX)
    964       1.1       jtc  == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
    965       1.1       jtc  */
    966       1.1       jtc static states
    967      1.21  junyoung step(
    968      1.21  junyoung     struct re_guts *g,
    969      1.21  junyoung     sopno start,		/* start state within strip */
    970      1.21  junyoung     sopno stop,			/* state after stop state within strip */
    971      1.21  junyoung     states bef,			/* states reachable before */
    972      1.21  junyoung     int ch,			/* character or NONCHAR code */
    973      1.21  junyoung     states aft)			/* states already known reachable after */
    974       1.1       jtc {
    975       1.8     perry 	cset *cs;
    976       1.8     perry 	sop s;
    977       1.8     perry 	sopno pc;
    978       1.8     perry 	onestate here;		/* note, macros know this name */
    979       1.8     perry 	sopno look;
    980       1.8     perry 	int i;
    981       1.1       jtc 
    982      1.12     lukem 	_DIAGASSERT(g != NULL);
    983      1.12     lukem 
    984       1.1       jtc 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
    985       1.1       jtc 		s = g->strip[pc];
    986       1.1       jtc 		switch (OP(s)) {
    987       1.1       jtc 		case OEND:
    988       1.1       jtc 			assert(pc == stop-1);
    989       1.1       jtc 			break;
    990       1.1       jtc 		case OCHAR:
    991       1.1       jtc 			/* only characters can match */
    992       1.1       jtc 			assert(!NONCHAR(ch) || ch != (char)OPND(s));
    993       1.1       jtc 			if (ch == (char)OPND(s))
    994       1.1       jtc 				FWD(aft, bef, 1);
    995       1.1       jtc 			break;
    996       1.1       jtc 		case OBOL:
    997       1.1       jtc 			if (ch == BOL || ch == BOLEOL)
    998       1.1       jtc 				FWD(aft, bef, 1);
    999       1.1       jtc 			break;
   1000       1.1       jtc 		case OEOL:
   1001       1.1       jtc 			if (ch == EOL || ch == BOLEOL)
   1002       1.1       jtc 				FWD(aft, bef, 1);
   1003       1.1       jtc 			break;
   1004       1.1       jtc 		case OBOW:
   1005       1.1       jtc 			if (ch == BOW)
   1006       1.1       jtc 				FWD(aft, bef, 1);
   1007       1.1       jtc 			break;
   1008       1.1       jtc 		case OEOW:
   1009       1.1       jtc 			if (ch == EOW)
   1010       1.1       jtc 				FWD(aft, bef, 1);
   1011       1.1       jtc 			break;
   1012       1.1       jtc 		case OANY:
   1013       1.1       jtc 			if (!NONCHAR(ch))
   1014       1.1       jtc 				FWD(aft, bef, 1);
   1015       1.1       jtc 			break;
   1016       1.1       jtc 		case OANYOF:
   1017       1.1       jtc 			cs = &g->sets[OPND(s)];
   1018       1.1       jtc 			if (!NONCHAR(ch) && CHIN(cs, ch))
   1019       1.1       jtc 				FWD(aft, bef, 1);
   1020       1.1       jtc 			break;
   1021       1.1       jtc 		case OBACK_:		/* ignored here */
   1022       1.1       jtc 		case O_BACK:
   1023       1.1       jtc 			FWD(aft, aft, 1);
   1024       1.1       jtc 			break;
   1025       1.1       jtc 		case OPLUS_:		/* forward, this is just an empty */
   1026       1.1       jtc 			FWD(aft, aft, 1);
   1027       1.1       jtc 			break;
   1028       1.1       jtc 		case O_PLUS:		/* both forward and back */
   1029       1.1       jtc 			FWD(aft, aft, 1);
   1030       1.1       jtc 			i = ISSETBACK(aft, OPND(s));
   1031       1.1       jtc 			BACK(aft, aft, OPND(s));
   1032       1.1       jtc 			if (!i && ISSETBACK(aft, OPND(s))) {
   1033       1.1       jtc 				/* oho, must reconsider loop body */
   1034       1.1       jtc 				pc -= OPND(s) + 1;
   1035       1.1       jtc 				INIT(here, pc);
   1036       1.1       jtc 			}
   1037       1.1       jtc 			break;
   1038       1.1       jtc 		case OQUEST_:		/* two branches, both forward */
   1039       1.1       jtc 			FWD(aft, aft, 1);
   1040       1.1       jtc 			FWD(aft, aft, OPND(s));
   1041       1.1       jtc 			break;
   1042       1.1       jtc 		case O_QUEST:		/* just an empty */
   1043       1.1       jtc 			FWD(aft, aft, 1);
   1044       1.1       jtc 			break;
   1045       1.1       jtc 		case OLPAREN:		/* not significant here */
   1046       1.1       jtc 		case ORPAREN:
   1047       1.1       jtc 			FWD(aft, aft, 1);
   1048       1.1       jtc 			break;
   1049       1.1       jtc 		case OCH_:		/* mark the first two branches */
   1050       1.1       jtc 			FWD(aft, aft, 1);
   1051       1.1       jtc 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
   1052       1.1       jtc 			FWD(aft, aft, OPND(s));
   1053       1.1       jtc 			break;
   1054       1.1       jtc 		case OOR1:		/* done a branch, find the O_CH */
   1055       1.1       jtc 			if (ISSTATEIN(aft, here)) {
   1056       1.1       jtc 				for (look = 1;
   1057       1.1       jtc 						OP(s = g->strip[pc+look]) != O_CH;
   1058       1.1       jtc 						look += OPND(s))
   1059       1.1       jtc 					assert(OP(s) == OOR2);
   1060       1.1       jtc 				FWD(aft, aft, look);
   1061       1.1       jtc 			}
   1062       1.1       jtc 			break;
   1063       1.1       jtc 		case OOR2:		/* propagate OCH_'s marking */
   1064       1.1       jtc 			FWD(aft, aft, 1);
   1065       1.1       jtc 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
   1066       1.1       jtc 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
   1067       1.1       jtc 				FWD(aft, aft, OPND(s));
   1068       1.1       jtc 			}
   1069       1.1       jtc 			break;
   1070       1.1       jtc 		case O_CH:		/* just empty */
   1071       1.1       jtc 			FWD(aft, aft, 1);
   1072       1.1       jtc 			break;
   1073       1.1       jtc 		default:		/* ooooops... */
   1074       1.1       jtc 			assert(nope);
   1075       1.1       jtc 			break;
   1076       1.1       jtc 		}
   1077       1.1       jtc 	}
   1078       1.1       jtc 
   1079       1.1       jtc 	return(aft);
   1080       1.1       jtc }
   1081       1.1       jtc 
   1082       1.1       jtc #ifdef REDEBUG
   1083       1.1       jtc /*
   1084       1.1       jtc  - print - print a set of states
   1085       1.1       jtc  == #ifdef REDEBUG
   1086       1.1       jtc  == static void print(struct match *m, char *caption, states st, \
   1087       1.1       jtc  ==	int ch, FILE *d);
   1088       1.1       jtc  == #endif
   1089       1.1       jtc  */
   1090       1.1       jtc static void
   1091      1.21  junyoung print(
   1092      1.21  junyoung     struct match *m,
   1093      1.21  junyoung     char *caption,
   1094      1.21  junyoung     states st,
   1095      1.21  junyoung     int ch,
   1096      1.21  junyoung     FILE *d)
   1097       1.1       jtc {
   1098       1.8     perry 	struct re_guts *g = m->g;
   1099       1.8     perry 	int i;
   1100       1.8     perry 	int first = 1;
   1101       1.1       jtc 
   1102      1.12     lukem 	_DIAGASSERT(m != NULL);
   1103      1.12     lukem 	_DIAGASSERT(caption != NULL);
   1104      1.12     lukem 
   1105       1.1       jtc 	if (!(m->eflags&REG_TRACE))
   1106       1.1       jtc 		return;
   1107       1.1       jtc 
   1108      1.12     lukem 	_DIAGASSERT(d != NULL);
   1109      1.12     lukem 
   1110       1.1       jtc 	fprintf(d, "%s", caption);
   1111       1.1       jtc 	if (ch != '\0')
   1112       1.1       jtc 		fprintf(d, " %s", pchar(ch));
   1113       1.1       jtc 	for (i = 0; i < g->nstates; i++)
   1114       1.1       jtc 		if (ISSET(st, i)) {
   1115       1.1       jtc 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
   1116       1.1       jtc 			first = 0;
   1117       1.1       jtc 		}
   1118       1.1       jtc 	fprintf(d, "\n");
   1119       1.1       jtc }
   1120       1.1       jtc 
   1121       1.1       jtc /*
   1122       1.1       jtc  - at - print current situation
   1123       1.1       jtc  == #ifdef REDEBUG
   1124       1.1       jtc  == static void at(struct match *m, char *title, char *start, char *stop, \
   1125       1.1       jtc  ==						sopno startst, sopno stopst);
   1126       1.1       jtc  == #endif
   1127       1.1       jtc  */
   1128       1.1       jtc static void
   1129      1.21  junyoung at(
   1130      1.21  junyoung     struct match *m,
   1131      1.21  junyoung     char *title,
   1132      1.21  junyoung     char *start,
   1133      1.21  junyoung     char *stop,
   1134      1.21  junyoung     sopno startst,
   1135      1.21  junyoung     sopno stopst)
   1136       1.1       jtc {
   1137      1.12     lukem 
   1138      1.12     lukem 	_DIAGASSERT(m != NULL);
   1139      1.12     lukem 	_DIAGASSERT(title != NULL);
   1140      1.12     lukem 	_DIAGASSERT(start != NULL);
   1141      1.12     lukem 	_DIAGASSERT(stop != NULL);
   1142      1.12     lukem 
   1143       1.1       jtc 	if (!(m->eflags&REG_TRACE))
   1144       1.1       jtc 		return;
   1145       1.1       jtc 
   1146       1.1       jtc 	printf("%s %s-", title, pchar(*start));
   1147       1.1       jtc 	printf("%s ", pchar(*stop));
   1148       1.1       jtc 	printf("%ld-%ld\n", (long)startst, (long)stopst);
   1149       1.1       jtc }
   1150       1.1       jtc 
   1151       1.1       jtc #ifndef PCHARDONE
   1152       1.1       jtc #define	PCHARDONE	/* never again */
   1153       1.1       jtc /*
   1154       1.1       jtc  - pchar - make a character printable
   1155       1.1       jtc  == #ifdef REDEBUG
   1156       1.1       jtc  == static char *pchar(int ch);
   1157       1.1       jtc  == #endif
   1158       1.1       jtc  *
   1159       1.1       jtc  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
   1160       1.1       jtc  * duplicate here avoids having a debugging-capable regexec.o tied to
   1161       1.1       jtc  * a matching debug.o, and this is convenient.  It all disappears in
   1162       1.1       jtc  * the non-debug compilation anyway, so it doesn't matter much.
   1163       1.1       jtc  */
   1164       1.1       jtc static char *			/* -> representation */
   1165      1.21  junyoung pchar(
   1166      1.21  junyoung     int ch)
   1167       1.1       jtc {
   1168       1.1       jtc 	static char pbuf[10];
   1169       1.1       jtc 
   1170       1.1       jtc 	if (isprint(ch) || ch == ' ')
   1171       1.6       mrg 		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
   1172       1.1       jtc 	else
   1173       1.6       mrg 		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
   1174       1.1       jtc 	return(pbuf);
   1175       1.1       jtc }
   1176       1.1       jtc #endif
   1177       1.1       jtc #endif
   1178       1.1       jtc 
   1179       1.1       jtc #undef	matcher
   1180       1.1       jtc #undef	fast
   1181       1.1       jtc #undef	slow
   1182       1.1       jtc #undef	dissect
   1183       1.1       jtc #undef	backref
   1184       1.1       jtc #undef	step
   1185       1.1       jtc #undef	print
   1186       1.1       jtc #undef	at
   1187       1.1       jtc #undef	match
   1188      1.14  christos #undef	nope
   1189