Home | History | Annotate | Line # | Download | only in regex
engine.c revision 1.12.10.1
      1  1.12.10.1   nathanw /*	$NetBSD: engine.c,v 1.12.10.1 2002/01/28 20:50:44 nathanw Exp $	*/
      2        1.5       cgd 
      3        1.4       cgd /*-
      4        1.4       cgd  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
      5        1.4       cgd  * Copyright (c) 1992, 1993, 1994
      6        1.4       cgd  *	The Regents of the University of California.  All rights reserved.
      7        1.4       cgd  *
      8        1.4       cgd  * This code is derived from software contributed to Berkeley by
      9        1.4       cgd  * Henry Spencer.
     10        1.4       cgd  *
     11        1.4       cgd  * Redistribution and use in source and binary forms, with or without
     12        1.4       cgd  * modification, are permitted provided that the following conditions
     13        1.4       cgd  * are met:
     14        1.4       cgd  * 1. Redistributions of source code must retain the above copyright
     15        1.4       cgd  *    notice, this list of conditions and the following disclaimer.
     16        1.4       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     17        1.4       cgd  *    notice, this list of conditions and the following disclaimer in the
     18        1.4       cgd  *    documentation and/or other materials provided with the distribution.
     19        1.4       cgd  * 3. All advertising materials mentioning features or use of this software
     20        1.4       cgd  *    must display the following acknowledgement:
     21        1.4       cgd  *	This product includes software developed by the University of
     22        1.4       cgd  *	California, Berkeley and its contributors.
     23        1.4       cgd  * 4. Neither the name of the University nor the names of its contributors
     24        1.4       cgd  *    may be used to endorse or promote products derived from this software
     25        1.4       cgd  *    without specific prior written permission.
     26        1.4       cgd  *
     27        1.4       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28        1.4       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29        1.4       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30        1.4       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31        1.4       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32        1.4       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33        1.4       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34        1.4       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35        1.4       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36        1.4       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37        1.4       cgd  * SUCH DAMAGE.
     38        1.4       cgd  *
     39        1.4       cgd  *	@(#)engine.c	8.5 (Berkeley) 3/20/94
     40        1.4       cgd  */
     41        1.4       cgd 
     42        1.1       jtc /*
     43        1.1       jtc  * The matching engine and friends.  This file is #included by regexec.c
     44        1.1       jtc  * after suitable #defines of a variety of macros used herein, so that
     45        1.1       jtc  * different state representations can be used without duplicating masses
     46        1.1       jtc  * of code.
     47        1.1       jtc  */
     48        1.1       jtc 
     49        1.1       jtc #ifdef SNAMES
     50        1.1       jtc #define	matcher	smatcher
     51        1.1       jtc #define	fast	sfast
     52        1.1       jtc #define	slow	sslow
     53        1.1       jtc #define	dissect	sdissect
     54        1.1       jtc #define	backref	sbackref
     55        1.1       jtc #define	step	sstep
     56        1.1       jtc #define	print	sprint
     57        1.1       jtc #define	at	sat
     58        1.1       jtc #define	match	smat
     59        1.1       jtc #endif
     60        1.1       jtc #ifdef LNAMES
     61        1.1       jtc #define	matcher	lmatcher
     62        1.1       jtc #define	fast	lfast
     63        1.1       jtc #define	slow	lslow
     64        1.1       jtc #define	dissect	ldissect
     65        1.1       jtc #define	backref	lbackref
     66        1.1       jtc #define	step	lstep
     67        1.1       jtc #define	print	lprint
     68        1.1       jtc #define	at	lat
     69        1.1       jtc #define	match	lmat
     70        1.1       jtc #endif
     71        1.1       jtc 
     72        1.1       jtc /* another structure passed up and down to avoid zillions of parameters */
     73        1.1       jtc struct match {
     74        1.1       jtc 	struct re_guts *g;
     75        1.1       jtc 	int eflags;
     76        1.1       jtc 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
     77        1.1       jtc 	char *offp;		/* offsets work from here */
     78        1.1       jtc 	char *beginp;		/* start of string -- virtual NUL precedes */
     79        1.1       jtc 	char *endp;		/* end of string -- virtual NUL here */
     80        1.1       jtc 	char *coldp;		/* can be no match starting before here */
     81        1.1       jtc 	char **lastpos;		/* [nplus+1] */
     82        1.1       jtc 	STATEVARS;
     83        1.1       jtc 	states st;		/* current states */
     84        1.1       jtc 	states fresh;		/* states for a fresh start */
     85        1.1       jtc 	states tmp;		/* temporary */
     86        1.1       jtc 	states empty;		/* empty set of states */
     87        1.1       jtc };
     88        1.1       jtc 
     89        1.4       cgd /* ========= begin header generated by ./mkh ========= */
     90        1.4       cgd #ifdef __cplusplus
     91        1.4       cgd extern "C" {
     92        1.4       cgd #endif
     93        1.4       cgd 
     94        1.4       cgd /* === engine.c === */
     95        1.4       cgd static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
     96        1.4       cgd static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
     97        1.4       cgd static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));
     98        1.4       cgd static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
     99        1.4       cgd static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
    100        1.4       cgd static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));
    101        1.4       cgd #define	BOL	(OUT+1)
    102        1.4       cgd #define	EOL	(BOL+1)
    103        1.4       cgd #define	BOLEOL	(BOL+2)
    104        1.4       cgd #define	NOTHING	(BOL+3)
    105        1.4       cgd #define	BOW	(BOL+4)
    106        1.4       cgd #define	EOW	(BOL+5)
    107        1.4       cgd #define	CODEMAX	(BOL+5)		/* highest code used */
    108        1.4       cgd #define	NONCHAR(c)	((c) > CHAR_MAX)
    109        1.4       cgd #define	NNONCHAR	(CODEMAX-CHAR_MAX)
    110        1.4       cgd #ifdef REDEBUG
    111        1.4       cgd static void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
    112        1.4       cgd #endif
    113        1.4       cgd #ifdef REDEBUG
    114        1.4       cgd static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
    115        1.4       cgd #endif
    116        1.4       cgd #ifdef REDEBUG
    117        1.4       cgd static char *pchar __P((int ch));
    118        1.4       cgd #endif
    119        1.4       cgd 
    120        1.4       cgd #ifdef __cplusplus
    121        1.4       cgd }
    122        1.4       cgd #endif
    123        1.4       cgd /* ========= end header generated by ./mkh ========= */
    124        1.1       jtc 
    125        1.1       jtc #ifdef REDEBUG
    126        1.1       jtc #define	SP(t, s, c)	print(m, t, s, c, stdout)
    127        1.1       jtc #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
    128        1.1       jtc #define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
    129        1.1       jtc #else
    130        1.1       jtc #define	SP(t, s, c)	/* nothing */
    131        1.1       jtc #define	AT(t, p1, p2, s1, s2)	/* nothing */
    132        1.1       jtc #define	NOTE(s)	/* nothing */
    133        1.1       jtc #endif
    134        1.1       jtc 
    135        1.1       jtc /*
    136        1.1       jtc  - matcher - the actual matching engine
    137        1.8     perry  == static int matcher(struct re_guts *g, char *string, \
    138        1.1       jtc  ==	size_t nmatch, regmatch_t pmatch[], int eflags);
    139        1.1       jtc  */
    140        1.1       jtc static int			/* 0 success, REG_NOMATCH failure */
    141        1.1       jtc matcher(g, string, nmatch, pmatch, eflags)
    142        1.8     perry struct re_guts *g;
    143        1.1       jtc char *string;
    144        1.1       jtc size_t nmatch;
    145        1.1       jtc regmatch_t pmatch[];
    146        1.1       jtc int eflags;
    147        1.1       jtc {
    148        1.8     perry 	char *endp;
    149        1.8     perry 	int i;
    150        1.1       jtc 	struct match mv;
    151        1.8     perry 	struct match *m = &mv;
    152        1.8     perry 	char *dp;
    153        1.8     perry 	const sopno gf = g->firststate+1;	/* +1 for OEND */
    154        1.8     perry 	const sopno gl = g->laststate;
    155        1.1       jtc 	char *start;
    156        1.1       jtc 	char *stop;
    157  1.12.10.1   nathanw 	int error = 0;
    158        1.1       jtc 
    159       1.12     lukem 	_DIAGASSERT(g != NULL);
    160       1.12     lukem 	_DIAGASSERT(string != NULL);
    161       1.12     lukem 	/* pmatch checked below */
    162       1.12     lukem 
    163        1.1       jtc 	/* simplify the situation where possible */
    164        1.1       jtc 	if (g->cflags&REG_NOSUB)
    165        1.1       jtc 		nmatch = 0;
    166        1.1       jtc 	if (eflags&REG_STARTEND) {
    167       1.12     lukem 		_DIAGASSERT(pmatch != NULL);
    168       1.11  christos 		start = string + (size_t)pmatch[0].rm_so;
    169       1.11  christos 		stop = string + (size_t)pmatch[0].rm_eo;
    170        1.1       jtc 	} else {
    171        1.1       jtc 		start = string;
    172        1.1       jtc 		stop = start + strlen(start);
    173        1.1       jtc 	}
    174        1.1       jtc 	if (stop < start)
    175        1.1       jtc 		return(REG_INVARG);
    176        1.1       jtc 
    177        1.1       jtc 	/* prescreening; this does wonders for this rather slow code */
    178        1.1       jtc 	if (g->must != NULL) {
    179        1.1       jtc 		for (dp = start; dp < stop; dp++)
    180        1.1       jtc 			if (*dp == g->must[0] && stop - dp >= g->mlen &&
    181        1.1       jtc 				memcmp(dp, g->must, (size_t)g->mlen) == 0)
    182        1.1       jtc 				break;
    183        1.1       jtc 		if (dp == stop)		/* we didn't find g->must */
    184        1.1       jtc 			return(REG_NOMATCH);
    185        1.1       jtc 	}
    186        1.1       jtc 
    187        1.1       jtc 	/* match struct setup */
    188        1.1       jtc 	m->g = g;
    189        1.1       jtc 	m->eflags = eflags;
    190        1.1       jtc 	m->pmatch = NULL;
    191        1.1       jtc 	m->lastpos = NULL;
    192        1.1       jtc 	m->offp = string;
    193        1.1       jtc 	m->beginp = start;
    194        1.1       jtc 	m->endp = stop;
    195        1.1       jtc 	STATESETUP(m, 4);
    196        1.1       jtc 	SETUP(m->st);
    197        1.1       jtc 	SETUP(m->fresh);
    198        1.1       jtc 	SETUP(m->tmp);
    199        1.1       jtc 	SETUP(m->empty);
    200        1.1       jtc 	CLEAR(m->empty);
    201        1.1       jtc 
    202        1.1       jtc 	/* this loop does only one repetition except for backrefs */
    203        1.1       jtc 	for (;;) {
    204        1.1       jtc 		endp = fast(m, start, stop, gf, gl);
    205        1.1       jtc 		if (endp == NULL) {		/* a miss */
    206  1.12.10.1   nathanw 			error = REG_NOMATCH;
    207  1.12.10.1   nathanw 			goto done;
    208        1.1       jtc 		}
    209        1.1       jtc 		if (nmatch == 0 && !g->backrefs)
    210        1.1       jtc 			break;		/* no further info needed */
    211        1.1       jtc 
    212        1.1       jtc 		/* where? */
    213        1.1       jtc 		assert(m->coldp != NULL);
    214        1.1       jtc 		for (;;) {
    215        1.1       jtc 			NOTE("finding start");
    216        1.1       jtc 			endp = slow(m, m->coldp, stop, gf, gl);
    217        1.1       jtc 			if (endp != NULL)
    218        1.1       jtc 				break;
    219        1.1       jtc 			assert(m->coldp < m->endp);
    220        1.1       jtc 			m->coldp++;
    221        1.1       jtc 		}
    222        1.1       jtc 		if (nmatch == 1 && !g->backrefs)
    223        1.1       jtc 			break;		/* no further info needed */
    224        1.1       jtc 
    225        1.1       jtc 		/* oh my, he wants the subexpressions... */
    226        1.1       jtc 		if (m->pmatch == NULL)
    227        1.1       jtc 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
    228        1.1       jtc 							sizeof(regmatch_t));
    229        1.1       jtc 		if (m->pmatch == NULL) {
    230  1.12.10.1   nathanw 			error = REG_ESPACE;
    231  1.12.10.1   nathanw 			goto done;
    232        1.1       jtc 		}
    233        1.1       jtc 		for (i = 1; i <= m->g->nsub; i++)
    234       1.10  drochner 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1;
    235        1.1       jtc 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
    236        1.1       jtc 			NOTE("dissecting");
    237        1.1       jtc 			dp = dissect(m, m->coldp, endp, gf, gl);
    238        1.1       jtc 		} else {
    239        1.1       jtc 			if (g->nplus > 0 && m->lastpos == NULL)
    240        1.1       jtc 				m->lastpos = (char **)malloc((g->nplus+1) *
    241        1.1       jtc 							sizeof(char *));
    242        1.1       jtc 			if (g->nplus > 0 && m->lastpos == NULL) {
    243  1.12.10.1   nathanw 				error = REG_ESPACE;
    244  1.12.10.1   nathanw 				goto done;
    245        1.1       jtc 			}
    246        1.1       jtc 			NOTE("backref dissect");
    247        1.1       jtc 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
    248        1.1       jtc 		}
    249        1.1       jtc 		if (dp != NULL)
    250        1.1       jtc 			break;
    251        1.1       jtc 
    252        1.1       jtc 		/* uh-oh... we couldn't find a subexpression-level match */
    253        1.1       jtc 		assert(g->backrefs);	/* must be back references doing it */
    254        1.1       jtc 		assert(g->nplus == 0 || m->lastpos != NULL);
    255        1.1       jtc 		for (;;) {
    256        1.1       jtc 			if (dp != NULL || endp <= m->coldp)
    257        1.1       jtc 				break;		/* defeat */
    258        1.1       jtc 			NOTE("backoff");
    259        1.1       jtc 			endp = slow(m, m->coldp, endp-1, gf, gl);
    260        1.1       jtc 			if (endp == NULL)
    261        1.1       jtc 				break;		/* defeat */
    262        1.1       jtc 			/* try it on a shorter possibility */
    263        1.1       jtc #ifndef NDEBUG
    264        1.1       jtc 			for (i = 1; i <= m->g->nsub; i++) {
    265       1.10  drochner 				assert(m->pmatch[i].rm_so == (regoff_t)-1);
    266       1.10  drochner 				assert(m->pmatch[i].rm_eo == (regoff_t)-1);
    267        1.1       jtc 			}
    268        1.1       jtc #endif
    269        1.1       jtc 			NOTE("backoff dissect");
    270        1.1       jtc 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
    271        1.1       jtc 		}
    272        1.1       jtc 		assert(dp == NULL || dp == endp);
    273        1.1       jtc 		if (dp != NULL)		/* found a shorter one */
    274        1.1       jtc 			break;
    275        1.1       jtc 
    276        1.1       jtc 		/* despite initial appearances, there is no match here */
    277        1.1       jtc 		NOTE("false alarm");
    278        1.1       jtc 		start = m->coldp + 1;	/* recycle starting later */
    279        1.1       jtc 		assert(start <= stop);
    280        1.1       jtc 	}
    281        1.1       jtc 
    282        1.1       jtc 	/* fill in the details if requested */
    283        1.1       jtc 	if (nmatch > 0) {
    284       1.12     lukem 		_DIAGASSERT(pmatch != NULL);
    285        1.1       jtc 		pmatch[0].rm_so = m->coldp - m->offp;
    286        1.1       jtc 		pmatch[0].rm_eo = endp - m->offp;
    287        1.1       jtc 	}
    288        1.1       jtc 	if (nmatch > 1) {
    289        1.1       jtc 		assert(m->pmatch != NULL);
    290        1.1       jtc 		for (i = 1; i < nmatch; i++)
    291        1.1       jtc 			if (i <= m->g->nsub)
    292        1.1       jtc 				pmatch[i] = m->pmatch[i];
    293        1.1       jtc 			else {
    294       1.10  drochner 				pmatch[i].rm_so = (regoff_t)-1;
    295       1.10  drochner 				pmatch[i].rm_eo = (regoff_t)-1;
    296        1.1       jtc 			}
    297        1.1       jtc 	}
    298        1.1       jtc 
    299  1.12.10.1   nathanw done:
    300  1.12.10.1   nathanw 	if (m->pmatch != NULL) {
    301        1.9  christos 		free(m->pmatch);
    302  1.12.10.1   nathanw 		m->pmatch = NULL;
    303  1.12.10.1   nathanw 	}
    304  1.12.10.1   nathanw 	if (m->lastpos != NULL) {
    305        1.9  christos 		free(m->lastpos);
    306  1.12.10.1   nathanw 		m->lastpos = NULL;
    307  1.12.10.1   nathanw 	}
    308        1.1       jtc 	STATETEARDOWN(m);
    309  1.12.10.1   nathanw 	return error;
    310        1.1       jtc }
    311        1.1       jtc 
    312        1.1       jtc /*
    313        1.1       jtc  - dissect - figure out what matched what, no back references
    314        1.8     perry  == static char *dissect(struct match *m, char *start, \
    315        1.1       jtc  ==	char *stop, sopno startst, sopno stopst);
    316        1.1       jtc  */
    317        1.1       jtc static char *			/* == stop (success) always */
    318        1.1       jtc dissect(m, start, stop, startst, stopst)
    319        1.8     perry struct match *m;
    320        1.1       jtc char *start;
    321        1.1       jtc char *stop;
    322        1.1       jtc sopno startst;
    323        1.1       jtc sopno stopst;
    324        1.1       jtc {
    325        1.8     perry 	int i;
    326        1.8     perry 	sopno ss;	/* start sop of current subRE */
    327        1.8     perry 	sopno es;	/* end sop of current subRE */
    328        1.8     perry 	char *sp;	/* start of string matched by it */
    329        1.8     perry 	char *stp;	/* string matched by it cannot pass here */
    330        1.8     perry 	char *rest;	/* start of rest of string */
    331        1.8     perry 	char *tail;	/* string unmatched by rest of RE */
    332        1.8     perry 	sopno ssub;	/* start sop of subsubRE */
    333        1.8     perry 	sopno esub;	/* end sop of subsubRE */
    334        1.8     perry 	char *ssp;	/* start of string matched by subsubRE */
    335        1.8     perry 	char *sep;	/* end of string matched by subsubRE */
    336        1.8     perry 	char *oldssp;	/* previous ssp */
    337        1.9  christos #ifndef NDEBUG
    338        1.8     perry 	char *dp;
    339        1.9  christos #endif
    340        1.1       jtc 
    341       1.12     lukem 	_DIAGASSERT(m != NULL);
    342       1.12     lukem 	_DIAGASSERT(start != NULL);
    343       1.12     lukem 	_DIAGASSERT(stop != NULL);
    344       1.12     lukem 
    345        1.1       jtc 	AT("diss", start, stop, startst, stopst);
    346        1.1       jtc 	sp = start;
    347        1.1       jtc 	for (ss = startst; ss < stopst; ss = es) {
    348        1.1       jtc 		/* identify end of subRE */
    349        1.1       jtc 		es = ss;
    350        1.1       jtc 		switch (OP(m->g->strip[es])) {
    351        1.1       jtc 		case OPLUS_:
    352        1.1       jtc 		case OQUEST_:
    353        1.1       jtc 			es += OPND(m->g->strip[es]);
    354        1.1       jtc 			break;
    355        1.1       jtc 		case OCH_:
    356        1.1       jtc 			while (OP(m->g->strip[es]) != O_CH)
    357        1.1       jtc 				es += OPND(m->g->strip[es]);
    358        1.1       jtc 			break;
    359        1.1       jtc 		}
    360        1.1       jtc 		es++;
    361        1.1       jtc 
    362        1.1       jtc 		/* figure out what it matched */
    363        1.1       jtc 		switch (OP(m->g->strip[ss])) {
    364        1.1       jtc 		case OEND:
    365        1.1       jtc 			assert(nope);
    366        1.1       jtc 			break;
    367        1.1       jtc 		case OCHAR:
    368        1.1       jtc 			sp++;
    369        1.1       jtc 			break;
    370        1.1       jtc 		case OBOL:
    371        1.1       jtc 		case OEOL:
    372        1.1       jtc 		case OBOW:
    373        1.1       jtc 		case OEOW:
    374        1.1       jtc 			break;
    375        1.1       jtc 		case OANY:
    376        1.1       jtc 		case OANYOF:
    377        1.1       jtc 			sp++;
    378        1.1       jtc 			break;
    379        1.1       jtc 		case OBACK_:
    380        1.1       jtc 		case O_BACK:
    381        1.1       jtc 			assert(nope);
    382        1.1       jtc 			break;
    383        1.1       jtc 		/* cases where length of match is hard to find */
    384        1.1       jtc 		case OQUEST_:
    385        1.1       jtc 			stp = stop;
    386        1.1       jtc 			for (;;) {
    387        1.1       jtc 				/* how long could this one be? */
    388        1.1       jtc 				rest = slow(m, sp, stp, ss, es);
    389        1.1       jtc 				assert(rest != NULL);	/* it did match */
    390        1.1       jtc 				/* could the rest match the rest? */
    391        1.1       jtc 				tail = slow(m, rest, stop, es, stopst);
    392        1.1       jtc 				if (tail == stop)
    393        1.1       jtc 					break;		/* yes! */
    394        1.1       jtc 				/* no -- try a shorter match for this one */
    395        1.1       jtc 				stp = rest - 1;
    396        1.1       jtc 				assert(stp >= sp);	/* it did work */
    397        1.1       jtc 			}
    398        1.1       jtc 			ssub = ss + 1;
    399        1.1       jtc 			esub = es - 1;
    400        1.1       jtc 			/* did innards match? */
    401        1.1       jtc 			if (slow(m, sp, rest, ssub, esub) != NULL) {
    402        1.9  christos #ifdef NDEBUG
    403        1.9  christos 				(void)
    404        1.9  christos #else
    405        1.9  christos 				dp =
    406        1.9  christos #endif
    407        1.9  christos 				    dissect(m, sp, rest, ssub, esub);
    408        1.1       jtc 				assert(dp == rest);
    409        1.1       jtc 			} else		/* no */
    410        1.1       jtc 				assert(sp == rest);
    411        1.1       jtc 			sp = rest;
    412        1.1       jtc 			break;
    413        1.1       jtc 		case OPLUS_:
    414        1.1       jtc 			stp = stop;
    415        1.1       jtc 			for (;;) {
    416        1.1       jtc 				/* how long could this one be? */
    417        1.1       jtc 				rest = slow(m, sp, stp, ss, es);
    418        1.1       jtc 				assert(rest != NULL);	/* it did match */
    419        1.1       jtc 				/* could the rest match the rest? */
    420        1.1       jtc 				tail = slow(m, rest, stop, es, stopst);
    421        1.1       jtc 				if (tail == stop)
    422        1.1       jtc 					break;		/* yes! */
    423        1.1       jtc 				/* no -- try a shorter match for this one */
    424        1.1       jtc 				stp = rest - 1;
    425        1.1       jtc 				assert(stp >= sp);	/* it did work */
    426        1.1       jtc 			}
    427        1.1       jtc 			ssub = ss + 1;
    428        1.1       jtc 			esub = es - 1;
    429        1.1       jtc 			ssp = sp;
    430        1.1       jtc 			oldssp = ssp;
    431        1.1       jtc 			for (;;) {	/* find last match of innards */
    432        1.1       jtc 				sep = slow(m, ssp, rest, ssub, esub);
    433        1.1       jtc 				if (sep == NULL || sep == ssp)
    434        1.1       jtc 					break;	/* failed or matched null */
    435        1.1       jtc 				oldssp = ssp;	/* on to next try */
    436        1.1       jtc 				ssp = sep;
    437        1.1       jtc 			}
    438        1.1       jtc 			if (sep == NULL) {
    439        1.1       jtc 				/* last successful match */
    440        1.1       jtc 				sep = ssp;
    441        1.1       jtc 				ssp = oldssp;
    442        1.1       jtc 			}
    443        1.1       jtc 			assert(sep == rest);	/* must exhaust substring */
    444        1.1       jtc 			assert(slow(m, ssp, sep, ssub, esub) == rest);
    445        1.9  christos #ifdef NDEBUG
    446        1.9  christos 			(void)
    447        1.9  christos #else
    448        1.9  christos 			dp =
    449        1.9  christos #endif
    450        1.9  christos 			    dissect(m, ssp, sep, ssub, esub);
    451        1.1       jtc 			assert(dp == sep);
    452        1.1       jtc 			sp = rest;
    453        1.1       jtc 			break;
    454        1.1       jtc 		case OCH_:
    455        1.1       jtc 			stp = stop;
    456        1.1       jtc 			for (;;) {
    457        1.1       jtc 				/* how long could this one be? */
    458        1.1       jtc 				rest = slow(m, sp, stp, ss, es);
    459        1.1       jtc 				assert(rest != NULL);	/* it did match */
    460        1.1       jtc 				/* could the rest match the rest? */
    461        1.1       jtc 				tail = slow(m, rest, stop, es, stopst);
    462        1.1       jtc 				if (tail == stop)
    463        1.1       jtc 					break;		/* yes! */
    464        1.1       jtc 				/* no -- try a shorter match for this one */
    465        1.1       jtc 				stp = rest - 1;
    466        1.1       jtc 				assert(stp >= sp);	/* it did work */
    467        1.1       jtc 			}
    468        1.1       jtc 			ssub = ss + 1;
    469        1.1       jtc 			esub = ss + OPND(m->g->strip[ss]) - 1;
    470        1.1       jtc 			assert(OP(m->g->strip[esub]) == OOR1);
    471        1.1       jtc 			for (;;) {	/* find first matching branch */
    472        1.1       jtc 				if (slow(m, sp, rest, ssub, esub) == rest)
    473        1.1       jtc 					break;	/* it matched all of it */
    474        1.1       jtc 				/* that one missed, try next one */
    475        1.1       jtc 				assert(OP(m->g->strip[esub]) == OOR1);
    476        1.1       jtc 				esub++;
    477        1.1       jtc 				assert(OP(m->g->strip[esub]) == OOR2);
    478        1.1       jtc 				ssub = esub + 1;
    479        1.1       jtc 				esub += OPND(m->g->strip[esub]);
    480        1.1       jtc 				if (OP(m->g->strip[esub]) == OOR2)
    481        1.1       jtc 					esub--;
    482        1.1       jtc 				else
    483        1.1       jtc 					assert(OP(m->g->strip[esub]) == O_CH);
    484        1.1       jtc 			}
    485        1.9  christos #ifdef NDEBUG
    486        1.9  christos 			(void)
    487        1.9  christos #else
    488        1.9  christos 			dp =
    489        1.9  christos #endif
    490        1.9  christos 			    dissect(m, sp, rest, ssub, esub);
    491        1.1       jtc 			assert(dp == rest);
    492        1.1       jtc 			sp = rest;
    493        1.1       jtc 			break;
    494        1.1       jtc 		case O_PLUS:
    495        1.1       jtc 		case O_QUEST:
    496        1.1       jtc 		case OOR1:
    497        1.1       jtc 		case OOR2:
    498        1.1       jtc 		case O_CH:
    499        1.1       jtc 			assert(nope);
    500        1.1       jtc 			break;
    501        1.1       jtc 		case OLPAREN:
    502        1.1       jtc 			i = OPND(m->g->strip[ss]);
    503        1.2       jtc 			assert(0 < i && i <= m->g->nsub);
    504        1.1       jtc 			m->pmatch[i].rm_so = sp - m->offp;
    505        1.1       jtc 			break;
    506        1.1       jtc 		case ORPAREN:
    507        1.1       jtc 			i = OPND(m->g->strip[ss]);
    508        1.2       jtc 			assert(0 < i && i <= m->g->nsub);
    509        1.1       jtc 			m->pmatch[i].rm_eo = sp - m->offp;
    510        1.1       jtc 			break;
    511        1.1       jtc 		default:		/* uh oh */
    512        1.1       jtc 			assert(nope);
    513        1.1       jtc 			break;
    514        1.1       jtc 		}
    515        1.1       jtc 	}
    516        1.1       jtc 
    517        1.1       jtc 	assert(sp == stop);
    518        1.1       jtc 	return(sp);
    519        1.1       jtc }
    520        1.1       jtc 
    521        1.1       jtc /*
    522        1.1       jtc  - backref - figure out what matched what, figuring in back references
    523        1.8     perry  == static char *backref(struct match *m, char *start, \
    524        1.1       jtc  ==	char *stop, sopno startst, sopno stopst, sopno lev);
    525        1.1       jtc  */
    526        1.1       jtc static char *			/* == stop (success) or NULL (failure) */
    527        1.1       jtc backref(m, start, stop, startst, stopst, lev)
    528        1.8     perry struct match *m;
    529        1.1       jtc char *start;
    530        1.1       jtc char *stop;
    531        1.1       jtc sopno startst;
    532        1.1       jtc sopno stopst;
    533        1.1       jtc sopno lev;			/* PLUS nesting level */
    534        1.1       jtc {
    535        1.8     perry 	int i;
    536        1.8     perry 	sopno ss;	/* start sop of current subRE */
    537        1.8     perry 	char *sp;	/* start of string matched by it */
    538        1.8     perry 	sopno ssub;	/* start sop of subsubRE */
    539        1.8     perry 	sopno esub;	/* end sop of subsubRE */
    540        1.8     perry 	char *ssp;	/* start of string matched by subsubRE */
    541        1.8     perry 	char *dp;
    542        1.8     perry 	size_t len;
    543        1.8     perry 	int hard;
    544        1.8     perry 	sop s;
    545        1.8     perry 	regoff_t offsave;
    546        1.8     perry 	cset *cs;
    547        1.1       jtc 
    548       1.12     lukem 	_DIAGASSERT(m != NULL);
    549       1.12     lukem 	_DIAGASSERT(start != NULL);
    550       1.12     lukem 	_DIAGASSERT(stop != NULL);
    551       1.12     lukem 
    552        1.1       jtc 	AT("back", start, stop, startst, stopst);
    553        1.1       jtc 	sp = start;
    554        1.1       jtc 
    555        1.1       jtc 	/* get as far as we can with easy stuff */
    556        1.1       jtc 	hard = 0;
    557        1.1       jtc 	for (ss = startst; !hard && ss < stopst; ss++)
    558        1.1       jtc 		switch (OP(s = m->g->strip[ss])) {
    559        1.1       jtc 		case OCHAR:
    560        1.1       jtc 			if (sp == stop || *sp++ != (char)OPND(s))
    561        1.1       jtc 				return(NULL);
    562        1.1       jtc 			break;
    563        1.1       jtc 		case OANY:
    564        1.1       jtc 			if (sp == stop)
    565        1.1       jtc 				return(NULL);
    566        1.1       jtc 			sp++;
    567        1.1       jtc 			break;
    568        1.1       jtc 		case OANYOF:
    569        1.1       jtc 			cs = &m->g->sets[OPND(s)];
    570        1.1       jtc 			if (sp == stop || !CHIN(cs, *sp++))
    571        1.1       jtc 				return(NULL);
    572        1.1       jtc 			break;
    573        1.1       jtc 		case OBOL:
    574        1.1       jtc 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
    575        1.1       jtc 					(sp < m->endp && *(sp-1) == '\n' &&
    576        1.1       jtc 						(m->g->cflags&REG_NEWLINE)) )
    577        1.1       jtc 				{ /* yes */ }
    578        1.1       jtc 			else
    579        1.1       jtc 				return(NULL);
    580        1.1       jtc 			break;
    581        1.1       jtc 		case OEOL:
    582        1.1       jtc 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
    583        1.1       jtc 					(sp < m->endp && *sp == '\n' &&
    584        1.1       jtc 						(m->g->cflags&REG_NEWLINE)) )
    585        1.1       jtc 				{ /* yes */ }
    586        1.1       jtc 			else
    587        1.1       jtc 				return(NULL);
    588        1.1       jtc 			break;
    589        1.1       jtc 		case OBOW:
    590        1.1       jtc 			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
    591        1.1       jtc 					(sp < m->endp && *(sp-1) == '\n' &&
    592        1.1       jtc 						(m->g->cflags&REG_NEWLINE)) ||
    593        1.1       jtc 					(sp > m->beginp &&
    594        1.3       jtc 							!ISWORD(*(sp-1))) ) &&
    595        1.3       jtc 					(sp < m->endp && ISWORD(*sp)) )
    596        1.1       jtc 				{ /* yes */ }
    597        1.1       jtc 			else
    598        1.1       jtc 				return(NULL);
    599        1.1       jtc 			break;
    600        1.1       jtc 		case OEOW:
    601        1.1       jtc 			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
    602        1.1       jtc 					(sp < m->endp && *sp == '\n' &&
    603        1.1       jtc 						(m->g->cflags&REG_NEWLINE)) ||
    604        1.3       jtc 					(sp < m->endp && !ISWORD(*sp)) ) &&
    605        1.3       jtc 					(sp > m->beginp && ISWORD(*(sp-1))) )
    606        1.1       jtc 				{ /* yes */ }
    607        1.1       jtc 			else
    608        1.1       jtc 				return(NULL);
    609        1.1       jtc 			break;
    610        1.1       jtc 		case O_QUEST:
    611        1.1       jtc 			break;
    612        1.1       jtc 		case OOR1:	/* matches null but needs to skip */
    613        1.1       jtc 			ss++;
    614        1.1       jtc 			s = m->g->strip[ss];
    615        1.1       jtc 			do {
    616        1.1       jtc 				assert(OP(s) == OOR2);
    617        1.1       jtc 				ss += OPND(s);
    618        1.1       jtc 			} while (OP(s = m->g->strip[ss]) != O_CH);
    619        1.1       jtc 			/* note that the ss++ gets us past the O_CH */
    620        1.1       jtc 			break;
    621        1.1       jtc 		default:	/* have to make a choice */
    622        1.1       jtc 			hard = 1;
    623        1.1       jtc 			break;
    624        1.1       jtc 		}
    625        1.1       jtc 	if (!hard) {		/* that was it! */
    626        1.1       jtc 		if (sp != stop)
    627        1.1       jtc 			return(NULL);
    628        1.1       jtc 		return(sp);
    629        1.1       jtc 	}
    630        1.1       jtc 	ss--;			/* adjust for the for's final increment */
    631        1.1       jtc 
    632        1.1       jtc 	/* the hard stuff */
    633        1.1       jtc 	AT("hard", sp, stop, ss, stopst);
    634        1.1       jtc 	s = m->g->strip[ss];
    635        1.1       jtc 	switch (OP(s)) {
    636        1.1       jtc 	case OBACK_:		/* the vilest depths */
    637        1.1       jtc 		i = OPND(s);
    638        1.2       jtc 		assert(0 < i && i <= m->g->nsub);
    639       1.10  drochner 		if (m->pmatch[i].rm_eo == (regoff_t)-1)
    640        1.1       jtc 			return(NULL);
    641       1.10  drochner 		assert(m->pmatch[i].rm_so != (regoff_t)-1);
    642       1.11  christos 		len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
    643        1.1       jtc 		assert(stop - m->beginp >= len);
    644        1.1       jtc 		if (sp > stop - len)
    645        1.1       jtc 			return(NULL);	/* not enough left to match */
    646       1.11  christos 		ssp = m->offp + (size_t)m->pmatch[i].rm_so;
    647        1.1       jtc 		if (memcmp(sp, ssp, len) != 0)
    648        1.1       jtc 			return(NULL);
    649        1.1       jtc 		while (m->g->strip[ss] != SOP(O_BACK, i))
    650        1.1       jtc 			ss++;
    651        1.1       jtc 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
    652        1.9  christos 
    653        1.1       jtc 	case OQUEST_:		/* to null or not */
    654        1.1       jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    655        1.1       jtc 		if (dp != NULL)
    656        1.1       jtc 			return(dp);	/* not */
    657        1.1       jtc 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
    658        1.9  christos 
    659        1.1       jtc 	case OPLUS_:
    660        1.1       jtc 		assert(m->lastpos != NULL);
    661        1.1       jtc 		assert(lev+1 <= m->g->nplus);
    662        1.1       jtc 		m->lastpos[lev+1] = sp;
    663        1.1       jtc 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
    664        1.9  christos 
    665        1.1       jtc 	case O_PLUS:
    666        1.1       jtc 		if (sp == m->lastpos[lev])	/* last pass matched null */
    667        1.1       jtc 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
    668        1.1       jtc 		/* try another pass */
    669        1.1       jtc 		m->lastpos[lev] = sp;
    670        1.1       jtc 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
    671        1.1       jtc 		if (dp == NULL)
    672        1.9  christos 			dp = backref(m, sp, stop, ss+1, stopst, lev-1);
    673        1.9  christos 		return(dp);
    674        1.9  christos 
    675        1.1       jtc 	case OCH_:		/* find the right one, if any */
    676        1.1       jtc 		ssub = ss + 1;
    677        1.1       jtc 		esub = ss + OPND(s) - 1;
    678        1.1       jtc 		assert(OP(m->g->strip[esub]) == OOR1);
    679        1.1       jtc 		for (;;) {	/* find first matching branch */
    680        1.1       jtc 			dp = backref(m, sp, stop, ssub, esub, lev);
    681        1.1       jtc 			if (dp != NULL)
    682        1.1       jtc 				return(dp);
    683        1.1       jtc 			/* that one missed, try next one */
    684        1.1       jtc 			if (OP(m->g->strip[esub]) == O_CH)
    685        1.1       jtc 				return(NULL);	/* there is none */
    686        1.1       jtc 			esub++;
    687        1.1       jtc 			assert(OP(m->g->strip[esub]) == OOR2);
    688        1.1       jtc 			ssub = esub + 1;
    689        1.1       jtc 			esub += OPND(m->g->strip[esub]);
    690        1.1       jtc 			if (OP(m->g->strip[esub]) == OOR2)
    691        1.1       jtc 				esub--;
    692        1.1       jtc 			else
    693        1.1       jtc 				assert(OP(m->g->strip[esub]) == O_CH);
    694        1.1       jtc 		}
    695        1.9  christos 
    696        1.1       jtc 	case OLPAREN:		/* must undo assignment if rest fails */
    697        1.1       jtc 		i = OPND(s);
    698        1.2       jtc 		assert(0 < i && i <= m->g->nsub);
    699        1.1       jtc 		offsave = m->pmatch[i].rm_so;
    700        1.1       jtc 		m->pmatch[i].rm_so = sp - m->offp;
    701        1.1       jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    702        1.1       jtc 		if (dp != NULL)
    703        1.1       jtc 			return(dp);
    704        1.1       jtc 		m->pmatch[i].rm_so = offsave;
    705        1.1       jtc 		return(NULL);
    706        1.9  christos 
    707        1.1       jtc 	case ORPAREN:		/* must undo assignment if rest fails */
    708        1.1       jtc 		i = OPND(s);
    709        1.2       jtc 		assert(0 < i && i <= m->g->nsub);
    710        1.1       jtc 		offsave = m->pmatch[i].rm_eo;
    711        1.1       jtc 		m->pmatch[i].rm_eo = sp - m->offp;
    712        1.1       jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    713        1.1       jtc 		if (dp != NULL)
    714        1.1       jtc 			return(dp);
    715        1.1       jtc 		m->pmatch[i].rm_eo = offsave;
    716        1.1       jtc 		return(NULL);
    717        1.9  christos 
    718        1.1       jtc 	default:		/* uh oh */
    719        1.1       jtc 		assert(nope);
    720        1.1       jtc 		break;
    721        1.1       jtc 	}
    722        1.1       jtc 
    723        1.1       jtc 	/* "can't happen" */
    724        1.1       jtc 	assert(nope);
    725        1.1       jtc 	/* NOTREACHED */
    726        1.7  christos 	return NULL;
    727        1.1       jtc }
    728        1.1       jtc 
    729        1.1       jtc /*
    730        1.1       jtc  - fast - step through the string at top speed
    731        1.8     perry  == static char *fast(struct match *m, char *start, \
    732        1.1       jtc  ==	char *stop, sopno startst, sopno stopst);
    733        1.1       jtc  */
    734        1.1       jtc static char *			/* where tentative match ended, or NULL */
    735        1.1       jtc fast(m, start, stop, startst, stopst)
    736        1.8     perry struct match *m;
    737        1.1       jtc char *start;
    738        1.1       jtc char *stop;
    739        1.1       jtc sopno startst;
    740        1.1       jtc sopno stopst;
    741        1.1       jtc {
    742        1.8     perry 	states st = m->st;
    743        1.8     perry 	states fresh = m->fresh;
    744        1.8     perry 	states tmp = m->tmp;
    745        1.8     perry 	char *p = start;
    746        1.8     perry 	int c = (start == m->beginp) ? OUT : *(start-1);
    747        1.8     perry 	int lastc;	/* previous c */
    748        1.8     perry 	int flagch;
    749        1.8     perry 	int i;
    750        1.8     perry 	char *coldp;	/* last p after which no match was underway */
    751        1.1       jtc 
    752       1.12     lukem 	_DIAGASSERT(m != NULL);
    753       1.12     lukem 	_DIAGASSERT(start != NULL);
    754       1.12     lukem 	_DIAGASSERT(stop != NULL);
    755       1.12     lukem 
    756        1.1       jtc 	CLEAR(st);
    757        1.1       jtc 	SET1(st, startst);
    758        1.1       jtc 	st = step(m->g, startst, stopst, st, NOTHING, st);
    759        1.1       jtc 	ASSIGN(fresh, st);
    760        1.1       jtc 	SP("start", st, *p);
    761        1.1       jtc 	coldp = NULL;
    762        1.1       jtc 	for (;;) {
    763        1.1       jtc 		/* next character */
    764        1.1       jtc 		lastc = c;
    765        1.1       jtc 		c = (p == m->endp) ? OUT : *p;
    766        1.1       jtc 		if (EQ(st, fresh))
    767        1.1       jtc 			coldp = p;
    768        1.1       jtc 
    769        1.1       jtc 		/* is there an EOL and/or BOL between lastc and c? */
    770        1.1       jtc 		flagch = '\0';
    771        1.1       jtc 		i = 0;
    772        1.1       jtc 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
    773        1.1       jtc 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
    774        1.1       jtc 			flagch = BOL;
    775        1.1       jtc 			i = m->g->nbol;
    776        1.1       jtc 		}
    777        1.1       jtc 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
    778        1.1       jtc 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
    779        1.1       jtc 			flagch = (flagch == BOL) ? BOLEOL : EOL;
    780        1.1       jtc 			i += m->g->neol;
    781        1.1       jtc 		}
    782        1.1       jtc 		if (i != 0) {
    783        1.1       jtc 			for (; i > 0; i--)
    784        1.1       jtc 				st = step(m->g, startst, stopst, st, flagch, st);
    785        1.1       jtc 			SP("boleol", st, c);
    786        1.1       jtc 		}
    787        1.1       jtc 
    788        1.1       jtc 		/* how about a word boundary? */
    789        1.3       jtc 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
    790        1.3       jtc 					(c != OUT && ISWORD(c)) ) {
    791        1.1       jtc 			flagch = BOW;
    792        1.1       jtc 		}
    793        1.3       jtc 		if ( (lastc != OUT && ISWORD(lastc)) &&
    794        1.3       jtc 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
    795        1.1       jtc 			flagch = EOW;
    796        1.1       jtc 		}
    797        1.1       jtc 		if (flagch == BOW || flagch == EOW) {
    798        1.1       jtc 			st = step(m->g, startst, stopst, st, flagch, st);
    799        1.1       jtc 			SP("boweow", st, c);
    800        1.1       jtc 		}
    801        1.1       jtc 
    802        1.1       jtc 		/* are we done? */
    803        1.1       jtc 		if (ISSET(st, stopst) || p == stop)
    804        1.1       jtc 			break;		/* NOTE BREAK OUT */
    805        1.1       jtc 
    806        1.1       jtc 		/* no, we must deal with this character */
    807        1.1       jtc 		ASSIGN(tmp, st);
    808        1.1       jtc 		ASSIGN(st, fresh);
    809        1.1       jtc 		assert(c != OUT);
    810        1.1       jtc 		st = step(m->g, startst, stopst, tmp, c, st);
    811        1.1       jtc 		SP("aft", st, c);
    812        1.1       jtc 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
    813        1.1       jtc 		p++;
    814        1.1       jtc 	}
    815        1.1       jtc 
    816        1.1       jtc 	assert(coldp != NULL);
    817        1.1       jtc 	m->coldp = coldp;
    818        1.1       jtc 	if (ISSET(st, stopst))
    819        1.1       jtc 		return(p+1);
    820        1.1       jtc 	else
    821        1.1       jtc 		return(NULL);
    822        1.1       jtc }
    823        1.1       jtc 
    824        1.1       jtc /*
    825        1.1       jtc  - slow - step through the string more deliberately
    826        1.8     perry  == static char *slow(struct match *m, char *start, \
    827        1.1       jtc  ==	char *stop, sopno startst, sopno stopst);
    828        1.1       jtc  */
    829        1.1       jtc static char *			/* where it ended */
    830        1.1       jtc slow(m, start, stop, startst, stopst)
    831        1.8     perry struct match *m;
    832        1.1       jtc char *start;
    833        1.1       jtc char *stop;
    834        1.1       jtc sopno startst;
    835        1.1       jtc sopno stopst;
    836        1.1       jtc {
    837        1.8     perry 	states st = m->st;
    838        1.8     perry 	states empty = m->empty;
    839        1.8     perry 	states tmp = m->tmp;
    840        1.8     perry 	char *p = start;
    841        1.8     perry 	int c = (start == m->beginp) ? OUT : *(start-1);
    842        1.8     perry 	int lastc;	/* previous c */
    843        1.8     perry 	int flagch;
    844        1.8     perry 	int i;
    845        1.8     perry 	char *matchp;	/* last p at which a match ended */
    846        1.1       jtc 
    847       1.12     lukem 	_DIAGASSERT(m != NULL);
    848       1.12     lukem 	_DIAGASSERT(start != NULL);
    849       1.12     lukem 	_DIAGASSERT(stop != NULL);
    850       1.12     lukem 
    851        1.1       jtc 	AT("slow", start, stop, startst, stopst);
    852        1.1       jtc 	CLEAR(st);
    853        1.1       jtc 	SET1(st, startst);
    854        1.1       jtc 	SP("sstart", st, *p);
    855        1.1       jtc 	st = step(m->g, startst, stopst, st, NOTHING, st);
    856        1.1       jtc 	matchp = NULL;
    857        1.1       jtc 	for (;;) {
    858        1.1       jtc 		/* next character */
    859        1.1       jtc 		lastc = c;
    860        1.1       jtc 		c = (p == m->endp) ? OUT : *p;
    861        1.1       jtc 
    862        1.1       jtc 		/* is there an EOL and/or BOL between lastc and c? */
    863        1.1       jtc 		flagch = '\0';
    864        1.1       jtc 		i = 0;
    865        1.1       jtc 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
    866        1.1       jtc 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
    867        1.1       jtc 			flagch = BOL;
    868        1.1       jtc 			i = m->g->nbol;
    869        1.1       jtc 		}
    870        1.1       jtc 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
    871        1.1       jtc 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
    872        1.1       jtc 			flagch = (flagch == BOL) ? BOLEOL : EOL;
    873        1.1       jtc 			i += m->g->neol;
    874        1.1       jtc 		}
    875        1.1       jtc 		if (i != 0) {
    876        1.1       jtc 			for (; i > 0; i--)
    877        1.1       jtc 				st = step(m->g, startst, stopst, st, flagch, st);
    878        1.1       jtc 			SP("sboleol", st, c);
    879        1.1       jtc 		}
    880        1.1       jtc 
    881        1.1       jtc 		/* how about a word boundary? */
    882        1.3       jtc 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
    883        1.3       jtc 					(c != OUT && ISWORD(c)) ) {
    884        1.1       jtc 			flagch = BOW;
    885        1.1       jtc 		}
    886        1.3       jtc 		if ( (lastc != OUT && ISWORD(lastc)) &&
    887        1.3       jtc 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
    888        1.1       jtc 			flagch = EOW;
    889        1.1       jtc 		}
    890        1.1       jtc 		if (flagch == BOW || flagch == EOW) {
    891        1.1       jtc 			st = step(m->g, startst, stopst, st, flagch, st);
    892        1.1       jtc 			SP("sboweow", st, c);
    893        1.1       jtc 		}
    894        1.1       jtc 
    895        1.1       jtc 		/* are we done? */
    896        1.1       jtc 		if (ISSET(st, stopst))
    897        1.1       jtc 			matchp = p;
    898        1.1       jtc 		if (EQ(st, empty) || p == stop)
    899        1.1       jtc 			break;		/* NOTE BREAK OUT */
    900        1.1       jtc 
    901        1.1       jtc 		/* no, we must deal with this character */
    902        1.1       jtc 		ASSIGN(tmp, st);
    903        1.1       jtc 		ASSIGN(st, empty);
    904        1.1       jtc 		assert(c != OUT);
    905        1.1       jtc 		st = step(m->g, startst, stopst, tmp, c, st);
    906        1.1       jtc 		SP("saft", st, c);
    907        1.1       jtc 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
    908        1.1       jtc 		p++;
    909        1.1       jtc 	}
    910        1.1       jtc 
    911        1.1       jtc 	return(matchp);
    912        1.1       jtc }
    913        1.1       jtc 
    914        1.1       jtc 
    915        1.1       jtc /*
    916        1.1       jtc  - step - map set of states reachable before char to set reachable after
    917        1.8     perry  == static states step(struct re_guts *g, sopno start, sopno stop, \
    918        1.8     perry  ==	states bef, int ch, states aft);
    919        1.1       jtc  == #define	BOL	(OUT+1)
    920        1.1       jtc  == #define	EOL	(BOL+1)
    921        1.1       jtc  == #define	BOLEOL	(BOL+2)
    922        1.1       jtc  == #define	NOTHING	(BOL+3)
    923        1.1       jtc  == #define	BOW	(BOL+4)
    924        1.1       jtc  == #define	EOW	(BOL+5)
    925        1.1       jtc  == #define	CODEMAX	(BOL+5)		// highest code used
    926        1.1       jtc  == #define	NONCHAR(c)	((c) > CHAR_MAX)
    927        1.1       jtc  == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
    928        1.1       jtc  */
    929        1.1       jtc static states
    930        1.1       jtc step(g, start, stop, bef, ch, aft)
    931        1.8     perry struct re_guts *g;
    932        1.3       jtc sopno start;			/* start state within strip */
    933        1.3       jtc sopno stop;			/* state after stop state within strip */
    934        1.8     perry states bef;		/* states reachable before */
    935        1.1       jtc int ch;				/* character or NONCHAR code */
    936        1.8     perry states aft;		/* states already known reachable after */
    937        1.1       jtc {
    938        1.8     perry 	cset *cs;
    939        1.8     perry 	sop s;
    940        1.8     perry 	sopno pc;
    941        1.8     perry 	onestate here;		/* note, macros know this name */
    942        1.8     perry 	sopno look;
    943        1.8     perry 	int i;
    944        1.1       jtc 
    945       1.12     lukem 	_DIAGASSERT(g != NULL);
    946       1.12     lukem 
    947        1.1       jtc 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
    948        1.1       jtc 		s = g->strip[pc];
    949        1.1       jtc 		switch (OP(s)) {
    950        1.1       jtc 		case OEND:
    951        1.1       jtc 			assert(pc == stop-1);
    952        1.1       jtc 			break;
    953        1.1       jtc 		case OCHAR:
    954        1.1       jtc 			/* only characters can match */
    955        1.1       jtc 			assert(!NONCHAR(ch) || ch != (char)OPND(s));
    956        1.1       jtc 			if (ch == (char)OPND(s))
    957        1.1       jtc 				FWD(aft, bef, 1);
    958        1.1       jtc 			break;
    959        1.1       jtc 		case OBOL:
    960        1.1       jtc 			if (ch == BOL || ch == BOLEOL)
    961        1.1       jtc 				FWD(aft, bef, 1);
    962        1.1       jtc 			break;
    963        1.1       jtc 		case OEOL:
    964        1.1       jtc 			if (ch == EOL || ch == BOLEOL)
    965        1.1       jtc 				FWD(aft, bef, 1);
    966        1.1       jtc 			break;
    967        1.1       jtc 		case OBOW:
    968        1.1       jtc 			if (ch == BOW)
    969        1.1       jtc 				FWD(aft, bef, 1);
    970        1.1       jtc 			break;
    971        1.1       jtc 		case OEOW:
    972        1.1       jtc 			if (ch == EOW)
    973        1.1       jtc 				FWD(aft, bef, 1);
    974        1.1       jtc 			break;
    975        1.1       jtc 		case OANY:
    976        1.1       jtc 			if (!NONCHAR(ch))
    977        1.1       jtc 				FWD(aft, bef, 1);
    978        1.1       jtc 			break;
    979        1.1       jtc 		case OANYOF:
    980        1.1       jtc 			cs = &g->sets[OPND(s)];
    981        1.1       jtc 			if (!NONCHAR(ch) && CHIN(cs, ch))
    982        1.1       jtc 				FWD(aft, bef, 1);
    983        1.1       jtc 			break;
    984        1.1       jtc 		case OBACK_:		/* ignored here */
    985        1.1       jtc 		case O_BACK:
    986        1.1       jtc 			FWD(aft, aft, 1);
    987        1.1       jtc 			break;
    988        1.1       jtc 		case OPLUS_:		/* forward, this is just an empty */
    989        1.1       jtc 			FWD(aft, aft, 1);
    990        1.1       jtc 			break;
    991        1.1       jtc 		case O_PLUS:		/* both forward and back */
    992        1.1       jtc 			FWD(aft, aft, 1);
    993        1.1       jtc 			i = ISSETBACK(aft, OPND(s));
    994        1.1       jtc 			BACK(aft, aft, OPND(s));
    995        1.1       jtc 			if (!i && ISSETBACK(aft, OPND(s))) {
    996        1.1       jtc 				/* oho, must reconsider loop body */
    997        1.1       jtc 				pc -= OPND(s) + 1;
    998        1.1       jtc 				INIT(here, pc);
    999        1.1       jtc 			}
   1000        1.1       jtc 			break;
   1001        1.1       jtc 		case OQUEST_:		/* two branches, both forward */
   1002        1.1       jtc 			FWD(aft, aft, 1);
   1003        1.1       jtc 			FWD(aft, aft, OPND(s));
   1004        1.1       jtc 			break;
   1005        1.1       jtc 		case O_QUEST:		/* just an empty */
   1006        1.1       jtc 			FWD(aft, aft, 1);
   1007        1.1       jtc 			break;
   1008        1.1       jtc 		case OLPAREN:		/* not significant here */
   1009        1.1       jtc 		case ORPAREN:
   1010        1.1       jtc 			FWD(aft, aft, 1);
   1011        1.1       jtc 			break;
   1012        1.1       jtc 		case OCH_:		/* mark the first two branches */
   1013        1.1       jtc 			FWD(aft, aft, 1);
   1014        1.1       jtc 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
   1015        1.1       jtc 			FWD(aft, aft, OPND(s));
   1016        1.1       jtc 			break;
   1017        1.1       jtc 		case OOR1:		/* done a branch, find the O_CH */
   1018        1.1       jtc 			if (ISSTATEIN(aft, here)) {
   1019        1.1       jtc 				for (look = 1;
   1020        1.1       jtc 						OP(s = g->strip[pc+look]) != O_CH;
   1021        1.1       jtc 						look += OPND(s))
   1022        1.1       jtc 					assert(OP(s) == OOR2);
   1023        1.1       jtc 				FWD(aft, aft, look);
   1024        1.1       jtc 			}
   1025        1.1       jtc 			break;
   1026        1.1       jtc 		case OOR2:		/* propagate OCH_'s marking */
   1027        1.1       jtc 			FWD(aft, aft, 1);
   1028        1.1       jtc 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
   1029        1.1       jtc 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
   1030        1.1       jtc 				FWD(aft, aft, OPND(s));
   1031        1.1       jtc 			}
   1032        1.1       jtc 			break;
   1033        1.1       jtc 		case O_CH:		/* just empty */
   1034        1.1       jtc 			FWD(aft, aft, 1);
   1035        1.1       jtc 			break;
   1036        1.1       jtc 		default:		/* ooooops... */
   1037        1.1       jtc 			assert(nope);
   1038        1.1       jtc 			break;
   1039        1.1       jtc 		}
   1040        1.1       jtc 	}
   1041        1.1       jtc 
   1042        1.1       jtc 	return(aft);
   1043        1.1       jtc }
   1044        1.1       jtc 
   1045        1.1       jtc #ifdef REDEBUG
   1046        1.1       jtc /*
   1047        1.1       jtc  - print - print a set of states
   1048        1.1       jtc  == #ifdef REDEBUG
   1049        1.1       jtc  == static void print(struct match *m, char *caption, states st, \
   1050        1.1       jtc  ==	int ch, FILE *d);
   1051        1.1       jtc  == #endif
   1052        1.1       jtc  */
   1053        1.1       jtc static void
   1054        1.1       jtc print(m, caption, st, ch, d)
   1055        1.1       jtc struct match *m;
   1056        1.1       jtc char *caption;
   1057        1.1       jtc states st;
   1058        1.1       jtc int ch;
   1059        1.1       jtc FILE *d;
   1060        1.1       jtc {
   1061        1.8     perry 	struct re_guts *g = m->g;
   1062        1.8     perry 	int i;
   1063        1.8     perry 	int first = 1;
   1064        1.1       jtc 
   1065       1.12     lukem 	_DIAGASSERT(m != NULL);
   1066       1.12     lukem 	_DIAGASSERT(caption != NULL);
   1067       1.12     lukem 
   1068        1.1       jtc 	if (!(m->eflags&REG_TRACE))
   1069        1.1       jtc 		return;
   1070        1.1       jtc 
   1071       1.12     lukem 	_DIAGASSERT(d != NULL);
   1072       1.12     lukem 
   1073        1.1       jtc 	fprintf(d, "%s", caption);
   1074        1.1       jtc 	if (ch != '\0')
   1075        1.1       jtc 		fprintf(d, " %s", pchar(ch));
   1076        1.1       jtc 	for (i = 0; i < g->nstates; i++)
   1077        1.1       jtc 		if (ISSET(st, i)) {
   1078        1.1       jtc 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
   1079        1.1       jtc 			first = 0;
   1080        1.1       jtc 		}
   1081        1.1       jtc 	fprintf(d, "\n");
   1082        1.1       jtc }
   1083        1.1       jtc 
   1084        1.1       jtc /*
   1085        1.1       jtc  - at - print current situation
   1086        1.1       jtc  == #ifdef REDEBUG
   1087        1.1       jtc  == static void at(struct match *m, char *title, char *start, char *stop, \
   1088        1.1       jtc  ==						sopno startst, sopno stopst);
   1089        1.1       jtc  == #endif
   1090        1.1       jtc  */
   1091        1.1       jtc static void
   1092        1.1       jtc at(m, title, start, stop, startst, stopst)
   1093        1.1       jtc struct match *m;
   1094        1.1       jtc char *title;
   1095        1.1       jtc char *start;
   1096        1.1       jtc char *stop;
   1097        1.1       jtc sopno startst;
   1098        1.1       jtc sopno stopst;
   1099        1.1       jtc {
   1100       1.12     lukem 
   1101       1.12     lukem 	_DIAGASSERT(m != NULL);
   1102       1.12     lukem 	_DIAGASSERT(title != NULL);
   1103       1.12     lukem 	_DIAGASSERT(start != NULL);
   1104       1.12     lukem 	_DIAGASSERT(stop != NULL);
   1105       1.12     lukem 
   1106        1.1       jtc 	if (!(m->eflags&REG_TRACE))
   1107        1.1       jtc 		return;
   1108        1.1       jtc 
   1109        1.1       jtc 	printf("%s %s-", title, pchar(*start));
   1110        1.1       jtc 	printf("%s ", pchar(*stop));
   1111        1.1       jtc 	printf("%ld-%ld\n", (long)startst, (long)stopst);
   1112        1.1       jtc }
   1113        1.1       jtc 
   1114        1.1       jtc #ifndef PCHARDONE
   1115        1.1       jtc #define	PCHARDONE	/* never again */
   1116        1.1       jtc /*
   1117        1.1       jtc  - pchar - make a character printable
   1118        1.1       jtc  == #ifdef REDEBUG
   1119        1.1       jtc  == static char *pchar(int ch);
   1120        1.1       jtc  == #endif
   1121        1.1       jtc  *
   1122        1.1       jtc  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
   1123        1.1       jtc  * duplicate here avoids having a debugging-capable regexec.o tied to
   1124        1.1       jtc  * a matching debug.o, and this is convenient.  It all disappears in
   1125        1.1       jtc  * the non-debug compilation anyway, so it doesn't matter much.
   1126        1.1       jtc  */
   1127        1.1       jtc static char *			/* -> representation */
   1128        1.1       jtc pchar(ch)
   1129        1.1       jtc int ch;
   1130        1.1       jtc {
   1131        1.1       jtc 	static char pbuf[10];
   1132        1.1       jtc 
   1133        1.1       jtc 	if (isprint(ch) || ch == ' ')
   1134        1.6       mrg 		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
   1135        1.1       jtc 	else
   1136        1.6       mrg 		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
   1137        1.1       jtc 	return(pbuf);
   1138        1.1       jtc }
   1139        1.1       jtc #endif
   1140        1.1       jtc #endif
   1141        1.1       jtc 
   1142        1.1       jtc #undef	matcher
   1143        1.1       jtc #undef	fast
   1144        1.1       jtc #undef	slow
   1145        1.1       jtc #undef	dissect
   1146        1.1       jtc #undef	backref
   1147        1.1       jtc #undef	step
   1148        1.1       jtc #undef	print
   1149        1.1       jtc #undef	at
   1150        1.1       jtc #undef	match
   1151