Home | History | Annotate | Line # | Download | only in regex
engine.c revision 1.12.10.2
      1  1.12.10.2   nathanw /*	$NetBSD: engine.c,v 1.12.10.2 2002/08/01 03:28:15 nathanw Exp $	*/
      2        1.5       cgd 
      3        1.4       cgd /*-
      4        1.4       cgd  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
      5        1.4       cgd  * Copyright (c) 1992, 1993, 1994
      6        1.4       cgd  *	The Regents of the University of California.  All rights reserved.
      7        1.4       cgd  *
      8        1.4       cgd  * This code is derived from software contributed to Berkeley by
      9        1.4       cgd  * Henry Spencer.
     10        1.4       cgd  *
     11        1.4       cgd  * Redistribution and use in source and binary forms, with or without
     12        1.4       cgd  * modification, are permitted provided that the following conditions
     13        1.4       cgd  * are met:
     14        1.4       cgd  * 1. Redistributions of source code must retain the above copyright
     15        1.4       cgd  *    notice, this list of conditions and the following disclaimer.
     16        1.4       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     17        1.4       cgd  *    notice, this list of conditions and the following disclaimer in the
     18        1.4       cgd  *    documentation and/or other materials provided with the distribution.
     19        1.4       cgd  * 3. All advertising materials mentioning features or use of this software
     20        1.4       cgd  *    must display the following acknowledgement:
     21        1.4       cgd  *	This product includes software developed by the University of
     22        1.4       cgd  *	California, Berkeley and its contributors.
     23        1.4       cgd  * 4. Neither the name of the University nor the names of its contributors
     24        1.4       cgd  *    may be used to endorse or promote products derived from this software
     25        1.4       cgd  *    without specific prior written permission.
     26        1.4       cgd  *
     27        1.4       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28        1.4       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29        1.4       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30        1.4       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31        1.4       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32        1.4       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33        1.4       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34        1.4       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35        1.4       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36        1.4       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37        1.4       cgd  * SUCH DAMAGE.
     38        1.4       cgd  *
     39        1.4       cgd  *	@(#)engine.c	8.5 (Berkeley) 3/20/94
     40        1.4       cgd  */
     41        1.4       cgd 
     42        1.1       jtc /*
     43        1.1       jtc  * The matching engine and friends.  This file is #included by regexec.c
     44        1.1       jtc  * after suitable #defines of a variety of macros used herein, so that
     45        1.1       jtc  * different state representations can be used without duplicating masses
     46        1.1       jtc  * of code.
     47        1.1       jtc  */
     48        1.1       jtc 
     49        1.1       jtc #ifdef SNAMES
     50        1.1       jtc #define	matcher	smatcher
     51        1.1       jtc #define	fast	sfast
     52        1.1       jtc #define	slow	sslow
     53        1.1       jtc #define	dissect	sdissect
     54        1.1       jtc #define	backref	sbackref
     55        1.1       jtc #define	step	sstep
     56        1.1       jtc #define	print	sprint
     57        1.1       jtc #define	at	sat
     58        1.1       jtc #define	match	smat
     59  1.12.10.2   nathanw #define	nope	snope
     60        1.1       jtc #endif
     61        1.1       jtc #ifdef LNAMES
     62        1.1       jtc #define	matcher	lmatcher
     63        1.1       jtc #define	fast	lfast
     64        1.1       jtc #define	slow	lslow
     65        1.1       jtc #define	dissect	ldissect
     66        1.1       jtc #define	backref	lbackref
     67        1.1       jtc #define	step	lstep
     68        1.1       jtc #define	print	lprint
     69        1.1       jtc #define	at	lat
     70        1.1       jtc #define	match	lmat
     71  1.12.10.2   nathanw #define	nope	lnope
     72        1.1       jtc #endif
     73        1.1       jtc 
     74        1.1       jtc /* another structure passed up and down to avoid zillions of parameters */
     75        1.1       jtc struct match {
     76        1.1       jtc 	struct re_guts *g;
     77        1.1       jtc 	int eflags;
     78        1.1       jtc 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
     79        1.1       jtc 	char *offp;		/* offsets work from here */
     80        1.1       jtc 	char *beginp;		/* start of string -- virtual NUL precedes */
     81        1.1       jtc 	char *endp;		/* end of string -- virtual NUL here */
     82        1.1       jtc 	char *coldp;		/* can be no match starting before here */
     83        1.1       jtc 	char **lastpos;		/* [nplus+1] */
     84        1.1       jtc 	STATEVARS;
     85        1.1       jtc 	states st;		/* current states */
     86        1.1       jtc 	states fresh;		/* states for a fresh start */
     87        1.1       jtc 	states tmp;		/* temporary */
     88        1.1       jtc 	states empty;		/* empty set of states */
     89        1.1       jtc };
     90        1.1       jtc 
     91        1.4       cgd /* ========= begin header generated by ./mkh ========= */
     92        1.4       cgd #ifdef __cplusplus
     93        1.4       cgd extern "C" {
     94        1.4       cgd #endif
     95        1.4       cgd 
     96        1.4       cgd /* === engine.c === */
     97        1.4       cgd static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
     98        1.4       cgd static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
     99        1.4       cgd static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));
    100        1.4       cgd static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
    101        1.4       cgd static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
    102        1.4       cgd static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));
    103        1.4       cgd #define	BOL	(OUT+1)
    104        1.4       cgd #define	EOL	(BOL+1)
    105        1.4       cgd #define	BOLEOL	(BOL+2)
    106        1.4       cgd #define	NOTHING	(BOL+3)
    107        1.4       cgd #define	BOW	(BOL+4)
    108        1.4       cgd #define	EOW	(BOL+5)
    109        1.4       cgd #define	CODEMAX	(BOL+5)		/* highest code used */
    110        1.4       cgd #define	NONCHAR(c)	((c) > CHAR_MAX)
    111        1.4       cgd #define	NNONCHAR	(CODEMAX-CHAR_MAX)
    112        1.4       cgd #ifdef REDEBUG
    113        1.4       cgd static void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
    114        1.4       cgd #endif
    115        1.4       cgd #ifdef REDEBUG
    116        1.4       cgd static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
    117        1.4       cgd #endif
    118        1.4       cgd #ifdef REDEBUG
    119        1.4       cgd static char *pchar __P((int ch));
    120        1.4       cgd #endif
    121        1.4       cgd 
    122        1.4       cgd #ifdef __cplusplus
    123        1.4       cgd }
    124        1.4       cgd #endif
    125        1.4       cgd /* ========= end header generated by ./mkh ========= */
    126        1.1       jtc 
    127        1.1       jtc #ifdef REDEBUG
    128        1.1       jtc #define	SP(t, s, c)	print(m, t, s, c, stdout)
    129        1.1       jtc #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
    130        1.1       jtc #define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
    131  1.12.10.2   nathanw static int nope = 0;
    132        1.1       jtc #else
    133        1.1       jtc #define	SP(t, s, c)	/* nothing */
    134        1.1       jtc #define	AT(t, p1, p2, s1, s2)	/* nothing */
    135        1.1       jtc #define	NOTE(s)	/* nothing */
    136        1.1       jtc #endif
    137        1.1       jtc 
    138        1.1       jtc /*
    139        1.1       jtc  - matcher - the actual matching engine
    140        1.8     perry  == static int matcher(struct re_guts *g, char *string, \
    141        1.1       jtc  ==	size_t nmatch, regmatch_t pmatch[], int eflags);
    142        1.1       jtc  */
    143        1.1       jtc static int			/* 0 success, REG_NOMATCH failure */
    144        1.1       jtc matcher(g, string, nmatch, pmatch, eflags)
    145        1.8     perry struct re_guts *g;
    146        1.1       jtc char *string;
    147        1.1       jtc size_t nmatch;
    148        1.1       jtc regmatch_t pmatch[];
    149        1.1       jtc int eflags;
    150        1.1       jtc {
    151        1.8     perry 	char *endp;
    152        1.8     perry 	int i;
    153        1.1       jtc 	struct match mv;
    154        1.8     perry 	struct match *m = &mv;
    155        1.8     perry 	char *dp;
    156        1.8     perry 	const sopno gf = g->firststate+1;	/* +1 for OEND */
    157        1.8     perry 	const sopno gl = g->laststate;
    158        1.1       jtc 	char *start;
    159        1.1       jtc 	char *stop;
    160  1.12.10.1   nathanw 	int error = 0;
    161        1.1       jtc 
    162       1.12     lukem 	_DIAGASSERT(g != NULL);
    163       1.12     lukem 	_DIAGASSERT(string != NULL);
    164       1.12     lukem 	/* pmatch checked below */
    165       1.12     lukem 
    166        1.1       jtc 	/* simplify the situation where possible */
    167        1.1       jtc 	if (g->cflags&REG_NOSUB)
    168        1.1       jtc 		nmatch = 0;
    169        1.1       jtc 	if (eflags&REG_STARTEND) {
    170       1.12     lukem 		_DIAGASSERT(pmatch != NULL);
    171       1.11  christos 		start = string + (size_t)pmatch[0].rm_so;
    172       1.11  christos 		stop = string + (size_t)pmatch[0].rm_eo;
    173        1.1       jtc 	} else {
    174        1.1       jtc 		start = string;
    175        1.1       jtc 		stop = start + strlen(start);
    176        1.1       jtc 	}
    177        1.1       jtc 	if (stop < start)
    178        1.1       jtc 		return(REG_INVARG);
    179        1.1       jtc 
    180        1.1       jtc 	/* prescreening; this does wonders for this rather slow code */
    181        1.1       jtc 	if (g->must != NULL) {
    182        1.1       jtc 		for (dp = start; dp < stop; dp++)
    183        1.1       jtc 			if (*dp == g->must[0] && stop - dp >= g->mlen &&
    184        1.1       jtc 				memcmp(dp, g->must, (size_t)g->mlen) == 0)
    185        1.1       jtc 				break;
    186        1.1       jtc 		if (dp == stop)		/* we didn't find g->must */
    187        1.1       jtc 			return(REG_NOMATCH);
    188        1.1       jtc 	}
    189        1.1       jtc 
    190        1.1       jtc 	/* match struct setup */
    191        1.1       jtc 	m->g = g;
    192        1.1       jtc 	m->eflags = eflags;
    193        1.1       jtc 	m->pmatch = NULL;
    194        1.1       jtc 	m->lastpos = NULL;
    195        1.1       jtc 	m->offp = string;
    196        1.1       jtc 	m->beginp = start;
    197        1.1       jtc 	m->endp = stop;
    198        1.1       jtc 	STATESETUP(m, 4);
    199        1.1       jtc 	SETUP(m->st);
    200        1.1       jtc 	SETUP(m->fresh);
    201        1.1       jtc 	SETUP(m->tmp);
    202        1.1       jtc 	SETUP(m->empty);
    203        1.1       jtc 	CLEAR(m->empty);
    204        1.1       jtc 
    205        1.1       jtc 	/* this loop does only one repetition except for backrefs */
    206        1.1       jtc 	for (;;) {
    207        1.1       jtc 		endp = fast(m, start, stop, gf, gl);
    208        1.1       jtc 		if (endp == NULL) {		/* a miss */
    209  1.12.10.1   nathanw 			error = REG_NOMATCH;
    210  1.12.10.1   nathanw 			goto done;
    211        1.1       jtc 		}
    212        1.1       jtc 		if (nmatch == 0 && !g->backrefs)
    213        1.1       jtc 			break;		/* no further info needed */
    214        1.1       jtc 
    215        1.1       jtc 		/* where? */
    216        1.1       jtc 		assert(m->coldp != NULL);
    217        1.1       jtc 		for (;;) {
    218        1.1       jtc 			NOTE("finding start");
    219        1.1       jtc 			endp = slow(m, m->coldp, stop, gf, gl);
    220        1.1       jtc 			if (endp != NULL)
    221        1.1       jtc 				break;
    222        1.1       jtc 			assert(m->coldp < m->endp);
    223        1.1       jtc 			m->coldp++;
    224        1.1       jtc 		}
    225        1.1       jtc 		if (nmatch == 1 && !g->backrefs)
    226        1.1       jtc 			break;		/* no further info needed */
    227        1.1       jtc 
    228        1.1       jtc 		/* oh my, he wants the subexpressions... */
    229        1.1       jtc 		if (m->pmatch == NULL)
    230        1.1       jtc 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
    231        1.1       jtc 							sizeof(regmatch_t));
    232        1.1       jtc 		if (m->pmatch == NULL) {
    233  1.12.10.1   nathanw 			error = REG_ESPACE;
    234  1.12.10.1   nathanw 			goto done;
    235        1.1       jtc 		}
    236        1.1       jtc 		for (i = 1; i <= m->g->nsub; i++)
    237       1.10  drochner 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1;
    238        1.1       jtc 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
    239        1.1       jtc 			NOTE("dissecting");
    240        1.1       jtc 			dp = dissect(m, m->coldp, endp, gf, gl);
    241        1.1       jtc 		} else {
    242        1.1       jtc 			if (g->nplus > 0 && m->lastpos == NULL)
    243        1.1       jtc 				m->lastpos = (char **)malloc((g->nplus+1) *
    244        1.1       jtc 							sizeof(char *));
    245        1.1       jtc 			if (g->nplus > 0 && m->lastpos == NULL) {
    246  1.12.10.1   nathanw 				error = REG_ESPACE;
    247  1.12.10.1   nathanw 				goto done;
    248        1.1       jtc 			}
    249        1.1       jtc 			NOTE("backref dissect");
    250        1.1       jtc 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
    251        1.1       jtc 		}
    252        1.1       jtc 		if (dp != NULL)
    253        1.1       jtc 			break;
    254        1.1       jtc 
    255        1.1       jtc 		/* uh-oh... we couldn't find a subexpression-level match */
    256        1.1       jtc 		assert(g->backrefs);	/* must be back references doing it */
    257        1.1       jtc 		assert(g->nplus == 0 || m->lastpos != NULL);
    258        1.1       jtc 		for (;;) {
    259        1.1       jtc 			if (dp != NULL || endp <= m->coldp)
    260        1.1       jtc 				break;		/* defeat */
    261        1.1       jtc 			NOTE("backoff");
    262        1.1       jtc 			endp = slow(m, m->coldp, endp-1, gf, gl);
    263        1.1       jtc 			if (endp == NULL)
    264        1.1       jtc 				break;		/* defeat */
    265        1.1       jtc 			/* try it on a shorter possibility */
    266        1.1       jtc #ifndef NDEBUG
    267        1.1       jtc 			for (i = 1; i <= m->g->nsub; i++) {
    268       1.10  drochner 				assert(m->pmatch[i].rm_so == (regoff_t)-1);
    269       1.10  drochner 				assert(m->pmatch[i].rm_eo == (regoff_t)-1);
    270        1.1       jtc 			}
    271        1.1       jtc #endif
    272        1.1       jtc 			NOTE("backoff dissect");
    273        1.1       jtc 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
    274        1.1       jtc 		}
    275        1.1       jtc 		assert(dp == NULL || dp == endp);
    276        1.1       jtc 		if (dp != NULL)		/* found a shorter one */
    277        1.1       jtc 			break;
    278        1.1       jtc 
    279        1.1       jtc 		/* despite initial appearances, there is no match here */
    280        1.1       jtc 		NOTE("false alarm");
    281        1.1       jtc 		start = m->coldp + 1;	/* recycle starting later */
    282        1.1       jtc 		assert(start <= stop);
    283        1.1       jtc 	}
    284        1.1       jtc 
    285        1.1       jtc 	/* fill in the details if requested */
    286        1.1       jtc 	if (nmatch > 0) {
    287       1.12     lukem 		_DIAGASSERT(pmatch != NULL);
    288        1.1       jtc 		pmatch[0].rm_so = m->coldp - m->offp;
    289        1.1       jtc 		pmatch[0].rm_eo = endp - m->offp;
    290        1.1       jtc 	}
    291        1.1       jtc 	if (nmatch > 1) {
    292        1.1       jtc 		assert(m->pmatch != NULL);
    293        1.1       jtc 		for (i = 1; i < nmatch; i++)
    294        1.1       jtc 			if (i <= m->g->nsub)
    295        1.1       jtc 				pmatch[i] = m->pmatch[i];
    296        1.1       jtc 			else {
    297       1.10  drochner 				pmatch[i].rm_so = (regoff_t)-1;
    298       1.10  drochner 				pmatch[i].rm_eo = (regoff_t)-1;
    299        1.1       jtc 			}
    300        1.1       jtc 	}
    301        1.1       jtc 
    302  1.12.10.1   nathanw done:
    303  1.12.10.1   nathanw 	if (m->pmatch != NULL) {
    304        1.9  christos 		free(m->pmatch);
    305  1.12.10.1   nathanw 		m->pmatch = NULL;
    306  1.12.10.1   nathanw 	}
    307  1.12.10.1   nathanw 	if (m->lastpos != NULL) {
    308        1.9  christos 		free(m->lastpos);
    309  1.12.10.1   nathanw 		m->lastpos = NULL;
    310  1.12.10.1   nathanw 	}
    311        1.1       jtc 	STATETEARDOWN(m);
    312  1.12.10.1   nathanw 	return error;
    313        1.1       jtc }
    314        1.1       jtc 
    315        1.1       jtc /*
    316        1.1       jtc  - dissect - figure out what matched what, no back references
    317        1.8     perry  == static char *dissect(struct match *m, char *start, \
    318        1.1       jtc  ==	char *stop, sopno startst, sopno stopst);
    319        1.1       jtc  */
    320        1.1       jtc static char *			/* == stop (success) always */
    321        1.1       jtc dissect(m, start, stop, startst, stopst)
    322        1.8     perry struct match *m;
    323        1.1       jtc char *start;
    324        1.1       jtc char *stop;
    325        1.1       jtc sopno startst;
    326        1.1       jtc sopno stopst;
    327        1.1       jtc {
    328        1.8     perry 	int i;
    329        1.8     perry 	sopno ss;	/* start sop of current subRE */
    330        1.8     perry 	sopno es;	/* end sop of current subRE */
    331        1.8     perry 	char *sp;	/* start of string matched by it */
    332        1.8     perry 	char *stp;	/* string matched by it cannot pass here */
    333        1.8     perry 	char *rest;	/* start of rest of string */
    334        1.8     perry 	char *tail;	/* string unmatched by rest of RE */
    335        1.8     perry 	sopno ssub;	/* start sop of subsubRE */
    336        1.8     perry 	sopno esub;	/* end sop of subsubRE */
    337        1.8     perry 	char *ssp;	/* start of string matched by subsubRE */
    338        1.8     perry 	char *sep;	/* end of string matched by subsubRE */
    339        1.8     perry 	char *oldssp;	/* previous ssp */
    340        1.9  christos #ifndef NDEBUG
    341        1.8     perry 	char *dp;
    342        1.9  christos #endif
    343        1.1       jtc 
    344       1.12     lukem 	_DIAGASSERT(m != NULL);
    345       1.12     lukem 	_DIAGASSERT(start != NULL);
    346       1.12     lukem 	_DIAGASSERT(stop != NULL);
    347       1.12     lukem 
    348        1.1       jtc 	AT("diss", start, stop, startst, stopst);
    349        1.1       jtc 	sp = start;
    350        1.1       jtc 	for (ss = startst; ss < stopst; ss = es) {
    351        1.1       jtc 		/* identify end of subRE */
    352        1.1       jtc 		es = ss;
    353        1.1       jtc 		switch (OP(m->g->strip[es])) {
    354        1.1       jtc 		case OPLUS_:
    355        1.1       jtc 		case OQUEST_:
    356        1.1       jtc 			es += OPND(m->g->strip[es]);
    357        1.1       jtc 			break;
    358        1.1       jtc 		case OCH_:
    359        1.1       jtc 			while (OP(m->g->strip[es]) != O_CH)
    360        1.1       jtc 				es += OPND(m->g->strip[es]);
    361        1.1       jtc 			break;
    362        1.1       jtc 		}
    363        1.1       jtc 		es++;
    364        1.1       jtc 
    365        1.1       jtc 		/* figure out what it matched */
    366        1.1       jtc 		switch (OP(m->g->strip[ss])) {
    367        1.1       jtc 		case OEND:
    368        1.1       jtc 			assert(nope);
    369        1.1       jtc 			break;
    370        1.1       jtc 		case OCHAR:
    371        1.1       jtc 			sp++;
    372        1.1       jtc 			break;
    373        1.1       jtc 		case OBOL:
    374        1.1       jtc 		case OEOL:
    375        1.1       jtc 		case OBOW:
    376        1.1       jtc 		case OEOW:
    377        1.1       jtc 			break;
    378        1.1       jtc 		case OANY:
    379        1.1       jtc 		case OANYOF:
    380        1.1       jtc 			sp++;
    381        1.1       jtc 			break;
    382        1.1       jtc 		case OBACK_:
    383        1.1       jtc 		case O_BACK:
    384        1.1       jtc 			assert(nope);
    385        1.1       jtc 			break;
    386        1.1       jtc 		/* cases where length of match is hard to find */
    387        1.1       jtc 		case OQUEST_:
    388        1.1       jtc 			stp = stop;
    389        1.1       jtc 			for (;;) {
    390        1.1       jtc 				/* how long could this one be? */
    391        1.1       jtc 				rest = slow(m, sp, stp, ss, es);
    392        1.1       jtc 				assert(rest != NULL);	/* it did match */
    393        1.1       jtc 				/* could the rest match the rest? */
    394        1.1       jtc 				tail = slow(m, rest, stop, es, stopst);
    395        1.1       jtc 				if (tail == stop)
    396        1.1       jtc 					break;		/* yes! */
    397        1.1       jtc 				/* no -- try a shorter match for this one */
    398        1.1       jtc 				stp = rest - 1;
    399        1.1       jtc 				assert(stp >= sp);	/* it did work */
    400        1.1       jtc 			}
    401        1.1       jtc 			ssub = ss + 1;
    402        1.1       jtc 			esub = es - 1;
    403        1.1       jtc 			/* did innards match? */
    404        1.1       jtc 			if (slow(m, sp, rest, ssub, esub) != NULL) {
    405        1.9  christos #ifdef NDEBUG
    406        1.9  christos 				(void)
    407        1.9  christos #else
    408        1.9  christos 				dp =
    409        1.9  christos #endif
    410        1.9  christos 				    dissect(m, sp, rest, ssub, esub);
    411        1.1       jtc 				assert(dp == rest);
    412        1.1       jtc 			} else		/* no */
    413        1.1       jtc 				assert(sp == rest);
    414        1.1       jtc 			sp = rest;
    415        1.1       jtc 			break;
    416        1.1       jtc 		case OPLUS_:
    417        1.1       jtc 			stp = stop;
    418        1.1       jtc 			for (;;) {
    419        1.1       jtc 				/* how long could this one be? */
    420        1.1       jtc 				rest = slow(m, sp, stp, ss, es);
    421        1.1       jtc 				assert(rest != NULL);	/* it did match */
    422        1.1       jtc 				/* could the rest match the rest? */
    423        1.1       jtc 				tail = slow(m, rest, stop, es, stopst);
    424        1.1       jtc 				if (tail == stop)
    425        1.1       jtc 					break;		/* yes! */
    426        1.1       jtc 				/* no -- try a shorter match for this one */
    427        1.1       jtc 				stp = rest - 1;
    428        1.1       jtc 				assert(stp >= sp);	/* it did work */
    429        1.1       jtc 			}
    430        1.1       jtc 			ssub = ss + 1;
    431        1.1       jtc 			esub = es - 1;
    432        1.1       jtc 			ssp = sp;
    433        1.1       jtc 			oldssp = ssp;
    434        1.1       jtc 			for (;;) {	/* find last match of innards */
    435        1.1       jtc 				sep = slow(m, ssp, rest, ssub, esub);
    436        1.1       jtc 				if (sep == NULL || sep == ssp)
    437        1.1       jtc 					break;	/* failed or matched null */
    438        1.1       jtc 				oldssp = ssp;	/* on to next try */
    439        1.1       jtc 				ssp = sep;
    440        1.1       jtc 			}
    441        1.1       jtc 			if (sep == NULL) {
    442        1.1       jtc 				/* last successful match */
    443        1.1       jtc 				sep = ssp;
    444        1.1       jtc 				ssp = oldssp;
    445        1.1       jtc 			}
    446        1.1       jtc 			assert(sep == rest);	/* must exhaust substring */
    447        1.1       jtc 			assert(slow(m, ssp, sep, ssub, esub) == rest);
    448        1.9  christos #ifdef NDEBUG
    449        1.9  christos 			(void)
    450        1.9  christos #else
    451        1.9  christos 			dp =
    452        1.9  christos #endif
    453        1.9  christos 			    dissect(m, ssp, sep, ssub, esub);
    454        1.1       jtc 			assert(dp == sep);
    455        1.1       jtc 			sp = rest;
    456        1.1       jtc 			break;
    457        1.1       jtc 		case OCH_:
    458        1.1       jtc 			stp = stop;
    459        1.1       jtc 			for (;;) {
    460        1.1       jtc 				/* how long could this one be? */
    461        1.1       jtc 				rest = slow(m, sp, stp, ss, es);
    462        1.1       jtc 				assert(rest != NULL);	/* it did match */
    463        1.1       jtc 				/* could the rest match the rest? */
    464        1.1       jtc 				tail = slow(m, rest, stop, es, stopst);
    465        1.1       jtc 				if (tail == stop)
    466        1.1       jtc 					break;		/* yes! */
    467        1.1       jtc 				/* no -- try a shorter match for this one */
    468        1.1       jtc 				stp = rest - 1;
    469        1.1       jtc 				assert(stp >= sp);	/* it did work */
    470        1.1       jtc 			}
    471        1.1       jtc 			ssub = ss + 1;
    472        1.1       jtc 			esub = ss + OPND(m->g->strip[ss]) - 1;
    473        1.1       jtc 			assert(OP(m->g->strip[esub]) == OOR1);
    474        1.1       jtc 			for (;;) {	/* find first matching branch */
    475        1.1       jtc 				if (slow(m, sp, rest, ssub, esub) == rest)
    476        1.1       jtc 					break;	/* it matched all of it */
    477        1.1       jtc 				/* that one missed, try next one */
    478        1.1       jtc 				assert(OP(m->g->strip[esub]) == OOR1);
    479        1.1       jtc 				esub++;
    480        1.1       jtc 				assert(OP(m->g->strip[esub]) == OOR2);
    481        1.1       jtc 				ssub = esub + 1;
    482        1.1       jtc 				esub += OPND(m->g->strip[esub]);
    483        1.1       jtc 				if (OP(m->g->strip[esub]) == OOR2)
    484        1.1       jtc 					esub--;
    485        1.1       jtc 				else
    486        1.1       jtc 					assert(OP(m->g->strip[esub]) == O_CH);
    487        1.1       jtc 			}
    488        1.9  christos #ifdef NDEBUG
    489        1.9  christos 			(void)
    490        1.9  christos #else
    491        1.9  christos 			dp =
    492        1.9  christos #endif
    493        1.9  christos 			    dissect(m, sp, rest, ssub, esub);
    494        1.1       jtc 			assert(dp == rest);
    495        1.1       jtc 			sp = rest;
    496        1.1       jtc 			break;
    497        1.1       jtc 		case O_PLUS:
    498        1.1       jtc 		case O_QUEST:
    499        1.1       jtc 		case OOR1:
    500        1.1       jtc 		case OOR2:
    501        1.1       jtc 		case O_CH:
    502        1.1       jtc 			assert(nope);
    503        1.1       jtc 			break;
    504        1.1       jtc 		case OLPAREN:
    505        1.1       jtc 			i = OPND(m->g->strip[ss]);
    506        1.2       jtc 			assert(0 < i && i <= m->g->nsub);
    507        1.1       jtc 			m->pmatch[i].rm_so = sp - m->offp;
    508        1.1       jtc 			break;
    509        1.1       jtc 		case ORPAREN:
    510        1.1       jtc 			i = OPND(m->g->strip[ss]);
    511        1.2       jtc 			assert(0 < i && i <= m->g->nsub);
    512        1.1       jtc 			m->pmatch[i].rm_eo = sp - m->offp;
    513        1.1       jtc 			break;
    514        1.1       jtc 		default:		/* uh oh */
    515        1.1       jtc 			assert(nope);
    516        1.1       jtc 			break;
    517        1.1       jtc 		}
    518        1.1       jtc 	}
    519        1.1       jtc 
    520        1.1       jtc 	assert(sp == stop);
    521        1.1       jtc 	return(sp);
    522        1.1       jtc }
    523        1.1       jtc 
    524        1.1       jtc /*
    525        1.1       jtc  - backref - figure out what matched what, figuring in back references
    526        1.8     perry  == static char *backref(struct match *m, char *start, \
    527        1.1       jtc  ==	char *stop, sopno startst, sopno stopst, sopno lev);
    528        1.1       jtc  */
    529        1.1       jtc static char *			/* == stop (success) or NULL (failure) */
    530        1.1       jtc backref(m, start, stop, startst, stopst, lev)
    531        1.8     perry struct match *m;
    532        1.1       jtc char *start;
    533        1.1       jtc char *stop;
    534        1.1       jtc sopno startst;
    535        1.1       jtc sopno stopst;
    536        1.1       jtc sopno lev;			/* PLUS nesting level */
    537        1.1       jtc {
    538        1.8     perry 	int i;
    539        1.8     perry 	sopno ss;	/* start sop of current subRE */
    540        1.8     perry 	char *sp;	/* start of string matched by it */
    541        1.8     perry 	sopno ssub;	/* start sop of subsubRE */
    542        1.8     perry 	sopno esub;	/* end sop of subsubRE */
    543        1.8     perry 	char *ssp;	/* start of string matched by subsubRE */
    544        1.8     perry 	char *dp;
    545        1.8     perry 	size_t len;
    546        1.8     perry 	int hard;
    547        1.8     perry 	sop s;
    548        1.8     perry 	regoff_t offsave;
    549        1.8     perry 	cset *cs;
    550        1.1       jtc 
    551       1.12     lukem 	_DIAGASSERT(m != NULL);
    552       1.12     lukem 	_DIAGASSERT(start != NULL);
    553       1.12     lukem 	_DIAGASSERT(stop != NULL);
    554       1.12     lukem 
    555        1.1       jtc 	AT("back", start, stop, startst, stopst);
    556        1.1       jtc 	sp = start;
    557        1.1       jtc 
    558        1.1       jtc 	/* get as far as we can with easy stuff */
    559        1.1       jtc 	hard = 0;
    560        1.1       jtc 	for (ss = startst; !hard && ss < stopst; ss++)
    561        1.1       jtc 		switch (OP(s = m->g->strip[ss])) {
    562        1.1       jtc 		case OCHAR:
    563        1.1       jtc 			if (sp == stop || *sp++ != (char)OPND(s))
    564        1.1       jtc 				return(NULL);
    565        1.1       jtc 			break;
    566        1.1       jtc 		case OANY:
    567        1.1       jtc 			if (sp == stop)
    568        1.1       jtc 				return(NULL);
    569        1.1       jtc 			sp++;
    570        1.1       jtc 			break;
    571        1.1       jtc 		case OANYOF:
    572        1.1       jtc 			cs = &m->g->sets[OPND(s)];
    573        1.1       jtc 			if (sp == stop || !CHIN(cs, *sp++))
    574        1.1       jtc 				return(NULL);
    575        1.1       jtc 			break;
    576        1.1       jtc 		case OBOL:
    577        1.1       jtc 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
    578        1.1       jtc 					(sp < m->endp && *(sp-1) == '\n' &&
    579        1.1       jtc 						(m->g->cflags&REG_NEWLINE)) )
    580        1.1       jtc 				{ /* yes */ }
    581        1.1       jtc 			else
    582        1.1       jtc 				return(NULL);
    583        1.1       jtc 			break;
    584        1.1       jtc 		case OEOL:
    585        1.1       jtc 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
    586        1.1       jtc 					(sp < m->endp && *sp == '\n' &&
    587        1.1       jtc 						(m->g->cflags&REG_NEWLINE)) )
    588        1.1       jtc 				{ /* yes */ }
    589        1.1       jtc 			else
    590        1.1       jtc 				return(NULL);
    591        1.1       jtc 			break;
    592        1.1       jtc 		case OBOW:
    593        1.1       jtc 			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
    594        1.1       jtc 					(sp < m->endp && *(sp-1) == '\n' &&
    595        1.1       jtc 						(m->g->cflags&REG_NEWLINE)) ||
    596        1.1       jtc 					(sp > m->beginp &&
    597        1.3       jtc 							!ISWORD(*(sp-1))) ) &&
    598        1.3       jtc 					(sp < m->endp && ISWORD(*sp)) )
    599        1.1       jtc 				{ /* yes */ }
    600        1.1       jtc 			else
    601        1.1       jtc 				return(NULL);
    602        1.1       jtc 			break;
    603        1.1       jtc 		case OEOW:
    604        1.1       jtc 			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
    605        1.1       jtc 					(sp < m->endp && *sp == '\n' &&
    606        1.1       jtc 						(m->g->cflags&REG_NEWLINE)) ||
    607        1.3       jtc 					(sp < m->endp && !ISWORD(*sp)) ) &&
    608        1.3       jtc 					(sp > m->beginp && ISWORD(*(sp-1))) )
    609        1.1       jtc 				{ /* yes */ }
    610        1.1       jtc 			else
    611        1.1       jtc 				return(NULL);
    612        1.1       jtc 			break;
    613        1.1       jtc 		case O_QUEST:
    614        1.1       jtc 			break;
    615        1.1       jtc 		case OOR1:	/* matches null but needs to skip */
    616        1.1       jtc 			ss++;
    617        1.1       jtc 			s = m->g->strip[ss];
    618        1.1       jtc 			do {
    619        1.1       jtc 				assert(OP(s) == OOR2);
    620        1.1       jtc 				ss += OPND(s);
    621        1.1       jtc 			} while (OP(s = m->g->strip[ss]) != O_CH);
    622        1.1       jtc 			/* note that the ss++ gets us past the O_CH */
    623        1.1       jtc 			break;
    624        1.1       jtc 		default:	/* have to make a choice */
    625        1.1       jtc 			hard = 1;
    626        1.1       jtc 			break;
    627        1.1       jtc 		}
    628        1.1       jtc 	if (!hard) {		/* that was it! */
    629        1.1       jtc 		if (sp != stop)
    630        1.1       jtc 			return(NULL);
    631        1.1       jtc 		return(sp);
    632        1.1       jtc 	}
    633        1.1       jtc 	ss--;			/* adjust for the for's final increment */
    634        1.1       jtc 
    635        1.1       jtc 	/* the hard stuff */
    636        1.1       jtc 	AT("hard", sp, stop, ss, stopst);
    637        1.1       jtc 	s = m->g->strip[ss];
    638        1.1       jtc 	switch (OP(s)) {
    639        1.1       jtc 	case OBACK_:		/* the vilest depths */
    640        1.1       jtc 		i = OPND(s);
    641        1.2       jtc 		assert(0 < i && i <= m->g->nsub);
    642       1.10  drochner 		if (m->pmatch[i].rm_eo == (regoff_t)-1)
    643        1.1       jtc 			return(NULL);
    644       1.10  drochner 		assert(m->pmatch[i].rm_so != (regoff_t)-1);
    645       1.11  christos 		len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
    646        1.1       jtc 		assert(stop - m->beginp >= len);
    647        1.1       jtc 		if (sp > stop - len)
    648        1.1       jtc 			return(NULL);	/* not enough left to match */
    649       1.11  christos 		ssp = m->offp + (size_t)m->pmatch[i].rm_so;
    650        1.1       jtc 		if (memcmp(sp, ssp, len) != 0)
    651        1.1       jtc 			return(NULL);
    652        1.1       jtc 		while (m->g->strip[ss] != SOP(O_BACK, i))
    653        1.1       jtc 			ss++;
    654        1.1       jtc 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
    655        1.9  christos 
    656        1.1       jtc 	case OQUEST_:		/* to null or not */
    657        1.1       jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    658        1.1       jtc 		if (dp != NULL)
    659        1.1       jtc 			return(dp);	/* not */
    660        1.1       jtc 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
    661        1.9  christos 
    662        1.1       jtc 	case OPLUS_:
    663        1.1       jtc 		assert(m->lastpos != NULL);
    664        1.1       jtc 		assert(lev+1 <= m->g->nplus);
    665        1.1       jtc 		m->lastpos[lev+1] = sp;
    666        1.1       jtc 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
    667        1.9  christos 
    668        1.1       jtc 	case O_PLUS:
    669        1.1       jtc 		if (sp == m->lastpos[lev])	/* last pass matched null */
    670        1.1       jtc 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
    671        1.1       jtc 		/* try another pass */
    672        1.1       jtc 		m->lastpos[lev] = sp;
    673        1.1       jtc 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
    674        1.1       jtc 		if (dp == NULL)
    675        1.9  christos 			dp = backref(m, sp, stop, ss+1, stopst, lev-1);
    676        1.9  christos 		return(dp);
    677        1.9  christos 
    678        1.1       jtc 	case OCH_:		/* find the right one, if any */
    679        1.1       jtc 		ssub = ss + 1;
    680        1.1       jtc 		esub = ss + OPND(s) - 1;
    681        1.1       jtc 		assert(OP(m->g->strip[esub]) == OOR1);
    682        1.1       jtc 		for (;;) {	/* find first matching branch */
    683        1.1       jtc 			dp = backref(m, sp, stop, ssub, esub, lev);
    684        1.1       jtc 			if (dp != NULL)
    685        1.1       jtc 				return(dp);
    686        1.1       jtc 			/* that one missed, try next one */
    687        1.1       jtc 			if (OP(m->g->strip[esub]) == O_CH)
    688        1.1       jtc 				return(NULL);	/* there is none */
    689        1.1       jtc 			esub++;
    690        1.1       jtc 			assert(OP(m->g->strip[esub]) == OOR2);
    691        1.1       jtc 			ssub = esub + 1;
    692        1.1       jtc 			esub += OPND(m->g->strip[esub]);
    693        1.1       jtc 			if (OP(m->g->strip[esub]) == OOR2)
    694        1.1       jtc 				esub--;
    695        1.1       jtc 			else
    696        1.1       jtc 				assert(OP(m->g->strip[esub]) == O_CH);
    697        1.1       jtc 		}
    698        1.9  christos 
    699        1.1       jtc 	case OLPAREN:		/* must undo assignment if rest fails */
    700        1.1       jtc 		i = OPND(s);
    701        1.2       jtc 		assert(0 < i && i <= m->g->nsub);
    702        1.1       jtc 		offsave = m->pmatch[i].rm_so;
    703        1.1       jtc 		m->pmatch[i].rm_so = sp - m->offp;
    704        1.1       jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    705        1.1       jtc 		if (dp != NULL)
    706        1.1       jtc 			return(dp);
    707        1.1       jtc 		m->pmatch[i].rm_so = offsave;
    708        1.1       jtc 		return(NULL);
    709        1.9  christos 
    710        1.1       jtc 	case ORPAREN:		/* must undo assignment if rest fails */
    711        1.1       jtc 		i = OPND(s);
    712        1.2       jtc 		assert(0 < i && i <= m->g->nsub);
    713        1.1       jtc 		offsave = m->pmatch[i].rm_eo;
    714        1.1       jtc 		m->pmatch[i].rm_eo = sp - m->offp;
    715        1.1       jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    716        1.1       jtc 		if (dp != NULL)
    717        1.1       jtc 			return(dp);
    718        1.1       jtc 		m->pmatch[i].rm_eo = offsave;
    719        1.1       jtc 		return(NULL);
    720        1.9  christos 
    721        1.1       jtc 	default:		/* uh oh */
    722        1.1       jtc 		assert(nope);
    723        1.1       jtc 		break;
    724        1.1       jtc 	}
    725        1.1       jtc 
    726        1.1       jtc 	/* "can't happen" */
    727        1.1       jtc 	assert(nope);
    728        1.1       jtc 	/* NOTREACHED */
    729        1.7  christos 	return NULL;
    730        1.1       jtc }
    731        1.1       jtc 
    732        1.1       jtc /*
    733        1.1       jtc  - fast - step through the string at top speed
    734        1.8     perry  == static char *fast(struct match *m, char *start, \
    735        1.1       jtc  ==	char *stop, sopno startst, sopno stopst);
    736        1.1       jtc  */
    737        1.1       jtc static char *			/* where tentative match ended, or NULL */
    738        1.1       jtc fast(m, start, stop, startst, stopst)
    739        1.8     perry struct match *m;
    740        1.1       jtc char *start;
    741        1.1       jtc char *stop;
    742        1.1       jtc sopno startst;
    743        1.1       jtc sopno stopst;
    744        1.1       jtc {
    745        1.8     perry 	states st = m->st;
    746        1.8     perry 	states fresh = m->fresh;
    747        1.8     perry 	states tmp = m->tmp;
    748        1.8     perry 	char *p = start;
    749        1.8     perry 	int c = (start == m->beginp) ? OUT : *(start-1);
    750        1.8     perry 	int lastc;	/* previous c */
    751        1.8     perry 	int flagch;
    752        1.8     perry 	int i;
    753        1.8     perry 	char *coldp;	/* last p after which no match was underway */
    754        1.1       jtc 
    755       1.12     lukem 	_DIAGASSERT(m != NULL);
    756       1.12     lukem 	_DIAGASSERT(start != NULL);
    757       1.12     lukem 	_DIAGASSERT(stop != NULL);
    758       1.12     lukem 
    759        1.1       jtc 	CLEAR(st);
    760        1.1       jtc 	SET1(st, startst);
    761        1.1       jtc 	st = step(m->g, startst, stopst, st, NOTHING, st);
    762        1.1       jtc 	ASSIGN(fresh, st);
    763        1.1       jtc 	SP("start", st, *p);
    764        1.1       jtc 	coldp = NULL;
    765        1.1       jtc 	for (;;) {
    766        1.1       jtc 		/* next character */
    767        1.1       jtc 		lastc = c;
    768        1.1       jtc 		c = (p == m->endp) ? OUT : *p;
    769        1.1       jtc 		if (EQ(st, fresh))
    770        1.1       jtc 			coldp = p;
    771        1.1       jtc 
    772        1.1       jtc 		/* is there an EOL and/or BOL between lastc and c? */
    773        1.1       jtc 		flagch = '\0';
    774        1.1       jtc 		i = 0;
    775        1.1       jtc 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
    776        1.1       jtc 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
    777        1.1       jtc 			flagch = BOL;
    778        1.1       jtc 			i = m->g->nbol;
    779        1.1       jtc 		}
    780        1.1       jtc 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
    781        1.1       jtc 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
    782        1.1       jtc 			flagch = (flagch == BOL) ? BOLEOL : EOL;
    783        1.1       jtc 			i += m->g->neol;
    784        1.1       jtc 		}
    785        1.1       jtc 		if (i != 0) {
    786        1.1       jtc 			for (; i > 0; i--)
    787        1.1       jtc 				st = step(m->g, startst, stopst, st, flagch, st);
    788        1.1       jtc 			SP("boleol", st, c);
    789        1.1       jtc 		}
    790        1.1       jtc 
    791        1.1       jtc 		/* how about a word boundary? */
    792        1.3       jtc 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
    793        1.3       jtc 					(c != OUT && ISWORD(c)) ) {
    794        1.1       jtc 			flagch = BOW;
    795        1.1       jtc 		}
    796        1.3       jtc 		if ( (lastc != OUT && ISWORD(lastc)) &&
    797        1.3       jtc 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
    798        1.1       jtc 			flagch = EOW;
    799        1.1       jtc 		}
    800        1.1       jtc 		if (flagch == BOW || flagch == EOW) {
    801        1.1       jtc 			st = step(m->g, startst, stopst, st, flagch, st);
    802        1.1       jtc 			SP("boweow", st, c);
    803        1.1       jtc 		}
    804        1.1       jtc 
    805        1.1       jtc 		/* are we done? */
    806        1.1       jtc 		if (ISSET(st, stopst) || p == stop)
    807        1.1       jtc 			break;		/* NOTE BREAK OUT */
    808        1.1       jtc 
    809        1.1       jtc 		/* no, we must deal with this character */
    810        1.1       jtc 		ASSIGN(tmp, st);
    811        1.1       jtc 		ASSIGN(st, fresh);
    812        1.1       jtc 		assert(c != OUT);
    813        1.1       jtc 		st = step(m->g, startst, stopst, tmp, c, st);
    814        1.1       jtc 		SP("aft", st, c);
    815        1.1       jtc 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
    816        1.1       jtc 		p++;
    817        1.1       jtc 	}
    818        1.1       jtc 
    819        1.1       jtc 	assert(coldp != NULL);
    820        1.1       jtc 	m->coldp = coldp;
    821        1.1       jtc 	if (ISSET(st, stopst))
    822        1.1       jtc 		return(p+1);
    823        1.1       jtc 	else
    824        1.1       jtc 		return(NULL);
    825        1.1       jtc }
    826        1.1       jtc 
    827        1.1       jtc /*
    828        1.1       jtc  - slow - step through the string more deliberately
    829        1.8     perry  == static char *slow(struct match *m, char *start, \
    830        1.1       jtc  ==	char *stop, sopno startst, sopno stopst);
    831        1.1       jtc  */
    832        1.1       jtc static char *			/* where it ended */
    833        1.1       jtc slow(m, start, stop, startst, stopst)
    834        1.8     perry struct match *m;
    835        1.1       jtc char *start;
    836        1.1       jtc char *stop;
    837        1.1       jtc sopno startst;
    838        1.1       jtc sopno stopst;
    839        1.1       jtc {
    840        1.8     perry 	states st = m->st;
    841        1.8     perry 	states empty = m->empty;
    842        1.8     perry 	states tmp = m->tmp;
    843        1.8     perry 	char *p = start;
    844        1.8     perry 	int c = (start == m->beginp) ? OUT : *(start-1);
    845        1.8     perry 	int lastc;	/* previous c */
    846        1.8     perry 	int flagch;
    847        1.8     perry 	int i;
    848        1.8     perry 	char *matchp;	/* last p at which a match ended */
    849        1.1       jtc 
    850       1.12     lukem 	_DIAGASSERT(m != NULL);
    851       1.12     lukem 	_DIAGASSERT(start != NULL);
    852       1.12     lukem 	_DIAGASSERT(stop != NULL);
    853       1.12     lukem 
    854        1.1       jtc 	AT("slow", start, stop, startst, stopst);
    855        1.1       jtc 	CLEAR(st);
    856        1.1       jtc 	SET1(st, startst);
    857        1.1       jtc 	SP("sstart", st, *p);
    858        1.1       jtc 	st = step(m->g, startst, stopst, st, NOTHING, st);
    859        1.1       jtc 	matchp = NULL;
    860        1.1       jtc 	for (;;) {
    861        1.1       jtc 		/* next character */
    862        1.1       jtc 		lastc = c;
    863        1.1       jtc 		c = (p == m->endp) ? OUT : *p;
    864        1.1       jtc 
    865        1.1       jtc 		/* is there an EOL and/or BOL between lastc and c? */
    866        1.1       jtc 		flagch = '\0';
    867        1.1       jtc 		i = 0;
    868        1.1       jtc 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
    869        1.1       jtc 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
    870        1.1       jtc 			flagch = BOL;
    871        1.1       jtc 			i = m->g->nbol;
    872        1.1       jtc 		}
    873        1.1       jtc 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
    874        1.1       jtc 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
    875        1.1       jtc 			flagch = (flagch == BOL) ? BOLEOL : EOL;
    876        1.1       jtc 			i += m->g->neol;
    877        1.1       jtc 		}
    878        1.1       jtc 		if (i != 0) {
    879        1.1       jtc 			for (; i > 0; i--)
    880        1.1       jtc 				st = step(m->g, startst, stopst, st, flagch, st);
    881        1.1       jtc 			SP("sboleol", st, c);
    882        1.1       jtc 		}
    883        1.1       jtc 
    884        1.1       jtc 		/* how about a word boundary? */
    885        1.3       jtc 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
    886        1.3       jtc 					(c != OUT && ISWORD(c)) ) {
    887        1.1       jtc 			flagch = BOW;
    888        1.1       jtc 		}
    889        1.3       jtc 		if ( (lastc != OUT && ISWORD(lastc)) &&
    890        1.3       jtc 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
    891        1.1       jtc 			flagch = EOW;
    892        1.1       jtc 		}
    893        1.1       jtc 		if (flagch == BOW || flagch == EOW) {
    894        1.1       jtc 			st = step(m->g, startst, stopst, st, flagch, st);
    895        1.1       jtc 			SP("sboweow", st, c);
    896        1.1       jtc 		}
    897        1.1       jtc 
    898        1.1       jtc 		/* are we done? */
    899        1.1       jtc 		if (ISSET(st, stopst))
    900        1.1       jtc 			matchp = p;
    901        1.1       jtc 		if (EQ(st, empty) || p == stop)
    902        1.1       jtc 			break;		/* NOTE BREAK OUT */
    903        1.1       jtc 
    904        1.1       jtc 		/* no, we must deal with this character */
    905        1.1       jtc 		ASSIGN(tmp, st);
    906        1.1       jtc 		ASSIGN(st, empty);
    907        1.1       jtc 		assert(c != OUT);
    908        1.1       jtc 		st = step(m->g, startst, stopst, tmp, c, st);
    909        1.1       jtc 		SP("saft", st, c);
    910        1.1       jtc 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
    911        1.1       jtc 		p++;
    912        1.1       jtc 	}
    913        1.1       jtc 
    914        1.1       jtc 	return(matchp);
    915        1.1       jtc }
    916        1.1       jtc 
    917        1.1       jtc 
    918        1.1       jtc /*
    919        1.1       jtc  - step - map set of states reachable before char to set reachable after
    920        1.8     perry  == static states step(struct re_guts *g, sopno start, sopno stop, \
    921        1.8     perry  ==	states bef, int ch, states aft);
    922        1.1       jtc  == #define	BOL	(OUT+1)
    923        1.1       jtc  == #define	EOL	(BOL+1)
    924        1.1       jtc  == #define	BOLEOL	(BOL+2)
    925        1.1       jtc  == #define	NOTHING	(BOL+3)
    926        1.1       jtc  == #define	BOW	(BOL+4)
    927        1.1       jtc  == #define	EOW	(BOL+5)
    928        1.1       jtc  == #define	CODEMAX	(BOL+5)		// highest code used
    929        1.1       jtc  == #define	NONCHAR(c)	((c) > CHAR_MAX)
    930        1.1       jtc  == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
    931        1.1       jtc  */
    932        1.1       jtc static states
    933        1.1       jtc step(g, start, stop, bef, ch, aft)
    934        1.8     perry struct re_guts *g;
    935        1.3       jtc sopno start;			/* start state within strip */
    936        1.3       jtc sopno stop;			/* state after stop state within strip */
    937        1.8     perry states bef;		/* states reachable before */
    938        1.1       jtc int ch;				/* character or NONCHAR code */
    939        1.8     perry states aft;		/* states already known reachable after */
    940        1.1       jtc {
    941        1.8     perry 	cset *cs;
    942        1.8     perry 	sop s;
    943        1.8     perry 	sopno pc;
    944        1.8     perry 	onestate here;		/* note, macros know this name */
    945        1.8     perry 	sopno look;
    946        1.8     perry 	int i;
    947        1.1       jtc 
    948       1.12     lukem 	_DIAGASSERT(g != NULL);
    949       1.12     lukem 
    950        1.1       jtc 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
    951        1.1       jtc 		s = g->strip[pc];
    952        1.1       jtc 		switch (OP(s)) {
    953        1.1       jtc 		case OEND:
    954        1.1       jtc 			assert(pc == stop-1);
    955        1.1       jtc 			break;
    956        1.1       jtc 		case OCHAR:
    957        1.1       jtc 			/* only characters can match */
    958        1.1       jtc 			assert(!NONCHAR(ch) || ch != (char)OPND(s));
    959        1.1       jtc 			if (ch == (char)OPND(s))
    960        1.1       jtc 				FWD(aft, bef, 1);
    961        1.1       jtc 			break;
    962        1.1       jtc 		case OBOL:
    963        1.1       jtc 			if (ch == BOL || ch == BOLEOL)
    964        1.1       jtc 				FWD(aft, bef, 1);
    965        1.1       jtc 			break;
    966        1.1       jtc 		case OEOL:
    967        1.1       jtc 			if (ch == EOL || ch == BOLEOL)
    968        1.1       jtc 				FWD(aft, bef, 1);
    969        1.1       jtc 			break;
    970        1.1       jtc 		case OBOW:
    971        1.1       jtc 			if (ch == BOW)
    972        1.1       jtc 				FWD(aft, bef, 1);
    973        1.1       jtc 			break;
    974        1.1       jtc 		case OEOW:
    975        1.1       jtc 			if (ch == EOW)
    976        1.1       jtc 				FWD(aft, bef, 1);
    977        1.1       jtc 			break;
    978        1.1       jtc 		case OANY:
    979        1.1       jtc 			if (!NONCHAR(ch))
    980        1.1       jtc 				FWD(aft, bef, 1);
    981        1.1       jtc 			break;
    982        1.1       jtc 		case OANYOF:
    983        1.1       jtc 			cs = &g->sets[OPND(s)];
    984        1.1       jtc 			if (!NONCHAR(ch) && CHIN(cs, ch))
    985        1.1       jtc 				FWD(aft, bef, 1);
    986        1.1       jtc 			break;
    987        1.1       jtc 		case OBACK_:		/* ignored here */
    988        1.1       jtc 		case O_BACK:
    989        1.1       jtc 			FWD(aft, aft, 1);
    990        1.1       jtc 			break;
    991        1.1       jtc 		case OPLUS_:		/* forward, this is just an empty */
    992        1.1       jtc 			FWD(aft, aft, 1);
    993        1.1       jtc 			break;
    994        1.1       jtc 		case O_PLUS:		/* both forward and back */
    995        1.1       jtc 			FWD(aft, aft, 1);
    996        1.1       jtc 			i = ISSETBACK(aft, OPND(s));
    997        1.1       jtc 			BACK(aft, aft, OPND(s));
    998        1.1       jtc 			if (!i && ISSETBACK(aft, OPND(s))) {
    999        1.1       jtc 				/* oho, must reconsider loop body */
   1000        1.1       jtc 				pc -= OPND(s) + 1;
   1001        1.1       jtc 				INIT(here, pc);
   1002        1.1       jtc 			}
   1003        1.1       jtc 			break;
   1004        1.1       jtc 		case OQUEST_:		/* two branches, both forward */
   1005        1.1       jtc 			FWD(aft, aft, 1);
   1006        1.1       jtc 			FWD(aft, aft, OPND(s));
   1007        1.1       jtc 			break;
   1008        1.1       jtc 		case O_QUEST:		/* just an empty */
   1009        1.1       jtc 			FWD(aft, aft, 1);
   1010        1.1       jtc 			break;
   1011        1.1       jtc 		case OLPAREN:		/* not significant here */
   1012        1.1       jtc 		case ORPAREN:
   1013        1.1       jtc 			FWD(aft, aft, 1);
   1014        1.1       jtc 			break;
   1015        1.1       jtc 		case OCH_:		/* mark the first two branches */
   1016        1.1       jtc 			FWD(aft, aft, 1);
   1017        1.1       jtc 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
   1018        1.1       jtc 			FWD(aft, aft, OPND(s));
   1019        1.1       jtc 			break;
   1020        1.1       jtc 		case OOR1:		/* done a branch, find the O_CH */
   1021        1.1       jtc 			if (ISSTATEIN(aft, here)) {
   1022        1.1       jtc 				for (look = 1;
   1023        1.1       jtc 						OP(s = g->strip[pc+look]) != O_CH;
   1024        1.1       jtc 						look += OPND(s))
   1025        1.1       jtc 					assert(OP(s) == OOR2);
   1026        1.1       jtc 				FWD(aft, aft, look);
   1027        1.1       jtc 			}
   1028        1.1       jtc 			break;
   1029        1.1       jtc 		case OOR2:		/* propagate OCH_'s marking */
   1030        1.1       jtc 			FWD(aft, aft, 1);
   1031        1.1       jtc 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
   1032        1.1       jtc 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
   1033        1.1       jtc 				FWD(aft, aft, OPND(s));
   1034        1.1       jtc 			}
   1035        1.1       jtc 			break;
   1036        1.1       jtc 		case O_CH:		/* just empty */
   1037        1.1       jtc 			FWD(aft, aft, 1);
   1038        1.1       jtc 			break;
   1039        1.1       jtc 		default:		/* ooooops... */
   1040        1.1       jtc 			assert(nope);
   1041        1.1       jtc 			break;
   1042        1.1       jtc 		}
   1043        1.1       jtc 	}
   1044        1.1       jtc 
   1045        1.1       jtc 	return(aft);
   1046        1.1       jtc }
   1047        1.1       jtc 
   1048        1.1       jtc #ifdef REDEBUG
   1049        1.1       jtc /*
   1050        1.1       jtc  - print - print a set of states
   1051        1.1       jtc  == #ifdef REDEBUG
   1052        1.1       jtc  == static void print(struct match *m, char *caption, states st, \
   1053        1.1       jtc  ==	int ch, FILE *d);
   1054        1.1       jtc  == #endif
   1055        1.1       jtc  */
   1056        1.1       jtc static void
   1057        1.1       jtc print(m, caption, st, ch, d)
   1058        1.1       jtc struct match *m;
   1059        1.1       jtc char *caption;
   1060        1.1       jtc states st;
   1061        1.1       jtc int ch;
   1062        1.1       jtc FILE *d;
   1063        1.1       jtc {
   1064        1.8     perry 	struct re_guts *g = m->g;
   1065        1.8     perry 	int i;
   1066        1.8     perry 	int first = 1;
   1067        1.1       jtc 
   1068       1.12     lukem 	_DIAGASSERT(m != NULL);
   1069       1.12     lukem 	_DIAGASSERT(caption != NULL);
   1070       1.12     lukem 
   1071        1.1       jtc 	if (!(m->eflags&REG_TRACE))
   1072        1.1       jtc 		return;
   1073        1.1       jtc 
   1074       1.12     lukem 	_DIAGASSERT(d != NULL);
   1075       1.12     lukem 
   1076        1.1       jtc 	fprintf(d, "%s", caption);
   1077        1.1       jtc 	if (ch != '\0')
   1078        1.1       jtc 		fprintf(d, " %s", pchar(ch));
   1079        1.1       jtc 	for (i = 0; i < g->nstates; i++)
   1080        1.1       jtc 		if (ISSET(st, i)) {
   1081        1.1       jtc 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
   1082        1.1       jtc 			first = 0;
   1083        1.1       jtc 		}
   1084        1.1       jtc 	fprintf(d, "\n");
   1085        1.1       jtc }
   1086        1.1       jtc 
   1087        1.1       jtc /*
   1088        1.1       jtc  - at - print current situation
   1089        1.1       jtc  == #ifdef REDEBUG
   1090        1.1       jtc  == static void at(struct match *m, char *title, char *start, char *stop, \
   1091        1.1       jtc  ==						sopno startst, sopno stopst);
   1092        1.1       jtc  == #endif
   1093        1.1       jtc  */
   1094        1.1       jtc static void
   1095        1.1       jtc at(m, title, start, stop, startst, stopst)
   1096        1.1       jtc struct match *m;
   1097        1.1       jtc char *title;
   1098        1.1       jtc char *start;
   1099        1.1       jtc char *stop;
   1100        1.1       jtc sopno startst;
   1101        1.1       jtc sopno stopst;
   1102        1.1       jtc {
   1103       1.12     lukem 
   1104       1.12     lukem 	_DIAGASSERT(m != NULL);
   1105       1.12     lukem 	_DIAGASSERT(title != NULL);
   1106       1.12     lukem 	_DIAGASSERT(start != NULL);
   1107       1.12     lukem 	_DIAGASSERT(stop != NULL);
   1108       1.12     lukem 
   1109        1.1       jtc 	if (!(m->eflags&REG_TRACE))
   1110        1.1       jtc 		return;
   1111        1.1       jtc 
   1112        1.1       jtc 	printf("%s %s-", title, pchar(*start));
   1113        1.1       jtc 	printf("%s ", pchar(*stop));
   1114        1.1       jtc 	printf("%ld-%ld\n", (long)startst, (long)stopst);
   1115        1.1       jtc }
   1116        1.1       jtc 
   1117        1.1       jtc #ifndef PCHARDONE
   1118        1.1       jtc #define	PCHARDONE	/* never again */
   1119        1.1       jtc /*
   1120        1.1       jtc  - pchar - make a character printable
   1121        1.1       jtc  == #ifdef REDEBUG
   1122        1.1       jtc  == static char *pchar(int ch);
   1123        1.1       jtc  == #endif
   1124        1.1       jtc  *
   1125        1.1       jtc  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
   1126        1.1       jtc  * duplicate here avoids having a debugging-capable regexec.o tied to
   1127        1.1       jtc  * a matching debug.o, and this is convenient.  It all disappears in
   1128        1.1       jtc  * the non-debug compilation anyway, so it doesn't matter much.
   1129        1.1       jtc  */
   1130        1.1       jtc static char *			/* -> representation */
   1131        1.1       jtc pchar(ch)
   1132        1.1       jtc int ch;
   1133        1.1       jtc {
   1134        1.1       jtc 	static char pbuf[10];
   1135        1.1       jtc 
   1136        1.1       jtc 	if (isprint(ch) || ch == ' ')
   1137        1.6       mrg 		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
   1138        1.1       jtc 	else
   1139        1.6       mrg 		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
   1140        1.1       jtc 	return(pbuf);
   1141        1.1       jtc }
   1142        1.1       jtc #endif
   1143        1.1       jtc #endif
   1144        1.1       jtc 
   1145        1.1       jtc #undef	matcher
   1146        1.1       jtc #undef	fast
   1147        1.1       jtc #undef	slow
   1148        1.1       jtc #undef	dissect
   1149        1.1       jtc #undef	backref
   1150        1.1       jtc #undef	step
   1151        1.1       jtc #undef	print
   1152        1.1       jtc #undef	at
   1153        1.1       jtc #undef	match
   1154  1.12.10.2   nathanw #undef	nope
   1155