Home | History | Annotate | Line # | Download | only in regex
engine.c revision 1.1.1.1
      1  1.1.1.1  cgd /*-
      2  1.1.1.1  cgd  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
      3  1.1.1.1  cgd  * Copyright (c) 1992, 1993, 1994
      4  1.1.1.1  cgd  *	The Regents of the University of California.  All rights reserved.
      5  1.1.1.1  cgd  *
      6  1.1.1.1  cgd  * This code is derived from software contributed to Berkeley by
      7  1.1.1.1  cgd  * Henry Spencer.
      8  1.1.1.1  cgd  *
      9  1.1.1.1  cgd  * Redistribution and use in source and binary forms, with or without
     10  1.1.1.1  cgd  * modification, are permitted provided that the following conditions
     11  1.1.1.1  cgd  * are met:
     12  1.1.1.1  cgd  * 1. Redistributions of source code must retain the above copyright
     13  1.1.1.1  cgd  *    notice, this list of conditions and the following disclaimer.
     14  1.1.1.1  cgd  * 2. Redistributions in binary form must reproduce the above copyright
     15  1.1.1.1  cgd  *    notice, this list of conditions and the following disclaimer in the
     16  1.1.1.1  cgd  *    documentation and/or other materials provided with the distribution.
     17  1.1.1.1  cgd  * 3. All advertising materials mentioning features or use of this software
     18  1.1.1.1  cgd  *    must display the following acknowledgement:
     19  1.1.1.1  cgd  *	This product includes software developed by the University of
     20  1.1.1.1  cgd  *	California, Berkeley and its contributors.
     21  1.1.1.1  cgd  * 4. Neither the name of the University nor the names of its contributors
     22  1.1.1.1  cgd  *    may be used to endorse or promote products derived from this software
     23  1.1.1.1  cgd  *    without specific prior written permission.
     24  1.1.1.1  cgd  *
     25  1.1.1.1  cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     26  1.1.1.1  cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     27  1.1.1.1  cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     28  1.1.1.1  cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     29  1.1.1.1  cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     30  1.1.1.1  cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     31  1.1.1.1  cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     32  1.1.1.1  cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     33  1.1.1.1  cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     34  1.1.1.1  cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     35  1.1.1.1  cgd  * SUCH DAMAGE.
     36  1.1.1.1  cgd  *
     37  1.1.1.1  cgd  *	@(#)engine.c	8.5 (Berkeley) 3/20/94
     38  1.1.1.1  cgd  */
     39  1.1.1.1  cgd 
     40      1.1  jtc /*
     41      1.1  jtc  * The matching engine and friends.  This file is #included by regexec.c
     42      1.1  jtc  * after suitable #defines of a variety of macros used herein, so that
     43      1.1  jtc  * different state representations can be used without duplicating masses
     44      1.1  jtc  * of code.
     45      1.1  jtc  */
     46      1.1  jtc 
     47      1.1  jtc #ifdef SNAMES
     48      1.1  jtc #define	matcher	smatcher
     49      1.1  jtc #define	fast	sfast
     50      1.1  jtc #define	slow	sslow
     51      1.1  jtc #define	dissect	sdissect
     52      1.1  jtc #define	backref	sbackref
     53      1.1  jtc #define	step	sstep
     54      1.1  jtc #define	print	sprint
     55      1.1  jtc #define	at	sat
     56      1.1  jtc #define	match	smat
     57      1.1  jtc #endif
     58      1.1  jtc #ifdef LNAMES
     59      1.1  jtc #define	matcher	lmatcher
     60      1.1  jtc #define	fast	lfast
     61      1.1  jtc #define	slow	lslow
     62      1.1  jtc #define	dissect	ldissect
     63      1.1  jtc #define	backref	lbackref
     64      1.1  jtc #define	step	lstep
     65      1.1  jtc #define	print	lprint
     66      1.1  jtc #define	at	lat
     67      1.1  jtc #define	match	lmat
     68      1.1  jtc #endif
     69      1.1  jtc 
     70      1.1  jtc /* another structure passed up and down to avoid zillions of parameters */
     71      1.1  jtc struct match {
     72      1.1  jtc 	struct re_guts *g;
     73      1.1  jtc 	int eflags;
     74      1.1  jtc 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
     75      1.1  jtc 	char *offp;		/* offsets work from here */
     76      1.1  jtc 	char *beginp;		/* start of string -- virtual NUL precedes */
     77      1.1  jtc 	char *endp;		/* end of string -- virtual NUL here */
     78      1.1  jtc 	char *coldp;		/* can be no match starting before here */
     79      1.1  jtc 	char **lastpos;		/* [nplus+1] */
     80      1.1  jtc 	STATEVARS;
     81      1.1  jtc 	states st;		/* current states */
     82      1.1  jtc 	states fresh;		/* states for a fresh start */
     83      1.1  jtc 	states tmp;		/* temporary */
     84      1.1  jtc 	states empty;		/* empty set of states */
     85      1.1  jtc };
     86      1.1  jtc 
     87  1.1.1.1  cgd /* ========= begin header generated by ./mkh ========= */
     88  1.1.1.1  cgd #ifdef __cplusplus
     89  1.1.1.1  cgd extern "C" {
     90  1.1.1.1  cgd #endif
     91  1.1.1.1  cgd 
     92  1.1.1.1  cgd /* === engine.c === */
     93  1.1.1.1  cgd static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
     94  1.1.1.1  cgd static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
     95  1.1.1.1  cgd static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));
     96  1.1.1.1  cgd static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
     97  1.1.1.1  cgd static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
     98  1.1.1.1  cgd static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));
     99  1.1.1.1  cgd #define	BOL	(OUT+1)
    100  1.1.1.1  cgd #define	EOL	(BOL+1)
    101  1.1.1.1  cgd #define	BOLEOL	(BOL+2)
    102  1.1.1.1  cgd #define	NOTHING	(BOL+3)
    103  1.1.1.1  cgd #define	BOW	(BOL+4)
    104  1.1.1.1  cgd #define	EOW	(BOL+5)
    105  1.1.1.1  cgd #define	CODEMAX	(BOL+5)		/* highest code used */
    106  1.1.1.1  cgd #define	NONCHAR(c)	((c) > CHAR_MAX)
    107  1.1.1.1  cgd #define	NNONCHAR	(CODEMAX-CHAR_MAX)
    108  1.1.1.1  cgd #ifdef REDEBUG
    109  1.1.1.1  cgd static void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
    110  1.1.1.1  cgd #endif
    111  1.1.1.1  cgd #ifdef REDEBUG
    112  1.1.1.1  cgd static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
    113  1.1.1.1  cgd #endif
    114  1.1.1.1  cgd #ifdef REDEBUG
    115  1.1.1.1  cgd static char *pchar __P((int ch));
    116  1.1.1.1  cgd #endif
    117  1.1.1.1  cgd 
    118  1.1.1.1  cgd #ifdef __cplusplus
    119  1.1.1.1  cgd }
    120  1.1.1.1  cgd #endif
    121  1.1.1.1  cgd /* ========= end header generated by ./mkh ========= */
    122      1.1  jtc 
    123      1.1  jtc #ifdef REDEBUG
    124      1.1  jtc #define	SP(t, s, c)	print(m, t, s, c, stdout)
    125      1.1  jtc #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
    126      1.1  jtc #define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
    127      1.1  jtc #else
    128      1.1  jtc #define	SP(t, s, c)	/* nothing */
    129      1.1  jtc #define	AT(t, p1, p2, s1, s2)	/* nothing */
    130      1.1  jtc #define	NOTE(s)	/* nothing */
    131      1.1  jtc #endif
    132      1.1  jtc 
    133      1.1  jtc /*
    134      1.1  jtc  - matcher - the actual matching engine
    135      1.1  jtc  == static int matcher(register struct re_guts *g, char *string, \
    136      1.1  jtc  ==	size_t nmatch, regmatch_t pmatch[], int eflags);
    137      1.1  jtc  */
    138      1.1  jtc static int			/* 0 success, REG_NOMATCH failure */
    139      1.1  jtc matcher(g, string, nmatch, pmatch, eflags)
    140      1.1  jtc register struct re_guts *g;
    141      1.1  jtc char *string;
    142      1.1  jtc size_t nmatch;
    143      1.1  jtc regmatch_t pmatch[];
    144      1.1  jtc int eflags;
    145      1.1  jtc {
    146      1.1  jtc 	register char *endp;
    147      1.1  jtc 	register int i;
    148      1.1  jtc 	struct match mv;
    149      1.1  jtc 	register struct match *m = &mv;
    150      1.1  jtc 	register char *dp;
    151      1.1  jtc 	const register sopno gf = g->firststate+1;	/* +1 for OEND */
    152      1.1  jtc 	const register sopno gl = g->laststate;
    153      1.1  jtc 	char *start;
    154      1.1  jtc 	char *stop;
    155      1.1  jtc 
    156      1.1  jtc 	/* simplify the situation where possible */
    157      1.1  jtc 	if (g->cflags&REG_NOSUB)
    158      1.1  jtc 		nmatch = 0;
    159      1.1  jtc 	if (eflags&REG_STARTEND) {
    160      1.1  jtc 		start = string + pmatch[0].rm_so;
    161      1.1  jtc 		stop = string + pmatch[0].rm_eo;
    162      1.1  jtc 	} else {
    163      1.1  jtc 		start = string;
    164      1.1  jtc 		stop = start + strlen(start);
    165      1.1  jtc 	}
    166      1.1  jtc 	if (stop < start)
    167      1.1  jtc 		return(REG_INVARG);
    168      1.1  jtc 
    169      1.1  jtc 	/* prescreening; this does wonders for this rather slow code */
    170      1.1  jtc 	if (g->must != NULL) {
    171      1.1  jtc 		for (dp = start; dp < stop; dp++)
    172      1.1  jtc 			if (*dp == g->must[0] && stop - dp >= g->mlen &&
    173      1.1  jtc 				memcmp(dp, g->must, (size_t)g->mlen) == 0)
    174      1.1  jtc 				break;
    175      1.1  jtc 		if (dp == stop)		/* we didn't find g->must */
    176      1.1  jtc 			return(REG_NOMATCH);
    177      1.1  jtc 	}
    178      1.1  jtc 
    179      1.1  jtc 	/* match struct setup */
    180      1.1  jtc 	m->g = g;
    181      1.1  jtc 	m->eflags = eflags;
    182      1.1  jtc 	m->pmatch = NULL;
    183      1.1  jtc 	m->lastpos = NULL;
    184      1.1  jtc 	m->offp = string;
    185      1.1  jtc 	m->beginp = start;
    186      1.1  jtc 	m->endp = stop;
    187      1.1  jtc 	STATESETUP(m, 4);
    188      1.1  jtc 	SETUP(m->st);
    189      1.1  jtc 	SETUP(m->fresh);
    190      1.1  jtc 	SETUP(m->tmp);
    191      1.1  jtc 	SETUP(m->empty);
    192      1.1  jtc 	CLEAR(m->empty);
    193      1.1  jtc 
    194      1.1  jtc 	/* this loop does only one repetition except for backrefs */
    195      1.1  jtc 	for (;;) {
    196      1.1  jtc 		endp = fast(m, start, stop, gf, gl);
    197      1.1  jtc 		if (endp == NULL) {		/* a miss */
    198      1.1  jtc 			STATETEARDOWN(m);
    199      1.1  jtc 			return(REG_NOMATCH);
    200      1.1  jtc 		}
    201      1.1  jtc 		if (nmatch == 0 && !g->backrefs)
    202      1.1  jtc 			break;		/* no further info needed */
    203      1.1  jtc 
    204      1.1  jtc 		/* where? */
    205      1.1  jtc 		assert(m->coldp != NULL);
    206      1.1  jtc 		for (;;) {
    207      1.1  jtc 			NOTE("finding start");
    208      1.1  jtc 			endp = slow(m, m->coldp, stop, gf, gl);
    209      1.1  jtc 			if (endp != NULL)
    210      1.1  jtc 				break;
    211      1.1  jtc 			assert(m->coldp < m->endp);
    212      1.1  jtc 			m->coldp++;
    213      1.1  jtc 		}
    214      1.1  jtc 		if (nmatch == 1 && !g->backrefs)
    215      1.1  jtc 			break;		/* no further info needed */
    216      1.1  jtc 
    217      1.1  jtc 		/* oh my, he wants the subexpressions... */
    218      1.1  jtc 		if (m->pmatch == NULL)
    219      1.1  jtc 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
    220      1.1  jtc 							sizeof(regmatch_t));
    221      1.1  jtc 		if (m->pmatch == NULL) {
    222      1.1  jtc 			STATETEARDOWN(m);
    223      1.1  jtc 			return(REG_ESPACE);
    224      1.1  jtc 		}
    225      1.1  jtc 		for (i = 1; i <= m->g->nsub; i++)
    226      1.1  jtc 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
    227      1.1  jtc 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
    228      1.1  jtc 			NOTE("dissecting");
    229      1.1  jtc 			dp = dissect(m, m->coldp, endp, gf, gl);
    230      1.1  jtc 		} else {
    231      1.1  jtc 			if (g->nplus > 0 && m->lastpos == NULL)
    232      1.1  jtc 				m->lastpos = (char **)malloc((g->nplus+1) *
    233      1.1  jtc 							sizeof(char *));
    234      1.1  jtc 			if (g->nplus > 0 && m->lastpos == NULL) {
    235      1.1  jtc 				free(m->pmatch);
    236      1.1  jtc 				STATETEARDOWN(m);
    237      1.1  jtc 				return(REG_ESPACE);
    238      1.1  jtc 			}
    239      1.1  jtc 			NOTE("backref dissect");
    240      1.1  jtc 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
    241      1.1  jtc 		}
    242      1.1  jtc 		if (dp != NULL)
    243      1.1  jtc 			break;
    244      1.1  jtc 
    245      1.1  jtc 		/* uh-oh... we couldn't find a subexpression-level match */
    246      1.1  jtc 		assert(g->backrefs);	/* must be back references doing it */
    247      1.1  jtc 		assert(g->nplus == 0 || m->lastpos != NULL);
    248      1.1  jtc 		for (;;) {
    249      1.1  jtc 			if (dp != NULL || endp <= m->coldp)
    250      1.1  jtc 				break;		/* defeat */
    251      1.1  jtc 			NOTE("backoff");
    252      1.1  jtc 			endp = slow(m, m->coldp, endp-1, gf, gl);
    253      1.1  jtc 			if (endp == NULL)
    254      1.1  jtc 				break;		/* defeat */
    255      1.1  jtc 			/* try it on a shorter possibility */
    256      1.1  jtc #ifndef NDEBUG
    257      1.1  jtc 			for (i = 1; i <= m->g->nsub; i++) {
    258      1.1  jtc 				assert(m->pmatch[i].rm_so == -1);
    259      1.1  jtc 				assert(m->pmatch[i].rm_eo == -1);
    260      1.1  jtc 			}
    261      1.1  jtc #endif
    262      1.1  jtc 			NOTE("backoff dissect");
    263      1.1  jtc 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
    264      1.1  jtc 		}
    265      1.1  jtc 		assert(dp == NULL || dp == endp);
    266      1.1  jtc 		if (dp != NULL)		/* found a shorter one */
    267      1.1  jtc 			break;
    268      1.1  jtc 
    269      1.1  jtc 		/* despite initial appearances, there is no match here */
    270      1.1  jtc 		NOTE("false alarm");
    271      1.1  jtc 		start = m->coldp + 1;	/* recycle starting later */
    272      1.1  jtc 		assert(start <= stop);
    273      1.1  jtc 	}
    274      1.1  jtc 
    275      1.1  jtc 	/* fill in the details if requested */
    276      1.1  jtc 	if (nmatch > 0) {
    277      1.1  jtc 		pmatch[0].rm_so = m->coldp - m->offp;
    278      1.1  jtc 		pmatch[0].rm_eo = endp - m->offp;
    279      1.1  jtc 	}
    280      1.1  jtc 	if (nmatch > 1) {
    281      1.1  jtc 		assert(m->pmatch != NULL);
    282      1.1  jtc 		for (i = 1; i < nmatch; i++)
    283      1.1  jtc 			if (i <= m->g->nsub)
    284      1.1  jtc 				pmatch[i] = m->pmatch[i];
    285      1.1  jtc 			else {
    286      1.1  jtc 				pmatch[i].rm_so = -1;
    287      1.1  jtc 				pmatch[i].rm_eo = -1;
    288      1.1  jtc 			}
    289      1.1  jtc 	}
    290      1.1  jtc 
    291      1.1  jtc 	if (m->pmatch != NULL)
    292      1.1  jtc 		free((char *)m->pmatch);
    293      1.1  jtc 	if (m->lastpos != NULL)
    294      1.1  jtc 		free((char *)m->lastpos);
    295      1.1  jtc 	STATETEARDOWN(m);
    296      1.1  jtc 	return(0);
    297      1.1  jtc }
    298      1.1  jtc 
    299      1.1  jtc /*
    300      1.1  jtc  - dissect - figure out what matched what, no back references
    301      1.1  jtc  == static char *dissect(register struct match *m, char *start, \
    302      1.1  jtc  ==	char *stop, sopno startst, sopno stopst);
    303      1.1  jtc  */
    304      1.1  jtc static char *			/* == stop (success) always */
    305      1.1  jtc dissect(m, start, stop, startst, stopst)
    306      1.1  jtc register struct match *m;
    307      1.1  jtc char *start;
    308      1.1  jtc char *stop;
    309      1.1  jtc sopno startst;
    310      1.1  jtc sopno stopst;
    311      1.1  jtc {
    312      1.1  jtc 	register int i;
    313      1.1  jtc 	register sopno ss;	/* start sop of current subRE */
    314      1.1  jtc 	register sopno es;	/* end sop of current subRE */
    315      1.1  jtc 	register char *sp;	/* start of string matched by it */
    316      1.1  jtc 	register char *stp;	/* string matched by it cannot pass here */
    317      1.1  jtc 	register char *rest;	/* start of rest of string */
    318      1.1  jtc 	register char *tail;	/* string unmatched by rest of RE */
    319      1.1  jtc 	register sopno ssub;	/* start sop of subsubRE */
    320      1.1  jtc 	register sopno esub;	/* end sop of subsubRE */
    321      1.1  jtc 	register char *ssp;	/* start of string matched by subsubRE */
    322      1.1  jtc 	register char *sep;	/* end of string matched by subsubRE */
    323      1.1  jtc 	register char *oldssp;	/* previous ssp */
    324      1.1  jtc 	register char *dp;
    325      1.1  jtc 
    326      1.1  jtc 	AT("diss", start, stop, startst, stopst);
    327      1.1  jtc 	sp = start;
    328      1.1  jtc 	for (ss = startst; ss < stopst; ss = es) {
    329      1.1  jtc 		/* identify end of subRE */
    330      1.1  jtc 		es = ss;
    331      1.1  jtc 		switch (OP(m->g->strip[es])) {
    332      1.1  jtc 		case OPLUS_:
    333      1.1  jtc 		case OQUEST_:
    334      1.1  jtc 			es += OPND(m->g->strip[es]);
    335      1.1  jtc 			break;
    336      1.1  jtc 		case OCH_:
    337      1.1  jtc 			while (OP(m->g->strip[es]) != O_CH)
    338      1.1  jtc 				es += OPND(m->g->strip[es]);
    339      1.1  jtc 			break;
    340      1.1  jtc 		}
    341      1.1  jtc 		es++;
    342      1.1  jtc 
    343      1.1  jtc 		/* figure out what it matched */
    344      1.1  jtc 		switch (OP(m->g->strip[ss])) {
    345      1.1  jtc 		case OEND:
    346      1.1  jtc 			assert(nope);
    347      1.1  jtc 			break;
    348      1.1  jtc 		case OCHAR:
    349      1.1  jtc 			sp++;
    350      1.1  jtc 			break;
    351      1.1  jtc 		case OBOL:
    352      1.1  jtc 		case OEOL:
    353      1.1  jtc 		case OBOW:
    354      1.1  jtc 		case OEOW:
    355      1.1  jtc 			break;
    356      1.1  jtc 		case OANY:
    357      1.1  jtc 		case OANYOF:
    358      1.1  jtc 			sp++;
    359      1.1  jtc 			break;
    360      1.1  jtc 		case OBACK_:
    361      1.1  jtc 		case O_BACK:
    362      1.1  jtc 			assert(nope);
    363      1.1  jtc 			break;
    364      1.1  jtc 		/* cases where length of match is hard to find */
    365      1.1  jtc 		case OQUEST_:
    366      1.1  jtc 			stp = stop;
    367      1.1  jtc 			for (;;) {
    368      1.1  jtc 				/* how long could this one be? */
    369      1.1  jtc 				rest = slow(m, sp, stp, ss, es);
    370      1.1  jtc 				assert(rest != NULL);	/* it did match */
    371      1.1  jtc 				/* could the rest match the rest? */
    372      1.1  jtc 				tail = slow(m, rest, stop, es, stopst);
    373      1.1  jtc 				if (tail == stop)
    374      1.1  jtc 					break;		/* yes! */
    375      1.1  jtc 				/* no -- try a shorter match for this one */
    376      1.1  jtc 				stp = rest - 1;
    377      1.1  jtc 				assert(stp >= sp);	/* it did work */
    378      1.1  jtc 			}
    379      1.1  jtc 			ssub = ss + 1;
    380      1.1  jtc 			esub = es - 1;
    381      1.1  jtc 			/* did innards match? */
    382      1.1  jtc 			if (slow(m, sp, rest, ssub, esub) != NULL) {
    383      1.1  jtc 				dp = dissect(m, sp, rest, ssub, esub);
    384      1.1  jtc 				assert(dp == rest);
    385      1.1  jtc 			} else		/* no */
    386      1.1  jtc 				assert(sp == rest);
    387      1.1  jtc 			sp = rest;
    388      1.1  jtc 			break;
    389      1.1  jtc 		case OPLUS_:
    390      1.1  jtc 			stp = stop;
    391      1.1  jtc 			for (;;) {
    392      1.1  jtc 				/* how long could this one be? */
    393      1.1  jtc 				rest = slow(m, sp, stp, ss, es);
    394      1.1  jtc 				assert(rest != NULL);	/* it did match */
    395      1.1  jtc 				/* could the rest match the rest? */
    396      1.1  jtc 				tail = slow(m, rest, stop, es, stopst);
    397      1.1  jtc 				if (tail == stop)
    398      1.1  jtc 					break;		/* yes! */
    399      1.1  jtc 				/* no -- try a shorter match for this one */
    400      1.1  jtc 				stp = rest - 1;
    401      1.1  jtc 				assert(stp >= sp);	/* it did work */
    402      1.1  jtc 			}
    403      1.1  jtc 			ssub = ss + 1;
    404      1.1  jtc 			esub = es - 1;
    405      1.1  jtc 			ssp = sp;
    406      1.1  jtc 			oldssp = ssp;
    407      1.1  jtc 			for (;;) {	/* find last match of innards */
    408      1.1  jtc 				sep = slow(m, ssp, rest, ssub, esub);
    409      1.1  jtc 				if (sep == NULL || sep == ssp)
    410      1.1  jtc 					break;	/* failed or matched null */
    411      1.1  jtc 				oldssp = ssp;	/* on to next try */
    412      1.1  jtc 				ssp = sep;
    413      1.1  jtc 			}
    414      1.1  jtc 			if (sep == NULL) {
    415      1.1  jtc 				/* last successful match */
    416      1.1  jtc 				sep = ssp;
    417      1.1  jtc 				ssp = oldssp;
    418      1.1  jtc 			}
    419      1.1  jtc 			assert(sep == rest);	/* must exhaust substring */
    420      1.1  jtc 			assert(slow(m, ssp, sep, ssub, esub) == rest);
    421      1.1  jtc 			dp = dissect(m, ssp, sep, ssub, esub);
    422      1.1  jtc 			assert(dp == sep);
    423      1.1  jtc 			sp = rest;
    424      1.1  jtc 			break;
    425      1.1  jtc 		case OCH_:
    426      1.1  jtc 			stp = stop;
    427      1.1  jtc 			for (;;) {
    428      1.1  jtc 				/* how long could this one be? */
    429      1.1  jtc 				rest = slow(m, sp, stp, ss, es);
    430      1.1  jtc 				assert(rest != NULL);	/* it did match */
    431      1.1  jtc 				/* could the rest match the rest? */
    432      1.1  jtc 				tail = slow(m, rest, stop, es, stopst);
    433      1.1  jtc 				if (tail == stop)
    434      1.1  jtc 					break;		/* yes! */
    435      1.1  jtc 				/* no -- try a shorter match for this one */
    436      1.1  jtc 				stp = rest - 1;
    437      1.1  jtc 				assert(stp >= sp);	/* it did work */
    438      1.1  jtc 			}
    439      1.1  jtc 			ssub = ss + 1;
    440      1.1  jtc 			esub = ss + OPND(m->g->strip[ss]) - 1;
    441      1.1  jtc 			assert(OP(m->g->strip[esub]) == OOR1);
    442      1.1  jtc 			for (;;) {	/* find first matching branch */
    443      1.1  jtc 				if (slow(m, sp, rest, ssub, esub) == rest)
    444      1.1  jtc 					break;	/* it matched all of it */
    445      1.1  jtc 				/* that one missed, try next one */
    446      1.1  jtc 				assert(OP(m->g->strip[esub]) == OOR1);
    447      1.1  jtc 				esub++;
    448      1.1  jtc 				assert(OP(m->g->strip[esub]) == OOR2);
    449      1.1  jtc 				ssub = esub + 1;
    450      1.1  jtc 				esub += OPND(m->g->strip[esub]);
    451      1.1  jtc 				if (OP(m->g->strip[esub]) == OOR2)
    452      1.1  jtc 					esub--;
    453      1.1  jtc 				else
    454      1.1  jtc 					assert(OP(m->g->strip[esub]) == O_CH);
    455      1.1  jtc 			}
    456      1.1  jtc 			dp = dissect(m, sp, rest, ssub, esub);
    457      1.1  jtc 			assert(dp == rest);
    458      1.1  jtc 			sp = rest;
    459      1.1  jtc 			break;
    460      1.1  jtc 		case O_PLUS:
    461      1.1  jtc 		case O_QUEST:
    462      1.1  jtc 		case OOR1:
    463      1.1  jtc 		case OOR2:
    464      1.1  jtc 		case O_CH:
    465      1.1  jtc 			assert(nope);
    466      1.1  jtc 			break;
    467      1.1  jtc 		case OLPAREN:
    468      1.1  jtc 			i = OPND(m->g->strip[ss]);
    469  1.1.1.1  cgd 			assert(0 < i && i <= m->g->nsub);
    470      1.1  jtc 			m->pmatch[i].rm_so = sp - m->offp;
    471      1.1  jtc 			break;
    472      1.1  jtc 		case ORPAREN:
    473      1.1  jtc 			i = OPND(m->g->strip[ss]);
    474  1.1.1.1  cgd 			assert(0 < i && i <= m->g->nsub);
    475      1.1  jtc 			m->pmatch[i].rm_eo = sp - m->offp;
    476      1.1  jtc 			break;
    477      1.1  jtc 		default:		/* uh oh */
    478      1.1  jtc 			assert(nope);
    479      1.1  jtc 			break;
    480      1.1  jtc 		}
    481      1.1  jtc 	}
    482      1.1  jtc 
    483      1.1  jtc 	assert(sp == stop);
    484      1.1  jtc 	return(sp);
    485      1.1  jtc }
    486      1.1  jtc 
    487      1.1  jtc /*
    488      1.1  jtc  - backref - figure out what matched what, figuring in back references
    489      1.1  jtc  == static char *backref(register struct match *m, char *start, \
    490      1.1  jtc  ==	char *stop, sopno startst, sopno stopst, sopno lev);
    491      1.1  jtc  */
    492      1.1  jtc static char *			/* == stop (success) or NULL (failure) */
    493      1.1  jtc backref(m, start, stop, startst, stopst, lev)
    494      1.1  jtc register struct match *m;
    495      1.1  jtc char *start;
    496      1.1  jtc char *stop;
    497      1.1  jtc sopno startst;
    498      1.1  jtc sopno stopst;
    499      1.1  jtc sopno lev;			/* PLUS nesting level */
    500      1.1  jtc {
    501      1.1  jtc 	register int i;
    502      1.1  jtc 	register sopno ss;	/* start sop of current subRE */
    503      1.1  jtc 	register char *sp;	/* start of string matched by it */
    504      1.1  jtc 	register sopno ssub;	/* start sop of subsubRE */
    505      1.1  jtc 	register sopno esub;	/* end sop of subsubRE */
    506      1.1  jtc 	register char *ssp;	/* start of string matched by subsubRE */
    507      1.1  jtc 	register char *dp;
    508      1.1  jtc 	register size_t len;
    509      1.1  jtc 	register int hard;
    510      1.1  jtc 	register sop s;
    511      1.1  jtc 	register regoff_t offsave;
    512      1.1  jtc 	register cset *cs;
    513      1.1  jtc 
    514      1.1  jtc 	AT("back", start, stop, startst, stopst);
    515      1.1  jtc 	sp = start;
    516      1.1  jtc 
    517      1.1  jtc 	/* get as far as we can with easy stuff */
    518      1.1  jtc 	hard = 0;
    519      1.1  jtc 	for (ss = startst; !hard && ss < stopst; ss++)
    520      1.1  jtc 		switch (OP(s = m->g->strip[ss])) {
    521      1.1  jtc 		case OCHAR:
    522      1.1  jtc 			if (sp == stop || *sp++ != (char)OPND(s))
    523      1.1  jtc 				return(NULL);
    524      1.1  jtc 			break;
    525      1.1  jtc 		case OANY:
    526      1.1  jtc 			if (sp == stop)
    527      1.1  jtc 				return(NULL);
    528      1.1  jtc 			sp++;
    529      1.1  jtc 			break;
    530      1.1  jtc 		case OANYOF:
    531      1.1  jtc 			cs = &m->g->sets[OPND(s)];
    532      1.1  jtc 			if (sp == stop || !CHIN(cs, *sp++))
    533      1.1  jtc 				return(NULL);
    534      1.1  jtc 			break;
    535      1.1  jtc 		case OBOL:
    536      1.1  jtc 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
    537      1.1  jtc 					(sp < m->endp && *(sp-1) == '\n' &&
    538      1.1  jtc 						(m->g->cflags&REG_NEWLINE)) )
    539      1.1  jtc 				{ /* yes */ }
    540      1.1  jtc 			else
    541      1.1  jtc 				return(NULL);
    542      1.1  jtc 			break;
    543      1.1  jtc 		case OEOL:
    544      1.1  jtc 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
    545      1.1  jtc 					(sp < m->endp && *sp == '\n' &&
    546      1.1  jtc 						(m->g->cflags&REG_NEWLINE)) )
    547      1.1  jtc 				{ /* yes */ }
    548      1.1  jtc 			else
    549      1.1  jtc 				return(NULL);
    550      1.1  jtc 			break;
    551      1.1  jtc 		case OBOW:
    552      1.1  jtc 			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
    553      1.1  jtc 					(sp < m->endp && *(sp-1) == '\n' &&
    554      1.1  jtc 						(m->g->cflags&REG_NEWLINE)) ||
    555      1.1  jtc 					(sp > m->beginp &&
    556  1.1.1.1  cgd 							!ISWORD(*(sp-1))) ) &&
    557  1.1.1.1  cgd 					(sp < m->endp && ISWORD(*sp)) )
    558      1.1  jtc 				{ /* yes */ }
    559      1.1  jtc 			else
    560      1.1  jtc 				return(NULL);
    561      1.1  jtc 			break;
    562      1.1  jtc 		case OEOW:
    563      1.1  jtc 			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
    564      1.1  jtc 					(sp < m->endp && *sp == '\n' &&
    565      1.1  jtc 						(m->g->cflags&REG_NEWLINE)) ||
    566  1.1.1.1  cgd 					(sp < m->endp && !ISWORD(*sp)) ) &&
    567  1.1.1.1  cgd 					(sp > m->beginp && ISWORD(*(sp-1))) )
    568      1.1  jtc 				{ /* yes */ }
    569      1.1  jtc 			else
    570      1.1  jtc 				return(NULL);
    571      1.1  jtc 			break;
    572      1.1  jtc 		case O_QUEST:
    573      1.1  jtc 			break;
    574      1.1  jtc 		case OOR1:	/* matches null but needs to skip */
    575      1.1  jtc 			ss++;
    576      1.1  jtc 			s = m->g->strip[ss];
    577      1.1  jtc 			do {
    578      1.1  jtc 				assert(OP(s) == OOR2);
    579      1.1  jtc 				ss += OPND(s);
    580      1.1  jtc 			} while (OP(s = m->g->strip[ss]) != O_CH);
    581      1.1  jtc 			/* note that the ss++ gets us past the O_CH */
    582      1.1  jtc 			break;
    583      1.1  jtc 		default:	/* have to make a choice */
    584      1.1  jtc 			hard = 1;
    585      1.1  jtc 			break;
    586      1.1  jtc 		}
    587      1.1  jtc 	if (!hard) {		/* that was it! */
    588      1.1  jtc 		if (sp != stop)
    589      1.1  jtc 			return(NULL);
    590      1.1  jtc 		return(sp);
    591      1.1  jtc 	}
    592      1.1  jtc 	ss--;			/* adjust for the for's final increment */
    593      1.1  jtc 
    594      1.1  jtc 	/* the hard stuff */
    595      1.1  jtc 	AT("hard", sp, stop, ss, stopst);
    596      1.1  jtc 	s = m->g->strip[ss];
    597      1.1  jtc 	switch (OP(s)) {
    598      1.1  jtc 	case OBACK_:		/* the vilest depths */
    599      1.1  jtc 		i = OPND(s);
    600  1.1.1.1  cgd 		assert(0 < i && i <= m->g->nsub);
    601      1.1  jtc 		if (m->pmatch[i].rm_eo == -1)
    602      1.1  jtc 			return(NULL);
    603      1.1  jtc 		assert(m->pmatch[i].rm_so != -1);
    604      1.1  jtc 		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
    605      1.1  jtc 		assert(stop - m->beginp >= len);
    606      1.1  jtc 		if (sp > stop - len)
    607      1.1  jtc 			return(NULL);	/* not enough left to match */
    608      1.1  jtc 		ssp = m->offp + m->pmatch[i].rm_so;
    609      1.1  jtc 		if (memcmp(sp, ssp, len) != 0)
    610      1.1  jtc 			return(NULL);
    611      1.1  jtc 		while (m->g->strip[ss] != SOP(O_BACK, i))
    612      1.1  jtc 			ss++;
    613      1.1  jtc 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
    614      1.1  jtc 		break;
    615      1.1  jtc 	case OQUEST_:		/* to null or not */
    616      1.1  jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    617      1.1  jtc 		if (dp != NULL)
    618      1.1  jtc 			return(dp);	/* not */
    619      1.1  jtc 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
    620      1.1  jtc 		break;
    621      1.1  jtc 	case OPLUS_:
    622      1.1  jtc 		assert(m->lastpos != NULL);
    623      1.1  jtc 		assert(lev+1 <= m->g->nplus);
    624      1.1  jtc 		m->lastpos[lev+1] = sp;
    625      1.1  jtc 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
    626      1.1  jtc 		break;
    627      1.1  jtc 	case O_PLUS:
    628      1.1  jtc 		if (sp == m->lastpos[lev])	/* last pass matched null */
    629      1.1  jtc 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
    630      1.1  jtc 		/* try another pass */
    631      1.1  jtc 		m->lastpos[lev] = sp;
    632      1.1  jtc 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
    633      1.1  jtc 		if (dp == NULL)
    634      1.1  jtc 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
    635      1.1  jtc 		else
    636      1.1  jtc 			return(dp);
    637      1.1  jtc 		break;
    638      1.1  jtc 	case OCH_:		/* find the right one, if any */
    639      1.1  jtc 		ssub = ss + 1;
    640      1.1  jtc 		esub = ss + OPND(s) - 1;
    641      1.1  jtc 		assert(OP(m->g->strip[esub]) == OOR1);
    642      1.1  jtc 		for (;;) {	/* find first matching branch */
    643      1.1  jtc 			dp = backref(m, sp, stop, ssub, esub, lev);
    644      1.1  jtc 			if (dp != NULL)
    645      1.1  jtc 				return(dp);
    646      1.1  jtc 			/* that one missed, try next one */
    647      1.1  jtc 			if (OP(m->g->strip[esub]) == O_CH)
    648      1.1  jtc 				return(NULL);	/* there is none */
    649      1.1  jtc 			esub++;
    650      1.1  jtc 			assert(OP(m->g->strip[esub]) == OOR2);
    651      1.1  jtc 			ssub = esub + 1;
    652      1.1  jtc 			esub += OPND(m->g->strip[esub]);
    653      1.1  jtc 			if (OP(m->g->strip[esub]) == OOR2)
    654      1.1  jtc 				esub--;
    655      1.1  jtc 			else
    656      1.1  jtc 				assert(OP(m->g->strip[esub]) == O_CH);
    657      1.1  jtc 		}
    658      1.1  jtc 		break;
    659      1.1  jtc 	case OLPAREN:		/* must undo assignment if rest fails */
    660      1.1  jtc 		i = OPND(s);
    661  1.1.1.1  cgd 		assert(0 < i && i <= m->g->nsub);
    662      1.1  jtc 		offsave = m->pmatch[i].rm_so;
    663      1.1  jtc 		m->pmatch[i].rm_so = sp - m->offp;
    664      1.1  jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    665      1.1  jtc 		if (dp != NULL)
    666      1.1  jtc 			return(dp);
    667      1.1  jtc 		m->pmatch[i].rm_so = offsave;
    668      1.1  jtc 		return(NULL);
    669      1.1  jtc 		break;
    670      1.1  jtc 	case ORPAREN:		/* must undo assignment if rest fails */
    671      1.1  jtc 		i = OPND(s);
    672  1.1.1.1  cgd 		assert(0 < i && i <= m->g->nsub);
    673      1.1  jtc 		offsave = m->pmatch[i].rm_eo;
    674      1.1  jtc 		m->pmatch[i].rm_eo = sp - m->offp;
    675      1.1  jtc 		dp = backref(m, sp, stop, ss+1, stopst, lev);
    676      1.1  jtc 		if (dp != NULL)
    677      1.1  jtc 			return(dp);
    678      1.1  jtc 		m->pmatch[i].rm_eo = offsave;
    679      1.1  jtc 		return(NULL);
    680      1.1  jtc 		break;
    681      1.1  jtc 	default:		/* uh oh */
    682      1.1  jtc 		assert(nope);
    683      1.1  jtc 		break;
    684      1.1  jtc 	}
    685      1.1  jtc 
    686      1.1  jtc 	/* "can't happen" */
    687      1.1  jtc 	assert(nope);
    688      1.1  jtc 	/* NOTREACHED */
    689      1.1  jtc }
    690      1.1  jtc 
    691      1.1  jtc /*
    692      1.1  jtc  - fast - step through the string at top speed
    693      1.1  jtc  == static char *fast(register struct match *m, char *start, \
    694      1.1  jtc  ==	char *stop, sopno startst, sopno stopst);
    695      1.1  jtc  */
    696      1.1  jtc static char *			/* where tentative match ended, or NULL */
    697      1.1  jtc fast(m, start, stop, startst, stopst)
    698      1.1  jtc register struct match *m;
    699      1.1  jtc char *start;
    700      1.1  jtc char *stop;
    701      1.1  jtc sopno startst;
    702      1.1  jtc sopno stopst;
    703      1.1  jtc {
    704      1.1  jtc 	register states st = m->st;
    705      1.1  jtc 	register states fresh = m->fresh;
    706      1.1  jtc 	register states tmp = m->tmp;
    707      1.1  jtc 	register char *p = start;
    708      1.1  jtc 	register int c = (start == m->beginp) ? OUT : *(start-1);
    709      1.1  jtc 	register int lastc;	/* previous c */
    710      1.1  jtc 	register int flagch;
    711      1.1  jtc 	register int i;
    712      1.1  jtc 	register char *coldp;	/* last p after which no match was underway */
    713      1.1  jtc 
    714      1.1  jtc 	CLEAR(st);
    715      1.1  jtc 	SET1(st, startst);
    716      1.1  jtc 	st = step(m->g, startst, stopst, st, NOTHING, st);
    717      1.1  jtc 	ASSIGN(fresh, st);
    718      1.1  jtc 	SP("start", st, *p);
    719      1.1  jtc 	coldp = NULL;
    720      1.1  jtc 	for (;;) {
    721      1.1  jtc 		/* next character */
    722      1.1  jtc 		lastc = c;
    723      1.1  jtc 		c = (p == m->endp) ? OUT : *p;
    724      1.1  jtc 		if (EQ(st, fresh))
    725      1.1  jtc 			coldp = p;
    726      1.1  jtc 
    727      1.1  jtc 		/* is there an EOL and/or BOL between lastc and c? */
    728      1.1  jtc 		flagch = '\0';
    729      1.1  jtc 		i = 0;
    730      1.1  jtc 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
    731      1.1  jtc 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
    732      1.1  jtc 			flagch = BOL;
    733      1.1  jtc 			i = m->g->nbol;
    734      1.1  jtc 		}
    735      1.1  jtc 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
    736      1.1  jtc 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
    737      1.1  jtc 			flagch = (flagch == BOL) ? BOLEOL : EOL;
    738      1.1  jtc 			i += m->g->neol;
    739      1.1  jtc 		}
    740      1.1  jtc 		if (i != 0) {
    741      1.1  jtc 			for (; i > 0; i--)
    742      1.1  jtc 				st = step(m->g, startst, stopst, st, flagch, st);
    743      1.1  jtc 			SP("boleol", st, c);
    744      1.1  jtc 		}
    745      1.1  jtc 
    746      1.1  jtc 		/* how about a word boundary? */
    747  1.1.1.1  cgd 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
    748  1.1.1.1  cgd 					(c != OUT && ISWORD(c)) ) {
    749      1.1  jtc 			flagch = BOW;
    750      1.1  jtc 		}
    751  1.1.1.1  cgd 		if ( (lastc != OUT && ISWORD(lastc)) &&
    752  1.1.1.1  cgd 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
    753      1.1  jtc 			flagch = EOW;
    754      1.1  jtc 		}
    755      1.1  jtc 		if (flagch == BOW || flagch == EOW) {
    756      1.1  jtc 			st = step(m->g, startst, stopst, st, flagch, st);
    757      1.1  jtc 			SP("boweow", st, c);
    758      1.1  jtc 		}
    759      1.1  jtc 
    760      1.1  jtc 		/* are we done? */
    761      1.1  jtc 		if (ISSET(st, stopst) || p == stop)
    762      1.1  jtc 			break;		/* NOTE BREAK OUT */
    763      1.1  jtc 
    764      1.1  jtc 		/* no, we must deal with this character */
    765      1.1  jtc 		ASSIGN(tmp, st);
    766      1.1  jtc 		ASSIGN(st, fresh);
    767      1.1  jtc 		assert(c != OUT);
    768      1.1  jtc 		st = step(m->g, startst, stopst, tmp, c, st);
    769      1.1  jtc 		SP("aft", st, c);
    770      1.1  jtc 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
    771      1.1  jtc 		p++;
    772      1.1  jtc 	}
    773      1.1  jtc 
    774      1.1  jtc 	assert(coldp != NULL);
    775      1.1  jtc 	m->coldp = coldp;
    776      1.1  jtc 	if (ISSET(st, stopst))
    777      1.1  jtc 		return(p+1);
    778      1.1  jtc 	else
    779      1.1  jtc 		return(NULL);
    780      1.1  jtc }
    781      1.1  jtc 
    782      1.1  jtc /*
    783      1.1  jtc  - slow - step through the string more deliberately
    784      1.1  jtc  == static char *slow(register struct match *m, char *start, \
    785      1.1  jtc  ==	char *stop, sopno startst, sopno stopst);
    786      1.1  jtc  */
    787      1.1  jtc static char *			/* where it ended */
    788      1.1  jtc slow(m, start, stop, startst, stopst)
    789      1.1  jtc register struct match *m;
    790      1.1  jtc char *start;
    791      1.1  jtc char *stop;
    792      1.1  jtc sopno startst;
    793      1.1  jtc sopno stopst;
    794      1.1  jtc {
    795      1.1  jtc 	register states st = m->st;
    796      1.1  jtc 	register states empty = m->empty;
    797      1.1  jtc 	register states tmp = m->tmp;
    798      1.1  jtc 	register char *p = start;
    799      1.1  jtc 	register int c = (start == m->beginp) ? OUT : *(start-1);
    800      1.1  jtc 	register int lastc;	/* previous c */
    801      1.1  jtc 	register int flagch;
    802      1.1  jtc 	register int i;
    803      1.1  jtc 	register char *matchp;	/* last p at which a match ended */
    804      1.1  jtc 
    805      1.1  jtc 	AT("slow", start, stop, startst, stopst);
    806      1.1  jtc 	CLEAR(st);
    807      1.1  jtc 	SET1(st, startst);
    808      1.1  jtc 	SP("sstart", st, *p);
    809      1.1  jtc 	st = step(m->g, startst, stopst, st, NOTHING, st);
    810      1.1  jtc 	matchp = NULL;
    811      1.1  jtc 	for (;;) {
    812      1.1  jtc 		/* next character */
    813      1.1  jtc 		lastc = c;
    814      1.1  jtc 		c = (p == m->endp) ? OUT : *p;
    815      1.1  jtc 
    816      1.1  jtc 		/* is there an EOL and/or BOL between lastc and c? */
    817      1.1  jtc 		flagch = '\0';
    818      1.1  jtc 		i = 0;
    819      1.1  jtc 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
    820      1.1  jtc 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
    821      1.1  jtc 			flagch = BOL;
    822      1.1  jtc 			i = m->g->nbol;
    823      1.1  jtc 		}
    824      1.1  jtc 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
    825      1.1  jtc 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
    826      1.1  jtc 			flagch = (flagch == BOL) ? BOLEOL : EOL;
    827      1.1  jtc 			i += m->g->neol;
    828      1.1  jtc 		}
    829      1.1  jtc 		if (i != 0) {
    830      1.1  jtc 			for (; i > 0; i--)
    831      1.1  jtc 				st = step(m->g, startst, stopst, st, flagch, st);
    832      1.1  jtc 			SP("sboleol", st, c);
    833      1.1  jtc 		}
    834      1.1  jtc 
    835      1.1  jtc 		/* how about a word boundary? */
    836  1.1.1.1  cgd 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
    837  1.1.1.1  cgd 					(c != OUT && ISWORD(c)) ) {
    838      1.1  jtc 			flagch = BOW;
    839      1.1  jtc 		}
    840  1.1.1.1  cgd 		if ( (lastc != OUT && ISWORD(lastc)) &&
    841  1.1.1.1  cgd 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
    842      1.1  jtc 			flagch = EOW;
    843      1.1  jtc 		}
    844      1.1  jtc 		if (flagch == BOW || flagch == EOW) {
    845      1.1  jtc 			st = step(m->g, startst, stopst, st, flagch, st);
    846      1.1  jtc 			SP("sboweow", st, c);
    847      1.1  jtc 		}
    848      1.1  jtc 
    849      1.1  jtc 		/* are we done? */
    850      1.1  jtc 		if (ISSET(st, stopst))
    851      1.1  jtc 			matchp = p;
    852      1.1  jtc 		if (EQ(st, empty) || p == stop)
    853      1.1  jtc 			break;		/* NOTE BREAK OUT */
    854      1.1  jtc 
    855      1.1  jtc 		/* no, we must deal with this character */
    856      1.1  jtc 		ASSIGN(tmp, st);
    857      1.1  jtc 		ASSIGN(st, empty);
    858      1.1  jtc 		assert(c != OUT);
    859      1.1  jtc 		st = step(m->g, startst, stopst, tmp, c, st);
    860      1.1  jtc 		SP("saft", st, c);
    861      1.1  jtc 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
    862      1.1  jtc 		p++;
    863      1.1  jtc 	}
    864      1.1  jtc 
    865      1.1  jtc 	return(matchp);
    866      1.1  jtc }
    867      1.1  jtc 
    868      1.1  jtc 
    869      1.1  jtc /*
    870      1.1  jtc  - step - map set of states reachable before char to set reachable after
    871  1.1.1.1  cgd  == static states step(register struct re_guts *g, sopno start, sopno stop, \
    872      1.1  jtc  ==	register states bef, int ch, register states aft);
    873      1.1  jtc  == #define	BOL	(OUT+1)
    874      1.1  jtc  == #define	EOL	(BOL+1)
    875      1.1  jtc  == #define	BOLEOL	(BOL+2)
    876      1.1  jtc  == #define	NOTHING	(BOL+3)
    877      1.1  jtc  == #define	BOW	(BOL+4)
    878      1.1  jtc  == #define	EOW	(BOL+5)
    879      1.1  jtc  == #define	CODEMAX	(BOL+5)		// highest code used
    880      1.1  jtc  == #define	NONCHAR(c)	((c) > CHAR_MAX)
    881      1.1  jtc  == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
    882      1.1  jtc  */
    883      1.1  jtc static states
    884      1.1  jtc step(g, start, stop, bef, ch, aft)
    885      1.1  jtc register struct re_guts *g;
    886  1.1.1.1  cgd sopno start;			/* start state within strip */
    887  1.1.1.1  cgd sopno stop;			/* state after stop state within strip */
    888      1.1  jtc register states bef;		/* states reachable before */
    889      1.1  jtc int ch;				/* character or NONCHAR code */
    890      1.1  jtc register states aft;		/* states already known reachable after */
    891      1.1  jtc {
    892      1.1  jtc 	register cset *cs;
    893      1.1  jtc 	register sop s;
    894      1.1  jtc 	register sopno pc;
    895      1.1  jtc 	register onestate here;		/* note, macros know this name */
    896      1.1  jtc 	register sopno look;
    897      1.1  jtc 	register int i;
    898      1.1  jtc 
    899      1.1  jtc 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
    900      1.1  jtc 		s = g->strip[pc];
    901      1.1  jtc 		switch (OP(s)) {
    902      1.1  jtc 		case OEND:
    903      1.1  jtc 			assert(pc == stop-1);
    904      1.1  jtc 			break;
    905      1.1  jtc 		case OCHAR:
    906      1.1  jtc 			/* only characters can match */
    907      1.1  jtc 			assert(!NONCHAR(ch) || ch != (char)OPND(s));
    908      1.1  jtc 			if (ch == (char)OPND(s))
    909      1.1  jtc 				FWD(aft, bef, 1);
    910      1.1  jtc 			break;
    911      1.1  jtc 		case OBOL:
    912      1.1  jtc 			if (ch == BOL || ch == BOLEOL)
    913      1.1  jtc 				FWD(aft, bef, 1);
    914      1.1  jtc 			break;
    915      1.1  jtc 		case OEOL:
    916      1.1  jtc 			if (ch == EOL || ch == BOLEOL)
    917      1.1  jtc 				FWD(aft, bef, 1);
    918      1.1  jtc 			break;
    919      1.1  jtc 		case OBOW:
    920      1.1  jtc 			if (ch == BOW)
    921      1.1  jtc 				FWD(aft, bef, 1);
    922      1.1  jtc 			break;
    923      1.1  jtc 		case OEOW:
    924      1.1  jtc 			if (ch == EOW)
    925      1.1  jtc 				FWD(aft, bef, 1);
    926      1.1  jtc 			break;
    927      1.1  jtc 		case OANY:
    928      1.1  jtc 			if (!NONCHAR(ch))
    929      1.1  jtc 				FWD(aft, bef, 1);
    930      1.1  jtc 			break;
    931      1.1  jtc 		case OANYOF:
    932      1.1  jtc 			cs = &g->sets[OPND(s)];
    933      1.1  jtc 			if (!NONCHAR(ch) && CHIN(cs, ch))
    934      1.1  jtc 				FWD(aft, bef, 1);
    935      1.1  jtc 			break;
    936      1.1  jtc 		case OBACK_:		/* ignored here */
    937      1.1  jtc 		case O_BACK:
    938      1.1  jtc 			FWD(aft, aft, 1);
    939      1.1  jtc 			break;
    940      1.1  jtc 		case OPLUS_:		/* forward, this is just an empty */
    941      1.1  jtc 			FWD(aft, aft, 1);
    942      1.1  jtc 			break;
    943      1.1  jtc 		case O_PLUS:		/* both forward and back */
    944      1.1  jtc 			FWD(aft, aft, 1);
    945      1.1  jtc 			i = ISSETBACK(aft, OPND(s));
    946      1.1  jtc 			BACK(aft, aft, OPND(s));
    947      1.1  jtc 			if (!i && ISSETBACK(aft, OPND(s))) {
    948      1.1  jtc 				/* oho, must reconsider loop body */
    949      1.1  jtc 				pc -= OPND(s) + 1;
    950      1.1  jtc 				INIT(here, pc);
    951      1.1  jtc 			}
    952      1.1  jtc 			break;
    953      1.1  jtc 		case OQUEST_:		/* two branches, both forward */
    954      1.1  jtc 			FWD(aft, aft, 1);
    955      1.1  jtc 			FWD(aft, aft, OPND(s));
    956      1.1  jtc 			break;
    957      1.1  jtc 		case O_QUEST:		/* just an empty */
    958      1.1  jtc 			FWD(aft, aft, 1);
    959      1.1  jtc 			break;
    960      1.1  jtc 		case OLPAREN:		/* not significant here */
    961      1.1  jtc 		case ORPAREN:
    962      1.1  jtc 			FWD(aft, aft, 1);
    963      1.1  jtc 			break;
    964      1.1  jtc 		case OCH_:		/* mark the first two branches */
    965      1.1  jtc 			FWD(aft, aft, 1);
    966      1.1  jtc 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
    967      1.1  jtc 			FWD(aft, aft, OPND(s));
    968      1.1  jtc 			break;
    969      1.1  jtc 		case OOR1:		/* done a branch, find the O_CH */
    970      1.1  jtc 			if (ISSTATEIN(aft, here)) {
    971      1.1  jtc 				for (look = 1;
    972      1.1  jtc 						OP(s = g->strip[pc+look]) != O_CH;
    973      1.1  jtc 						look += OPND(s))
    974      1.1  jtc 					assert(OP(s) == OOR2);
    975      1.1  jtc 				FWD(aft, aft, look);
    976      1.1  jtc 			}
    977      1.1  jtc 			break;
    978      1.1  jtc 		case OOR2:		/* propagate OCH_'s marking */
    979      1.1  jtc 			FWD(aft, aft, 1);
    980      1.1  jtc 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
    981      1.1  jtc 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
    982      1.1  jtc 				FWD(aft, aft, OPND(s));
    983      1.1  jtc 			}
    984      1.1  jtc 			break;
    985      1.1  jtc 		case O_CH:		/* just empty */
    986      1.1  jtc 			FWD(aft, aft, 1);
    987      1.1  jtc 			break;
    988      1.1  jtc 		default:		/* ooooops... */
    989      1.1  jtc 			assert(nope);
    990      1.1  jtc 			break;
    991      1.1  jtc 		}
    992      1.1  jtc 	}
    993      1.1  jtc 
    994      1.1  jtc 	return(aft);
    995      1.1  jtc }
    996      1.1  jtc 
    997      1.1  jtc #ifdef REDEBUG
    998      1.1  jtc /*
    999      1.1  jtc  - print - print a set of states
   1000      1.1  jtc  == #ifdef REDEBUG
   1001      1.1  jtc  == static void print(struct match *m, char *caption, states st, \
   1002      1.1  jtc  ==	int ch, FILE *d);
   1003      1.1  jtc  == #endif
   1004      1.1  jtc  */
   1005      1.1  jtc static void
   1006      1.1  jtc print(m, caption, st, ch, d)
   1007      1.1  jtc struct match *m;
   1008      1.1  jtc char *caption;
   1009      1.1  jtc states st;
   1010      1.1  jtc int ch;
   1011      1.1  jtc FILE *d;
   1012      1.1  jtc {
   1013      1.1  jtc 	register struct re_guts *g = m->g;
   1014      1.1  jtc 	register int i;
   1015      1.1  jtc 	register int first = 1;
   1016      1.1  jtc 
   1017      1.1  jtc 	if (!(m->eflags&REG_TRACE))
   1018      1.1  jtc 		return;
   1019      1.1  jtc 
   1020      1.1  jtc 	fprintf(d, "%s", caption);
   1021      1.1  jtc 	if (ch != '\0')
   1022      1.1  jtc 		fprintf(d, " %s", pchar(ch));
   1023      1.1  jtc 	for (i = 0; i < g->nstates; i++)
   1024      1.1  jtc 		if (ISSET(st, i)) {
   1025      1.1  jtc 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
   1026      1.1  jtc 			first = 0;
   1027      1.1  jtc 		}
   1028      1.1  jtc 	fprintf(d, "\n");
   1029      1.1  jtc }
   1030      1.1  jtc 
   1031      1.1  jtc /*
   1032      1.1  jtc  - at - print current situation
   1033      1.1  jtc  == #ifdef REDEBUG
   1034      1.1  jtc  == static void at(struct match *m, char *title, char *start, char *stop, \
   1035      1.1  jtc  ==						sopno startst, sopno stopst);
   1036      1.1  jtc  == #endif
   1037      1.1  jtc  */
   1038      1.1  jtc static void
   1039      1.1  jtc at(m, title, start, stop, startst, stopst)
   1040      1.1  jtc struct match *m;
   1041      1.1  jtc char *title;
   1042      1.1  jtc char *start;
   1043      1.1  jtc char *stop;
   1044      1.1  jtc sopno startst;
   1045      1.1  jtc sopno stopst;
   1046      1.1  jtc {
   1047      1.1  jtc 	if (!(m->eflags&REG_TRACE))
   1048      1.1  jtc 		return;
   1049      1.1  jtc 
   1050      1.1  jtc 	printf("%s %s-", title, pchar(*start));
   1051      1.1  jtc 	printf("%s ", pchar(*stop));
   1052      1.1  jtc 	printf("%ld-%ld\n", (long)startst, (long)stopst);
   1053      1.1  jtc }
   1054      1.1  jtc 
   1055      1.1  jtc #ifndef PCHARDONE
   1056      1.1  jtc #define	PCHARDONE	/* never again */
   1057      1.1  jtc /*
   1058      1.1  jtc  - pchar - make a character printable
   1059      1.1  jtc  == #ifdef REDEBUG
   1060      1.1  jtc  == static char *pchar(int ch);
   1061      1.1  jtc  == #endif
   1062      1.1  jtc  *
   1063      1.1  jtc  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
   1064      1.1  jtc  * duplicate here avoids having a debugging-capable regexec.o tied to
   1065      1.1  jtc  * a matching debug.o, and this is convenient.  It all disappears in
   1066      1.1  jtc  * the non-debug compilation anyway, so it doesn't matter much.
   1067      1.1  jtc  */
   1068      1.1  jtc static char *			/* -> representation */
   1069      1.1  jtc pchar(ch)
   1070      1.1  jtc int ch;
   1071      1.1  jtc {
   1072      1.1  jtc 	static char pbuf[10];
   1073      1.1  jtc 
   1074      1.1  jtc 	if (isprint(ch) || ch == ' ')
   1075      1.1  jtc 		sprintf(pbuf, "%c", ch);
   1076      1.1  jtc 	else
   1077      1.1  jtc 		sprintf(pbuf, "\\%o", ch);
   1078      1.1  jtc 	return(pbuf);
   1079      1.1  jtc }
   1080      1.1  jtc #endif
   1081      1.1  jtc #endif
   1082      1.1  jtc 
   1083      1.1  jtc #undef	matcher
   1084      1.1  jtc #undef	fast
   1085      1.1  jtc #undef	slow
   1086      1.1  jtc #undef	dissect
   1087      1.1  jtc #undef	backref
   1088      1.1  jtc #undef	step
   1089      1.1  jtc #undef	print
   1090      1.1  jtc #undef	at
   1091      1.1  jtc #undef	match
   1092