engine.c revision 1.20 1 1.20 junyoung /* $NetBSD: engine.c,v 1.20 2007/02/08 05:07:23 junyoung Exp $ */
2 1.5 cgd
3 1.4 cgd /*-
4 1.4 cgd * Copyright (c) 1992, 1993, 1994
5 1.4 cgd * The Regents of the University of California. All rights reserved.
6 1.15 agc *
7 1.15 agc * This code is derived from software contributed to Berkeley by
8 1.15 agc * Henry Spencer.
9 1.15 agc *
10 1.15 agc * Redistribution and use in source and binary forms, with or without
11 1.15 agc * modification, are permitted provided that the following conditions
12 1.15 agc * are met:
13 1.15 agc * 1. Redistributions of source code must retain the above copyright
14 1.15 agc * notice, this list of conditions and the following disclaimer.
15 1.15 agc * 2. Redistributions in binary form must reproduce the above copyright
16 1.15 agc * notice, this list of conditions and the following disclaimer in the
17 1.15 agc * documentation and/or other materials provided with the distribution.
18 1.15 agc * 3. Neither the name of the University nor the names of its contributors
19 1.15 agc * may be used to endorse or promote products derived from this software
20 1.15 agc * without specific prior written permission.
21 1.15 agc *
22 1.15 agc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 1.15 agc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 1.15 agc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 1.15 agc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 1.15 agc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 1.15 agc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 1.15 agc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 1.15 agc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 1.15 agc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 1.15 agc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 1.15 agc * SUCH DAMAGE.
33 1.15 agc *
34 1.15 agc * @(#)engine.c 8.5 (Berkeley) 3/20/94
35 1.15 agc */
36 1.15 agc
37 1.15 agc /*-
38 1.15 agc * Copyright (c) 1992, 1993, 1994 Henry Spencer.
39 1.4 cgd *
40 1.4 cgd * This code is derived from software contributed to Berkeley by
41 1.4 cgd * Henry Spencer.
42 1.4 cgd *
43 1.4 cgd * Redistribution and use in source and binary forms, with or without
44 1.4 cgd * modification, are permitted provided that the following conditions
45 1.4 cgd * are met:
46 1.4 cgd * 1. Redistributions of source code must retain the above copyright
47 1.4 cgd * notice, this list of conditions and the following disclaimer.
48 1.4 cgd * 2. Redistributions in binary form must reproduce the above copyright
49 1.4 cgd * notice, this list of conditions and the following disclaimer in the
50 1.4 cgd * documentation and/or other materials provided with the distribution.
51 1.4 cgd * 3. All advertising materials mentioning features or use of this software
52 1.4 cgd * must display the following acknowledgement:
53 1.4 cgd * This product includes software developed by the University of
54 1.4 cgd * California, Berkeley and its contributors.
55 1.4 cgd * 4. Neither the name of the University nor the names of its contributors
56 1.4 cgd * may be used to endorse or promote products derived from this software
57 1.4 cgd * without specific prior written permission.
58 1.4 cgd *
59 1.4 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 1.4 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 1.4 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 1.4 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 1.4 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 1.4 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 1.4 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 1.4 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 1.4 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 1.4 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 1.4 cgd * SUCH DAMAGE.
70 1.4 cgd *
71 1.4 cgd * @(#)engine.c 8.5 (Berkeley) 3/20/94
72 1.4 cgd */
73 1.4 cgd
74 1.1 jtc /*
75 1.1 jtc * The matching engine and friends. This file is #included by regexec.c
76 1.1 jtc * after suitable #defines of a variety of macros used herein, so that
77 1.1 jtc * different state representations can be used without duplicating masses
78 1.1 jtc * of code.
79 1.1 jtc */
80 1.1 jtc
81 1.1 jtc #ifdef SNAMES
82 1.1 jtc #define matcher smatcher
83 1.1 jtc #define fast sfast
84 1.1 jtc #define slow sslow
85 1.1 jtc #define dissect sdissect
86 1.1 jtc #define backref sbackref
87 1.1 jtc #define step sstep
88 1.1 jtc #define print sprint
89 1.1 jtc #define at sat
90 1.1 jtc #define match smat
91 1.14 christos #define nope snope
92 1.1 jtc #endif
93 1.1 jtc #ifdef LNAMES
94 1.1 jtc #define matcher lmatcher
95 1.1 jtc #define fast lfast
96 1.1 jtc #define slow lslow
97 1.1 jtc #define dissect ldissect
98 1.1 jtc #define backref lbackref
99 1.1 jtc #define step lstep
100 1.1 jtc #define print lprint
101 1.1 jtc #define at lat
102 1.1 jtc #define match lmat
103 1.14 christos #define nope lnope
104 1.1 jtc #endif
105 1.1 jtc
106 1.1 jtc /* another structure passed up and down to avoid zillions of parameters */
107 1.1 jtc struct match {
108 1.1 jtc struct re_guts *g;
109 1.1 jtc int eflags;
110 1.1 jtc regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
111 1.19 yamt const char *offp; /* offsets work from here */
112 1.19 yamt const char *beginp; /* start of string -- virtual NUL precedes */
113 1.19 yamt const char *endp; /* end of string -- virtual NUL here */
114 1.19 yamt const char *coldp; /* can be no match starting before here */
115 1.19 yamt const char **lastpos; /* [nplus+1] */
116 1.1 jtc STATEVARS;
117 1.1 jtc states st; /* current states */
118 1.1 jtc states fresh; /* states for a fresh start */
119 1.1 jtc states tmp; /* temporary */
120 1.1 jtc states empty; /* empty set of states */
121 1.1 jtc };
122 1.1 jtc
123 1.4 cgd /* ========= begin header generated by ./mkh ========= */
124 1.4 cgd #ifdef __cplusplus
125 1.4 cgd extern "C" {
126 1.4 cgd #endif
127 1.4 cgd
128 1.4 cgd /* === engine.c === */
129 1.20 junyoung static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
130 1.20 junyoung static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
131 1.20 junyoung static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev);
132 1.20 junyoung static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
133 1.20 junyoung static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
134 1.20 junyoung static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
135 1.4 cgd #define BOL (OUT+1)
136 1.4 cgd #define EOL (BOL+1)
137 1.4 cgd #define BOLEOL (BOL+2)
138 1.4 cgd #define NOTHING (BOL+3)
139 1.4 cgd #define BOW (BOL+4)
140 1.4 cgd #define EOW (BOL+5)
141 1.4 cgd #define CODEMAX (BOL+5) /* highest code used */
142 1.4 cgd #define NONCHAR(c) ((c) > CHAR_MAX)
143 1.4 cgd #define NNONCHAR (CODEMAX-CHAR_MAX)
144 1.4 cgd #ifdef REDEBUG
145 1.20 junyoung static void print(struct match *m, char *caption, states st, int ch, FILE *d);
146 1.4 cgd #endif
147 1.4 cgd #ifdef REDEBUG
148 1.20 junyoung static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
149 1.4 cgd #endif
150 1.4 cgd #ifdef REDEBUG
151 1.20 junyoung static char *pchar(int ch);
152 1.4 cgd #endif
153 1.4 cgd
154 1.4 cgd #ifdef __cplusplus
155 1.4 cgd }
156 1.4 cgd #endif
157 1.4 cgd /* ========= end header generated by ./mkh ========= */
158 1.1 jtc
159 1.1 jtc #ifdef REDEBUG
160 1.1 jtc #define SP(t, s, c) print(m, t, s, c, stdout)
161 1.1 jtc #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
162 1.1 jtc #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); }
163 1.14 christos static int nope = 0;
164 1.1 jtc #else
165 1.1 jtc #define SP(t, s, c) /* nothing */
166 1.1 jtc #define AT(t, p1, p2, s1, s2) /* nothing */
167 1.1 jtc #define NOTE(s) /* nothing */
168 1.1 jtc #endif
169 1.1 jtc
170 1.1 jtc /*
171 1.1 jtc - matcher - the actual matching engine
172 1.8 perry == static int matcher(struct re_guts *g, char *string, \
173 1.1 jtc == size_t nmatch, regmatch_t pmatch[], int eflags);
174 1.1 jtc */
175 1.1 jtc static int /* 0 success, REG_NOMATCH failure */
176 1.1 jtc matcher(g, string, nmatch, pmatch, eflags)
177 1.8 perry struct re_guts *g;
178 1.19 yamt const char *string;
179 1.1 jtc size_t nmatch;
180 1.1 jtc regmatch_t pmatch[];
181 1.1 jtc int eflags;
182 1.1 jtc {
183 1.19 yamt const char *endp;
184 1.8 perry int i;
185 1.1 jtc struct match mv;
186 1.8 perry struct match *m = &mv;
187 1.19 yamt const char *dp;
188 1.8 perry const sopno gf = g->firststate+1; /* +1 for OEND */
189 1.8 perry const sopno gl = g->laststate;
190 1.19 yamt const char *start;
191 1.19 yamt const char *stop;
192 1.13 christos int error = 0;
193 1.1 jtc
194 1.12 lukem _DIAGASSERT(g != NULL);
195 1.12 lukem _DIAGASSERT(string != NULL);
196 1.12 lukem /* pmatch checked below */
197 1.12 lukem
198 1.1 jtc /* simplify the situation where possible */
199 1.1 jtc if (g->cflags®_NOSUB)
200 1.1 jtc nmatch = 0;
201 1.1 jtc if (eflags®_STARTEND) {
202 1.12 lukem _DIAGASSERT(pmatch != NULL);
203 1.11 christos start = string + (size_t)pmatch[0].rm_so;
204 1.11 christos stop = string + (size_t)pmatch[0].rm_eo;
205 1.1 jtc } else {
206 1.1 jtc start = string;
207 1.1 jtc stop = start + strlen(start);
208 1.1 jtc }
209 1.1 jtc if (stop < start)
210 1.1 jtc return(REG_INVARG);
211 1.1 jtc
212 1.1 jtc /* prescreening; this does wonders for this rather slow code */
213 1.1 jtc if (g->must != NULL) {
214 1.1 jtc for (dp = start; dp < stop; dp++)
215 1.1 jtc if (*dp == g->must[0] && stop - dp >= g->mlen &&
216 1.1 jtc memcmp(dp, g->must, (size_t)g->mlen) == 0)
217 1.1 jtc break;
218 1.1 jtc if (dp == stop) /* we didn't find g->must */
219 1.1 jtc return(REG_NOMATCH);
220 1.1 jtc }
221 1.1 jtc
222 1.1 jtc /* match struct setup */
223 1.1 jtc m->g = g;
224 1.1 jtc m->eflags = eflags;
225 1.1 jtc m->pmatch = NULL;
226 1.1 jtc m->lastpos = NULL;
227 1.1 jtc m->offp = string;
228 1.1 jtc m->beginp = start;
229 1.1 jtc m->endp = stop;
230 1.1 jtc STATESETUP(m, 4);
231 1.1 jtc SETUP(m->st);
232 1.1 jtc SETUP(m->fresh);
233 1.1 jtc SETUP(m->tmp);
234 1.1 jtc SETUP(m->empty);
235 1.1 jtc CLEAR(m->empty);
236 1.1 jtc
237 1.1 jtc /* this loop does only one repetition except for backrefs */
238 1.1 jtc for (;;) {
239 1.1 jtc endp = fast(m, start, stop, gf, gl);
240 1.1 jtc if (endp == NULL) { /* a miss */
241 1.13 christos error = REG_NOMATCH;
242 1.13 christos goto done;
243 1.1 jtc }
244 1.1 jtc if (nmatch == 0 && !g->backrefs)
245 1.1 jtc break; /* no further info needed */
246 1.1 jtc
247 1.1 jtc /* where? */
248 1.1 jtc assert(m->coldp != NULL);
249 1.1 jtc for (;;) {
250 1.1 jtc NOTE("finding start");
251 1.1 jtc endp = slow(m, m->coldp, stop, gf, gl);
252 1.1 jtc if (endp != NULL)
253 1.1 jtc break;
254 1.1 jtc assert(m->coldp < m->endp);
255 1.1 jtc m->coldp++;
256 1.1 jtc }
257 1.1 jtc if (nmatch == 1 && !g->backrefs)
258 1.1 jtc break; /* no further info needed */
259 1.1 jtc
260 1.1 jtc /* oh my, he wants the subexpressions... */
261 1.1 jtc if (m->pmatch == NULL)
262 1.1 jtc m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
263 1.1 jtc sizeof(regmatch_t));
264 1.1 jtc if (m->pmatch == NULL) {
265 1.13 christos error = REG_ESPACE;
266 1.13 christos goto done;
267 1.1 jtc }
268 1.1 jtc for (i = 1; i <= m->g->nsub; i++)
269 1.10 drochner m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1;
270 1.1 jtc if (!g->backrefs && !(m->eflags®_BACKR)) {
271 1.1 jtc NOTE("dissecting");
272 1.1 jtc dp = dissect(m, m->coldp, endp, gf, gl);
273 1.1 jtc } else {
274 1.1 jtc if (g->nplus > 0 && m->lastpos == NULL)
275 1.19 yamt m->lastpos = malloc((g->nplus+1) *
276 1.19 yamt sizeof(const char *));
277 1.1 jtc if (g->nplus > 0 && m->lastpos == NULL) {
278 1.13 christos error = REG_ESPACE;
279 1.13 christos goto done;
280 1.1 jtc }
281 1.1 jtc NOTE("backref dissect");
282 1.1 jtc dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
283 1.1 jtc }
284 1.1 jtc if (dp != NULL)
285 1.1 jtc break;
286 1.1 jtc
287 1.1 jtc /* uh-oh... we couldn't find a subexpression-level match */
288 1.1 jtc assert(g->backrefs); /* must be back references doing it */
289 1.1 jtc assert(g->nplus == 0 || m->lastpos != NULL);
290 1.1 jtc for (;;) {
291 1.1 jtc if (dp != NULL || endp <= m->coldp)
292 1.1 jtc break; /* defeat */
293 1.1 jtc NOTE("backoff");
294 1.1 jtc endp = slow(m, m->coldp, endp-1, gf, gl);
295 1.1 jtc if (endp == NULL)
296 1.1 jtc break; /* defeat */
297 1.1 jtc /* try it on a shorter possibility */
298 1.1 jtc #ifndef NDEBUG
299 1.1 jtc for (i = 1; i <= m->g->nsub; i++) {
300 1.10 drochner assert(m->pmatch[i].rm_so == (regoff_t)-1);
301 1.10 drochner assert(m->pmatch[i].rm_eo == (regoff_t)-1);
302 1.1 jtc }
303 1.1 jtc #endif
304 1.1 jtc NOTE("backoff dissect");
305 1.1 jtc dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
306 1.1 jtc }
307 1.1 jtc assert(dp == NULL || dp == endp);
308 1.1 jtc if (dp != NULL) /* found a shorter one */
309 1.1 jtc break;
310 1.1 jtc
311 1.1 jtc /* despite initial appearances, there is no match here */
312 1.1 jtc NOTE("false alarm");
313 1.1 jtc start = m->coldp + 1; /* recycle starting later */
314 1.1 jtc assert(start <= stop);
315 1.1 jtc }
316 1.1 jtc
317 1.1 jtc /* fill in the details if requested */
318 1.1 jtc if (nmatch > 0) {
319 1.12 lukem _DIAGASSERT(pmatch != NULL);
320 1.1 jtc pmatch[0].rm_so = m->coldp - m->offp;
321 1.1 jtc pmatch[0].rm_eo = endp - m->offp;
322 1.1 jtc }
323 1.1 jtc if (nmatch > 1) {
324 1.1 jtc assert(m->pmatch != NULL);
325 1.1 jtc for (i = 1; i < nmatch; i++)
326 1.1 jtc if (i <= m->g->nsub)
327 1.1 jtc pmatch[i] = m->pmatch[i];
328 1.1 jtc else {
329 1.10 drochner pmatch[i].rm_so = (regoff_t)-1;
330 1.10 drochner pmatch[i].rm_eo = (regoff_t)-1;
331 1.1 jtc }
332 1.1 jtc }
333 1.1 jtc
334 1.13 christos done:
335 1.13 christos if (m->pmatch != NULL) {
336 1.9 christos free(m->pmatch);
337 1.13 christos m->pmatch = NULL;
338 1.13 christos }
339 1.13 christos if (m->lastpos != NULL) {
340 1.9 christos free(m->lastpos);
341 1.13 christos m->lastpos = NULL;
342 1.13 christos }
343 1.1 jtc STATETEARDOWN(m);
344 1.13 christos return error;
345 1.1 jtc }
346 1.1 jtc
347 1.1 jtc /*
348 1.1 jtc - dissect - figure out what matched what, no back references
349 1.19 yamt == static const char *dissect(struct match *m, const char *start, \
350 1.19 yamt == const char *stop, sopno startst, sopno stopst);
351 1.1 jtc */
352 1.19 yamt static const char * /* == stop (success) always */
353 1.1 jtc dissect(m, start, stop, startst, stopst)
354 1.8 perry struct match *m;
355 1.19 yamt const char *start;
356 1.19 yamt const char *stop;
357 1.1 jtc sopno startst;
358 1.1 jtc sopno stopst;
359 1.1 jtc {
360 1.8 perry int i;
361 1.8 perry sopno ss; /* start sop of current subRE */
362 1.8 perry sopno es; /* end sop of current subRE */
363 1.19 yamt const char *sp; /* start of string matched by it */
364 1.19 yamt const char *stp; /* string matched by it cannot pass here */
365 1.19 yamt const char *rest; /* start of rest of string */
366 1.19 yamt const char *tail; /* string unmatched by rest of RE */
367 1.8 perry sopno ssub; /* start sop of subsubRE */
368 1.8 perry sopno esub; /* end sop of subsubRE */
369 1.19 yamt const char *ssp; /* start of string matched by subsubRE */
370 1.19 yamt const char *sep; /* end of string matched by subsubRE */
371 1.19 yamt const char *oldssp; /* previous ssp */
372 1.9 christos #ifndef NDEBUG
373 1.19 yamt const char *dp;
374 1.9 christos #endif
375 1.1 jtc
376 1.12 lukem _DIAGASSERT(m != NULL);
377 1.12 lukem _DIAGASSERT(start != NULL);
378 1.12 lukem _DIAGASSERT(stop != NULL);
379 1.12 lukem
380 1.1 jtc AT("diss", start, stop, startst, stopst);
381 1.1 jtc sp = start;
382 1.1 jtc for (ss = startst; ss < stopst; ss = es) {
383 1.1 jtc /* identify end of subRE */
384 1.1 jtc es = ss;
385 1.1 jtc switch (OP(m->g->strip[es])) {
386 1.1 jtc case OPLUS_:
387 1.1 jtc case OQUEST_:
388 1.1 jtc es += OPND(m->g->strip[es]);
389 1.1 jtc break;
390 1.1 jtc case OCH_:
391 1.1 jtc while (OP(m->g->strip[es]) != O_CH)
392 1.1 jtc es += OPND(m->g->strip[es]);
393 1.1 jtc break;
394 1.1 jtc }
395 1.1 jtc es++;
396 1.1 jtc
397 1.1 jtc /* figure out what it matched */
398 1.1 jtc switch (OP(m->g->strip[ss])) {
399 1.1 jtc case OEND:
400 1.1 jtc assert(nope);
401 1.1 jtc break;
402 1.1 jtc case OCHAR:
403 1.1 jtc sp++;
404 1.1 jtc break;
405 1.1 jtc case OBOL:
406 1.1 jtc case OEOL:
407 1.1 jtc case OBOW:
408 1.1 jtc case OEOW:
409 1.1 jtc break;
410 1.1 jtc case OANY:
411 1.1 jtc case OANYOF:
412 1.1 jtc sp++;
413 1.1 jtc break;
414 1.1 jtc case OBACK_:
415 1.1 jtc case O_BACK:
416 1.1 jtc assert(nope);
417 1.1 jtc break;
418 1.1 jtc /* cases where length of match is hard to find */
419 1.1 jtc case OQUEST_:
420 1.1 jtc stp = stop;
421 1.1 jtc for (;;) {
422 1.1 jtc /* how long could this one be? */
423 1.1 jtc rest = slow(m, sp, stp, ss, es);
424 1.1 jtc assert(rest != NULL); /* it did match */
425 1.1 jtc /* could the rest match the rest? */
426 1.1 jtc tail = slow(m, rest, stop, es, stopst);
427 1.1 jtc if (tail == stop)
428 1.1 jtc break; /* yes! */
429 1.1 jtc /* no -- try a shorter match for this one */
430 1.1 jtc stp = rest - 1;
431 1.1 jtc assert(stp >= sp); /* it did work */
432 1.1 jtc }
433 1.1 jtc ssub = ss + 1;
434 1.1 jtc esub = es - 1;
435 1.1 jtc /* did innards match? */
436 1.1 jtc if (slow(m, sp, rest, ssub, esub) != NULL) {
437 1.9 christos #ifdef NDEBUG
438 1.9 christos (void)
439 1.9 christos #else
440 1.9 christos dp =
441 1.9 christos #endif
442 1.9 christos dissect(m, sp, rest, ssub, esub);
443 1.1 jtc assert(dp == rest);
444 1.1 jtc } else /* no */
445 1.1 jtc assert(sp == rest);
446 1.1 jtc sp = rest;
447 1.1 jtc break;
448 1.1 jtc case OPLUS_:
449 1.1 jtc stp = stop;
450 1.1 jtc for (;;) {
451 1.1 jtc /* how long could this one be? */
452 1.1 jtc rest = slow(m, sp, stp, ss, es);
453 1.1 jtc assert(rest != NULL); /* it did match */
454 1.1 jtc /* could the rest match the rest? */
455 1.1 jtc tail = slow(m, rest, stop, es, stopst);
456 1.1 jtc if (tail == stop)
457 1.1 jtc break; /* yes! */
458 1.1 jtc /* no -- try a shorter match for this one */
459 1.1 jtc stp = rest - 1;
460 1.1 jtc assert(stp >= sp); /* it did work */
461 1.1 jtc }
462 1.1 jtc ssub = ss + 1;
463 1.1 jtc esub = es - 1;
464 1.1 jtc ssp = sp;
465 1.1 jtc oldssp = ssp;
466 1.1 jtc for (;;) { /* find last match of innards */
467 1.1 jtc sep = slow(m, ssp, rest, ssub, esub);
468 1.1 jtc if (sep == NULL || sep == ssp)
469 1.1 jtc break; /* failed or matched null */
470 1.1 jtc oldssp = ssp; /* on to next try */
471 1.1 jtc ssp = sep;
472 1.1 jtc }
473 1.1 jtc if (sep == NULL) {
474 1.1 jtc /* last successful match */
475 1.1 jtc sep = ssp;
476 1.1 jtc ssp = oldssp;
477 1.1 jtc }
478 1.1 jtc assert(sep == rest); /* must exhaust substring */
479 1.1 jtc assert(slow(m, ssp, sep, ssub, esub) == rest);
480 1.9 christos #ifdef NDEBUG
481 1.9 christos (void)
482 1.9 christos #else
483 1.9 christos dp =
484 1.9 christos #endif
485 1.9 christos dissect(m, ssp, sep, ssub, esub);
486 1.1 jtc assert(dp == sep);
487 1.1 jtc sp = rest;
488 1.1 jtc break;
489 1.1 jtc case OCH_:
490 1.1 jtc stp = stop;
491 1.1 jtc for (;;) {
492 1.1 jtc /* how long could this one be? */
493 1.1 jtc rest = slow(m, sp, stp, ss, es);
494 1.1 jtc assert(rest != NULL); /* it did match */
495 1.1 jtc /* could the rest match the rest? */
496 1.1 jtc tail = slow(m, rest, stop, es, stopst);
497 1.1 jtc if (tail == stop)
498 1.1 jtc break; /* yes! */
499 1.1 jtc /* no -- try a shorter match for this one */
500 1.1 jtc stp = rest - 1;
501 1.1 jtc assert(stp >= sp); /* it did work */
502 1.1 jtc }
503 1.1 jtc ssub = ss + 1;
504 1.1 jtc esub = ss + OPND(m->g->strip[ss]) - 1;
505 1.1 jtc assert(OP(m->g->strip[esub]) == OOR1);
506 1.1 jtc for (;;) { /* find first matching branch */
507 1.1 jtc if (slow(m, sp, rest, ssub, esub) == rest)
508 1.1 jtc break; /* it matched all of it */
509 1.1 jtc /* that one missed, try next one */
510 1.1 jtc assert(OP(m->g->strip[esub]) == OOR1);
511 1.1 jtc esub++;
512 1.1 jtc assert(OP(m->g->strip[esub]) == OOR2);
513 1.1 jtc ssub = esub + 1;
514 1.1 jtc esub += OPND(m->g->strip[esub]);
515 1.1 jtc if (OP(m->g->strip[esub]) == OOR2)
516 1.1 jtc esub--;
517 1.1 jtc else
518 1.1 jtc assert(OP(m->g->strip[esub]) == O_CH);
519 1.1 jtc }
520 1.9 christos #ifdef NDEBUG
521 1.9 christos (void)
522 1.9 christos #else
523 1.9 christos dp =
524 1.9 christos #endif
525 1.9 christos dissect(m, sp, rest, ssub, esub);
526 1.1 jtc assert(dp == rest);
527 1.1 jtc sp = rest;
528 1.1 jtc break;
529 1.1 jtc case O_PLUS:
530 1.1 jtc case O_QUEST:
531 1.1 jtc case OOR1:
532 1.1 jtc case OOR2:
533 1.1 jtc case O_CH:
534 1.1 jtc assert(nope);
535 1.1 jtc break;
536 1.1 jtc case OLPAREN:
537 1.1 jtc i = OPND(m->g->strip[ss]);
538 1.2 jtc assert(0 < i && i <= m->g->nsub);
539 1.1 jtc m->pmatch[i].rm_so = sp - m->offp;
540 1.1 jtc break;
541 1.1 jtc case ORPAREN:
542 1.1 jtc i = OPND(m->g->strip[ss]);
543 1.2 jtc assert(0 < i && i <= m->g->nsub);
544 1.1 jtc m->pmatch[i].rm_eo = sp - m->offp;
545 1.1 jtc break;
546 1.1 jtc default: /* uh oh */
547 1.1 jtc assert(nope);
548 1.1 jtc break;
549 1.1 jtc }
550 1.1 jtc }
551 1.1 jtc
552 1.1 jtc assert(sp == stop);
553 1.1 jtc return(sp);
554 1.1 jtc }
555 1.1 jtc
556 1.1 jtc /*
557 1.1 jtc - backref - figure out what matched what, figuring in back references
558 1.19 yamt == static const char *backref(struct match *m, const char *start, \
559 1.19 yamt == const char *stop, sopno startst, sopno stopst, sopno lev);
560 1.1 jtc */
561 1.19 yamt static const char * /* == stop (success) or NULL (failure) */
562 1.1 jtc backref(m, start, stop, startst, stopst, lev)
563 1.8 perry struct match *m;
564 1.19 yamt const char *start;
565 1.19 yamt const char *stop;
566 1.1 jtc sopno startst;
567 1.1 jtc sopno stopst;
568 1.1 jtc sopno lev; /* PLUS nesting level */
569 1.1 jtc {
570 1.8 perry int i;
571 1.8 perry sopno ss; /* start sop of current subRE */
572 1.19 yamt const char *sp; /* start of string matched by it */
573 1.8 perry sopno ssub; /* start sop of subsubRE */
574 1.8 perry sopno esub; /* end sop of subsubRE */
575 1.19 yamt const char *ssp; /* start of string matched by subsubRE */
576 1.19 yamt const char *dp;
577 1.8 perry size_t len;
578 1.8 perry int hard;
579 1.8 perry sop s;
580 1.8 perry regoff_t offsave;
581 1.8 perry cset *cs;
582 1.1 jtc
583 1.12 lukem _DIAGASSERT(m != NULL);
584 1.12 lukem _DIAGASSERT(start != NULL);
585 1.12 lukem _DIAGASSERT(stop != NULL);
586 1.12 lukem
587 1.1 jtc AT("back", start, stop, startst, stopst);
588 1.1 jtc sp = start;
589 1.1 jtc
590 1.1 jtc /* get as far as we can with easy stuff */
591 1.1 jtc hard = 0;
592 1.1 jtc for (ss = startst; !hard && ss < stopst; ss++)
593 1.1 jtc switch (OP(s = m->g->strip[ss])) {
594 1.1 jtc case OCHAR:
595 1.1 jtc if (sp == stop || *sp++ != (char)OPND(s))
596 1.1 jtc return(NULL);
597 1.1 jtc break;
598 1.1 jtc case OANY:
599 1.1 jtc if (sp == stop)
600 1.1 jtc return(NULL);
601 1.1 jtc sp++;
602 1.1 jtc break;
603 1.1 jtc case OANYOF:
604 1.1 jtc cs = &m->g->sets[OPND(s)];
605 1.1 jtc if (sp == stop || !CHIN(cs, *sp++))
606 1.1 jtc return(NULL);
607 1.1 jtc break;
608 1.1 jtc case OBOL:
609 1.1 jtc if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
610 1.1 jtc (sp < m->endp && *(sp-1) == '\n' &&
611 1.1 jtc (m->g->cflags®_NEWLINE)) )
612 1.1 jtc { /* yes */ }
613 1.1 jtc else
614 1.1 jtc return(NULL);
615 1.1 jtc break;
616 1.1 jtc case OEOL:
617 1.1 jtc if ( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
618 1.1 jtc (sp < m->endp && *sp == '\n' &&
619 1.1 jtc (m->g->cflags®_NEWLINE)) )
620 1.1 jtc { /* yes */ }
621 1.1 jtc else
622 1.1 jtc return(NULL);
623 1.1 jtc break;
624 1.1 jtc case OBOW:
625 1.17 enami if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
626 1.17 enami (sp < m->endp && *(sp-1) == '\n' &&
627 1.17 enami (m->g->cflags®_NEWLINE)) ||
628 1.17 enami (sp > m->beginp &&
629 1.17 enami !ISWORD(*(sp-1))) ) &&
630 1.17 enami (sp < m->endp && ISWORD(*sp)) )
631 1.1 jtc { /* yes */ }
632 1.1 jtc else
633 1.1 jtc return(NULL);
634 1.1 jtc break;
635 1.1 jtc case OEOW:
636 1.17 enami if (( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
637 1.17 enami (sp < m->endp && *sp == '\n' &&
638 1.17 enami (m->g->cflags®_NEWLINE)) ||
639 1.17 enami (sp < m->endp && !ISWORD(*sp)) ) &&
640 1.17 enami (sp > m->beginp && ISWORD(*(sp-1))) )
641 1.1 jtc { /* yes */ }
642 1.1 jtc else
643 1.1 jtc return(NULL);
644 1.1 jtc break;
645 1.1 jtc case O_QUEST:
646 1.1 jtc break;
647 1.1 jtc case OOR1: /* matches null but needs to skip */
648 1.1 jtc ss++;
649 1.1 jtc s = m->g->strip[ss];
650 1.1 jtc do {
651 1.1 jtc assert(OP(s) == OOR2);
652 1.1 jtc ss += OPND(s);
653 1.1 jtc } while (OP(s = m->g->strip[ss]) != O_CH);
654 1.1 jtc /* note that the ss++ gets us past the O_CH */
655 1.1 jtc break;
656 1.1 jtc default: /* have to make a choice */
657 1.1 jtc hard = 1;
658 1.1 jtc break;
659 1.1 jtc }
660 1.1 jtc if (!hard) { /* that was it! */
661 1.1 jtc if (sp != stop)
662 1.1 jtc return(NULL);
663 1.1 jtc return(sp);
664 1.1 jtc }
665 1.1 jtc ss--; /* adjust for the for's final increment */
666 1.1 jtc
667 1.1 jtc /* the hard stuff */
668 1.1 jtc AT("hard", sp, stop, ss, stopst);
669 1.1 jtc s = m->g->strip[ss];
670 1.1 jtc switch (OP(s)) {
671 1.1 jtc case OBACK_: /* the vilest depths */
672 1.1 jtc i = OPND(s);
673 1.2 jtc assert(0 < i && i <= m->g->nsub);
674 1.10 drochner if (m->pmatch[i].rm_eo == (regoff_t)-1)
675 1.1 jtc return(NULL);
676 1.10 drochner assert(m->pmatch[i].rm_so != (regoff_t)-1);
677 1.11 christos len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
678 1.18 christos if (len == 0)
679 1.18 christos return(NULL);
680 1.1 jtc assert(stop - m->beginp >= len);
681 1.1 jtc if (sp > stop - len)
682 1.1 jtc return(NULL); /* not enough left to match */
683 1.11 christos ssp = m->offp + (size_t)m->pmatch[i].rm_so;
684 1.1 jtc if (memcmp(sp, ssp, len) != 0)
685 1.1 jtc return(NULL);
686 1.1 jtc while (m->g->strip[ss] != SOP(O_BACK, i))
687 1.1 jtc ss++;
688 1.1 jtc return(backref(m, sp+len, stop, ss+1, stopst, lev));
689 1.9 christos
690 1.1 jtc case OQUEST_: /* to null or not */
691 1.1 jtc dp = backref(m, sp, stop, ss+1, stopst, lev);
692 1.1 jtc if (dp != NULL)
693 1.1 jtc return(dp); /* not */
694 1.1 jtc return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
695 1.9 christos
696 1.1 jtc case OPLUS_:
697 1.1 jtc assert(m->lastpos != NULL);
698 1.1 jtc assert(lev+1 <= m->g->nplus);
699 1.1 jtc m->lastpos[lev+1] = sp;
700 1.1 jtc return(backref(m, sp, stop, ss+1, stopst, lev+1));
701 1.9 christos
702 1.1 jtc case O_PLUS:
703 1.1 jtc if (sp == m->lastpos[lev]) /* last pass matched null */
704 1.1 jtc return(backref(m, sp, stop, ss+1, stopst, lev-1));
705 1.1 jtc /* try another pass */
706 1.1 jtc m->lastpos[lev] = sp;
707 1.1 jtc dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
708 1.1 jtc if (dp == NULL)
709 1.9 christos dp = backref(m, sp, stop, ss+1, stopst, lev-1);
710 1.9 christos return(dp);
711 1.9 christos
712 1.1 jtc case OCH_: /* find the right one, if any */
713 1.1 jtc ssub = ss + 1;
714 1.1 jtc esub = ss + OPND(s) - 1;
715 1.1 jtc assert(OP(m->g->strip[esub]) == OOR1);
716 1.1 jtc for (;;) { /* find first matching branch */
717 1.1 jtc dp = backref(m, sp, stop, ssub, esub, lev);
718 1.1 jtc if (dp != NULL)
719 1.1 jtc return(dp);
720 1.1 jtc /* that one missed, try next one */
721 1.1 jtc if (OP(m->g->strip[esub]) == O_CH)
722 1.1 jtc return(NULL); /* there is none */
723 1.1 jtc esub++;
724 1.1 jtc assert(OP(m->g->strip[esub]) == OOR2);
725 1.1 jtc ssub = esub + 1;
726 1.1 jtc esub += OPND(m->g->strip[esub]);
727 1.1 jtc if (OP(m->g->strip[esub]) == OOR2)
728 1.1 jtc esub--;
729 1.1 jtc else
730 1.1 jtc assert(OP(m->g->strip[esub]) == O_CH);
731 1.1 jtc }
732 1.9 christos
733 1.1 jtc case OLPAREN: /* must undo assignment if rest fails */
734 1.1 jtc i = OPND(s);
735 1.2 jtc assert(0 < i && i <= m->g->nsub);
736 1.1 jtc offsave = m->pmatch[i].rm_so;
737 1.1 jtc m->pmatch[i].rm_so = sp - m->offp;
738 1.1 jtc dp = backref(m, sp, stop, ss+1, stopst, lev);
739 1.1 jtc if (dp != NULL)
740 1.1 jtc return(dp);
741 1.1 jtc m->pmatch[i].rm_so = offsave;
742 1.1 jtc return(NULL);
743 1.9 christos
744 1.1 jtc case ORPAREN: /* must undo assignment if rest fails */
745 1.1 jtc i = OPND(s);
746 1.2 jtc assert(0 < i && i <= m->g->nsub);
747 1.1 jtc offsave = m->pmatch[i].rm_eo;
748 1.1 jtc m->pmatch[i].rm_eo = sp - m->offp;
749 1.1 jtc dp = backref(m, sp, stop, ss+1, stopst, lev);
750 1.1 jtc if (dp != NULL)
751 1.1 jtc return(dp);
752 1.1 jtc m->pmatch[i].rm_eo = offsave;
753 1.1 jtc return(NULL);
754 1.9 christos
755 1.1 jtc default: /* uh oh */
756 1.1 jtc assert(nope);
757 1.1 jtc break;
758 1.1 jtc }
759 1.1 jtc
760 1.1 jtc /* "can't happen" */
761 1.1 jtc assert(nope);
762 1.1 jtc /* NOTREACHED */
763 1.7 christos return NULL;
764 1.1 jtc }
765 1.1 jtc
766 1.1 jtc /*
767 1.1 jtc - fast - step through the string at top speed
768 1.19 yamt == static const char *fast(struct match *m, const char *start, \
769 1.19 yamt == const char *stop, sopno startst, sopno stopst);
770 1.1 jtc */
771 1.19 yamt static const char * /* where tentative match ended, or NULL */
772 1.1 jtc fast(m, start, stop, startst, stopst)
773 1.8 perry struct match *m;
774 1.19 yamt const char *start;
775 1.19 yamt const char *stop;
776 1.1 jtc sopno startst;
777 1.1 jtc sopno stopst;
778 1.1 jtc {
779 1.8 perry states st = m->st;
780 1.8 perry states fresh = m->fresh;
781 1.8 perry states tmp = m->tmp;
782 1.19 yamt const char *p = start;
783 1.17 enami int c = (start == m->beginp) ? OUT : *(start-1);
784 1.8 perry int lastc; /* previous c */
785 1.8 perry int flagch;
786 1.8 perry int i;
787 1.19 yamt const char *coldp; /* last p after which no match was underway */
788 1.1 jtc
789 1.12 lukem _DIAGASSERT(m != NULL);
790 1.12 lukem _DIAGASSERT(start != NULL);
791 1.12 lukem _DIAGASSERT(stop != NULL);
792 1.12 lukem
793 1.1 jtc CLEAR(st);
794 1.1 jtc SET1(st, startst);
795 1.1 jtc st = step(m->g, startst, stopst, st, NOTHING, st);
796 1.1 jtc ASSIGN(fresh, st);
797 1.1 jtc SP("start", st, *p);
798 1.1 jtc coldp = NULL;
799 1.1 jtc for (;;) {
800 1.1 jtc /* next character */
801 1.1 jtc lastc = c;
802 1.17 enami c = (p == m->endp) ? OUT : *p;
803 1.1 jtc if (EQ(st, fresh))
804 1.1 jtc coldp = p;
805 1.1 jtc
806 1.1 jtc /* is there an EOL and/or BOL between lastc and c? */
807 1.1 jtc flagch = '\0';
808 1.1 jtc i = 0;
809 1.1 jtc if ( (lastc == '\n' && m->g->cflags®_NEWLINE) ||
810 1.1 jtc (lastc == OUT && !(m->eflags®_NOTBOL)) ) {
811 1.1 jtc flagch = BOL;
812 1.1 jtc i = m->g->nbol;
813 1.1 jtc }
814 1.1 jtc if ( (c == '\n' && m->g->cflags®_NEWLINE) ||
815 1.1 jtc (c == OUT && !(m->eflags®_NOTEOL)) ) {
816 1.1 jtc flagch = (flagch == BOL) ? BOLEOL : EOL;
817 1.1 jtc i += m->g->neol;
818 1.1 jtc }
819 1.1 jtc if (i != 0) {
820 1.1 jtc for (; i > 0; i--)
821 1.1 jtc st = step(m->g, startst, stopst, st, flagch, st);
822 1.1 jtc SP("boleol", st, c);
823 1.1 jtc }
824 1.1 jtc
825 1.1 jtc /* how about a word boundary? */
826 1.3 jtc if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
827 1.3 jtc (c != OUT && ISWORD(c)) ) {
828 1.1 jtc flagch = BOW;
829 1.1 jtc }
830 1.3 jtc if ( (lastc != OUT && ISWORD(lastc)) &&
831 1.3 jtc (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
832 1.1 jtc flagch = EOW;
833 1.1 jtc }
834 1.1 jtc if (flagch == BOW || flagch == EOW) {
835 1.1 jtc st = step(m->g, startst, stopst, st, flagch, st);
836 1.1 jtc SP("boweow", st, c);
837 1.1 jtc }
838 1.1 jtc
839 1.1 jtc /* are we done? */
840 1.1 jtc if (ISSET(st, stopst) || p == stop)
841 1.1 jtc break; /* NOTE BREAK OUT */
842 1.1 jtc
843 1.1 jtc /* no, we must deal with this character */
844 1.1 jtc ASSIGN(tmp, st);
845 1.1 jtc ASSIGN(st, fresh);
846 1.1 jtc assert(c != OUT);
847 1.1 jtc st = step(m->g, startst, stopst, tmp, c, st);
848 1.1 jtc SP("aft", st, c);
849 1.1 jtc assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
850 1.1 jtc p++;
851 1.1 jtc }
852 1.1 jtc
853 1.1 jtc assert(coldp != NULL);
854 1.1 jtc m->coldp = coldp;
855 1.1 jtc if (ISSET(st, stopst))
856 1.1 jtc return(p+1);
857 1.1 jtc else
858 1.1 jtc return(NULL);
859 1.1 jtc }
860 1.1 jtc
861 1.1 jtc /*
862 1.1 jtc - slow - step through the string more deliberately
863 1.19 yamt == static const char *slow(struct match *m, const char *start, \
864 1.19 yamt == const char *stop, sopno startst, sopno stopst);
865 1.1 jtc */
866 1.19 yamt static const char * /* where it ended */
867 1.1 jtc slow(m, start, stop, startst, stopst)
868 1.8 perry struct match *m;
869 1.19 yamt const char *start;
870 1.19 yamt const char *stop;
871 1.1 jtc sopno startst;
872 1.1 jtc sopno stopst;
873 1.1 jtc {
874 1.8 perry states st = m->st;
875 1.8 perry states empty = m->empty;
876 1.8 perry states tmp = m->tmp;
877 1.19 yamt const char *p = start;
878 1.17 enami int c = (start == m->beginp) ? OUT : *(start-1);
879 1.8 perry int lastc; /* previous c */
880 1.8 perry int flagch;
881 1.8 perry int i;
882 1.19 yamt const char *matchp; /* last p at which a match ended */
883 1.1 jtc
884 1.12 lukem _DIAGASSERT(m != NULL);
885 1.12 lukem _DIAGASSERT(start != NULL);
886 1.12 lukem _DIAGASSERT(stop != NULL);
887 1.12 lukem
888 1.1 jtc AT("slow", start, stop, startst, stopst);
889 1.1 jtc CLEAR(st);
890 1.1 jtc SET1(st, startst);
891 1.1 jtc SP("sstart", st, *p);
892 1.1 jtc st = step(m->g, startst, stopst, st, NOTHING, st);
893 1.1 jtc matchp = NULL;
894 1.1 jtc for (;;) {
895 1.1 jtc /* next character */
896 1.1 jtc lastc = c;
897 1.17 enami c = (p == m->endp) ? OUT : *p;
898 1.1 jtc
899 1.1 jtc /* is there an EOL and/or BOL between lastc and c? */
900 1.1 jtc flagch = '\0';
901 1.1 jtc i = 0;
902 1.1 jtc if ( (lastc == '\n' && m->g->cflags®_NEWLINE) ||
903 1.1 jtc (lastc == OUT && !(m->eflags®_NOTBOL)) ) {
904 1.1 jtc flagch = BOL;
905 1.1 jtc i = m->g->nbol;
906 1.1 jtc }
907 1.1 jtc if ( (c == '\n' && m->g->cflags®_NEWLINE) ||
908 1.1 jtc (c == OUT && !(m->eflags®_NOTEOL)) ) {
909 1.1 jtc flagch = (flagch == BOL) ? BOLEOL : EOL;
910 1.1 jtc i += m->g->neol;
911 1.1 jtc }
912 1.1 jtc if (i != 0) {
913 1.1 jtc for (; i > 0; i--)
914 1.1 jtc st = step(m->g, startst, stopst, st, flagch, st);
915 1.1 jtc SP("sboleol", st, c);
916 1.1 jtc }
917 1.1 jtc
918 1.1 jtc /* how about a word boundary? */
919 1.3 jtc if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
920 1.3 jtc (c != OUT && ISWORD(c)) ) {
921 1.1 jtc flagch = BOW;
922 1.1 jtc }
923 1.3 jtc if ( (lastc != OUT && ISWORD(lastc)) &&
924 1.3 jtc (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
925 1.1 jtc flagch = EOW;
926 1.1 jtc }
927 1.1 jtc if (flagch == BOW || flagch == EOW) {
928 1.1 jtc st = step(m->g, startst, stopst, st, flagch, st);
929 1.1 jtc SP("sboweow", st, c);
930 1.1 jtc }
931 1.1 jtc
932 1.1 jtc /* are we done? */
933 1.1 jtc if (ISSET(st, stopst))
934 1.1 jtc matchp = p;
935 1.1 jtc if (EQ(st, empty) || p == stop)
936 1.1 jtc break; /* NOTE BREAK OUT */
937 1.1 jtc
938 1.1 jtc /* no, we must deal with this character */
939 1.1 jtc ASSIGN(tmp, st);
940 1.1 jtc ASSIGN(st, empty);
941 1.1 jtc assert(c != OUT);
942 1.1 jtc st = step(m->g, startst, stopst, tmp, c, st);
943 1.1 jtc SP("saft", st, c);
944 1.1 jtc assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
945 1.1 jtc p++;
946 1.1 jtc }
947 1.1 jtc
948 1.1 jtc return(matchp);
949 1.1 jtc }
950 1.1 jtc
951 1.1 jtc
952 1.1 jtc /*
953 1.1 jtc - step - map set of states reachable before char to set reachable after
954 1.8 perry == static states step(struct re_guts *g, sopno start, sopno stop, \
955 1.8 perry == states bef, int ch, states aft);
956 1.1 jtc == #define BOL (OUT+1)
957 1.1 jtc == #define EOL (BOL+1)
958 1.1 jtc == #define BOLEOL (BOL+2)
959 1.1 jtc == #define NOTHING (BOL+3)
960 1.1 jtc == #define BOW (BOL+4)
961 1.1 jtc == #define EOW (BOL+5)
962 1.1 jtc == #define CODEMAX (BOL+5) // highest code used
963 1.1 jtc == #define NONCHAR(c) ((c) > CHAR_MAX)
964 1.1 jtc == #define NNONCHAR (CODEMAX-CHAR_MAX)
965 1.1 jtc */
966 1.1 jtc static states
967 1.1 jtc step(g, start, stop, bef, ch, aft)
968 1.8 perry struct re_guts *g;
969 1.3 jtc sopno start; /* start state within strip */
970 1.3 jtc sopno stop; /* state after stop state within strip */
971 1.8 perry states bef; /* states reachable before */
972 1.1 jtc int ch; /* character or NONCHAR code */
973 1.8 perry states aft; /* states already known reachable after */
974 1.1 jtc {
975 1.8 perry cset *cs;
976 1.8 perry sop s;
977 1.8 perry sopno pc;
978 1.8 perry onestate here; /* note, macros know this name */
979 1.8 perry sopno look;
980 1.8 perry int i;
981 1.1 jtc
982 1.12 lukem _DIAGASSERT(g != NULL);
983 1.12 lukem
984 1.1 jtc for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
985 1.1 jtc s = g->strip[pc];
986 1.1 jtc switch (OP(s)) {
987 1.1 jtc case OEND:
988 1.1 jtc assert(pc == stop-1);
989 1.1 jtc break;
990 1.1 jtc case OCHAR:
991 1.1 jtc /* only characters can match */
992 1.1 jtc assert(!NONCHAR(ch) || ch != (char)OPND(s));
993 1.1 jtc if (ch == (char)OPND(s))
994 1.1 jtc FWD(aft, bef, 1);
995 1.1 jtc break;
996 1.1 jtc case OBOL:
997 1.1 jtc if (ch == BOL || ch == BOLEOL)
998 1.1 jtc FWD(aft, bef, 1);
999 1.1 jtc break;
1000 1.1 jtc case OEOL:
1001 1.1 jtc if (ch == EOL || ch == BOLEOL)
1002 1.1 jtc FWD(aft, bef, 1);
1003 1.1 jtc break;
1004 1.1 jtc case OBOW:
1005 1.1 jtc if (ch == BOW)
1006 1.1 jtc FWD(aft, bef, 1);
1007 1.1 jtc break;
1008 1.1 jtc case OEOW:
1009 1.1 jtc if (ch == EOW)
1010 1.1 jtc FWD(aft, bef, 1);
1011 1.1 jtc break;
1012 1.1 jtc case OANY:
1013 1.1 jtc if (!NONCHAR(ch))
1014 1.1 jtc FWD(aft, bef, 1);
1015 1.1 jtc break;
1016 1.1 jtc case OANYOF:
1017 1.1 jtc cs = &g->sets[OPND(s)];
1018 1.1 jtc if (!NONCHAR(ch) && CHIN(cs, ch))
1019 1.1 jtc FWD(aft, bef, 1);
1020 1.1 jtc break;
1021 1.1 jtc case OBACK_: /* ignored here */
1022 1.1 jtc case O_BACK:
1023 1.1 jtc FWD(aft, aft, 1);
1024 1.1 jtc break;
1025 1.1 jtc case OPLUS_: /* forward, this is just an empty */
1026 1.1 jtc FWD(aft, aft, 1);
1027 1.1 jtc break;
1028 1.1 jtc case O_PLUS: /* both forward and back */
1029 1.1 jtc FWD(aft, aft, 1);
1030 1.1 jtc i = ISSETBACK(aft, OPND(s));
1031 1.1 jtc BACK(aft, aft, OPND(s));
1032 1.1 jtc if (!i && ISSETBACK(aft, OPND(s))) {
1033 1.1 jtc /* oho, must reconsider loop body */
1034 1.1 jtc pc -= OPND(s) + 1;
1035 1.1 jtc INIT(here, pc);
1036 1.1 jtc }
1037 1.1 jtc break;
1038 1.1 jtc case OQUEST_: /* two branches, both forward */
1039 1.1 jtc FWD(aft, aft, 1);
1040 1.1 jtc FWD(aft, aft, OPND(s));
1041 1.1 jtc break;
1042 1.1 jtc case O_QUEST: /* just an empty */
1043 1.1 jtc FWD(aft, aft, 1);
1044 1.1 jtc break;
1045 1.1 jtc case OLPAREN: /* not significant here */
1046 1.1 jtc case ORPAREN:
1047 1.1 jtc FWD(aft, aft, 1);
1048 1.1 jtc break;
1049 1.1 jtc case OCH_: /* mark the first two branches */
1050 1.1 jtc FWD(aft, aft, 1);
1051 1.1 jtc assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1052 1.1 jtc FWD(aft, aft, OPND(s));
1053 1.1 jtc break;
1054 1.1 jtc case OOR1: /* done a branch, find the O_CH */
1055 1.1 jtc if (ISSTATEIN(aft, here)) {
1056 1.1 jtc for (look = 1;
1057 1.1 jtc OP(s = g->strip[pc+look]) != O_CH;
1058 1.1 jtc look += OPND(s))
1059 1.1 jtc assert(OP(s) == OOR2);
1060 1.1 jtc FWD(aft, aft, look);
1061 1.1 jtc }
1062 1.1 jtc break;
1063 1.1 jtc case OOR2: /* propagate OCH_'s marking */
1064 1.1 jtc FWD(aft, aft, 1);
1065 1.1 jtc if (OP(g->strip[pc+OPND(s)]) != O_CH) {
1066 1.1 jtc assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1067 1.1 jtc FWD(aft, aft, OPND(s));
1068 1.1 jtc }
1069 1.1 jtc break;
1070 1.1 jtc case O_CH: /* just empty */
1071 1.1 jtc FWD(aft, aft, 1);
1072 1.1 jtc break;
1073 1.1 jtc default: /* ooooops... */
1074 1.1 jtc assert(nope);
1075 1.1 jtc break;
1076 1.1 jtc }
1077 1.1 jtc }
1078 1.1 jtc
1079 1.1 jtc return(aft);
1080 1.1 jtc }
1081 1.1 jtc
1082 1.1 jtc #ifdef REDEBUG
1083 1.1 jtc /*
1084 1.1 jtc - print - print a set of states
1085 1.1 jtc == #ifdef REDEBUG
1086 1.1 jtc == static void print(struct match *m, char *caption, states st, \
1087 1.1 jtc == int ch, FILE *d);
1088 1.1 jtc == #endif
1089 1.1 jtc */
1090 1.1 jtc static void
1091 1.1 jtc print(m, caption, st, ch, d)
1092 1.1 jtc struct match *m;
1093 1.1 jtc char *caption;
1094 1.1 jtc states st;
1095 1.1 jtc int ch;
1096 1.1 jtc FILE *d;
1097 1.1 jtc {
1098 1.8 perry struct re_guts *g = m->g;
1099 1.8 perry int i;
1100 1.8 perry int first = 1;
1101 1.1 jtc
1102 1.12 lukem _DIAGASSERT(m != NULL);
1103 1.12 lukem _DIAGASSERT(caption != NULL);
1104 1.12 lukem
1105 1.1 jtc if (!(m->eflags®_TRACE))
1106 1.1 jtc return;
1107 1.1 jtc
1108 1.12 lukem _DIAGASSERT(d != NULL);
1109 1.12 lukem
1110 1.1 jtc fprintf(d, "%s", caption);
1111 1.1 jtc if (ch != '\0')
1112 1.1 jtc fprintf(d, " %s", pchar(ch));
1113 1.1 jtc for (i = 0; i < g->nstates; i++)
1114 1.1 jtc if (ISSET(st, i)) {
1115 1.1 jtc fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1116 1.1 jtc first = 0;
1117 1.1 jtc }
1118 1.1 jtc fprintf(d, "\n");
1119 1.1 jtc }
1120 1.1 jtc
1121 1.1 jtc /*
1122 1.1 jtc - at - print current situation
1123 1.1 jtc == #ifdef REDEBUG
1124 1.1 jtc == static void at(struct match *m, char *title, char *start, char *stop, \
1125 1.1 jtc == sopno startst, sopno stopst);
1126 1.1 jtc == #endif
1127 1.1 jtc */
1128 1.1 jtc static void
1129 1.1 jtc at(m, title, start, stop, startst, stopst)
1130 1.1 jtc struct match *m;
1131 1.1 jtc char *title;
1132 1.1 jtc char *start;
1133 1.1 jtc char *stop;
1134 1.1 jtc sopno startst;
1135 1.1 jtc sopno stopst;
1136 1.1 jtc {
1137 1.12 lukem
1138 1.12 lukem _DIAGASSERT(m != NULL);
1139 1.12 lukem _DIAGASSERT(title != NULL);
1140 1.12 lukem _DIAGASSERT(start != NULL);
1141 1.12 lukem _DIAGASSERT(stop != NULL);
1142 1.12 lukem
1143 1.1 jtc if (!(m->eflags®_TRACE))
1144 1.1 jtc return;
1145 1.1 jtc
1146 1.1 jtc printf("%s %s-", title, pchar(*start));
1147 1.1 jtc printf("%s ", pchar(*stop));
1148 1.1 jtc printf("%ld-%ld\n", (long)startst, (long)stopst);
1149 1.1 jtc }
1150 1.1 jtc
1151 1.1 jtc #ifndef PCHARDONE
1152 1.1 jtc #define PCHARDONE /* never again */
1153 1.1 jtc /*
1154 1.1 jtc - pchar - make a character printable
1155 1.1 jtc == #ifdef REDEBUG
1156 1.1 jtc == static char *pchar(int ch);
1157 1.1 jtc == #endif
1158 1.1 jtc *
1159 1.1 jtc * Is this identical to regchar() over in debug.c? Well, yes. But a
1160 1.1 jtc * duplicate here avoids having a debugging-capable regexec.o tied to
1161 1.1 jtc * a matching debug.o, and this is convenient. It all disappears in
1162 1.1 jtc * the non-debug compilation anyway, so it doesn't matter much.
1163 1.1 jtc */
1164 1.1 jtc static char * /* -> representation */
1165 1.1 jtc pchar(ch)
1166 1.1 jtc int ch;
1167 1.1 jtc {
1168 1.1 jtc static char pbuf[10];
1169 1.1 jtc
1170 1.1 jtc if (isprint(ch) || ch == ' ')
1171 1.6 mrg (void)snprintf(pbuf, sizeof pbuf, "%c", ch);
1172 1.1 jtc else
1173 1.6 mrg (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
1174 1.1 jtc return(pbuf);
1175 1.1 jtc }
1176 1.1 jtc #endif
1177 1.1 jtc #endif
1178 1.1 jtc
1179 1.1 jtc #undef matcher
1180 1.1 jtc #undef fast
1181 1.1 jtc #undef slow
1182 1.1 jtc #undef dissect
1183 1.1 jtc #undef backref
1184 1.1 jtc #undef step
1185 1.1 jtc #undef print
1186 1.1 jtc #undef at
1187 1.1 jtc #undef match
1188 1.14 christos #undef nope
1189