Home | History | Annotate | Line # | Download | only in regex
      1  1.49  christos /*	$NetBSD: regcomp.c,v 1.49 2025/01/01 18:19:50 christos Exp $	*/
      2   1.6       cgd 
      3   1.5       cgd /*-
      4  1.39  christos  * SPDX-License-Identifier: BSD-3-Clause
      5  1.39  christos  *
      6  1.39  christos  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
      7   1.5       cgd  * Copyright (c) 1992, 1993, 1994
      8   1.5       cgd  *	The Regents of the University of California.  All rights reserved.
      9   1.5       cgd  *
     10  1.39  christos  * Copyright (c) 2011 The FreeBSD Foundation
     11  1.39  christos  * All rights reserved.
     12  1.39  christos  * Portions of this software were developed by David Chisnall
     13  1.39  christos  * under sponsorship from the FreeBSD Foundation.
     14  1.39  christos  *
     15   1.5       cgd  * This code is derived from software contributed to Berkeley by
     16   1.5       cgd  * Henry Spencer.
     17   1.5       cgd  *
     18   1.5       cgd  * Redistribution and use in source and binary forms, with or without
     19   1.5       cgd  * modification, are permitted provided that the following conditions
     20   1.5       cgd  * are met:
     21   1.5       cgd  * 1. Redistributions of source code must retain the above copyright
     22   1.5       cgd  *    notice, this list of conditions and the following disclaimer.
     23   1.5       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     24   1.5       cgd  *    notice, this list of conditions and the following disclaimer in the
     25   1.5       cgd  *    documentation and/or other materials provided with the distribution.
     26  1.18       agc  * 3. Neither the name of the University nor the names of its contributors
     27  1.18       agc  *    may be used to endorse or promote products derived from this software
     28  1.18       agc  *    without specific prior written permission.
     29  1.18       agc  *
     30  1.18       agc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     31  1.18       agc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     32  1.18       agc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     33  1.18       agc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     34  1.18       agc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     35  1.18       agc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     36  1.18       agc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     37  1.18       agc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     38  1.18       agc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     39  1.18       agc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     40  1.18       agc  * SUCH DAMAGE.
     41  1.18       agc  *
     42  1.18       agc  *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
     43  1.18       agc  */
     44  1.18       agc 
     45  1.45  christos #if HAVE_NBTOOL_CONFIG_H
     46  1.45  christos #include "nbtool_config.h"
     47  1.45  christos #endif
     48  1.45  christos 
     49   1.7  christos #include <sys/cdefs.h>
     50   1.6       cgd #if 0
     51   1.5       cgd static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
     52  1.39  christos __FBSDID("$FreeBSD: head/lib/libc/regex/regcomp.c 368359 2020-12-05 03:18:48Z kevans $");
     53   1.6       cgd #endif
     54  1.49  christos __RCSID("$NetBSD: regcomp.c,v 1.49 2025/01/01 18:19:50 christos Exp $");
     55  1.42  christos 
     56  1.42  christos #ifndef LIBHACK
     57  1.39  christos #define REGEX_GNU_EXTENSIONS
     58   1.5       cgd 
     59   1.8       jtc #include "namespace.h"
     60  1.42  christos #endif
     61   1.1       jtc #include <sys/types.h>
     62  1.39  christos #include <stdio.h>
     63  1.39  christos #include <string.h>
     64   1.1       jtc #include <ctype.h>
     65   1.1       jtc #include <limits.h>
     66   1.1       jtc #include <stdlib.h>
     67  1.28  junyoung #include <regex.h>
     68  1.39  christos #include <stdbool.h>
     69   1.8       jtc 
     70  1.44  christos #if defined(__weak_alias) && !defined(LIBHACK)
     71  1.16   mycroft __weak_alias(regcomp,_regcomp)
     72   1.8       jtc #endif
     73   1.1       jtc 
     74  1.39  christos #ifdef REGEX_LIBC_COLLATE
     75  1.39  christos #include "collate.h"
     76  1.39  christos #endif
     77  1.39  christos 
     78   1.1       jtc #include "utils.h"
     79   1.1       jtc #include "regex2.h"
     80   1.1       jtc 
     81   1.1       jtc #include "cname.h"
     82   1.1       jtc 
     83   1.1       jtc /*
     84  1.39  christos  * Branching context, used to keep track of branch state for all of the branch-
     85  1.39  christos  * aware functions. In addition to keeping track of branch positions for the
     86  1.39  christos  * p_branch_* functions, we use this to simplify some clumsiness in BREs for
     87  1.39  christos  * detection of whether ^ is acting as an anchor or being used erroneously and
     88  1.39  christos  * also for whether we're in a sub-expression or not.
     89  1.39  christos  */
     90  1.39  christos struct branchc {
     91  1.39  christos 	sopno start;
     92  1.39  christos 	sopno back;
     93  1.39  christos 	sopno fwd;
     94  1.39  christos 
     95  1.39  christos 	int nbranch;
     96  1.39  christos 	int nchain;
     97  1.39  christos 	bool outer;
     98  1.39  christos 	bool terminate;
     99  1.39  christos };
    100  1.39  christos 
    101  1.39  christos /*
    102   1.1       jtc  * parse structure, passed up and down to avoid global variables and
    103   1.1       jtc  * other clumsinesses
    104   1.1       jtc  */
    105   1.1       jtc struct parse {
    106  1.21      yamt 	const char *next;	/* next character in RE */
    107  1.21      yamt 	const char *end;	/* end of string (-> NUL normally) */
    108   1.1       jtc 	int error;		/* has an error been seen? */
    109  1.39  christos 	int gnuext;
    110   1.1       jtc 	sop *strip;		/* malloced strip */
    111   1.1       jtc 	sopno ssize;		/* malloced strip size (allocated) */
    112   1.1       jtc 	sopno slen;		/* malloced strip length (used) */
    113  1.33  christos 	size_t ncsalloc;	/* number of csets allocated */
    114   1.1       jtc 	struct re_guts *g;
    115   1.1       jtc #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
    116   1.1       jtc 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
    117   1.1       jtc 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
    118  1.39  christos 	bool allowbranch;	/* can this expression branch? */
    119  1.39  christos 	bool bre;		/* convenience; is this a BRE? */
    120  1.39  christos 	int pflags;		/* other parsing flags -- legacy escapes? */
    121  1.39  christos 	bool (*parse_expr)(struct parse *, struct branchc *);
    122  1.39  christos 	void (*pre_parse)(struct parse *, struct branchc *);
    123  1.39  christos 	void (*post_parse)(struct parse *, struct branchc *);
    124   1.1       jtc };
    125   1.1       jtc 
    126  1.39  christos #define PFLAG_LEGACY_ESC	0x00000001
    127  1.39  christos 
    128   1.5       cgd /* ========= begin header generated by ./mkh ========= */
    129   1.5       cgd #ifdef __cplusplus
    130   1.5       cgd extern "C" {
    131   1.5       cgd #endif
    132   1.5       cgd 
    133   1.5       cgd /* === regcomp.c === */
    134  1.39  christos static bool p_ere_exp(struct parse *p, struct branchc *bc);
    135  1.26  junyoung static void p_str(struct parse *p);
    136  1.39  christos static int p_branch_eat_delim(struct parse *p, struct branchc *bc);
    137  1.39  christos static void p_branch_ins_offset(struct parse *p, struct branchc *bc);
    138  1.39  christos static void p_branch_fix_tail(struct parse *p, struct branchc *bc);
    139  1.39  christos static bool p_branch_empty(struct parse *p, struct branchc *bc);
    140  1.39  christos static bool p_branch_do(struct parse *p, struct branchc *bc);
    141  1.39  christos static void p_bre_pre_parse(struct parse *p, struct branchc *bc);
    142  1.39  christos static void p_bre_post_parse(struct parse *p, struct branchc *bc);
    143  1.39  christos static void p_re(struct parse *p, int end1, int end2);
    144  1.39  christos static bool p_simp_re(struct parse *p, struct branchc *bc);
    145  1.26  junyoung static int p_count(struct parse *p);
    146  1.26  junyoung static void p_bracket(struct parse *p);
    147  1.39  christos static int p_range_cmp(wchar_t c1, wchar_t c2);
    148  1.26  junyoung static void p_b_term(struct parse *p, cset *cs);
    149  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    150  1.39  christos static int p_b_pseudoclass(struct parse *p, char c);
    151  1.39  christos #endif
    152  1.26  junyoung static void p_b_cclass(struct parse *p, cset *cs);
    153  1.39  christos static void p_b_cclass_named(struct parse *p, cset *cs, const char[]);
    154  1.26  junyoung static void p_b_eclass(struct parse *p, cset *cs);
    155  1.39  christos static wint_t p_b_symbol(struct parse *p);
    156  1.39  christos static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
    157  1.39  christos static bool may_escape(struct parse *p, const wint_t ch);
    158  1.39  christos static wint_t othercase(wint_t ch);
    159  1.39  christos static void bothcases(struct parse *p, wint_t ch);
    160  1.39  christos static void ordinary(struct parse *p, wint_t ch);
    161  1.26  junyoung static void nonnewline(struct parse *p);
    162  1.39  christos static void repeat(struct parse *p, sopno start, int from, int to);
    163  1.26  junyoung static int seterr(struct parse *p, int e);
    164  1.26  junyoung static cset *allocset(struct parse *p);
    165  1.26  junyoung static void freeset(struct parse *p, cset *cs);
    166  1.39  christos static void CHadd(struct parse *p, cset *cs, wint_t ch);
    167  1.39  christos static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
    168  1.39  christos static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
    169  1.39  christos static wint_t singleton(cset *cs);
    170  1.26  junyoung static sopno dupl(struct parse *p, sopno start, sopno finish);
    171  1.39  christos static void doemit(struct parse *p, sop op, size_t opnd);
    172  1.39  christos static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
    173  1.39  christos static void dofwd(struct parse *p, sopno pos, sop value);
    174  1.30  christos static int enlarge(struct parse *p, sopno size);
    175  1.26  junyoung static void stripsnug(struct parse *p, struct re_guts *g);
    176  1.26  junyoung static void findmust(struct parse *p, struct re_guts *g);
    177  1.39  christos static int altoffset(sop *scan, int offset);
    178  1.39  christos static void computejumps(struct parse *p, struct re_guts *g);
    179  1.39  christos static void computematchjumps(struct parse *p, struct re_guts *g);
    180  1.26  junyoung static sopno pluscount(struct parse *p, struct re_guts *g);
    181  1.39  christos static wint_t wgetnext(struct parse *p);
    182   1.5       cgd 
    183   1.5       cgd #ifdef __cplusplus
    184   1.5       cgd }
    185   1.5       cgd #endif
    186   1.5       cgd /* ========= end header generated by ./mkh ========= */
    187   1.1       jtc 
    188   1.1       jtc static char nuls[10];		/* place to point scanner in event of error */
    189   1.1       jtc 
    190   1.1       jtc /*
    191   1.1       jtc  * macros for use with parse structure
    192   1.1       jtc  * BEWARE:  these know that the parse structure is named `p' !!!
    193   1.1       jtc  */
    194   1.1       jtc #define	PEEK()	(*p->next)
    195   1.1       jtc #define	PEEK2()	(*(p->next+1))
    196   1.1       jtc #define	MORE()	(p->next < p->end)
    197   1.1       jtc #define	MORE2()	(p->next+1 < p->end)
    198   1.1       jtc #define	SEE(c)	(MORE() && PEEK() == (c))
    199   1.1       jtc #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
    200  1.39  christos #define	SEESPEC(a)	(p->bre ? SEETWO('\\', a) : SEE(a))
    201   1.1       jtc #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
    202   1.1       jtc #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
    203  1.39  christos #define	EATSPEC(a)	(p->bre ? EATTWO('\\', a) : EAT(a))
    204   1.1       jtc #define	NEXT()	(p->next++)
    205   1.1       jtc #define	NEXT2()	(p->next += 2)
    206   1.1       jtc #define	NEXTn(n)	(p->next += (n))
    207   1.1       jtc #define	GETNEXT()	(*p->next++)
    208  1.39  christos #define	WGETNEXT()	wgetnext(p)
    209   1.1       jtc #define	SETERROR(e)	seterr(p, (e))
    210  1.39  christos #define	REQUIRE(co, e)	((co) || SETERROR(e))
    211   1.1       jtc #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
    212  1.39  christos #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
    213   1.1       jtc #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
    214  1.40  christos #define	EMIT(op, sopnd)	doemit(p, (op), (sopnd))
    215  1.40  christos #define	INSERT(op, pos)	doinsert(p, (op), HERE()-(pos)+1, pos)
    216  1.12  drochner #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
    217   1.1       jtc #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
    218   1.1       jtc #define	HERE()		(p->slen)
    219   1.1       jtc #define	THERE()		(p->slen - 1)
    220   1.4       jtc #define	THERETHERE()	(p->slen - 2)
    221   1.1       jtc #define	DROP(n)	(p->slen -= (n))
    222   1.1       jtc 
    223  1.39  christos /* Macro used by computejump()/computematchjump() */
    224  1.41  christos #ifndef MIN
    225  1.39  christos #define MIN(a,b)	((a)<(b)?(a):(b))
    226  1.41  christos #endif
    227  1.30  christos 
    228  1.42  christos #ifndef NLS
    229  1.42  christos static const struct {
    230  1.42  christos 	const char *name;
    231  1.42  christos 	int (*func)(int);
    232  1.42  christos } wctypes[] = {
    233  1.42  christos #define ADD(x) { .name = # x, .func = is ## x }
    234  1.42  christos 	ADD(alnum),
    235  1.42  christos 	ADD(alpha),
    236  1.42  christos 	ADD(blank),
    237  1.42  christos 	ADD(cntrl),
    238  1.42  christos 	ADD(digit),
    239  1.42  christos 	ADD(graph),
    240  1.42  christos 	ADD(lower),
    241  1.42  christos 	ADD(print),
    242  1.42  christos 	ADD(punct),
    243  1.42  christos 	ADD(space),
    244  1.42  christos 	ADD(upper),
    245  1.42  christos 	ADD(xdigit),
    246  1.42  christos #undef ADD
    247  1.42  christos };
    248  1.42  christos 
    249  1.42  christos wctype_t
    250  1.42  christos __regex_wctype(const char *str)
    251  1.42  christos {
    252  1.42  christos 	for (size_t i = 0; i < __arraycount(wctypes); i++) {
    253  1.42  christos 		if (strcmp(wctypes[i].name, str) == 0)
    254  1.42  christos 			return (wctype_t)(i + 1);
    255  1.42  christos 	}
    256  1.42  christos 	return (wctype_t)0;
    257  1.42  christos }
    258  1.42  christos 
    259  1.42  christos int
    260  1.42  christos __regex_iswctype(wint_t c, wctype_t ct)
    261  1.42  christos {
    262  1.42  christos 	if (ct == 0)
    263  1.42  christos 		return 0;
    264  1.42  christos 	return (*wctypes[ct - 1].func)(c);
    265  1.42  christos }
    266  1.42  christos #endif
    267  1.42  christos 
    268  1.39  christos static int				/* 0 success, otherwise REG_something */
    269  1.39  christos regcomp_internal(regex_t * __restrict preg,
    270  1.39  christos 	const char * __restrict pattern,
    271  1.39  christos 	int cflags, int pflags)
    272   1.1       jtc {
    273   1.1       jtc 	struct parse pa;
    274   1.9     perry 	struct re_guts *g;
    275   1.9     perry 	struct parse *p = &pa;
    276   1.9     perry 	int i;
    277   1.9     perry 	size_t len;
    278  1.39  christos 	size_t maxlen;
    279   1.3       jtc #ifdef REDEBUG
    280   1.3       jtc #	define	GOODFLAGS(f)	(f)
    281   1.3       jtc #else
    282   1.3       jtc #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
    283   1.3       jtc #endif
    284   1.1       jtc 
    285  1.14     lukem 	_DIAGASSERT(preg != NULL);
    286  1.14     lukem 	_DIAGASSERT(pattern != NULL);
    287  1.14     lukem 
    288   1.3       jtc 	cflags = GOODFLAGS(cflags);
    289   1.1       jtc 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
    290   1.1       jtc 		return(REG_INVARG);
    291   1.1       jtc 
    292   1.1       jtc 	if (cflags&REG_PEND) {
    293   1.1       jtc 		if (preg->re_endp < pattern)
    294   1.1       jtc 			return(REG_INVARG);
    295   1.1       jtc 		len = preg->re_endp - pattern;
    296   1.1       jtc 	} else
    297  1.11  christos 		len = strlen(pattern);
    298   1.1       jtc 
    299   1.1       jtc 	/* do the mallocs early so failure handling is easy */
    300  1.39  christos 	g = malloc(sizeof(*g));
    301   1.1       jtc 	if (g == NULL)
    302   1.1       jtc 		return(REG_ESPACE);
    303  1.39  christos 	/*
    304  1.39  christos 	 * Limit the pattern space to avoid a 32-bit overflow on buffer
    305  1.39  christos 	 * extension.  Also avoid any signed overflow in case of conversion
    306  1.39  christos 	 * so make the real limit based on a 31-bit overflow.
    307  1.39  christos 	 *
    308  1.39  christos 	 * Likely not applicable on 64-bit systems but handle the case
    309  1.39  christos 	 * generically (who are we to stop people from using ~715MB+
    310  1.39  christos 	 * patterns?).
    311  1.39  christos 	 */
    312  1.40  christos 	maxlen = ((size_t)-1 >> 1) / sizeof(*p->strip) * 2 / 3;
    313  1.39  christos 	if (len >= maxlen) {
    314  1.40  christos 		free(g);
    315  1.39  christos 		return(REG_ESPACE);
    316  1.39  christos 	}
    317  1.40  christos 	p->ssize = (sopno)(len / 2 * 3 + 1);	/* ugh */
    318  1.39  christos 	assert(p->ssize >= len);
    319  1.39  christos 
    320  1.39  christos 	p->strip = calloc(p->ssize, sizeof(*p->strip));
    321   1.1       jtc 	p->slen = 0;
    322   1.1       jtc 	if (p->strip == NULL) {
    323  1.40  christos 		free(g);
    324   1.1       jtc 		return(REG_ESPACE);
    325   1.1       jtc 	}
    326   1.1       jtc 
    327   1.1       jtc 	/* set things up */
    328   1.1       jtc 	p->g = g;
    329  1.39  christos 	p->next = pattern;	/* convenience; we do not modify it */
    330   1.1       jtc 	p->end = p->next + len;
    331   1.1       jtc 	p->error = 0;
    332   1.1       jtc 	p->ncsalloc = 0;
    333  1.39  christos 	p->pflags = pflags;
    334   1.1       jtc 	for (i = 0; i < NPAREN; i++) {
    335   1.1       jtc 		p->pbegin[i] = 0;
    336   1.1       jtc 		p->pend[i] = 0;
    337   1.1       jtc 	}
    338  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    339  1.39  christos 	if ((cflags & REG_GNU) == 0) {
    340  1.39  christos 		p->gnuext = false;
    341  1.39  christos 		p->allowbranch = (cflags & REG_EXTENDED) != 0;
    342  1.39  christos 	} else
    343  1.39  christos 		p->gnuext = p->allowbranch = true;
    344  1.39  christos #else
    345  1.39  christos 	p->gnuext = false;
    346  1.39  christos 	p->allowbranch = (cflags & REG_EXTENDED) != 0;
    347  1.39  christos #endif
    348  1.39  christos 	if (cflags & REG_EXTENDED) {
    349  1.39  christos 		p->bre = false;
    350  1.39  christos 		p->parse_expr = p_ere_exp;
    351  1.39  christos 		p->pre_parse = NULL;
    352  1.39  christos 		p->post_parse = NULL;
    353  1.39  christos 	} else {
    354  1.39  christos 		p->bre = true;
    355  1.39  christos 		p->parse_expr = p_simp_re;
    356  1.39  christos 		p->pre_parse = p_bre_pre_parse;
    357  1.39  christos 		p->post_parse = p_bre_post_parse;
    358  1.39  christos 	}
    359   1.1       jtc 	g->sets = NULL;
    360   1.1       jtc 	g->ncsets = 0;
    361   1.1       jtc 	g->cflags = cflags;
    362   1.1       jtc 	g->iflags = 0;
    363   1.1       jtc 	g->nbol = 0;
    364   1.1       jtc 	g->neol = 0;
    365   1.1       jtc 	g->must = NULL;
    366  1.39  christos 	g->moffset = -1;
    367  1.39  christos 	g->charjump = NULL;
    368  1.39  christos 	g->matchjump = NULL;
    369   1.1       jtc 	g->mlen = 0;
    370   1.1       jtc 	g->nsub = 0;
    371   1.1       jtc 	g->backrefs = 0;
    372   1.1       jtc 
    373   1.1       jtc 	/* do it */
    374   1.1       jtc 	EMIT(OEND, 0);
    375   1.1       jtc 	g->firststate = THERE();
    376  1.39  christos 	if (cflags & REG_NOSPEC)
    377   1.1       jtc 		p_str(p);
    378   1.1       jtc 	else
    379  1.39  christos 		p_re(p, OUT, OUT);
    380   1.1       jtc 	EMIT(OEND, 0);
    381   1.1       jtc 	g->laststate = THERE();
    382   1.1       jtc 
    383   1.1       jtc 	/* tidy up loose ends and fill things in */
    384   1.1       jtc 	stripsnug(p, g);
    385   1.1       jtc 	findmust(p, g);
    386  1.39  christos 	/* only use Boyer-Moore algorithm if the pattern is bigger
    387  1.39  christos 	 * than three characters
    388  1.39  christos 	 */
    389  1.39  christos 	if(g->mlen > 3) {
    390  1.39  christos 		computejumps(p, g);
    391  1.39  christos 		computematchjumps(p, g);
    392  1.39  christos 		if(g->matchjump == NULL && g->charjump != NULL) {
    393  1.39  christos 			free(g->charjump);
    394  1.39  christos 			g->charjump = NULL;
    395  1.39  christos 		}
    396  1.39  christos 	}
    397   1.1       jtc 	g->nplus = pluscount(p, g);
    398   1.1       jtc 	g->magic = MAGIC2;
    399   1.1       jtc 	preg->re_nsub = g->nsub;
    400   1.1       jtc 	preg->re_g = g;
    401   1.1       jtc 	preg->re_magic = MAGIC1;
    402   1.1       jtc #ifndef REDEBUG
    403   1.1       jtc 	/* not debugging, so can't rely on the assert() in regexec() */
    404   1.1       jtc 	if (g->iflags&BAD)
    405   1.1       jtc 		SETERROR(REG_ASSERT);
    406   1.1       jtc #endif
    407   1.1       jtc 
    408   1.1       jtc 	/* win or lose, we're done */
    409   1.1       jtc 	if (p->error != 0)	/* lose */
    410   1.1       jtc 		regfree(preg);
    411   1.1       jtc 	return(p->error);
    412   1.1       jtc }
    413   1.1       jtc 
    414   1.1       jtc /*
    415  1.39  christos  - regcomp - interface for parser and compilation
    416  1.39  christos  = extern int regcomp(regex_t *, const char *, int);
    417  1.39  christos  = #define	REG_BASIC	0000
    418  1.39  christos  = #define	REG_EXTENDED	0001
    419  1.39  christos  = #define	REG_ICASE	0002
    420  1.39  christos  = #define	REG_NOSUB	0004
    421  1.39  christos  = #define	REG_NEWLINE	0010
    422  1.39  christos  = #define	REG_NOSPEC	0020
    423  1.39  christos  = #define	REG_PEND	0040
    424  1.39  christos  = #define	REG_DUMP	0200
    425   1.1       jtc  */
    426  1.39  christos int				/* 0 success, otherwise REG_something */
    427  1.39  christos regcomp(regex_t * __restrict preg,
    428  1.39  christos 	const char * __restrict pattern,
    429  1.39  christos 	int cflags)
    430   1.1       jtc {
    431  1.30  christos 
    432  1.39  christos 	return (regcomp_internal(preg, pattern, cflags, 0));
    433   1.1       jtc }
    434   1.1       jtc 
    435   1.1       jtc /*
    436  1.39  christos  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op,
    437  1.39  christos  - return whether we should terminate or not
    438  1.39  christos  == static bool p_ere_exp(struct parse *p);
    439   1.1       jtc  */
    440  1.39  christos static bool
    441  1.39  christos p_ere_exp(struct parse *p, struct branchc *bc)
    442   1.1       jtc {
    443   1.9     perry 	char c;
    444  1.39  christos 	wint_t wc;
    445   1.9     perry 	sopno pos;
    446   1.9     perry 	int count;
    447   1.9     perry 	int count2;
    448  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    449  1.40  christos 	size_t i;
    450  1.39  christos 	int handled;
    451  1.39  christos #endif
    452   1.9     perry 	sopno subno;
    453   1.1       jtc 	int wascaret = 0;
    454   1.1       jtc 
    455  1.14     lukem 	_DIAGASSERT(p != NULL);
    456  1.14     lukem 
    457  1.39  christos 	(void)bc;
    458   1.1       jtc 	assert(MORE());		/* caller should have ensured this */
    459   1.1       jtc 	c = GETNEXT();
    460   1.1       jtc 
    461  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    462  1.39  christos 	handled = 0;
    463  1.39  christos #endif
    464   1.1       jtc 	pos = HERE();
    465   1.1       jtc 	switch (c) {
    466   1.1       jtc 	case '(':
    467  1.39  christos 		(void)REQUIRE(MORE(), REG_EPAREN);
    468   1.1       jtc 		p->g->nsub++;
    469  1.40  christos 		subno = (sopno)p->g->nsub;
    470   1.1       jtc 		if (subno < NPAREN)
    471   1.1       jtc 			p->pbegin[subno] = HERE();
    472   1.1       jtc 		EMIT(OLPAREN, subno);
    473   1.1       jtc 		if (!SEE(')'))
    474  1.39  christos 			p_re(p, ')', IGN);
    475   1.1       jtc 		if (subno < NPAREN) {
    476   1.1       jtc 			p->pend[subno] = HERE();
    477   1.1       jtc 			assert(p->pend[subno] != 0);
    478   1.1       jtc 		}
    479   1.1       jtc 		EMIT(ORPAREN, subno);
    480  1.39  christos 		(void)MUSTEAT(')', REG_EPAREN);
    481   1.1       jtc 		break;
    482   1.1       jtc #ifndef POSIX_MISTAKE
    483   1.1       jtc 	case ')':		/* happens only if no current unmatched ( */
    484   1.1       jtc 		/*
    485   1.1       jtc 		 * You may ask, why the ifndef?  Because I didn't notice
    486   1.1       jtc 		 * this until slightly too late for 1003.2, and none of the
    487   1.1       jtc 		 * other 1003.2 regular-expression reviewers noticed it at
    488   1.1       jtc 		 * all.  So an unmatched ) is legal POSIX, at least until
    489   1.1       jtc 		 * we can get it fixed.
    490   1.1       jtc 		 */
    491   1.1       jtc 		SETERROR(REG_EPAREN);
    492   1.1       jtc 		break;
    493   1.1       jtc #endif
    494   1.1       jtc 	case '^':
    495   1.1       jtc 		EMIT(OBOL, 0);
    496   1.1       jtc 		p->g->iflags |= USEBOL;
    497   1.1       jtc 		p->g->nbol++;
    498   1.1       jtc 		wascaret = 1;
    499   1.1       jtc 		break;
    500   1.1       jtc 	case '$':
    501   1.1       jtc 		EMIT(OEOL, 0);
    502   1.1       jtc 		p->g->iflags |= USEEOL;
    503   1.1       jtc 		p->g->neol++;
    504   1.1       jtc 		break;
    505   1.1       jtc 	case '|':
    506   1.1       jtc 		SETERROR(REG_EMPTY);
    507   1.1       jtc 		break;
    508   1.1       jtc 	case '*':
    509   1.1       jtc 	case '+':
    510   1.1       jtc 	case '?':
    511  1.39  christos 	case '{':
    512   1.1       jtc 		SETERROR(REG_BADRPT);
    513   1.1       jtc 		break;
    514   1.1       jtc 	case '.':
    515   1.1       jtc 		if (p->g->cflags&REG_NEWLINE)
    516   1.1       jtc 			nonnewline(p);
    517   1.1       jtc 		else
    518   1.1       jtc 			EMIT(OANY, 0);
    519   1.1       jtc 		break;
    520   1.1       jtc 	case '[':
    521   1.1       jtc 		p_bracket(p);
    522   1.1       jtc 		break;
    523   1.1       jtc 	case '\\':
    524  1.39  christos 		(void)REQUIRE(MORE(), REG_EESCAPE);
    525  1.39  christos 		wc = WGETNEXT();
    526  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    527  1.39  christos 		if (p->gnuext) {
    528  1.39  christos 			handled = 1;
    529  1.39  christos 			switch (wc) {
    530  1.39  christos 			case '`':
    531  1.39  christos 				EMIT(OBOS, 0);
    532  1.39  christos 				break;
    533  1.39  christos 			case '\'':
    534  1.39  christos 				EMIT(OEOS, 0);
    535  1.39  christos 				break;
    536  1.39  christos 			case 'B':
    537  1.39  christos 				EMIT(ONWBND, 0);
    538  1.39  christos 				break;
    539  1.39  christos 			case 'b':
    540  1.39  christos 				EMIT(OWBND, 0);
    541  1.39  christos 				break;
    542  1.39  christos 			case 'W':
    543  1.39  christos 			case 'w':
    544  1.39  christos 			case 'S':
    545  1.39  christos 			case 's':
    546  1.39  christos 				p_b_pseudoclass(p, wc);
    547  1.39  christos 				break;
    548  1.46  christos 			case 'a':
    549  1.46  christos 				ordinary(p, '\a');
    550  1.46  christos 				break;
    551  1.46  christos 			case 'e':
    552  1.46  christos 				ordinary(p, '\e');
    553  1.46  christos 				break;
    554  1.46  christos 			case 'f':
    555  1.46  christos 				ordinary(p, '\f');
    556  1.46  christos 				break;
    557  1.46  christos 			case 'n':
    558  1.46  christos 				ordinary(p, '\n');
    559  1.46  christos 				break;
    560  1.46  christos 			case 'r':
    561  1.46  christos 				ordinary(p, '\r');
    562  1.46  christos 				break;
    563  1.46  christos 			case 't':
    564  1.46  christos 				ordinary(p, '\t');
    565  1.46  christos 				break;
    566  1.46  christos 			case 'v':
    567  1.46  christos 				ordinary(p, '\v');
    568  1.46  christos 				break;
    569  1.39  christos 			case '1':
    570  1.39  christos 			case '2':
    571  1.39  christos 			case '3':
    572  1.39  christos 			case '4':
    573  1.39  christos 			case '5':
    574  1.39  christos 			case '6':
    575  1.39  christos 			case '7':
    576  1.39  christos 			case '8':
    577  1.39  christos 			case '9':
    578  1.39  christos 				i = wc - '0';
    579  1.39  christos 				assert(i < NPAREN);
    580  1.39  christos 				if (p->pend[i] != 0) {
    581  1.39  christos 					assert(i <= p->g->nsub);
    582  1.39  christos 					EMIT(OBACK_, i);
    583  1.39  christos 					assert(p->pbegin[i] != 0);
    584  1.39  christos 					assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
    585  1.39  christos 					assert(OP(p->strip[p->pend[i]]) == ORPAREN);
    586  1.39  christos 					(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
    587  1.39  christos 					EMIT(O_BACK, i);
    588  1.39  christos 				} else
    589  1.39  christos 					SETERROR(REG_ESUBREG);
    590  1.39  christos 				p->g->backrefs = 1;
    591  1.39  christos 				break;
    592  1.39  christos 			default:
    593  1.39  christos 				handled = 0;
    594  1.39  christos 			}
    595  1.39  christos 			/* Don't proceed to the POSIX bits if we've already handled it */
    596  1.39  christos 			if (handled)
    597  1.39  christos 				break;
    598  1.39  christos 		}
    599  1.39  christos #endif
    600  1.39  christos 		switch (wc) {
    601  1.39  christos 		case '<':
    602  1.39  christos 			EMIT(OBOW, 0);
    603  1.39  christos 			break;
    604  1.39  christos 		case '>':
    605  1.39  christos 			EMIT(OEOW, 0);
    606  1.39  christos 			break;
    607  1.39  christos 		default:
    608  1.39  christos 			if (may_escape(p, wc))
    609  1.39  christos 				ordinary(p, wc);
    610  1.39  christos 			else
    611  1.39  christos 				SETERROR(REG_EESCAPE);
    612  1.39  christos 			break;
    613  1.39  christos 		}
    614   1.1       jtc 		break;
    615   1.1       jtc 	default:
    616  1.38  christos 		if (p->error != 0)
    617  1.39  christos 			return (false);
    618  1.39  christos 		p->next--;
    619  1.39  christos 		wc = WGETNEXT();
    620  1.39  christos 		ordinary(p, wc);
    621   1.1       jtc 		break;
    622   1.1       jtc 	}
    623   1.1       jtc 
    624   1.1       jtc 	if (!MORE())
    625  1.39  christos 		return (false);
    626   1.1       jtc 	c = PEEK();
    627   1.1       jtc 	/* we call { a repetition if followed by a digit */
    628  1.39  christos 	if (!( c == '*' || c == '+' || c == '?' || c == '{'))
    629  1.39  christos 		return (false);		/* no repetition, we're done */
    630  1.39  christos 	else if (c == '{')
    631  1.39  christos 		(void)REQUIRE(MORE2() && \
    632  1.39  christos 		    (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
    633   1.1       jtc 	NEXT();
    634   1.1       jtc 
    635  1.39  christos 	(void)REQUIRE(!wascaret, REG_BADRPT);
    636   1.1       jtc 	switch (c) {
    637   1.1       jtc 	case '*':	/* implemented as +? */
    638   1.4       jtc 		/* this case does not require the (y|) trick, noKLUDGE */
    639   1.1       jtc 		INSERT(OPLUS_, pos);
    640   1.1       jtc 		ASTERN(O_PLUS, pos);
    641   1.1       jtc 		INSERT(OQUEST_, pos);
    642   1.1       jtc 		ASTERN(O_QUEST, pos);
    643   1.1       jtc 		break;
    644   1.1       jtc 	case '+':
    645   1.1       jtc 		INSERT(OPLUS_, pos);
    646   1.1       jtc 		ASTERN(O_PLUS, pos);
    647   1.1       jtc 		break;
    648   1.1       jtc 	case '?':
    649   1.4       jtc 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
    650   1.4       jtc 		INSERT(OCH_, pos);		/* offset slightly wrong */
    651   1.4       jtc 		ASTERN(OOR1, pos);		/* this one's right */
    652   1.4       jtc 		AHEAD(pos);			/* fix the OCH_ */
    653   1.4       jtc 		EMIT(OOR2, 0);			/* offset very wrong... */
    654   1.4       jtc 		AHEAD(THERE());			/* ...so fix it */
    655   1.4       jtc 		ASTERN(O_CH, THERETHERE());
    656   1.1       jtc 		break;
    657   1.1       jtc 	case '{':
    658   1.1       jtc 		count = p_count(p);
    659   1.1       jtc 		if (EAT(',')) {
    660  1.39  christos 			if (isdigit((uch)PEEK())) {
    661   1.1       jtc 				count2 = p_count(p);
    662  1.39  christos 				(void)REQUIRE(count <= count2, REG_BADBR);
    663   1.1       jtc 			} else		/* single number with comma */
    664   1.1       jtc 				count2 = INFINITY;
    665   1.1       jtc 		} else		/* just a single number */
    666   1.1       jtc 			count2 = count;
    667  1.39  christos 		repeat(p, pos, count, count2);
    668   1.1       jtc 		if (!EAT('}')) {	/* error heuristics */
    669   1.1       jtc 			while (MORE() && PEEK() != '}')
    670   1.1       jtc 				NEXT();
    671  1.39  christos 			(void)REQUIRE(MORE(), REG_EBRACE);
    672   1.1       jtc 			SETERROR(REG_BADBR);
    673   1.1       jtc 		}
    674   1.1       jtc 		break;
    675   1.1       jtc 	}
    676   1.1       jtc 
    677   1.1       jtc 	if (!MORE())
    678  1.39  christos 		return (false);
    679   1.1       jtc 	c = PEEK();
    680   1.1       jtc 	if (!( c == '*' || c == '+' || c == '?' ||
    681  1.39  christos 				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
    682  1.39  christos 		return (false);
    683   1.1       jtc 	SETERROR(REG_BADRPT);
    684  1.39  christos 	return (false);
    685   1.1       jtc }
    686   1.1       jtc 
    687   1.1       jtc /*
    688   1.1       jtc  - p_str - string (no metacharacters) "parser"
    689   1.9     perry  == static void p_str(struct parse *p);
    690   1.1       jtc  */
    691   1.1       jtc static void
    692  1.39  christos p_str(struct parse *p)
    693   1.1       jtc {
    694  1.39  christos 	(void)REQUIRE(MORE(), REG_EMPTY);
    695  1.39  christos 	while (MORE())
    696  1.39  christos 		ordinary(p, WGETNEXT());
    697  1.39  christos }
    698  1.14     lukem 
    699  1.39  christos /*
    700  1.39  christos  * Eat consecutive branch delimiters for the kind of expression that we are
    701  1.39  christos  * parsing, return the number of delimiters that we ate.
    702  1.39  christos  */
    703  1.39  christos static int
    704  1.39  christos p_branch_eat_delim(struct parse *p, struct branchc *bc)
    705  1.39  christos {
    706  1.39  christos 	int nskip;
    707  1.14     lukem 
    708  1.39  christos 	(void)bc;
    709  1.39  christos 	nskip = 0;
    710  1.39  christos 	while (EATSPEC('|'))
    711  1.39  christos 		++nskip;
    712  1.39  christos 	return (nskip);
    713   1.1       jtc }
    714   1.1       jtc 
    715   1.1       jtc /*
    716  1.39  christos  * Insert necessary branch book-keeping operations. This emits a
    717  1.39  christos  * bogus 'next' offset, since we still have more to parse
    718   1.1       jtc  */
    719   1.1       jtc static void
    720  1.39  christos p_branch_ins_offset(struct parse *p, struct branchc *bc)
    721   1.9     perry {
    722   1.1       jtc 
    723  1.39  christos 	if (bc->nbranch == 0) {
    724  1.39  christos 		INSERT(OCH_, bc->start);	/* offset is wrong */
    725  1.39  christos 		bc->fwd = bc->start;
    726  1.39  christos 		bc->back = bc->start;
    727  1.39  christos 	}
    728  1.39  christos 
    729  1.39  christos 	ASTERN(OOR1, bc->back);
    730  1.39  christos 	bc->back = THERE();
    731  1.39  christos 	AHEAD(bc->fwd);			/* fix previous offset */
    732  1.39  christos 	bc->fwd = HERE();
    733  1.39  christos 	EMIT(OOR2, 0);			/* offset is very wrong */
    734  1.39  christos 	++bc->nbranch;
    735  1.39  christos }
    736  1.39  christos 
    737  1.39  christos /*
    738  1.39  christos  * Fix the offset of the tail branch, if we actually had any branches.
    739  1.39  christos  * This is to correct the bogus placeholder offset that we use.
    740  1.39  christos  */
    741  1.39  christos static void
    742  1.39  christos p_branch_fix_tail(struct parse *p, struct branchc *bc)
    743  1.39  christos {
    744  1.14     lukem 
    745  1.39  christos 	/* Fix bogus offset at the tail if we actually have branches */
    746  1.39  christos 	if (bc->nbranch > 0) {
    747  1.39  christos 		AHEAD(bc->fwd);
    748  1.39  christos 		ASTERN(O_CH, bc->back);
    749  1.30  christos 	}
    750  1.39  christos }
    751  1.39  christos 
    752  1.39  christos /*
    753  1.39  christos  * Signal to the parser that an empty branch has been encountered; this will,
    754  1.39  christos  * in the future, be used to allow for more permissive behavior with empty
    755  1.39  christos  * branches. The return value should indicate whether parsing may continue
    756  1.39  christos  * or not.
    757  1.39  christos  */
    758  1.39  christos static bool
    759  1.39  christos p_branch_empty(struct parse *p, struct branchc *bc)
    760  1.39  christos {
    761  1.39  christos 
    762  1.39  christos 	(void)bc;
    763  1.39  christos 	SETERROR(REG_EMPTY);
    764  1.39  christos 	return (false);
    765  1.39  christos }
    766  1.39  christos 
    767  1.39  christos /*
    768  1.39  christos  * Take care of any branching requirements. This includes inserting the
    769  1.39  christos  * appropriate branching instructions as well as eating all of the branch
    770  1.39  christos  * delimiters until we either run out of pattern or need to parse more pattern.
    771  1.39  christos  */
    772  1.39  christos static bool
    773  1.39  christos p_branch_do(struct parse *p, struct branchc *bc)
    774  1.39  christos {
    775  1.39  christos 	int ate = 0;
    776  1.39  christos 
    777  1.39  christos 	ate = p_branch_eat_delim(p, bc);
    778  1.39  christos 	if (ate == 0)
    779  1.39  christos 		return (false);
    780  1.39  christos 	else if ((ate > 1 || (bc->outer && !MORE())) && !p_branch_empty(p, bc))
    781  1.39  christos 		/*
    782  1.39  christos 		 * Halt parsing only if we have an empty branch and p_branch_empty
    783  1.39  christos 		 * indicates that we must not continue. In the future, this will not
    784  1.39  christos 		 * necessarily be an error.
    785  1.39  christos 		 */
    786  1.39  christos 		return (false);
    787  1.39  christos 	p_branch_ins_offset(p, bc);
    788  1.39  christos 
    789  1.39  christos 	return (true);
    790  1.39  christos }
    791  1.30  christos 
    792  1.39  christos static void
    793  1.39  christos p_bre_pre_parse(struct parse *p, struct branchc *bc)
    794  1.39  christos {
    795  1.14     lukem 
    796  1.40  christos 	(void)bc;
    797  1.39  christos 	/*
    798  1.39  christos 	 * Does not move cleanly into expression parser because of
    799  1.39  christos 	 * ordinary interpration of * at the beginning position of
    800  1.39  christos 	 * an expression.
    801  1.39  christos 	 */
    802   1.1       jtc 	if (EAT('^')) {
    803   1.1       jtc 		EMIT(OBOL, 0);
    804   1.1       jtc 		p->g->iflags |= USEBOL;
    805   1.1       jtc 		p->g->nbol++;
    806   1.1       jtc 	}
    807  1.39  christos }
    808  1.39  christos 
    809  1.39  christos static void
    810  1.39  christos p_bre_post_parse(struct parse *p, struct branchc *bc)
    811  1.39  christos {
    812  1.39  christos 
    813  1.39  christos 	/* Expression is terminating due to EOL token */
    814  1.39  christos 	if (bc->terminate) {
    815   1.1       jtc 		DROP(1);
    816   1.1       jtc 		EMIT(OEOL, 0);
    817   1.1       jtc 		p->g->iflags |= USEEOL;
    818   1.1       jtc 		p->g->neol++;
    819   1.1       jtc 	}
    820  1.39  christos }
    821   1.1       jtc 
    822  1.39  christos /*
    823  1.39  christos  - p_re - Top level parser, concatenation and BRE anchoring
    824  1.39  christos  == static void p_re(struct parse *p, int end1, int end2);
    825  1.39  christos  * Giving end1 as OUT essentially eliminates the end1/end2 check.
    826  1.39  christos  *
    827  1.39  christos  * This implementation is a bit of a kludge, in that a trailing $ is first
    828  1.39  christos  * taken as an ordinary character and then revised to be an anchor.
    829  1.39  christos  * The amount of lookahead needed to avoid this kludge is excessive.
    830  1.39  christos  */
    831  1.39  christos static void
    832  1.39  christos p_re(struct parse *p,
    833  1.39  christos 	int end1,	/* first terminating character */
    834  1.39  christos 	int end2)	/* second terminating character; ignored for EREs */
    835  1.39  christos {
    836  1.39  christos 	struct branchc bc;
    837  1.39  christos 
    838  1.39  christos 	bc.nbranch = 0;
    839  1.39  christos 	if (end1 == OUT && end2 == OUT)
    840  1.39  christos 		bc.outer = true;
    841  1.39  christos 	else
    842  1.39  christos 		bc.outer = false;
    843  1.39  christos #define	SEEEND()	(!p->bre ? SEE(end1) : SEETWO(end1, end2))
    844  1.39  christos 	for (;;) {
    845  1.39  christos 		bc.start = HERE();
    846  1.39  christos 		bc.nchain = 0;
    847  1.39  christos 		bc.terminate = false;
    848  1.39  christos 		if (p->pre_parse != NULL)
    849  1.39  christos 			p->pre_parse(p, &bc);
    850  1.39  christos 		while (MORE() && (!p->allowbranch || !SEESPEC('|')) && !SEEEND()) {
    851  1.39  christos 			bc.terminate = p->parse_expr(p, &bc);
    852  1.39  christos 			++bc.nchain;
    853  1.39  christos 		}
    854  1.39  christos 		if (p->post_parse != NULL)
    855  1.39  christos 			p->post_parse(p, &bc);
    856  1.39  christos 		(void) REQUIRE(p->gnuext || HERE() != bc.start, REG_EMPTY);
    857  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    858  1.39  christos 		if (p->gnuext && HERE() == bc.start && !p_branch_empty(p, &bc))
    859  1.39  christos 			break;
    860  1.39  christos #endif
    861  1.39  christos 		if (!p->allowbranch)
    862  1.39  christos 			break;
    863  1.39  christos 		/*
    864  1.39  christos 		 * p_branch_do's return value indicates whether we should
    865  1.39  christos 		 * continue parsing or not. This is both for correctness and
    866  1.39  christos 		 * a slight optimization, because it will check if we've
    867  1.39  christos 		 * encountered an empty branch or the end of the string
    868  1.39  christos 		 * immediately following a branch delimiter.
    869  1.39  christos 		 */
    870  1.39  christos 		if (!p_branch_do(p, &bc))
    871  1.39  christos 			break;
    872  1.39  christos 	}
    873  1.39  christos #undef SEE_END
    874  1.39  christos 	if (p->allowbranch)
    875  1.39  christos 		p_branch_fix_tail(p, &bc);
    876  1.39  christos 	assert(!MORE() || SEE(end1));
    877   1.1       jtc }
    878   1.1       jtc 
    879   1.1       jtc /*
    880   1.1       jtc  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
    881  1.39  christos  == static bool p_simp_re(struct parse *p, struct branchc *bc);
    882   1.1       jtc  */
    883  1.39  christos static bool			/* was the simple RE an unbackslashed $? */
    884  1.39  christos p_simp_re(struct parse *p, struct branchc *bc)
    885   1.1       jtc {
    886   1.9     perry 	int c;
    887  1.39  christos 	int cc;			/* convenient/control character */
    888   1.9     perry 	int count;
    889   1.9     perry 	int count2;
    890  1.39  christos 	sopno pos;
    891  1.39  christos 	bool handled;
    892  1.40  christos 	size_t i;
    893  1.39  christos 	wint_t wc;
    894   1.9     perry 	sopno subno;
    895   1.1       jtc #	define	BACKSL	(1<<CHAR_BIT)
    896   1.1       jtc 
    897  1.39  christos 	pos = HERE();		/* repetition op, if any, covers from here */
    898  1.39  christos 	handled = false;
    899   1.1       jtc 
    900   1.1       jtc 	assert(MORE());		/* caller should have ensured this */
    901  1.48  christos 	c = (uch)GETNEXT();
    902   1.1       jtc 	if (c == '\\') {
    903  1.39  christos 		(void)REQUIRE(MORE(), REG_EESCAPE);
    904  1.48  christos 		cc = (uch)GETNEXT();
    905  1.39  christos 		c = BACKSL | cc;
    906  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    907  1.39  christos 		if (p->gnuext) {
    908  1.39  christos 			handled = true;
    909  1.39  christos 			switch (c) {
    910  1.39  christos 			case BACKSL|'`':
    911  1.39  christos 				EMIT(OBOS, 0);
    912  1.39  christos 				break;
    913  1.39  christos 			case BACKSL|'\'':
    914  1.39  christos 				EMIT(OEOS, 0);
    915  1.39  christos 				break;
    916  1.39  christos 			case BACKSL|'B':
    917  1.39  christos 				EMIT(ONWBND, 0);
    918  1.39  christos 				break;
    919  1.39  christos 			case BACKSL|'b':
    920  1.39  christos 				EMIT(OWBND, 0);
    921  1.39  christos 				break;
    922  1.39  christos 			case BACKSL|'W':
    923  1.39  christos 			case BACKSL|'w':
    924  1.39  christos 			case BACKSL|'S':
    925  1.39  christos 			case BACKSL|'s':
    926  1.39  christos 				p_b_pseudoclass(p, cc);
    927  1.39  christos 				break;
    928  1.46  christos 			case BACKSL|'a':
    929  1.46  christos 				ordinary(p, '\a');
    930  1.46  christos 				break;
    931  1.46  christos 			case BACKSL|'e':
    932  1.46  christos 				ordinary(p, '\e');
    933  1.46  christos 				break;
    934  1.46  christos 			case BACKSL|'f':
    935  1.46  christos 				ordinary(p, '\f');
    936  1.46  christos 				break;
    937  1.46  christos 			case BACKSL|'n':
    938  1.46  christos 				ordinary(p, '\n');
    939  1.46  christos 				break;
    940  1.46  christos 			case BACKSL|'r':
    941  1.46  christos 				ordinary(p, '\r');
    942  1.46  christos 				break;
    943  1.46  christos 			case BACKSL|'t':
    944  1.46  christos 				ordinary(p, '\t');
    945  1.46  christos 				break;
    946  1.46  christos 			case BACKSL|'v':
    947  1.46  christos 				ordinary(p, '\v');
    948  1.46  christos 				break;
    949  1.39  christos 			default:
    950  1.39  christos 				handled = false;
    951  1.39  christos 			}
    952  1.39  christos 		}
    953  1.39  christos #endif
    954   1.1       jtc 	}
    955  1.39  christos 	if (!handled) {
    956  1.39  christos 		switch (c) {
    957  1.39  christos 		case '.':
    958  1.39  christos 			if (p->g->cflags&REG_NEWLINE)
    959  1.39  christos 				nonnewline(p);
    960  1.39  christos 			else
    961  1.39  christos 				EMIT(OANY, 0);
    962  1.39  christos 			break;
    963  1.39  christos 		case '[':
    964  1.39  christos 			p_bracket(p);
    965  1.39  christos 			break;
    966  1.39  christos 		case BACKSL|'<':
    967  1.39  christos 			EMIT(OBOW, 0);
    968  1.39  christos 			break;
    969  1.39  christos 		case BACKSL|'>':
    970  1.39  christos 			EMIT(OEOW, 0);
    971  1.39  christos 			break;
    972  1.39  christos 		case BACKSL|'{':
    973  1.39  christos 			SETERROR(REG_BADRPT);
    974  1.39  christos 			break;
    975  1.39  christos 		case BACKSL|'(':
    976  1.39  christos 			p->g->nsub++;
    977  1.40  christos 			subno = (sopno)p->g->nsub;
    978  1.39  christos 			if (subno < NPAREN)
    979  1.39  christos 				p->pbegin[subno] = HERE();
    980  1.39  christos 			EMIT(OLPAREN, subno);
    981  1.39  christos 			/* the MORE here is an error heuristic */
    982  1.39  christos 			if (MORE() && !SEETWO('\\', ')'))
    983  1.39  christos 				p_re(p, '\\', ')');
    984  1.39  christos 			if (subno < NPAREN) {
    985  1.39  christos 				p->pend[subno] = HERE();
    986  1.39  christos 				assert(p->pend[subno] != 0);
    987  1.39  christos 			}
    988  1.39  christos 			EMIT(ORPAREN, subno);
    989  1.39  christos 			(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
    990  1.39  christos 			break;
    991  1.39  christos 		case BACKSL|')':	/* should not get here -- must be user */
    992  1.39  christos 			SETERROR(REG_EPAREN);
    993  1.39  christos 			break;
    994  1.39  christos 		case BACKSL|'1':
    995  1.39  christos 		case BACKSL|'2':
    996  1.39  christos 		case BACKSL|'3':
    997  1.39  christos 		case BACKSL|'4':
    998  1.39  christos 		case BACKSL|'5':
    999  1.39  christos 		case BACKSL|'6':
   1000  1.39  christos 		case BACKSL|'7':
   1001  1.39  christos 		case BACKSL|'8':
   1002  1.39  christos 		case BACKSL|'9':
   1003  1.39  christos 			i = (c&~BACKSL) - '0';
   1004  1.39  christos 			assert(i < NPAREN);
   1005  1.39  christos 			if (p->pend[i] != 0) {
   1006  1.39  christos 				assert(i <= p->g->nsub);
   1007  1.39  christos 				EMIT(OBACK_, i);
   1008  1.39  christos 				assert(p->pbegin[i] != 0);
   1009  1.39  christos 				assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
   1010  1.39  christos 				assert(OP(p->strip[p->pend[i]]) == ORPAREN);
   1011  1.39  christos 				(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
   1012  1.39  christos 				EMIT(O_BACK, i);
   1013  1.39  christos 			} else
   1014  1.39  christos 				SETERROR(REG_ESUBREG);
   1015  1.39  christos 			p->g->backrefs = 1;
   1016  1.39  christos 			break;
   1017  1.39  christos 		case '*':
   1018  1.39  christos 			/*
   1019  1.39  christos 			 * Ordinary if used as the first character beyond BOL anchor of
   1020  1.39  christos 			 * a (sub-)expression, counts as a bad repetition operator if it
   1021  1.39  christos 			 * appears otherwise.
   1022  1.39  christos 			 */
   1023  1.39  christos 			(void)REQUIRE(bc->nchain == 0, REG_BADRPT);
   1024  1.39  christos 			/* FALLTHROUGH */
   1025  1.39  christos 		default:
   1026  1.39  christos 			if (p->error != 0)
   1027  1.39  christos 				return (false);	/* Definitely not $... */
   1028  1.39  christos 			p->next--;
   1029  1.39  christos 			wc = WGETNEXT();
   1030  1.39  christos 			if ((c & BACKSL) == 0 || may_escape(p, wc))
   1031  1.39  christos 				ordinary(p, wc);
   1032  1.39  christos 			else
   1033  1.39  christos 				SETERROR(REG_EESCAPE);
   1034  1.39  christos 			break;
   1035   1.1       jtc 		}
   1036   1.1       jtc 	}
   1037   1.1       jtc 
   1038   1.1       jtc 	if (EAT('*')) {		/* implemented as +? */
   1039   1.4       jtc 		/* this case does not require the (y|) trick, noKLUDGE */
   1040   1.1       jtc 		INSERT(OPLUS_, pos);
   1041   1.1       jtc 		ASTERN(O_PLUS, pos);
   1042   1.1       jtc 		INSERT(OQUEST_, pos);
   1043   1.1       jtc 		ASTERN(O_QUEST, pos);
   1044  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
   1045  1.39  christos 	} else if (p->gnuext && EATTWO('\\', '?')) {
   1046  1.39  christos 		INSERT(OQUEST_, pos);
   1047  1.39  christos 		ASTERN(O_QUEST, pos);
   1048  1.39  christos 	} else if (p->gnuext && EATTWO('\\', '+')) {
   1049  1.39  christos 		INSERT(OPLUS_, pos);
   1050  1.39  christos 		ASTERN(O_PLUS, pos);
   1051  1.39  christos #endif
   1052   1.1       jtc 	} else if (EATTWO('\\', '{')) {
   1053   1.1       jtc 		count = p_count(p);
   1054   1.1       jtc 		if (EAT(',')) {
   1055  1.39  christos 			if (MORE() && isdigit((uch)PEEK())) {
   1056   1.1       jtc 				count2 = p_count(p);
   1057  1.39  christos 				(void)REQUIRE(count <= count2, REG_BADBR);
   1058   1.1       jtc 			} else		/* single number with comma */
   1059   1.1       jtc 				count2 = INFINITY;
   1060   1.1       jtc 		} else		/* just a single number */
   1061   1.1       jtc 			count2 = count;
   1062  1.39  christos 		repeat(p, pos, count, count2);
   1063   1.1       jtc 		if (!EATTWO('\\', '}')) {	/* error heuristics */
   1064   1.1       jtc 			while (MORE() && !SEETWO('\\', '}'))
   1065   1.1       jtc 				NEXT();
   1066  1.39  christos 			(void)REQUIRE(MORE(), REG_EBRACE);
   1067   1.1       jtc 			SETERROR(REG_BADBR);
   1068   1.1       jtc 		}
   1069  1.39  christos 	} else if (c == '$')     /* $ (but not \$) ends it */
   1070  1.39  christos 		return (true);
   1071   1.1       jtc 
   1072  1.39  christos 	return (false);
   1073   1.1       jtc }
   1074   1.1       jtc 
   1075   1.1       jtc /*
   1076   1.1       jtc  - p_count - parse a repetition count
   1077   1.9     perry  == static int p_count(struct parse *p);
   1078   1.1       jtc  */
   1079   1.1       jtc static int			/* the value */
   1080  1.39  christos p_count(struct parse *p)
   1081   1.1       jtc {
   1082   1.9     perry 	int count = 0;
   1083   1.9     perry 	int ndigits = 0;
   1084   1.1       jtc 
   1085  1.39  christos 	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
   1086  1.48  christos 		count = count*10 + ((uch)GETNEXT() - '0');
   1087   1.1       jtc 		ndigits++;
   1088   1.1       jtc 	}
   1089   1.1       jtc 
   1090  1.39  christos 	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
   1091   1.1       jtc 	return(count);
   1092   1.1       jtc }
   1093   1.1       jtc 
   1094   1.1       jtc /*
   1095   1.1       jtc  - p_bracket - parse a bracketed character list
   1096   1.9     perry  == static void p_bracket(struct parse *p);
   1097   1.1       jtc  */
   1098   1.1       jtc static void
   1099  1.39  christos p_bracket(struct parse *p)
   1100   1.1       jtc {
   1101  1.14     lukem 	cset *cs;
   1102  1.39  christos 	wint_t ch;
   1103  1.14     lukem 
   1104   1.1       jtc 	/* Dept of Truly Sickening Special-Case Kludges */
   1105  1.39  christos 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
   1106   1.1       jtc 		EMIT(OBOW, 0);
   1107   1.1       jtc 		NEXTn(6);
   1108   1.1       jtc 		return;
   1109   1.1       jtc 	}
   1110  1.39  christos 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
   1111   1.1       jtc 		EMIT(OEOW, 0);
   1112   1.1       jtc 		NEXTn(6);
   1113   1.1       jtc 		return;
   1114   1.1       jtc 	}
   1115   1.1       jtc 
   1116  1.39  christos 	if ((cs = allocset(p)) == NULL)
   1117  1.39  christos 		return;
   1118  1.39  christos 
   1119  1.39  christos 	if (p->g->cflags&REG_ICASE)
   1120  1.39  christos 		cs->icase = 1;
   1121   1.1       jtc 	if (EAT('^'))
   1122  1.39  christos 		cs->invert = 1;
   1123   1.1       jtc 	if (EAT(']'))
   1124  1.39  christos 		CHadd(p, cs, ']');
   1125   1.1       jtc 	else if (EAT('-'))
   1126  1.39  christos 		CHadd(p, cs, '-');
   1127   1.1       jtc 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
   1128   1.1       jtc 		p_b_term(p, cs);
   1129   1.1       jtc 	if (EAT('-'))
   1130  1.39  christos 		CHadd(p, cs, '-');
   1131  1.39  christos 	(void)MUSTEAT(']', REG_EBRACK);
   1132   1.1       jtc 
   1133   1.1       jtc 	if (p->error != 0)	/* don't mess things up further */
   1134   1.1       jtc 		return;
   1135   1.1       jtc 
   1136  1.39  christos 	if (cs->invert && p->g->cflags&REG_NEWLINE)
   1137  1.39  christos 		cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
   1138   1.1       jtc 
   1139  1.39  christos 	if ((ch = singleton(cs)) != OUT) {	/* optimize singleton sets */
   1140  1.39  christos 		ordinary(p, ch);
   1141  1.39  christos 		freeset(p, cs);
   1142  1.39  christos 	} else
   1143  1.40  christos 		EMIT(OANYOF, (size_t)(cs - p->g->sets));
   1144  1.39  christos }
   1145   1.1       jtc 
   1146  1.39  christos static int
   1147  1.39  christos p_range_cmp(wchar_t c1, wchar_t c2)
   1148  1.39  christos {
   1149  1.39  christos #ifdef REGEX_LIBC_COLLATE
   1150  1.39  christos 	return __wcollate_range_cmp(c1, c2);
   1151  1.42  christos #elif defined(NLS)
   1152  1.39  christos 	/* Copied from libc/collate __wcollate_range_cmp */
   1153  1.39  christos 	wchar_t s1[2], s2[2];
   1154   1.1       jtc 
   1155  1.39  christos 	s1[0] = c1;
   1156  1.39  christos 	s1[1] = L'\0';
   1157  1.39  christos 	s2[0] = c2;
   1158  1.39  christos 	s2[1] = L'\0';
   1159  1.42  christos 	return wcscoll(s1, s2);
   1160  1.42  christos #else
   1161  1.42  christos 	char s1[2], s2[2];
   1162  1.42  christos 
   1163  1.42  christos 	s1[0] = (char)c1;
   1164  1.42  christos 	s1[1] = '\0';
   1165  1.42  christos 	s2[0] = (char)c2;
   1166  1.42  christos 	s2[1] = '\0';
   1167  1.42  christos 	return strcoll(s1, s2);
   1168  1.39  christos #endif
   1169   1.1       jtc }
   1170   1.1       jtc 
   1171   1.1       jtc /*
   1172   1.1       jtc  - p_b_term - parse one term of a bracketed character list
   1173   1.9     perry  == static void p_b_term(struct parse *p, cset *cs);
   1174   1.1       jtc  */
   1175   1.1       jtc static void
   1176  1.39  christos p_b_term(struct parse *p, cset *cs)
   1177   1.1       jtc {
   1178   1.9     perry 	char c;
   1179  1.39  christos 	wint_t start, finish;
   1180  1.39  christos 	wint_t i;
   1181  1.39  christos #ifdef REGEX_LIBC_COLLATE
   1182  1.39  christos 	struct xlocale_collate *table =
   1183  1.39  christos 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
   1184  1.39  christos #endif
   1185   1.1       jtc 
   1186  1.14     lukem 	_DIAGASSERT(p != NULL);
   1187  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1188  1.14     lukem 
   1189   1.1       jtc 	/* classify what we've got */
   1190   1.1       jtc 	switch ((MORE()) ? PEEK() : '\0') {
   1191   1.1       jtc 	case '[':
   1192   1.1       jtc 		c = (MORE2()) ? PEEK2() : '\0';
   1193   1.1       jtc 		break;
   1194   1.1       jtc 	case '-':
   1195   1.1       jtc 		SETERROR(REG_ERANGE);
   1196   1.1       jtc 		return;			/* NOTE RETURN */
   1197   1.1       jtc 	default:
   1198   1.1       jtc 		c = '\0';
   1199   1.1       jtc 		break;
   1200   1.1       jtc 	}
   1201   1.1       jtc 
   1202   1.1       jtc 	switch (c) {
   1203   1.1       jtc 	case ':':		/* character class */
   1204   1.1       jtc 		NEXT2();
   1205  1.39  christos 		(void)REQUIRE(MORE(), REG_EBRACK);
   1206   1.1       jtc 		c = PEEK();
   1207  1.39  christos 		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
   1208   1.1       jtc 		p_b_cclass(p, cs);
   1209  1.39  christos 		(void)REQUIRE(MORE(), REG_EBRACK);
   1210  1.39  christos 		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
   1211   1.1       jtc 		break;
   1212   1.1       jtc 	case '=':		/* equivalence class */
   1213   1.1       jtc 		NEXT2();
   1214  1.39  christos 		(void)REQUIRE(MORE(), REG_EBRACK);
   1215   1.1       jtc 		c = PEEK();
   1216  1.39  christos 		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
   1217   1.1       jtc 		p_b_eclass(p, cs);
   1218  1.39  christos 		(void)REQUIRE(MORE(), REG_EBRACK);
   1219  1.39  christos 		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
   1220   1.1       jtc 		break;
   1221   1.1       jtc 	default:		/* symbol, ordinary character, or range */
   1222   1.1       jtc 		start = p_b_symbol(p);
   1223   1.1       jtc 		if (SEE('-') && MORE2() && PEEK2() != ']') {
   1224   1.1       jtc 			/* range */
   1225   1.1       jtc 			NEXT();
   1226   1.1       jtc 			if (EAT('-'))
   1227   1.1       jtc 				finish = '-';
   1228   1.1       jtc 			else
   1229   1.1       jtc 				finish = p_b_symbol(p);
   1230   1.1       jtc 		} else
   1231   1.1       jtc 			finish = start;
   1232  1.39  christos 		if (start == finish)
   1233  1.39  christos 			CHadd(p, cs, start);
   1234  1.39  christos 		else {
   1235  1.39  christos #ifdef REGEX_LIBC_COLLATE
   1236  1.39  christos 			if (table->__collate_load_error || MB_CUR_MAX > 1) {
   1237  1.39  christos #else
   1238  1.39  christos 			if (MB_CUR_MAX > 1) {
   1239  1.39  christos #endif
   1240  1.39  christos 				(void)REQUIRE(start <= finish, REG_ERANGE);
   1241  1.39  christos 				CHaddrange(p, cs, start, finish);
   1242  1.39  christos 			} else {
   1243  1.39  christos 				(void)REQUIRE(p_range_cmp(start, finish) <= 0, REG_ERANGE);
   1244  1.39  christos 				for (i = 0; i <= UCHAR_MAX; i++) {
   1245  1.39  christos 					if (p_range_cmp(start, i) <= 0 &&
   1246  1.39  christos 					    p_range_cmp(i, finish) <= 0 )
   1247  1.39  christos 						CHadd(p, cs, i);
   1248  1.39  christos 				}
   1249  1.39  christos 			}
   1250  1.39  christos 		}
   1251  1.39  christos 		break;
   1252  1.39  christos 	}
   1253  1.39  christos }
   1254  1.39  christos 
   1255  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
   1256  1.39  christos /*
   1257  1.39  christos  - p_b_pseudoclass - parse a pseudo-class (\w, \W, \s, \S)
   1258  1.39  christos  == static int p_b_pseudoclass(struct parse *p, char c)
   1259  1.39  christos  */
   1260  1.39  christos static int
   1261  1.39  christos p_b_pseudoclass(struct parse *p, char c) {
   1262  1.39  christos 	cset *cs;
   1263  1.39  christos 
   1264  1.39  christos 	if ((cs = allocset(p)) == NULL)
   1265  1.39  christos 		return(0);
   1266  1.39  christos 
   1267  1.39  christos 	if (p->g->cflags&REG_ICASE)
   1268  1.39  christos 		cs->icase = 1;
   1269  1.39  christos 
   1270  1.39  christos 	switch (c) {
   1271  1.39  christos 	case 'W':
   1272  1.39  christos 		cs->invert = 1;
   1273  1.39  christos 		/* FALLTHROUGH */
   1274  1.39  christos 	case 'w':
   1275  1.39  christos 		p_b_cclass_named(p, cs, "alnum");
   1276   1.1       jtc 		break;
   1277  1.39  christos 	case 'S':
   1278  1.39  christos 		cs->invert = 1;
   1279  1.39  christos 		/* FALLTHROUGH */
   1280  1.39  christos 	case 's':
   1281  1.39  christos 		p_b_cclass_named(p, cs, "space");
   1282  1.39  christos 		break;
   1283  1.39  christos 	default:
   1284  1.39  christos 		return(0);
   1285   1.1       jtc 	}
   1286  1.39  christos 
   1287  1.40  christos 	EMIT(OANYOF, (size_t)(cs - p->g->sets));
   1288  1.39  christos 	return(1);
   1289   1.1       jtc }
   1290  1.39  christos #endif
   1291   1.1       jtc 
   1292   1.1       jtc /*
   1293   1.1       jtc  - p_b_cclass - parse a character-class name and deal with it
   1294   1.9     perry  == static void p_b_cclass(struct parse *p, cset *cs);
   1295   1.1       jtc  */
   1296   1.1       jtc static void
   1297  1.39  christos p_b_cclass(struct parse *p, cset *cs)
   1298   1.1       jtc {
   1299  1.39  christos 	const char *sp = p->next;
   1300   1.9     perry 	size_t len;
   1301  1.39  christos 	char clname[16];
   1302   1.1       jtc 
   1303  1.39  christos 	while (MORE() && isalpha((uch)PEEK()))
   1304   1.1       jtc 		NEXT();
   1305   1.1       jtc 	len = p->next - sp;
   1306  1.39  christos 	if (len >= sizeof(clname) - 1) {
   1307   1.1       jtc 		SETERROR(REG_ECTYPE);
   1308   1.1       jtc 		return;
   1309   1.1       jtc 	}
   1310  1.39  christos 	memcpy(clname, sp, len);
   1311  1.39  christos 	clname[len] = '\0';
   1312  1.39  christos 
   1313  1.39  christos 	p_b_cclass_named(p, cs, clname);
   1314  1.39  christos }
   1315  1.42  christos 
   1316  1.39  christos /*
   1317  1.39  christos  - p_b_cclass_named - deal with a named character class
   1318  1.39  christos  == static void p_b_cclass_named(struct parse *p, cset *cs, const char []);
   1319  1.39  christos  */
   1320  1.39  christos static void
   1321  1.39  christos p_b_cclass_named(struct parse *p, cset *cs, const char clname[]) {
   1322  1.39  christos 	wctype_t wct;
   1323   1.1       jtc 
   1324  1.39  christos 	if ((wct = wctype(clname)) == 0) {
   1325  1.39  christos 		SETERROR(REG_ECTYPE);
   1326  1.39  christos 		return;
   1327  1.39  christos 	}
   1328  1.39  christos 	CHaddtype(p, cs, wct);
   1329   1.1       jtc }
   1330   1.1       jtc 
   1331   1.1       jtc /*
   1332   1.1       jtc  - p_b_eclass - parse an equivalence-class name and deal with it
   1333   1.9     perry  == static void p_b_eclass(struct parse *p, cset *cs);
   1334   1.1       jtc  *
   1335   1.1       jtc  * This implementation is incomplete. xxx
   1336   1.1       jtc  */
   1337   1.1       jtc static void
   1338  1.39  christos p_b_eclass(struct parse *p, cset *cs)
   1339   1.1       jtc {
   1340  1.39  christos 	wint_t c;
   1341   1.1       jtc 
   1342  1.14     lukem 	_DIAGASSERT(p != NULL);
   1343  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1344  1.14     lukem 
   1345   1.1       jtc 	c = p_b_coll_elem(p, '=');
   1346  1.39  christos 	CHadd(p, cs, c);
   1347   1.1       jtc }
   1348   1.1       jtc 
   1349   1.1       jtc /*
   1350   1.1       jtc  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
   1351  1.39  christos  == static wint_t p_b_symbol(struct parse *p);
   1352   1.1       jtc  */
   1353  1.39  christos static wint_t			/* value of symbol */
   1354  1.39  christos p_b_symbol(struct parse *p)
   1355   1.1       jtc {
   1356  1.39  christos 	wint_t value;
   1357   1.1       jtc 
   1358  1.14     lukem 	_DIAGASSERT(p != NULL);
   1359  1.14     lukem 
   1360  1.39  christos 	(void)REQUIRE(MORE(), REG_EBRACK);
   1361   1.1       jtc 	if (!EATTWO('[', '.'))
   1362  1.39  christos 		return(WGETNEXT());
   1363   1.1       jtc 
   1364   1.1       jtc 	/* collating symbol */
   1365   1.1       jtc 	value = p_b_coll_elem(p, '.');
   1366  1.39  christos 	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
   1367   1.1       jtc 	return(value);
   1368   1.1       jtc }
   1369   1.1       jtc 
   1370   1.1       jtc /*
   1371   1.1       jtc  - p_b_coll_elem - parse a collating-element name and look it up
   1372  1.39  christos  == static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
   1373   1.1       jtc  */
   1374  1.39  christos static wint_t			/* value of collating element */
   1375  1.39  christos p_b_coll_elem(struct parse *p,
   1376  1.39  christos 	wint_t endc)		/* name ended by endc,']' */
   1377  1.39  christos {
   1378  1.39  christos 	const char *sp = p->next;
   1379  1.39  christos 	struct cname *cp;
   1380  1.42  christos 	size_t len;
   1381   1.1       jtc 
   1382  1.14     lukem 	_DIAGASSERT(p != NULL);
   1383  1.14     lukem 
   1384   1.1       jtc 	while (MORE() && !SEETWO(endc, ']'))
   1385   1.1       jtc 		NEXT();
   1386   1.1       jtc 	if (!MORE()) {
   1387   1.1       jtc 		SETERROR(REG_EBRACK);
   1388   1.1       jtc 		return(0);
   1389   1.1       jtc 	}
   1390   1.1       jtc 	len = p->next - sp;
   1391   1.1       jtc 	for (cp = cnames; cp->name != NULL; cp++)
   1392  1.37  christos 		if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
   1393   1.1       jtc 			return(cp->code);	/* known name */
   1394  1.42  christos #ifdef NLS
   1395  1.42  christos 	mbstate_t mbs;
   1396  1.42  christos 	wchar_t wc;
   1397  1.42  christos 	size_t clen;
   1398  1.42  christos 
   1399  1.39  christos 	memset(&mbs, 0, sizeof(mbs));
   1400  1.39  christos 	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
   1401  1.39  christos 		return (wc);			/* single character */
   1402  1.39  christos 	else if (clen == (size_t)-1 || clen == (size_t)-2)
   1403  1.39  christos 		SETERROR(REG_ILLSEQ);
   1404  1.39  christos 	else
   1405  1.39  christos 		SETERROR(REG_ECOLLATE);		/* neither */
   1406   1.1       jtc 	return(0);
   1407  1.42  christos #else
   1408  1.42  christos 	if (len == 1)
   1409  1.42  christos 		return *sp;    /* single character */
   1410  1.42  christos 	SETERROR(REG_ECOLLATE);                 /* neither */
   1411  1.42  christos 	return 0;
   1412  1.42  christos #endif
   1413   1.1       jtc }
   1414   1.1       jtc 
   1415   1.1       jtc /*
   1416  1.39  christos  - may_escape - determine whether 'ch' is escape-able in the current context
   1417  1.39  christos  == static int may_escape(struct parse *p, const wint_t ch)
   1418  1.39  christos  */
   1419  1.39  christos static bool
   1420  1.39  christos may_escape(struct parse *p, const wint_t ch)
   1421  1.39  christos {
   1422  1.39  christos 
   1423  1.39  christos 	if ((p->pflags & PFLAG_LEGACY_ESC) != 0)
   1424  1.39  christos 		return (true);
   1425  1.48  christos 	if (iswalpha(ch) || ch == '\'' || ch == '`')
   1426  1.39  christos 		return (false);
   1427  1.39  christos 	return (true);
   1428  1.39  christos #ifdef NOTYET
   1429  1.39  christos 	/*
   1430  1.39  christos 	 * Build a whitelist of characters that may be escaped to produce an
   1431  1.39  christos 	 * ordinary in the current context. This assumes that these have not
   1432  1.39  christos 	 * been otherwise interpreted as a special character. Escaping an
   1433  1.39  christos 	 * ordinary character yields undefined results according to
   1434  1.39  christos 	 * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take
   1435  1.39  christos 	 * advantage of this and use escaped ordinary characters to provide
   1436  1.39  christos 	 * special meaning, e.g. \b, \B, \w, \W, \s, \S.
   1437  1.39  christos 	 */
   1438  1.39  christos 	switch(ch) {
   1439  1.39  christos 	case '|':
   1440  1.39  christos 	case '+':
   1441  1.39  christos 	case '?':
   1442  1.39  christos 		/* The above characters may not be escaped in BREs */
   1443  1.39  christos 		if (!(p->g->cflags&REG_EXTENDED))
   1444  1.39  christos 			return (false);
   1445  1.39  christos 		/* Fallthrough */
   1446  1.39  christos 	case '(':
   1447  1.39  christos 	case ')':
   1448  1.39  christos 	case '{':
   1449  1.39  christos 	case '}':
   1450  1.39  christos 	case '.':
   1451  1.39  christos 	case '[':
   1452  1.39  christos 	case ']':
   1453  1.39  christos 	case '\\':
   1454  1.39  christos 	case '*':
   1455  1.39  christos 	case '^':
   1456  1.39  christos 	case '$':
   1457  1.39  christos 		return (true);
   1458  1.39  christos 	default:
   1459  1.39  christos 		return (false);
   1460  1.39  christos 	}
   1461  1.39  christos #endif
   1462  1.39  christos }
   1463  1.39  christos 
   1464  1.39  christos /*
   1465   1.1       jtc  - othercase - return the case counterpart of an alphabetic
   1466  1.39  christos  == static wint_t othercase(wint_t ch);
   1467   1.1       jtc  */
   1468  1.39  christos static wint_t			/* if no counterpart, return ch */
   1469  1.39  christos othercase(wint_t ch)
   1470  1.39  christos {
   1471  1.39  christos 	assert(iswalpha(ch));
   1472  1.39  christos 	if (iswupper(ch))
   1473  1.39  christos 		return(towlower(ch));
   1474  1.39  christos 	else if (iswlower(ch))
   1475  1.39  christos 		return(towupper(ch));
   1476   1.1       jtc 	else			/* peculiar, but could happen */
   1477   1.1       jtc 		return(ch);
   1478   1.1       jtc }
   1479   1.1       jtc 
   1480   1.1       jtc /*
   1481   1.1       jtc  - bothcases - emit a dualcase version of a two-case character
   1482  1.39  christos  == static void bothcases(struct parse *p, wint_t ch);
   1483   1.1       jtc  *
   1484   1.1       jtc  * Boy, is this implementation ever a kludge...
   1485   1.1       jtc  */
   1486   1.1       jtc static void
   1487  1.39  christos bothcases(struct parse *p, wint_t ch)
   1488   1.1       jtc {
   1489  1.39  christos 	const char *oldnext = p->next;
   1490  1.39  christos 	const char *oldend = p->end;
   1491  1.39  christos 	char bracket[3 + MB_LEN_MAX];
   1492  1.39  christos 	size_t n;
   1493   1.1       jtc 
   1494  1.14     lukem 	_DIAGASSERT(p != NULL);
   1495  1.14     lukem 
   1496   1.1       jtc 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
   1497   1.1       jtc 	p->next = bracket;
   1498  1.42  christos #ifdef NLS
   1499  1.42  christos 	mbstate_t mbs;
   1500  1.39  christos 	memset(&mbs, 0, sizeof(mbs));
   1501  1.39  christos 	n = wcrtomb(bracket, ch, &mbs);
   1502  1.39  christos 	assert(n != (size_t)-1);
   1503  1.42  christos #else
   1504  1.42  christos 	n = 0;
   1505  1.42  christos 	bracket[n++] = ch;
   1506  1.42  christos #endif
   1507  1.39  christos 	bracket[n] = ']';
   1508  1.39  christos 	bracket[n + 1] = '\0';
   1509  1.39  christos 	p->end = bracket+n+1;
   1510   1.1       jtc 	p_bracket(p);
   1511  1.39  christos 	assert(p->next == p->end);
   1512   1.1       jtc 	p->next = oldnext;
   1513   1.1       jtc 	p->end = oldend;
   1514   1.1       jtc }
   1515   1.1       jtc 
   1516   1.1       jtc /*
   1517   1.1       jtc  - ordinary - emit an ordinary character
   1518  1.39  christos  == static void ordinary(struct parse *p, wint_t ch);
   1519   1.1       jtc  */
   1520   1.1       jtc static void
   1521  1.39  christos ordinary(struct parse *p, wint_t ch)
   1522   1.1       jtc {
   1523  1.39  christos 	cset *cs;
   1524   1.1       jtc 
   1525  1.14     lukem 	_DIAGASSERT(p != NULL);
   1526  1.14     lukem 
   1527  1.39  christos 	if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
   1528  1.39  christos 		bothcases(p, ch);
   1529  1.40  christos 	else if ((wint_t)(ch & OPDMASK) == ch)
   1530  1.40  christos 		EMIT(OCHAR, (size_t)ch);
   1531   1.1       jtc 	else {
   1532  1.39  christos 		/*
   1533  1.39  christos 		 * Kludge: character is too big to fit into an OCHAR operand.
   1534  1.39  christos 		 * Emit a singleton set.
   1535  1.39  christos 		 */
   1536  1.39  christos 		if ((cs = allocset(p)) == NULL)
   1537  1.39  christos 			return;
   1538  1.39  christos 		CHadd(p, cs, ch);
   1539  1.40  christos 		EMIT(OANYOF, (size_t)(cs - p->g->sets));
   1540   1.1       jtc 	}
   1541   1.1       jtc }
   1542   1.1       jtc 
   1543   1.1       jtc /*
   1544   1.1       jtc  - nonnewline - emit REG_NEWLINE version of OANY
   1545   1.9     perry  == static void nonnewline(struct parse *p);
   1546   1.1       jtc  *
   1547   1.1       jtc  * Boy, is this implementation ever a kludge...
   1548   1.1       jtc  */
   1549   1.1       jtc static void
   1550  1.39  christos nonnewline(struct parse *p)
   1551   1.1       jtc {
   1552  1.39  christos 	const char *oldnext = p->next;
   1553  1.39  christos 	const char *oldend = p->end;
   1554   1.1       jtc 	char bracket[4];
   1555   1.1       jtc 
   1556  1.14     lukem 	_DIAGASSERT(p != NULL);
   1557  1.14     lukem 
   1558   1.1       jtc 	p->next = bracket;
   1559   1.1       jtc 	p->end = bracket+3;
   1560   1.1       jtc 	bracket[0] = '^';
   1561   1.1       jtc 	bracket[1] = '\n';
   1562   1.1       jtc 	bracket[2] = ']';
   1563   1.1       jtc 	bracket[3] = '\0';
   1564   1.1       jtc 	p_bracket(p);
   1565   1.1       jtc 	assert(p->next == bracket+3);
   1566   1.1       jtc 	p->next = oldnext;
   1567   1.1       jtc 	p->end = oldend;
   1568   1.1       jtc }
   1569   1.1       jtc 
   1570   1.1       jtc /*
   1571   1.1       jtc  - repeat - generate code for a bounded repetition, recursively if needed
   1572  1.39  christos  == static void repeat(struct parse *p, sopno start, int from, int to);
   1573   1.1       jtc  */
   1574   1.1       jtc static void
   1575  1.39  christos repeat(struct parse *p,
   1576  1.39  christos 	sopno start,		/* operand from here to end of strip */
   1577  1.39  christos 	int from,		/* repeated from this number */
   1578  1.39  christos 	int to)			/* to this number of times (maybe INFINITY) */
   1579   1.1       jtc {
   1580  1.39  christos 	sopno finish = HERE();
   1581   1.1       jtc #	define	N	2
   1582   1.1       jtc #	define	INF	3
   1583   1.1       jtc #	define	REP(f, t)	((f)*8 + (t))
   1584   1.1       jtc #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
   1585   1.9     perry 	sopno copy;
   1586   1.1       jtc 
   1587  1.14     lukem 	_DIAGASSERT(p != NULL);
   1588  1.14     lukem 
   1589  1.39  christos 	if (p->error != 0)	/* head off possible runaway recursion */
   1590  1.30  christos 		return;
   1591  1.30  christos 
   1592   1.1       jtc 	assert(from <= to);
   1593   1.1       jtc 
   1594   1.1       jtc 	switch (REP(MAP(from), MAP(to))) {
   1595   1.1       jtc 	case REP(0, 0):			/* must be user doing this */
   1596   1.1       jtc 		DROP(finish-start);	/* drop the operand */
   1597   1.1       jtc 		break;
   1598   1.1       jtc 	case REP(0, 1):			/* as x{1,1}? */
   1599   1.1       jtc 	case REP(0, N):			/* as x{1,n}? */
   1600   1.1       jtc 	case REP(0, INF):		/* as x{1,}? */
   1601   1.4       jtc 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
   1602   1.4       jtc 		INSERT(OCH_, start);		/* offset is wrong... */
   1603  1.39  christos 		repeat(p, start+1, 1, to);
   1604   1.4       jtc 		ASTERN(OOR1, start);
   1605   1.1       jtc 		AHEAD(start);			/* ... fix it */
   1606   1.4       jtc 		EMIT(OOR2, 0);
   1607   1.4       jtc 		AHEAD(THERE());
   1608   1.4       jtc 		ASTERN(O_CH, THERETHERE());
   1609   1.1       jtc 		break;
   1610   1.1       jtc 	case REP(1, 1):			/* trivial case */
   1611   1.1       jtc 		/* done */
   1612   1.1       jtc 		break;
   1613   1.1       jtc 	case REP(1, N):			/* as x?x{1,n-1} */
   1614   1.4       jtc 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
   1615   1.4       jtc 		INSERT(OCH_, start);
   1616   1.4       jtc 		ASTERN(OOR1, start);
   1617   1.4       jtc 		AHEAD(start);
   1618   1.4       jtc 		EMIT(OOR2, 0);			/* offset very wrong... */
   1619   1.4       jtc 		AHEAD(THERE());			/* ...so fix it */
   1620   1.4       jtc 		ASTERN(O_CH, THERETHERE());
   1621   1.1       jtc 		copy = dupl(p, start+1, finish+1);
   1622   1.4       jtc 		assert(copy == finish+4);
   1623  1.39  christos 		repeat(p, copy, 1, to-1);
   1624   1.1       jtc 		break;
   1625   1.1       jtc 	case REP(1, INF):		/* as x+ */
   1626   1.1       jtc 		INSERT(OPLUS_, start);
   1627   1.1       jtc 		ASTERN(O_PLUS, start);
   1628   1.1       jtc 		break;
   1629   1.1       jtc 	case REP(N, N):			/* as xx{m-1,n-1} */
   1630   1.1       jtc 		copy = dupl(p, start, finish);
   1631  1.39  christos 		repeat(p, copy, from-1, to-1);
   1632   1.1       jtc 		break;
   1633   1.1       jtc 	case REP(N, INF):		/* as xx{n-1,INF} */
   1634   1.1       jtc 		copy = dupl(p, start, finish);
   1635  1.39  christos 		repeat(p, copy, from-1, to);
   1636   1.1       jtc 		break;
   1637   1.1       jtc 	default:			/* "can't happen" */
   1638   1.1       jtc 		SETERROR(REG_ASSERT);	/* just in case */
   1639   1.1       jtc 		break;
   1640   1.1       jtc 	}
   1641   1.1       jtc }
   1642   1.1       jtc 
   1643   1.1       jtc /*
   1644  1.39  christos  - wgetnext - helper function for WGETNEXT() macro. Gets the next wide
   1645  1.39  christos  - character from the parse struct, signals a REG_ILLSEQ error if the
   1646  1.39  christos  - character can't be converted. Returns the number of bytes consumed.
   1647  1.39  christos  */
   1648  1.39  christos static wint_t
   1649  1.39  christos wgetnext(struct parse *p)
   1650  1.39  christos {
   1651  1.42  christos #ifdef NLS
   1652  1.39  christos 	mbstate_t mbs;
   1653  1.39  christos 	wchar_t wc;
   1654  1.39  christos 	size_t n;
   1655  1.39  christos 
   1656  1.39  christos 	memset(&mbs, 0, sizeof(mbs));
   1657  1.40  christos 	n = mbrtowc(&wc, p->next, (size_t)(p->end - p->next), &mbs);
   1658  1.39  christos 	if (n == (size_t)-1 || n == (size_t)-2) {
   1659  1.39  christos 		SETERROR(REG_ILLSEQ);
   1660  1.39  christos 		return (0);
   1661  1.39  christos 	}
   1662  1.39  christos 	if (n == 0)
   1663  1.39  christos 		n = 1;
   1664  1.39  christos 	p->next += n;
   1665  1.42  christos 	return wc;
   1666  1.42  christos #else
   1667  1.42  christos 	return *p->next++;
   1668  1.42  christos #endif
   1669  1.39  christos }
   1670  1.39  christos 
   1671  1.39  christos /*
   1672   1.1       jtc  - seterr - set an error condition
   1673   1.9     perry  == static int seterr(struct parse *p, int e);
   1674   1.1       jtc  */
   1675   1.1       jtc static int			/* useless but makes type checking happy */
   1676  1.39  christos seterr(struct parse *p, int e)
   1677   1.1       jtc {
   1678  1.14     lukem 
   1679  1.14     lukem 	_DIAGASSERT(p != NULL);
   1680  1.14     lukem 
   1681   1.1       jtc 	if (p->error == 0)	/* keep earliest error condition */
   1682   1.1       jtc 		p->error = e;
   1683   1.1       jtc 	p->next = nuls;		/* try to bring things to a halt */
   1684   1.1       jtc 	p->end = nuls;
   1685   1.1       jtc 	return(0);		/* make the return value well-defined */
   1686   1.1       jtc }
   1687   1.1       jtc 
   1688   1.1       jtc /*
   1689   1.1       jtc  - allocset - allocate a set of characters for []
   1690   1.9     perry  == static cset *allocset(struct parse *p);
   1691   1.1       jtc  */
   1692   1.1       jtc static cset *
   1693  1.39  christos allocset(struct parse *p)
   1694   1.1       jtc {
   1695  1.39  christos 	cset *cs, *ncs;
   1696   1.1       jtc 
   1697  1.14     lukem 	_DIAGASSERT(p != NULL);
   1698  1.14     lukem 
   1699  1.39  christos 	ncs = reallocarray(p->g->sets, p->g->ncsets + 1, sizeof(*ncs));
   1700  1.39  christos 	if (ncs == NULL) {
   1701  1.39  christos 		SETERROR(REG_ESPACE);
   1702  1.39  christos 		return (NULL);
   1703   1.1       jtc 	}
   1704  1.39  christos 	p->g->sets = ncs;
   1705  1.39  christos 	cs = &p->g->sets[p->g->ncsets++];
   1706  1.39  christos 	memset(cs, 0, sizeof(*cs));
   1707   1.1       jtc 
   1708   1.1       jtc 	return(cs);
   1709   1.1       jtc }
   1710   1.1       jtc 
   1711   1.1       jtc /*
   1712   1.1       jtc  - freeset - free a now-unused set
   1713   1.9     perry  == static void freeset(struct parse *p, cset *cs);
   1714   1.1       jtc  */
   1715   1.1       jtc static void
   1716  1.39  christos freeset(struct parse *p, cset *cs)
   1717   1.1       jtc {
   1718  1.14     lukem 	cset *top;
   1719  1.14     lukem 
   1720  1.14     lukem 	_DIAGASSERT(p != NULL);
   1721  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1722  1.14     lukem 
   1723  1.14     lukem 	top = &p->g->sets[p->g->ncsets];
   1724   1.1       jtc 
   1725  1.39  christos 	free(cs->wides);
   1726  1.39  christos 	free(cs->ranges);
   1727  1.39  christos 	free(cs->types);
   1728  1.39  christos 	memset(cs, 0, sizeof(*cs));
   1729   1.1       jtc 	if (cs == top-1)	/* recover only the easy case */
   1730   1.1       jtc 		p->g->ncsets--;
   1731   1.1       jtc }
   1732   1.1       jtc 
   1733   1.1       jtc /*
   1734  1.39  christos  - singleton - Determine whether a set contains only one character,
   1735  1.39  christos  - returning it if so, otherwise returning OUT.
   1736   1.1       jtc  */
   1737  1.39  christos static wint_t
   1738  1.39  christos singleton(cset *cs)
   1739   1.1       jtc {
   1740  1.39  christos 	wint_t i, s, n;
   1741   1.1       jtc 
   1742  1.39  christos 	for (i = n = 0; i < NC; i++)
   1743  1.39  christos 		if (CHIN(cs, i)) {
   1744   1.1       jtc 			n++;
   1745  1.39  christos 			s = i;
   1746  1.39  christos 		}
   1747  1.39  christos 	if (n == 1)
   1748  1.39  christos 		return (s);
   1749  1.39  christos 	if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
   1750  1.39  christos 	    cs->icase == 0)
   1751  1.39  christos 		return (cs->wides[0]);
   1752  1.39  christos 	/* Don't bother handling the other cases. */
   1753  1.39  christos 	return (OUT);
   1754   1.1       jtc }
   1755   1.1       jtc 
   1756   1.1       jtc /*
   1757  1.39  christos  - CHadd - add character to character set.
   1758   1.1       jtc  */
   1759   1.1       jtc static void
   1760  1.39  christos CHadd(struct parse *p, cset *cs, wint_t ch)
   1761   1.1       jtc {
   1762  1.39  christos 	wint_t nch, *newwides;
   1763  1.14     lukem 
   1764  1.14     lukem 	_DIAGASSERT(p != NULL);
   1765  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1766  1.14     lukem 
   1767  1.49  christos 	if ((unsigned)ch < NC)
   1768  1.40  christos 		cs->bmp[(unsigned)ch >> 3] |= 1 << (ch & 7);
   1769  1.39  christos 	else {
   1770  1.39  christos 		newwides = reallocarray(cs->wides, cs->nwides + 1,
   1771  1.39  christos 		    sizeof(*cs->wides));
   1772  1.39  christos 		if (newwides == NULL) {
   1773  1.39  christos 			SETERROR(REG_ESPACE);
   1774  1.39  christos 			return;
   1775  1.39  christos 		}
   1776  1.39  christos 		cs->wides = newwides;
   1777  1.39  christos 		cs->wides[cs->nwides++] = ch;
   1778  1.39  christos 	}
   1779  1.39  christos 	if (cs->icase) {
   1780  1.49  christos 		if ((unsigned)(nch = towlower(ch)) < NC)
   1781  1.40  christos 			cs->bmp[(unsigned)nch >> 3] |= 1 << (nch & 7);
   1782  1.49  christos 		if ((unsigned)(nch = towupper(ch)) < NC)
   1783  1.40  christos 			cs->bmp[(unsigned)nch >> 3] |= 1 << (nch & 7);
   1784   1.1       jtc 	}
   1785   1.1       jtc }
   1786   1.1       jtc 
   1787   1.1       jtc /*
   1788  1.39  christos  - CHaddrange - add all characters in the range [min,max] to a character set.
   1789   1.1       jtc  */
   1790   1.1       jtc static void
   1791  1.39  christos CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max)
   1792   1.1       jtc {
   1793  1.39  christos 	crange *newranges;
   1794  1.14     lukem 
   1795  1.39  christos 	_DIAGASSERT(p != NULL);
   1796  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1797  1.14     lukem 
   1798  1.39  christos 	for (; min < NC && min <= max; min++)
   1799  1.39  christos 		CHadd(p, cs, min);
   1800  1.39  christos 	if (min >= max)
   1801  1.39  christos 		return;
   1802  1.39  christos 	newranges = reallocarray(cs->ranges, cs->nranges + 1,
   1803  1.39  christos 	    sizeof(*cs->ranges));
   1804  1.39  christos 	if (newranges == NULL) {
   1805  1.39  christos 		SETERROR(REG_ESPACE);
   1806   1.1       jtc 		return;
   1807   1.1       jtc 	}
   1808  1.39  christos 	cs->ranges = newranges;
   1809  1.39  christos 	cs->ranges[cs->nranges].min = min;
   1810  1.39  christos 	cs->ranges[cs->nranges].max = max;
   1811  1.39  christos 	cs->nranges++;
   1812   1.1       jtc }
   1813   1.1       jtc 
   1814   1.1       jtc /*
   1815  1.39  christos  - CHaddtype - add all characters of a certain type to a character set.
   1816   1.1       jtc  */
   1817   1.1       jtc static void
   1818  1.39  christos CHaddtype(struct parse *p, cset *cs, wctype_t wct)
   1819   1.1       jtc {
   1820  1.39  christos 	wint_t i;
   1821  1.39  christos 	wctype_t *newtypes;
   1822  1.14     lukem 
   1823  1.14     lukem 	_DIAGASSERT(p != NULL);
   1824  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1825  1.14     lukem 
   1826  1.39  christos 	for (i = 0; i < NC; i++)
   1827  1.39  christos 		if (iswctype(i, wct))
   1828  1.39  christos 			CHadd(p, cs, i);
   1829  1.39  christos 	newtypes = reallocarray(cs->types, cs->ntypes + 1,
   1830  1.39  christos 	    sizeof(*cs->types));
   1831  1.39  christos 	if (newtypes == NULL) {
   1832  1.39  christos 		SETERROR(REG_ESPACE);
   1833   1.1       jtc 		return;
   1834  1.39  christos 	}
   1835  1.39  christos 	cs->types = newtypes;
   1836  1.39  christos 	cs->types[cs->ntypes++] = wct;
   1837   1.1       jtc }
   1838   1.1       jtc 
   1839   1.1       jtc /*
   1840   1.1       jtc  - dupl - emit a duplicate of a bunch of sops
   1841   1.9     perry  == static sopno dupl(struct parse *p, sopno start, sopno finish);
   1842   1.1       jtc  */
   1843   1.1       jtc static sopno			/* start of duplicate */
   1844  1.39  christos dupl(struct parse *p,
   1845  1.39  christos 	sopno start,		/* from here */
   1846  1.39  christos 	sopno finish)		/* to this less one */
   1847   1.1       jtc {
   1848  1.39  christos 	sopno ret = HERE();
   1849   1.9     perry 	sopno len = finish - start;
   1850   1.1       jtc 
   1851  1.14     lukem 	_DIAGASSERT(p != NULL);
   1852  1.14     lukem 
   1853   1.1       jtc 	assert(finish >= start);
   1854   1.1       jtc 	if (len == 0)
   1855   1.1       jtc 		return(ret);
   1856  1.39  christos 	if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
   1857  1.39  christos 		return(ret);
   1858  1.40  christos 	(void) memcpy(p->strip + p->slen,
   1859  1.40  christos 	    p->strip + start, len * sizeof(*p->strip));
   1860   1.1       jtc 	p->slen += len;
   1861   1.1       jtc 	return(ret);
   1862   1.1       jtc }
   1863   1.1       jtc 
   1864   1.1       jtc /*
   1865   1.1       jtc  - doemit - emit a strip operator
   1866   1.9     perry  == static void doemit(struct parse *p, sop op, size_t opnd);
   1867   1.1       jtc  *
   1868   1.1       jtc  * It might seem better to implement this as a macro with a function as
   1869   1.1       jtc  * hard-case backup, but it's just too big and messy unless there are
   1870   1.1       jtc  * some changes to the data structures.  Maybe later.
   1871   1.1       jtc  */
   1872   1.1       jtc static void
   1873  1.39  christos doemit(struct parse *p, sop op, size_t opnd)
   1874   1.1       jtc {
   1875   1.1       jtc 	/* avoid making error situations worse */
   1876   1.1       jtc 	if (p->error != 0)
   1877   1.1       jtc 		return;
   1878   1.1       jtc 
   1879  1.39  christos 	_DIAGASSERT(p != NULL);
   1880  1.39  christos 
   1881   1.1       jtc 	/* deal with oversize operands ("can't happen", more or less) */
   1882   1.1       jtc 	assert(opnd < 1<<OPSHIFT);
   1883   1.1       jtc 
   1884   1.1       jtc 	/* deal with undersized strip */
   1885   1.1       jtc 	if (p->slen >= p->ssize)
   1886  1.30  christos 		if (!enlarge(p, (p->ssize+1) / 2 * 3))	/* +50% */
   1887  1.30  christos 			return;
   1888   1.1       jtc 
   1889   1.1       jtc 	/* finally, it's all reduced to the easy case */
   1890  1.40  christos 	p->strip[p->slen++] = (sopno)SOP(op, opnd);
   1891   1.1       jtc }
   1892   1.1       jtc 
   1893   1.1       jtc /*
   1894   1.1       jtc  - doinsert - insert a sop into the strip
   1895   1.9     perry  == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
   1896   1.1       jtc  */
   1897   1.1       jtc static void
   1898  1.39  christos doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
   1899   1.1       jtc {
   1900   1.9     perry 	sopno sn;
   1901   1.9     perry 	sop s;
   1902   1.9     perry 	int i;
   1903   1.1       jtc 
   1904  1.14     lukem 	_DIAGASSERT(p != NULL);
   1905  1.14     lukem 
   1906   1.1       jtc 	/* avoid making error situations worse */
   1907   1.1       jtc 	if (p->error != 0)
   1908   1.1       jtc 		return;
   1909   1.1       jtc 
   1910   1.1       jtc 	sn = HERE();
   1911   1.1       jtc 	EMIT(op, opnd);		/* do checks, ensure space */
   1912   1.1       jtc 	assert(HERE() == sn+1);
   1913   1.1       jtc 	s = p->strip[sn];
   1914   1.1       jtc 
   1915   1.1       jtc 	/* adjust paren pointers */
   1916   1.1       jtc 	assert(pos > 0);
   1917   1.1       jtc 	for (i = 1; i < NPAREN; i++) {
   1918   1.1       jtc 		if (p->pbegin[i] >= pos) {
   1919   1.1       jtc 			p->pbegin[i]++;
   1920   1.1       jtc 		}
   1921   1.1       jtc 		if (p->pend[i] >= pos) {
   1922   1.1       jtc 			p->pend[i]++;
   1923   1.1       jtc 		}
   1924   1.1       jtc 	}
   1925   1.1       jtc 
   1926  1.40  christos 	memmove(&p->strip[pos+1], &p->strip[pos],
   1927  1.40  christos 	    (HERE()-pos-1)*sizeof(*p->strip));
   1928   1.1       jtc 	p->strip[pos] = s;
   1929   1.1       jtc }
   1930   1.1       jtc 
   1931   1.1       jtc /*
   1932   1.1       jtc  - dofwd - complete a forward reference
   1933   1.9     perry  == static void dofwd(struct parse *p, sopno pos, sop value);
   1934   1.1       jtc  */
   1935   1.1       jtc static void
   1936  1.39  christos dofwd(struct parse *p, sopno pos, sop value)
   1937   1.1       jtc {
   1938  1.14     lukem 
   1939  1.14     lukem 	_DIAGASSERT(p != NULL);
   1940  1.14     lukem 
   1941   1.1       jtc 	/* avoid making error situations worse */
   1942   1.1       jtc 	if (p->error != 0)
   1943   1.1       jtc 		return;
   1944   1.1       jtc 
   1945   1.1       jtc 	assert(value < 1<<OPSHIFT);
   1946  1.39  christos 	p->strip[pos] = OP(p->strip[pos]) | value;
   1947   1.1       jtc }
   1948   1.1       jtc 
   1949   1.1       jtc /*
   1950   1.1       jtc  - enlarge - enlarge the strip
   1951  1.39  christos  == static int enlarge(struct parse *p, sopno size);
   1952   1.1       jtc  */
   1953  1.30  christos static int
   1954  1.35     joerg enlarge(struct parse *p, sopno size)
   1955   1.1       jtc {
   1956  1.39  christos 	sop *sp;
   1957  1.39  christos 
   1958  1.14     lukem 	_DIAGASSERT(p != NULL);
   1959  1.14     lukem 
   1960   1.1       jtc 	if (p->ssize >= size)
   1961  1.30  christos 		return 1;
   1962   1.1       jtc 
   1963  1.40  christos 	sp = reallocarray(p->strip, size, sizeof(*p->strip));
   1964  1.39  christos 	if (sp == NULL) {
   1965   1.1       jtc 		SETERROR(REG_ESPACE);
   1966  1.30  christos 		return 0;
   1967   1.1       jtc 	}
   1968  1.39  christos 	p->strip = sp;
   1969  1.35     joerg 	p->ssize = size;
   1970  1.30  christos 	return 1;
   1971   1.1       jtc }
   1972   1.1       jtc 
   1973   1.1       jtc /*
   1974   1.1       jtc  - stripsnug - compact the strip
   1975   1.9     perry  == static void stripsnug(struct parse *p, struct re_guts *g);
   1976   1.1       jtc  */
   1977   1.1       jtc static void
   1978  1.39  christos stripsnug(struct parse *p, struct re_guts *g)
   1979   1.1       jtc {
   1980  1.14     lukem 
   1981  1.14     lukem 	_DIAGASSERT(p != NULL);
   1982  1.14     lukem 	_DIAGASSERT(g != NULL);
   1983  1.14     lukem 
   1984   1.1       jtc 	g->nstates = p->slen;
   1985  1.40  christos 	g->strip = reallocarray(p->strip, p->slen, sizeof(*p->strip));
   1986  1.39  christos 	if (g->strip == NULL) {
   1987  1.39  christos 		SETERROR(REG_ESPACE);
   1988  1.39  christos 		g->strip = p->strip;
   1989  1.39  christos 	}
   1990   1.1       jtc }
   1991   1.1       jtc 
   1992   1.1       jtc /*
   1993   1.1       jtc  - findmust - fill in must and mlen with longest mandatory literal string
   1994   1.9     perry  == static void findmust(struct parse *p, struct re_guts *g);
   1995   1.1       jtc  *
   1996   1.1       jtc  * This algorithm could do fancy things like analyzing the operands of |
   1997   1.1       jtc  * for common subsequences.  Someday.  This code is simple and finds most
   1998   1.1       jtc  * of the interesting cases.
   1999   1.1       jtc  *
   2000   1.1       jtc  * Note that must and mlen got initialized during setup.
   2001   1.1       jtc  */
   2002   1.1       jtc static void
   2003  1.39  christos findmust(struct parse *p, struct re_guts *g)
   2004   1.1       jtc {
   2005   1.9     perry 	sop *scan;
   2006   1.7  christos 	sop *start = NULL;
   2007   1.9     perry 	sop *newstart = NULL;
   2008   1.9     perry 	sopno newlen;
   2009   1.9     perry 	sop s;
   2010   1.9     perry 	char *cp;
   2011  1.39  christos 	int offset;
   2012  1.39  christos 	mbstate_t mbs;
   2013   1.1       jtc 
   2014  1.14     lukem 	_DIAGASSERT(p != NULL);
   2015  1.14     lukem 	_DIAGASSERT(g != NULL);
   2016  1.14     lukem 
   2017   1.1       jtc 	/* avoid making error situations worse */
   2018   1.1       jtc 	if (p->error != 0)
   2019   1.1       jtc 		return;
   2020   1.1       jtc 
   2021  1.39  christos #ifdef notyet
   2022  1.39  christos 	/*
   2023  1.39  christos 	 * It's not generally safe to do a ``char'' substring search on
   2024  1.39  christos 	 * multibyte character strings, but it's safe for at least
   2025  1.39  christos 	 * UTF-8 (see RFC 3629).
   2026  1.39  christos 	 */
   2027  1.39  christos 	if (MB_CUR_MAX > 1 &&
   2028  1.39  christos 	    strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
   2029  1.39  christos 		return;
   2030  1.39  christos #endif
   2031  1.39  christos 
   2032   1.1       jtc 	/* find the longest OCHAR sequence in strip */
   2033   1.1       jtc 	newlen = 0;
   2034  1.39  christos 	offset = 0;
   2035  1.39  christos 	g->moffset = 0;
   2036   1.1       jtc 	scan = g->strip + 1;
   2037   1.1       jtc 	do {
   2038   1.1       jtc 		s = *scan++;
   2039   1.1       jtc 		switch (OP(s)) {
   2040   1.1       jtc 		case OCHAR:		/* sequence member */
   2041  1.39  christos 			if (newlen == 0) {		/* new sequence */
   2042  1.39  christos 				memset(&mbs, 0, sizeof(mbs));
   2043   1.1       jtc 				newstart = scan - 1;
   2044  1.39  christos 			}
   2045  1.42  christos #ifdef NLS
   2046  1.42  christos 			char buf[MB_LEN_MAX];
   2047  1.42  christos 			size_t clen = wcrtomb(buf, (int)OPND(s), &mbs);
   2048  1.39  christos 			if (clen == (size_t)-1)
   2049  1.39  christos 				goto toohard;
   2050  1.40  christos 			newlen += (sopno)clen;
   2051  1.42  christos #else
   2052  1.42  christos 			newlen++;
   2053  1.42  christos #endif
   2054   1.1       jtc 			break;
   2055   1.1       jtc 		case OPLUS_:		/* things that don't break one */
   2056   1.1       jtc 		case OLPAREN:
   2057   1.1       jtc 		case ORPAREN:
   2058   1.1       jtc 			break;
   2059   1.1       jtc 		case OQUEST_:		/* things that must be skipped */
   2060   1.1       jtc 		case OCH_:
   2061  1.39  christos 			offset = altoffset(scan, offset);
   2062   1.1       jtc 			scan--;
   2063   1.1       jtc 			do {
   2064   1.1       jtc 				scan += OPND(s);
   2065   1.1       jtc 				s = *scan;
   2066   1.1       jtc 				/* assert() interferes w debug printouts */
   2067  1.40  christos 				if (OP(s) != O_QUEST &&
   2068  1.40  christos 				    OP(s) != O_CH && OP(s) != OOR2) {
   2069   1.1       jtc 					g->iflags |= BAD;
   2070   1.1       jtc 					return;
   2071   1.1       jtc 				}
   2072  1.40  christos 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
   2073  1.11  christos 			/* FALLTHROUGH */
   2074  1.39  christos 		case OBOW:		/* things that break a sequence */
   2075  1.39  christos 		case OEOW:
   2076  1.39  christos 		case OBOL:
   2077  1.39  christos 		case OEOL:
   2078  1.39  christos 		case OBOS:
   2079  1.39  christos 		case OEOS:
   2080  1.39  christos 		case OWBND:
   2081  1.39  christos 		case ONWBND:
   2082  1.39  christos 		case O_QUEST:
   2083  1.39  christos 		case O_CH:
   2084  1.39  christos 		case OEND:
   2085  1.39  christos 			if (newlen > (sopno)g->mlen) {		/* ends one */
   2086   1.1       jtc 				start = newstart;
   2087   1.1       jtc 				g->mlen = newlen;
   2088  1.39  christos 				if (offset > -1) {
   2089  1.39  christos 					g->moffset += offset;
   2090  1.39  christos 					offset = newlen;
   2091  1.39  christos 				} else
   2092  1.39  christos 					g->moffset = offset;
   2093  1.39  christos 			} else {
   2094  1.39  christos 				if (offset > -1)
   2095  1.39  christos 					offset += newlen;
   2096   1.1       jtc 			}
   2097   1.1       jtc 			newlen = 0;
   2098   1.1       jtc 			break;
   2099  1.39  christos 		case OANY:
   2100  1.39  christos 			if (newlen > (sopno)g->mlen) {		/* ends one */
   2101  1.39  christos 				start = newstart;
   2102  1.39  christos 				g->mlen = newlen;
   2103  1.39  christos 				if (offset > -1) {
   2104  1.39  christos 					g->moffset += offset;
   2105  1.39  christos 					offset = newlen;
   2106  1.39  christos 				} else
   2107  1.39  christos 					g->moffset = offset;
   2108  1.39  christos 			} else {
   2109  1.39  christos 				if (offset > -1)
   2110  1.39  christos 					offset += newlen;
   2111  1.39  christos 			}
   2112  1.39  christos 			if (offset > -1)
   2113  1.39  christos 				offset++;
   2114  1.39  christos 			newlen = 0;
   2115  1.39  christos 			break;
   2116  1.39  christos 		case OANYOF:		/* may or may not invalidate offset */
   2117  1.39  christos 			/* First, everything as OANY */
   2118  1.39  christos 			if (newlen > (sopno)g->mlen) {		/* ends one */
   2119  1.39  christos 				start = newstart;
   2120  1.39  christos 				g->mlen = newlen;
   2121  1.39  christos 				if (offset > -1) {
   2122  1.39  christos 					g->moffset += offset;
   2123  1.39  christos 					offset = newlen;
   2124  1.39  christos 				} else
   2125  1.39  christos 					g->moffset = offset;
   2126  1.39  christos 			} else {
   2127  1.39  christos 				if (offset > -1)
   2128  1.39  christos 					offset += newlen;
   2129  1.39  christos 			}
   2130  1.39  christos 			if (offset > -1)
   2131  1.39  christos 				offset++;
   2132  1.39  christos 			newlen = 0;
   2133  1.39  christos 			break;
   2134  1.42  christos #ifdef NLS
   2135  1.40  christos 		toohard:/*FALLTHROUGH*/
   2136  1.42  christos #endif
   2137  1.39  christos 		default:
   2138  1.39  christos 			/* Anything here makes it impossible or too hard
   2139  1.39  christos 			 * to calculate the offset -- so we give up;
   2140  1.39  christos 			 * save the last known good offset, in case the
   2141  1.39  christos 			 * must sequence doesn't occur later.
   2142  1.39  christos 			 */
   2143  1.39  christos 			if (newlen > (sopno)g->mlen) {		/* ends one */
   2144  1.39  christos 				start = newstart;
   2145  1.39  christos 				g->mlen = newlen;
   2146  1.39  christos 				if (offset > -1)
   2147  1.39  christos 					g->moffset += offset;
   2148  1.39  christos 				else
   2149  1.39  christos 					g->moffset = offset;
   2150  1.39  christos 			}
   2151  1.39  christos 			offset = -1;
   2152  1.39  christos 			newlen = 0;
   2153  1.39  christos 			break;
   2154   1.1       jtc 		}
   2155   1.1       jtc 	} while (OP(s) != OEND);
   2156   1.1       jtc 
   2157  1.39  christos 	if (g->mlen == 0) {		/* there isn't one */
   2158  1.39  christos 		g->moffset = -1;
   2159   1.1       jtc 		return;
   2160  1.39  christos 	}
   2161   1.1       jtc 
   2162   1.1       jtc 	/* turn it into a character string */
   2163   1.1       jtc 	g->must = malloc((size_t)g->mlen + 1);
   2164   1.1       jtc 	if (g->must == NULL) {		/* argh; just forget it */
   2165   1.1       jtc 		g->mlen = 0;
   2166  1.39  christos 		g->moffset = -1;
   2167   1.1       jtc 		return;
   2168   1.1       jtc 	}
   2169   1.1       jtc 	cp = g->must;
   2170   1.1       jtc 	scan = start;
   2171  1.39  christos 	memset(&mbs, 0, sizeof(mbs));
   2172  1.39  christos 	while (cp < g->must + g->mlen) {
   2173   1.1       jtc 		while (OP(s = *scan++) != OCHAR)
   2174   1.1       jtc 			continue;
   2175  1.42  christos #ifdef NLS
   2176  1.43  christos 		size_t clen = wcrtomb(cp, (int)OPND(s), &mbs);
   2177  1.39  christos 		assert(clen != (size_t)-1);
   2178  1.39  christos 		cp += clen;
   2179  1.42  christos #else
   2180  1.42  christos 		*cp++ = OPND(s);
   2181  1.42  christos #endif
   2182   1.1       jtc 	}
   2183   1.2       jtc 	assert(cp == g->must + g->mlen);
   2184   1.1       jtc 	*cp++ = '\0';		/* just on general principles */
   2185   1.1       jtc }
   2186   1.1       jtc 
   2187   1.1       jtc /*
   2188  1.39  christos  - altoffset - choose biggest offset among multiple choices
   2189  1.39  christos  == static int altoffset(sop *scan, int offset);
   2190  1.39  christos  *
   2191  1.39  christos  * Compute, recursively if necessary, the largest offset among multiple
   2192  1.39  christos  * re paths.
   2193  1.39  christos  */
   2194  1.39  christos static int
   2195  1.39  christos altoffset(sop *scan, int offset)
   2196  1.39  christos {
   2197  1.39  christos 	int largest;
   2198  1.39  christos 	int try;
   2199  1.39  christos 	sop s;
   2200  1.39  christos 
   2201  1.39  christos 	_DIAGASSERT(scan != NULL);
   2202  1.39  christos 
   2203  1.39  christos 	/* If we gave up already on offsets, return */
   2204  1.39  christos 	if (offset == -1)
   2205  1.39  christos 		return -1;
   2206  1.39  christos 
   2207  1.39  christos 	largest = 0;
   2208  1.39  christos 	try = 0;
   2209  1.39  christos 	s = *scan++;
   2210  1.40  christos 	while (OP(s) != O_QUEST && OP(s) != O_CH) {
   2211  1.39  christos 		switch (OP(s)) {
   2212  1.39  christos 		case OOR1:
   2213  1.39  christos 			if (try > largest)
   2214  1.39  christos 				largest = try;
   2215  1.39  christos 			try = 0;
   2216  1.39  christos 			break;
   2217  1.39  christos 		case OQUEST_:
   2218  1.39  christos 		case OCH_:
   2219  1.39  christos 			try = altoffset(scan, try);
   2220  1.39  christos 			if (try == -1)
   2221  1.39  christos 				return -1;
   2222  1.39  christos 			scan--;
   2223  1.39  christos 			do {
   2224  1.39  christos 				scan += OPND(s);
   2225  1.39  christos 				s = *scan;
   2226  1.40  christos 				if (OP(s) != O_QUEST &&
   2227  1.40  christos 				    OP(s) != O_CH && OP(s) != OOR2)
   2228  1.39  christos 					return -1;
   2229  1.40  christos 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
   2230  1.39  christos 			/* We must skip to the next position, or we'll
   2231  1.39  christos 			 * leave altoffset() too early.
   2232  1.39  christos 			 */
   2233  1.39  christos 			scan++;
   2234  1.39  christos 			break;
   2235  1.39  christos 		case OANYOF:
   2236  1.39  christos 		case OCHAR:
   2237  1.39  christos 		case OANY:
   2238  1.39  christos 			try++;
   2239  1.40  christos 			/*FALLTHROUGH*/
   2240  1.39  christos 		case OBOW:
   2241  1.39  christos 		case OEOW:
   2242  1.39  christos 		case OWBND:
   2243  1.39  christos 		case ONWBND:
   2244  1.39  christos 		case OLPAREN:
   2245  1.39  christos 		case ORPAREN:
   2246  1.39  christos 		case OOR2:
   2247  1.39  christos 			break;
   2248  1.39  christos 		default:
   2249  1.39  christos 			try = -1;
   2250  1.39  christos 			break;
   2251  1.39  christos 		}
   2252  1.39  christos 		if (try == -1)
   2253  1.39  christos 			return -1;
   2254  1.39  christos 		s = *scan++;
   2255  1.39  christos 	}
   2256  1.39  christos 
   2257  1.39  christos 	if (try > largest)
   2258  1.39  christos 		largest = try;
   2259  1.39  christos 
   2260  1.39  christos 	return largest+offset;
   2261  1.39  christos }
   2262  1.39  christos 
   2263  1.39  christos /*
   2264  1.39  christos  - computejumps - compute char jumps for BM scan
   2265  1.39  christos  == static void computejumps(struct parse *p, struct re_guts *g);
   2266  1.39  christos  *
   2267  1.39  christos  * This algorithm assumes g->must exists and is has size greater than
   2268  1.39  christos  * zero. It's based on the algorithm found on Computer Algorithms by
   2269  1.39  christos  * Sara Baase.
   2270  1.39  christos  *
   2271  1.39  christos  * A char jump is the number of characters one needs to jump based on
   2272  1.39  christos  * the value of the character from the text that was mismatched.
   2273  1.39  christos  */
   2274  1.39  christos static void
   2275  1.39  christos computejumps(struct parse *p, struct re_guts *g)
   2276  1.39  christos {
   2277  1.39  christos 	int ch;
   2278  1.39  christos 	size_t mindex;
   2279  1.39  christos 
   2280  1.39  christos 	_DIAGASSERT(p != NULL);
   2281  1.39  christos 	_DIAGASSERT(g != NULL);
   2282  1.39  christos 
   2283  1.39  christos 	/* Avoid making errors worse */
   2284  1.39  christos 	if (p->error != 0)
   2285  1.39  christos 		return;
   2286  1.39  christos 
   2287  1.39  christos 	g->charjump = calloc((NC_MAX + 1), sizeof(*g->charjump));
   2288  1.39  christos 	if (g->charjump == NULL)	/* Not a fatal error */
   2289  1.39  christos 		return;
   2290  1.39  christos 	/* Adjust for signed chars, if necessary */
   2291  1.39  christos 	g->charjump = &g->charjump[-(CHAR_MIN)];
   2292  1.39  christos 
   2293  1.39  christos 	/* If the character does not exist in the pattern, the jump
   2294  1.39  christos 	 * is equal to the number of characters in the pattern.
   2295  1.39  christos 	 */
   2296  1.39  christos 	for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
   2297  1.39  christos 		g->charjump[ch] = g->mlen;
   2298  1.39  christos 
   2299  1.39  christos 	/* If the character does exist, compute the jump that would
   2300  1.39  christos 	 * take us to the last character in the pattern equal to it
   2301  1.39  christos 	 * (notice that we match right to left, so that last character
   2302  1.39  christos 	 * is the first one that would be matched).
   2303  1.39  christos 	 */
   2304  1.39  christos 	for (mindex = 0; mindex < g->mlen; mindex++)
   2305  1.39  christos 		g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
   2306  1.39  christos }
   2307  1.39  christos 
   2308  1.39  christos /*
   2309  1.39  christos  - computematchjumps - compute match jumps for BM scan
   2310  1.39  christos  == static void computematchjumps(struct parse *p, struct re_guts *g);
   2311  1.39  christos  *
   2312  1.39  christos  * This algorithm assumes g->must exists and is has size greater than
   2313  1.39  christos  * zero. It's based on the algorithm found on Computer Algorithms by
   2314  1.39  christos  * Sara Baase.
   2315  1.39  christos  *
   2316  1.39  christos  * A match jump is the number of characters one needs to advance based
   2317  1.39  christos  * on the already-matched suffix.
   2318  1.39  christos  * Notice that all values here are minus (g->mlen-1), because of the way
   2319  1.39  christos  * the search algorithm works.
   2320  1.39  christos  */
   2321  1.39  christos static void
   2322  1.39  christos computematchjumps(struct parse *p, struct re_guts *g)
   2323  1.39  christos {
   2324  1.39  christos 	size_t mindex;		/* General "must" iterator */
   2325  1.39  christos 	size_t suffix;		/* Keeps track of matching suffix */
   2326  1.39  christos 	size_t ssuffix;		/* Keeps track of suffixes' suffix */
   2327  1.39  christos 	size_t* pmatches;	/* pmatches[k] points to the next i
   2328  1.39  christos 				 * such that i+1...mlen is a substring
   2329  1.39  christos 				 * of k+1...k+mlen-i-1
   2330  1.39  christos 				 */
   2331  1.39  christos 
   2332  1.39  christos 	_DIAGASSERT(p != NULL);
   2333  1.39  christos 	_DIAGASSERT(g != NULL);
   2334  1.39  christos 
   2335  1.39  christos 	/* Avoid making errors worse */
   2336  1.39  christos 	if (p->error != 0)
   2337  1.39  christos 		return;
   2338  1.39  christos 
   2339  1.39  christos 	pmatches = calloc(g->mlen, sizeof(*pmatches));
   2340  1.39  christos 	if (pmatches == NULL) {
   2341  1.39  christos 		g->matchjump = NULL;
   2342  1.39  christos 		return;
   2343  1.39  christos 	}
   2344  1.39  christos 
   2345  1.39  christos 	g->matchjump = calloc(g->mlen, sizeof(*g->matchjump));
   2346  1.39  christos 	if (g->matchjump == NULL) {	/* Not a fatal error */
   2347  1.39  christos 		free(pmatches);
   2348  1.39  christos 		return;
   2349  1.39  christos 	}
   2350  1.39  christos 
   2351  1.39  christos 	/* Set maximum possible jump for each character in the pattern */
   2352  1.39  christos 	for (mindex = 0; mindex < g->mlen; mindex++)
   2353  1.39  christos 		g->matchjump[mindex] = 2 * g->mlen - mindex - 1;
   2354  1.39  christos 
   2355  1.39  christos 	/* Compute pmatches[] */
   2356  1.39  christos 	for (suffix = mindex = g->mlen; mindex-- > 0; suffix--) {
   2357  1.39  christos 		pmatches[mindex] = suffix;
   2358  1.39  christos 
   2359  1.39  christos 		/* If a mismatch is found, interrupting the substring,
   2360  1.39  christos 		 * compute the matchjump for that position. If no
   2361  1.39  christos 		 * mismatch is found, then a text substring mismatched
   2362  1.39  christos 		 * against the suffix will also mismatch against the
   2363  1.39  christos 		 * substring.
   2364  1.39  christos 		 */
   2365  1.39  christos 		while (suffix < g->mlen
   2366  1.39  christos 		    && g->must[mindex] != g->must[suffix]) {
   2367  1.39  christos 			g->matchjump[suffix] = MIN(g->matchjump[suffix],
   2368  1.39  christos 			    g->mlen - mindex - 1);
   2369  1.39  christos 			suffix = pmatches[suffix];
   2370  1.39  christos 		}
   2371  1.39  christos 	}
   2372  1.39  christos 
   2373  1.39  christos 	/* Compute the matchjump up to the last substring found to jump
   2374  1.39  christos 	 * to the beginning of the largest must pattern prefix matching
   2375  1.39  christos 	 * it's own suffix.
   2376  1.39  christos 	 */
   2377  1.39  christos 	for (mindex = 0; mindex <= suffix; mindex++)
   2378  1.39  christos 		g->matchjump[mindex] = MIN(g->matchjump[mindex],
   2379  1.39  christos 		    g->mlen + suffix - mindex);
   2380  1.39  christos 
   2381  1.39  christos         ssuffix = pmatches[suffix];
   2382  1.39  christos         while (suffix < g->mlen) {
   2383  1.39  christos                 while (suffix <= ssuffix && suffix < g->mlen) {
   2384  1.39  christos                         g->matchjump[suffix] = MIN(g->matchjump[suffix],
   2385  1.39  christos 			    g->mlen + ssuffix - suffix);
   2386  1.39  christos                         suffix++;
   2387  1.39  christos                 }
   2388  1.39  christos 		if (suffix < g->mlen)
   2389  1.39  christos                 	ssuffix = pmatches[ssuffix];
   2390  1.39  christos         }
   2391  1.39  christos 
   2392  1.39  christos 	free(pmatches);
   2393  1.39  christos }
   2394  1.39  christos 
   2395  1.39  christos /*
   2396   1.1       jtc  - pluscount - count + nesting
   2397   1.9     perry  == static sopno pluscount(struct parse *p, struct re_guts *g);
   2398   1.1       jtc  */
   2399   1.1       jtc static sopno			/* nesting depth */
   2400  1.39  christos pluscount(struct parse *p, struct re_guts *g)
   2401   1.1       jtc {
   2402   1.9     perry 	sop *scan;
   2403   1.9     perry 	sop s;
   2404   1.9     perry 	sopno plusnest = 0;
   2405   1.9     perry 	sopno maxnest = 0;
   2406  1.14     lukem 
   2407  1.14     lukem 	_DIAGASSERT(p != NULL);
   2408  1.14     lukem 	_DIAGASSERT(g != NULL);
   2409   1.1       jtc 
   2410   1.1       jtc 	if (p->error != 0)
   2411   1.1       jtc 		return(0);	/* there may not be an OEND */
   2412   1.1       jtc 
   2413   1.1       jtc 	scan = g->strip + 1;
   2414   1.1       jtc 	do {
   2415   1.1       jtc 		s = *scan++;
   2416   1.1       jtc 		switch (OP(s)) {
   2417   1.1       jtc 		case OPLUS_:
   2418   1.1       jtc 			plusnest++;
   2419   1.1       jtc 			break;
   2420   1.1       jtc 		case O_PLUS:
   2421   1.1       jtc 			if (plusnest > maxnest)
   2422   1.1       jtc 				maxnest = plusnest;
   2423   1.1       jtc 			plusnest--;
   2424   1.1       jtc 			break;
   2425   1.1       jtc 		}
   2426   1.1       jtc 	} while (OP(s) != OEND);
   2427   1.1       jtc 	if (plusnest != 0)
   2428   1.1       jtc 		g->iflags |= BAD;
   2429   1.1       jtc 	return(maxnest);
   2430   1.1       jtc }
   2431