Home | History | Annotate | Line # | Download | only in regex
regcomp.c revision 1.46
      1  1.46  christos /*	$NetBSD: regcomp.c,v 1.46 2021/03/11 15:00:29 christos Exp $	*/
      2   1.6       cgd 
      3   1.5       cgd /*-
      4  1.39  christos  * SPDX-License-Identifier: BSD-3-Clause
      5  1.39  christos  *
      6  1.39  christos  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
      7   1.5       cgd  * Copyright (c) 1992, 1993, 1994
      8   1.5       cgd  *	The Regents of the University of California.  All rights reserved.
      9   1.5       cgd  *
     10  1.39  christos  * Copyright (c) 2011 The FreeBSD Foundation
     11  1.39  christos  * All rights reserved.
     12  1.39  christos  * Portions of this software were developed by David Chisnall
     13  1.39  christos  * under sponsorship from the FreeBSD Foundation.
     14  1.39  christos  *
     15   1.5       cgd  * This code is derived from software contributed to Berkeley by
     16   1.5       cgd  * Henry Spencer.
     17   1.5       cgd  *
     18   1.5       cgd  * Redistribution and use in source and binary forms, with or without
     19   1.5       cgd  * modification, are permitted provided that the following conditions
     20   1.5       cgd  * are met:
     21   1.5       cgd  * 1. Redistributions of source code must retain the above copyright
     22   1.5       cgd  *    notice, this list of conditions and the following disclaimer.
     23   1.5       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     24   1.5       cgd  *    notice, this list of conditions and the following disclaimer in the
     25   1.5       cgd  *    documentation and/or other materials provided with the distribution.
     26  1.18       agc  * 3. Neither the name of the University nor the names of its contributors
     27  1.18       agc  *    may be used to endorse or promote products derived from this software
     28  1.18       agc  *    without specific prior written permission.
     29  1.18       agc  *
     30  1.18       agc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     31  1.18       agc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     32  1.18       agc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     33  1.18       agc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     34  1.18       agc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     35  1.18       agc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     36  1.18       agc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     37  1.18       agc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     38  1.18       agc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     39  1.18       agc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     40  1.18       agc  * SUCH DAMAGE.
     41  1.18       agc  *
     42  1.18       agc  *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
     43  1.18       agc  */
     44  1.18       agc 
     45  1.45  christos #if HAVE_NBTOOL_CONFIG_H
     46  1.45  christos #include "nbtool_config.h"
     47  1.45  christos #endif
     48  1.45  christos 
     49   1.7  christos #include <sys/cdefs.h>
     50   1.6       cgd #if 0
     51   1.5       cgd static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
     52  1.39  christos __FBSDID("$FreeBSD: head/lib/libc/regex/regcomp.c 368359 2020-12-05 03:18:48Z kevans $");
     53   1.6       cgd #endif
     54  1.46  christos __RCSID("$NetBSD: regcomp.c,v 1.46 2021/03/11 15:00:29 christos Exp $");
     55  1.39  christos 
     56  1.39  christos #define _OPENBSD_SOURCE
     57  1.42  christos 
     58  1.42  christos #ifndef LIBHACK
     59  1.39  christos #define REGEX_GNU_EXTENSIONS
     60   1.5       cgd 
     61   1.8       jtc #include "namespace.h"
     62  1.42  christos #endif
     63   1.1       jtc #include <sys/types.h>
     64  1.39  christos #include <stdio.h>
     65  1.39  christos #include <string.h>
     66   1.1       jtc #include <ctype.h>
     67   1.1       jtc #include <limits.h>
     68   1.1       jtc #include <stdlib.h>
     69  1.28  junyoung #include <regex.h>
     70  1.39  christos #include <stdbool.h>
     71   1.8       jtc 
     72  1.44  christos #if defined(__weak_alias) && !defined(LIBHACK)
     73  1.16   mycroft __weak_alias(regcomp,_regcomp)
     74   1.8       jtc #endif
     75   1.1       jtc 
     76  1.39  christos #ifdef REGEX_LIBC_COLLATE
     77  1.39  christos #include "collate.h"
     78  1.39  christos #endif
     79  1.39  christos 
     80   1.1       jtc #include "utils.h"
     81   1.1       jtc #include "regex2.h"
     82   1.1       jtc 
     83   1.1       jtc #include "cname.h"
     84   1.1       jtc 
     85   1.1       jtc /*
     86  1.39  christos  * Branching context, used to keep track of branch state for all of the branch-
     87  1.39  christos  * aware functions. In addition to keeping track of branch positions for the
     88  1.39  christos  * p_branch_* functions, we use this to simplify some clumsiness in BREs for
     89  1.39  christos  * detection of whether ^ is acting as an anchor or being used erroneously and
     90  1.39  christos  * also for whether we're in a sub-expression or not.
     91  1.39  christos  */
     92  1.39  christos struct branchc {
     93  1.39  christos 	sopno start;
     94  1.39  christos 	sopno back;
     95  1.39  christos 	sopno fwd;
     96  1.39  christos 
     97  1.39  christos 	int nbranch;
     98  1.39  christos 	int nchain;
     99  1.39  christos 	bool outer;
    100  1.39  christos 	bool terminate;
    101  1.39  christos };
    102  1.39  christos 
    103  1.39  christos /*
    104   1.1       jtc  * parse structure, passed up and down to avoid global variables and
    105   1.1       jtc  * other clumsinesses
    106   1.1       jtc  */
    107   1.1       jtc struct parse {
    108  1.21      yamt 	const char *next;	/* next character in RE */
    109  1.21      yamt 	const char *end;	/* end of string (-> NUL normally) */
    110   1.1       jtc 	int error;		/* has an error been seen? */
    111  1.39  christos 	int gnuext;
    112   1.1       jtc 	sop *strip;		/* malloced strip */
    113   1.1       jtc 	sopno ssize;		/* malloced strip size (allocated) */
    114   1.1       jtc 	sopno slen;		/* malloced strip length (used) */
    115  1.33  christos 	size_t ncsalloc;	/* number of csets allocated */
    116   1.1       jtc 	struct re_guts *g;
    117   1.1       jtc #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
    118   1.1       jtc 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
    119   1.1       jtc 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
    120  1.39  christos 	bool allowbranch;	/* can this expression branch? */
    121  1.39  christos 	bool bre;		/* convenience; is this a BRE? */
    122  1.39  christos 	int pflags;		/* other parsing flags -- legacy escapes? */
    123  1.39  christos 	bool (*parse_expr)(struct parse *, struct branchc *);
    124  1.39  christos 	void (*pre_parse)(struct parse *, struct branchc *);
    125  1.39  christos 	void (*post_parse)(struct parse *, struct branchc *);
    126   1.1       jtc };
    127   1.1       jtc 
    128  1.39  christos #define PFLAG_LEGACY_ESC	0x00000001
    129  1.39  christos 
    130   1.5       cgd /* ========= begin header generated by ./mkh ========= */
    131   1.5       cgd #ifdef __cplusplus
    132   1.5       cgd extern "C" {
    133   1.5       cgd #endif
    134   1.5       cgd 
    135   1.5       cgd /* === regcomp.c === */
    136  1.39  christos static bool p_ere_exp(struct parse *p, struct branchc *bc);
    137  1.26  junyoung static void p_str(struct parse *p);
    138  1.39  christos static int p_branch_eat_delim(struct parse *p, struct branchc *bc);
    139  1.39  christos static void p_branch_ins_offset(struct parse *p, struct branchc *bc);
    140  1.39  christos static void p_branch_fix_tail(struct parse *p, struct branchc *bc);
    141  1.39  christos static bool p_branch_empty(struct parse *p, struct branchc *bc);
    142  1.39  christos static bool p_branch_do(struct parse *p, struct branchc *bc);
    143  1.39  christos static void p_bre_pre_parse(struct parse *p, struct branchc *bc);
    144  1.39  christos static void p_bre_post_parse(struct parse *p, struct branchc *bc);
    145  1.39  christos static void p_re(struct parse *p, int end1, int end2);
    146  1.39  christos static bool p_simp_re(struct parse *p, struct branchc *bc);
    147  1.26  junyoung static int p_count(struct parse *p);
    148  1.26  junyoung static void p_bracket(struct parse *p);
    149  1.39  christos static int p_range_cmp(wchar_t c1, wchar_t c2);
    150  1.26  junyoung static void p_b_term(struct parse *p, cset *cs);
    151  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    152  1.39  christos static int p_b_pseudoclass(struct parse *p, char c);
    153  1.39  christos #endif
    154  1.26  junyoung static void p_b_cclass(struct parse *p, cset *cs);
    155  1.39  christos static void p_b_cclass_named(struct parse *p, cset *cs, const char[]);
    156  1.26  junyoung static void p_b_eclass(struct parse *p, cset *cs);
    157  1.39  christos static wint_t p_b_symbol(struct parse *p);
    158  1.39  christos static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
    159  1.39  christos static bool may_escape(struct parse *p, const wint_t ch);
    160  1.39  christos static wint_t othercase(wint_t ch);
    161  1.39  christos static void bothcases(struct parse *p, wint_t ch);
    162  1.39  christos static void ordinary(struct parse *p, wint_t ch);
    163  1.26  junyoung static void nonnewline(struct parse *p);
    164  1.39  christos static void repeat(struct parse *p, sopno start, int from, int to);
    165  1.26  junyoung static int seterr(struct parse *p, int e);
    166  1.26  junyoung static cset *allocset(struct parse *p);
    167  1.26  junyoung static void freeset(struct parse *p, cset *cs);
    168  1.39  christos static void CHadd(struct parse *p, cset *cs, wint_t ch);
    169  1.39  christos static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
    170  1.39  christos static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
    171  1.39  christos static wint_t singleton(cset *cs);
    172  1.26  junyoung static sopno dupl(struct parse *p, sopno start, sopno finish);
    173  1.39  christos static void doemit(struct parse *p, sop op, size_t opnd);
    174  1.39  christos static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
    175  1.39  christos static void dofwd(struct parse *p, sopno pos, sop value);
    176  1.30  christos static int enlarge(struct parse *p, sopno size);
    177  1.26  junyoung static void stripsnug(struct parse *p, struct re_guts *g);
    178  1.26  junyoung static void findmust(struct parse *p, struct re_guts *g);
    179  1.39  christos static int altoffset(sop *scan, int offset);
    180  1.39  christos static void computejumps(struct parse *p, struct re_guts *g);
    181  1.39  christos static void computematchjumps(struct parse *p, struct re_guts *g);
    182  1.26  junyoung static sopno pluscount(struct parse *p, struct re_guts *g);
    183  1.39  christos static wint_t wgetnext(struct parse *p);
    184   1.5       cgd 
    185   1.5       cgd #ifdef __cplusplus
    186   1.5       cgd }
    187   1.5       cgd #endif
    188   1.5       cgd /* ========= end header generated by ./mkh ========= */
    189   1.1       jtc 
    190   1.1       jtc static char nuls[10];		/* place to point scanner in event of error */
    191   1.1       jtc 
    192   1.1       jtc /*
    193   1.1       jtc  * macros for use with parse structure
    194   1.1       jtc  * BEWARE:  these know that the parse structure is named `p' !!!
    195   1.1       jtc  */
    196   1.1       jtc #define	PEEK()	(*p->next)
    197   1.1       jtc #define	PEEK2()	(*(p->next+1))
    198   1.1       jtc #define	MORE()	(p->next < p->end)
    199   1.1       jtc #define	MORE2()	(p->next+1 < p->end)
    200   1.1       jtc #define	SEE(c)	(MORE() && PEEK() == (c))
    201   1.1       jtc #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
    202  1.39  christos #define	SEESPEC(a)	(p->bre ? SEETWO('\\', a) : SEE(a))
    203   1.1       jtc #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
    204   1.1       jtc #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
    205  1.39  christos #define	EATSPEC(a)	(p->bre ? EATTWO('\\', a) : EAT(a))
    206   1.1       jtc #define	NEXT()	(p->next++)
    207   1.1       jtc #define	NEXT2()	(p->next += 2)
    208   1.1       jtc #define	NEXTn(n)	(p->next += (n))
    209   1.1       jtc #define	GETNEXT()	(*p->next++)
    210  1.39  christos #define	WGETNEXT()	wgetnext(p)
    211   1.1       jtc #define	SETERROR(e)	seterr(p, (e))
    212  1.39  christos #define	REQUIRE(co, e)	((co) || SETERROR(e))
    213   1.1       jtc #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
    214  1.39  christos #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
    215   1.1       jtc #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
    216  1.40  christos #define	EMIT(op, sopnd)	doemit(p, (op), (sopnd))
    217  1.40  christos #define	INSERT(op, pos)	doinsert(p, (op), HERE()-(pos)+1, pos)
    218  1.12  drochner #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
    219   1.1       jtc #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
    220   1.1       jtc #define	HERE()		(p->slen)
    221   1.1       jtc #define	THERE()		(p->slen - 1)
    222   1.4       jtc #define	THERETHERE()	(p->slen - 2)
    223   1.1       jtc #define	DROP(n)	(p->slen -= (n))
    224   1.1       jtc 
    225  1.39  christos /* Macro used by computejump()/computematchjump() */
    226  1.41  christos #ifndef MIN
    227  1.39  christos #define MIN(a,b)	((a)<(b)?(a):(b))
    228  1.41  christos #endif
    229  1.30  christos 
    230  1.42  christos #ifndef NLS
    231  1.42  christos static const struct {
    232  1.42  christos 	const char *name;
    233  1.42  christos 	int (*func)(int);
    234  1.42  christos } wctypes[] = {
    235  1.42  christos #define ADD(x) { .name = # x, .func = is ## x }
    236  1.42  christos 	ADD(alnum),
    237  1.42  christos 	ADD(alpha),
    238  1.42  christos 	ADD(blank),
    239  1.42  christos 	ADD(cntrl),
    240  1.42  christos 	ADD(digit),
    241  1.42  christos 	ADD(graph),
    242  1.42  christos 	ADD(lower),
    243  1.42  christos 	ADD(print),
    244  1.42  christos 	ADD(punct),
    245  1.42  christos 	ADD(space),
    246  1.42  christos 	ADD(upper),
    247  1.42  christos 	ADD(xdigit),
    248  1.42  christos #undef ADD
    249  1.42  christos };
    250  1.42  christos 
    251  1.42  christos wctype_t
    252  1.42  christos __regex_wctype(const char *str)
    253  1.42  christos {
    254  1.42  christos 	for (size_t i = 0; i < __arraycount(wctypes); i++) {
    255  1.42  christos 		if (strcmp(wctypes[i].name, str) == 0)
    256  1.42  christos 			return (wctype_t)(i + 1);
    257  1.42  christos 	}
    258  1.42  christos 	return (wctype_t)0;
    259  1.42  christos }
    260  1.42  christos 
    261  1.42  christos int
    262  1.42  christos __regex_iswctype(wint_t c, wctype_t ct)
    263  1.42  christos {
    264  1.42  christos 	if (ct == 0)
    265  1.42  christos 		return 0;
    266  1.42  christos 	return (*wctypes[ct - 1].func)(c);
    267  1.42  christos }
    268  1.42  christos #endif
    269  1.42  christos 
    270  1.39  christos static int				/* 0 success, otherwise REG_something */
    271  1.39  christos regcomp_internal(regex_t * __restrict preg,
    272  1.39  christos 	const char * __restrict pattern,
    273  1.39  christos 	int cflags, int pflags)
    274   1.1       jtc {
    275   1.1       jtc 	struct parse pa;
    276   1.9     perry 	struct re_guts *g;
    277   1.9     perry 	struct parse *p = &pa;
    278   1.9     perry 	int i;
    279   1.9     perry 	size_t len;
    280  1.39  christos 	size_t maxlen;
    281   1.3       jtc #ifdef REDEBUG
    282   1.3       jtc #	define	GOODFLAGS(f)	(f)
    283   1.3       jtc #else
    284   1.3       jtc #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
    285   1.3       jtc #endif
    286   1.1       jtc 
    287  1.14     lukem 	_DIAGASSERT(preg != NULL);
    288  1.14     lukem 	_DIAGASSERT(pattern != NULL);
    289  1.14     lukem 
    290   1.3       jtc 	cflags = GOODFLAGS(cflags);
    291   1.1       jtc 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
    292   1.1       jtc 		return(REG_INVARG);
    293   1.1       jtc 
    294   1.1       jtc 	if (cflags&REG_PEND) {
    295   1.1       jtc 		if (preg->re_endp < pattern)
    296   1.1       jtc 			return(REG_INVARG);
    297   1.1       jtc 		len = preg->re_endp - pattern;
    298   1.1       jtc 	} else
    299  1.11  christos 		len = strlen(pattern);
    300   1.1       jtc 
    301   1.1       jtc 	/* do the mallocs early so failure handling is easy */
    302  1.39  christos 	g = malloc(sizeof(*g));
    303   1.1       jtc 	if (g == NULL)
    304   1.1       jtc 		return(REG_ESPACE);
    305  1.39  christos 	/*
    306  1.39  christos 	 * Limit the pattern space to avoid a 32-bit overflow on buffer
    307  1.39  christos 	 * extension.  Also avoid any signed overflow in case of conversion
    308  1.39  christos 	 * so make the real limit based on a 31-bit overflow.
    309  1.39  christos 	 *
    310  1.39  christos 	 * Likely not applicable on 64-bit systems but handle the case
    311  1.39  christos 	 * generically (who are we to stop people from using ~715MB+
    312  1.39  christos 	 * patterns?).
    313  1.39  christos 	 */
    314  1.40  christos 	maxlen = ((size_t)-1 >> 1) / sizeof(*p->strip) * 2 / 3;
    315  1.39  christos 	if (len >= maxlen) {
    316  1.40  christos 		free(g);
    317  1.39  christos 		return(REG_ESPACE);
    318  1.39  christos 	}
    319  1.40  christos 	p->ssize = (sopno)(len / 2 * 3 + 1);	/* ugh */
    320  1.39  christos 	assert(p->ssize >= len);
    321  1.39  christos 
    322  1.39  christos 	p->strip = calloc(p->ssize, sizeof(*p->strip));
    323   1.1       jtc 	p->slen = 0;
    324   1.1       jtc 	if (p->strip == NULL) {
    325  1.40  christos 		free(g);
    326   1.1       jtc 		return(REG_ESPACE);
    327   1.1       jtc 	}
    328   1.1       jtc 
    329   1.1       jtc 	/* set things up */
    330   1.1       jtc 	p->g = g;
    331  1.39  christos 	p->next = pattern;	/* convenience; we do not modify it */
    332   1.1       jtc 	p->end = p->next + len;
    333   1.1       jtc 	p->error = 0;
    334   1.1       jtc 	p->ncsalloc = 0;
    335  1.39  christos 	p->pflags = pflags;
    336   1.1       jtc 	for (i = 0; i < NPAREN; i++) {
    337   1.1       jtc 		p->pbegin[i] = 0;
    338   1.1       jtc 		p->pend[i] = 0;
    339   1.1       jtc 	}
    340  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    341  1.39  christos 	if ((cflags & REG_GNU) == 0) {
    342  1.39  christos 		p->gnuext = false;
    343  1.39  christos 		p->allowbranch = (cflags & REG_EXTENDED) != 0;
    344  1.39  christos 	} else
    345  1.39  christos 		p->gnuext = p->allowbranch = true;
    346  1.39  christos #else
    347  1.39  christos 	p->gnuext = false;
    348  1.39  christos 	p->allowbranch = (cflags & REG_EXTENDED) != 0;
    349  1.39  christos #endif
    350  1.39  christos 	if (cflags & REG_EXTENDED) {
    351  1.39  christos 		p->bre = false;
    352  1.39  christos 		p->parse_expr = p_ere_exp;
    353  1.39  christos 		p->pre_parse = NULL;
    354  1.39  christos 		p->post_parse = NULL;
    355  1.39  christos 	} else {
    356  1.39  christos 		p->bre = true;
    357  1.39  christos 		p->parse_expr = p_simp_re;
    358  1.39  christos 		p->pre_parse = p_bre_pre_parse;
    359  1.39  christos 		p->post_parse = p_bre_post_parse;
    360  1.39  christos 	}
    361   1.1       jtc 	g->sets = NULL;
    362   1.1       jtc 	g->ncsets = 0;
    363   1.1       jtc 	g->cflags = cflags;
    364   1.1       jtc 	g->iflags = 0;
    365   1.1       jtc 	g->nbol = 0;
    366   1.1       jtc 	g->neol = 0;
    367   1.1       jtc 	g->must = NULL;
    368  1.39  christos 	g->moffset = -1;
    369  1.39  christos 	g->charjump = NULL;
    370  1.39  christos 	g->matchjump = NULL;
    371   1.1       jtc 	g->mlen = 0;
    372   1.1       jtc 	g->nsub = 0;
    373   1.1       jtc 	g->backrefs = 0;
    374   1.1       jtc 
    375   1.1       jtc 	/* do it */
    376   1.1       jtc 	EMIT(OEND, 0);
    377   1.1       jtc 	g->firststate = THERE();
    378  1.39  christos 	if (cflags & REG_NOSPEC)
    379   1.1       jtc 		p_str(p);
    380   1.1       jtc 	else
    381  1.39  christos 		p_re(p, OUT, OUT);
    382   1.1       jtc 	EMIT(OEND, 0);
    383   1.1       jtc 	g->laststate = THERE();
    384   1.1       jtc 
    385   1.1       jtc 	/* tidy up loose ends and fill things in */
    386   1.1       jtc 	stripsnug(p, g);
    387   1.1       jtc 	findmust(p, g);
    388  1.39  christos 	/* only use Boyer-Moore algorithm if the pattern is bigger
    389  1.39  christos 	 * than three characters
    390  1.39  christos 	 */
    391  1.39  christos 	if(g->mlen > 3) {
    392  1.39  christos 		computejumps(p, g);
    393  1.39  christos 		computematchjumps(p, g);
    394  1.39  christos 		if(g->matchjump == NULL && g->charjump != NULL) {
    395  1.39  christos 			free(g->charjump);
    396  1.39  christos 			g->charjump = NULL;
    397  1.39  christos 		}
    398  1.39  christos 	}
    399   1.1       jtc 	g->nplus = pluscount(p, g);
    400   1.1       jtc 	g->magic = MAGIC2;
    401   1.1       jtc 	preg->re_nsub = g->nsub;
    402   1.1       jtc 	preg->re_g = g;
    403   1.1       jtc 	preg->re_magic = MAGIC1;
    404   1.1       jtc #ifndef REDEBUG
    405   1.1       jtc 	/* not debugging, so can't rely on the assert() in regexec() */
    406   1.1       jtc 	if (g->iflags&BAD)
    407   1.1       jtc 		SETERROR(REG_ASSERT);
    408   1.1       jtc #endif
    409   1.1       jtc 
    410   1.1       jtc 	/* win or lose, we're done */
    411   1.1       jtc 	if (p->error != 0)	/* lose */
    412   1.1       jtc 		regfree(preg);
    413   1.1       jtc 	return(p->error);
    414   1.1       jtc }
    415   1.1       jtc 
    416   1.1       jtc /*
    417  1.39  christos  - regcomp - interface for parser and compilation
    418  1.39  christos  = extern int regcomp(regex_t *, const char *, int);
    419  1.39  christos  = #define	REG_BASIC	0000
    420  1.39  christos  = #define	REG_EXTENDED	0001
    421  1.39  christos  = #define	REG_ICASE	0002
    422  1.39  christos  = #define	REG_NOSUB	0004
    423  1.39  christos  = #define	REG_NEWLINE	0010
    424  1.39  christos  = #define	REG_NOSPEC	0020
    425  1.39  christos  = #define	REG_PEND	0040
    426  1.39  christos  = #define	REG_DUMP	0200
    427   1.1       jtc  */
    428  1.39  christos int				/* 0 success, otherwise REG_something */
    429  1.39  christos regcomp(regex_t * __restrict preg,
    430  1.39  christos 	const char * __restrict pattern,
    431  1.39  christos 	int cflags)
    432   1.1       jtc {
    433  1.30  christos 
    434  1.39  christos 	return (regcomp_internal(preg, pattern, cflags, 0));
    435   1.1       jtc }
    436   1.1       jtc 
    437   1.1       jtc /*
    438  1.39  christos  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op,
    439  1.39  christos  - return whether we should terminate or not
    440  1.39  christos  == static bool p_ere_exp(struct parse *p);
    441   1.1       jtc  */
    442  1.39  christos static bool
    443  1.39  christos p_ere_exp(struct parse *p, struct branchc *bc)
    444   1.1       jtc {
    445   1.9     perry 	char c;
    446  1.39  christos 	wint_t wc;
    447   1.9     perry 	sopno pos;
    448   1.9     perry 	int count;
    449   1.9     perry 	int count2;
    450  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    451  1.40  christos 	size_t i;
    452  1.39  christos 	int handled;
    453  1.39  christos #endif
    454   1.9     perry 	sopno subno;
    455   1.1       jtc 	int wascaret = 0;
    456   1.1       jtc 
    457  1.14     lukem 	_DIAGASSERT(p != NULL);
    458  1.14     lukem 
    459  1.39  christos 	(void)bc;
    460   1.1       jtc 	assert(MORE());		/* caller should have ensured this */
    461   1.1       jtc 	c = GETNEXT();
    462   1.1       jtc 
    463  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    464  1.39  christos 	handled = 0;
    465  1.39  christos #endif
    466   1.1       jtc 	pos = HERE();
    467   1.1       jtc 	switch (c) {
    468   1.1       jtc 	case '(':
    469  1.39  christos 		(void)REQUIRE(MORE(), REG_EPAREN);
    470   1.1       jtc 		p->g->nsub++;
    471  1.40  christos 		subno = (sopno)p->g->nsub;
    472   1.1       jtc 		if (subno < NPAREN)
    473   1.1       jtc 			p->pbegin[subno] = HERE();
    474   1.1       jtc 		EMIT(OLPAREN, subno);
    475   1.1       jtc 		if (!SEE(')'))
    476  1.39  christos 			p_re(p, ')', IGN);
    477   1.1       jtc 		if (subno < NPAREN) {
    478   1.1       jtc 			p->pend[subno] = HERE();
    479   1.1       jtc 			assert(p->pend[subno] != 0);
    480   1.1       jtc 		}
    481   1.1       jtc 		EMIT(ORPAREN, subno);
    482  1.39  christos 		(void)MUSTEAT(')', REG_EPAREN);
    483   1.1       jtc 		break;
    484   1.1       jtc #ifndef POSIX_MISTAKE
    485   1.1       jtc 	case ')':		/* happens only if no current unmatched ( */
    486   1.1       jtc 		/*
    487   1.1       jtc 		 * You may ask, why the ifndef?  Because I didn't notice
    488   1.1       jtc 		 * this until slightly too late for 1003.2, and none of the
    489   1.1       jtc 		 * other 1003.2 regular-expression reviewers noticed it at
    490   1.1       jtc 		 * all.  So an unmatched ) is legal POSIX, at least until
    491   1.1       jtc 		 * we can get it fixed.
    492   1.1       jtc 		 */
    493   1.1       jtc 		SETERROR(REG_EPAREN);
    494   1.1       jtc 		break;
    495   1.1       jtc #endif
    496   1.1       jtc 	case '^':
    497   1.1       jtc 		EMIT(OBOL, 0);
    498   1.1       jtc 		p->g->iflags |= USEBOL;
    499   1.1       jtc 		p->g->nbol++;
    500   1.1       jtc 		wascaret = 1;
    501   1.1       jtc 		break;
    502   1.1       jtc 	case '$':
    503   1.1       jtc 		EMIT(OEOL, 0);
    504   1.1       jtc 		p->g->iflags |= USEEOL;
    505   1.1       jtc 		p->g->neol++;
    506   1.1       jtc 		break;
    507   1.1       jtc 	case '|':
    508   1.1       jtc 		SETERROR(REG_EMPTY);
    509   1.1       jtc 		break;
    510   1.1       jtc 	case '*':
    511   1.1       jtc 	case '+':
    512   1.1       jtc 	case '?':
    513  1.39  christos 	case '{':
    514   1.1       jtc 		SETERROR(REG_BADRPT);
    515   1.1       jtc 		break;
    516   1.1       jtc 	case '.':
    517   1.1       jtc 		if (p->g->cflags&REG_NEWLINE)
    518   1.1       jtc 			nonnewline(p);
    519   1.1       jtc 		else
    520   1.1       jtc 			EMIT(OANY, 0);
    521   1.1       jtc 		break;
    522   1.1       jtc 	case '[':
    523   1.1       jtc 		p_bracket(p);
    524   1.1       jtc 		break;
    525   1.1       jtc 	case '\\':
    526  1.39  christos 		(void)REQUIRE(MORE(), REG_EESCAPE);
    527  1.39  christos 		wc = WGETNEXT();
    528  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    529  1.39  christos 		if (p->gnuext) {
    530  1.39  christos 			handled = 1;
    531  1.39  christos 			switch (wc) {
    532  1.39  christos 			case '`':
    533  1.39  christos 				EMIT(OBOS, 0);
    534  1.39  christos 				break;
    535  1.39  christos 			case '\'':
    536  1.39  christos 				EMIT(OEOS, 0);
    537  1.39  christos 				break;
    538  1.39  christos 			case 'B':
    539  1.39  christos 				EMIT(ONWBND, 0);
    540  1.39  christos 				break;
    541  1.39  christos 			case 'b':
    542  1.39  christos 				EMIT(OWBND, 0);
    543  1.39  christos 				break;
    544  1.39  christos 			case 'W':
    545  1.39  christos 			case 'w':
    546  1.39  christos 			case 'S':
    547  1.39  christos 			case 's':
    548  1.39  christos 				p_b_pseudoclass(p, wc);
    549  1.39  christos 				break;
    550  1.46  christos 			case 'a':
    551  1.46  christos 				ordinary(p, '\a');
    552  1.46  christos 				break;
    553  1.46  christos 			case 'e':
    554  1.46  christos 				ordinary(p, '\e');
    555  1.46  christos 				break;
    556  1.46  christos 			case 'f':
    557  1.46  christos 				ordinary(p, '\f');
    558  1.46  christos 				break;
    559  1.46  christos 			case 'n':
    560  1.46  christos 				ordinary(p, '\n');
    561  1.46  christos 				break;
    562  1.46  christos 			case 'r':
    563  1.46  christos 				ordinary(p, '\r');
    564  1.46  christos 				break;
    565  1.46  christos 			case 't':
    566  1.46  christos 				ordinary(p, '\t');
    567  1.46  christos 				break;
    568  1.46  christos 			case 'v':
    569  1.46  christos 				ordinary(p, '\v');
    570  1.46  christos 				break;
    571  1.39  christos 			case '1':
    572  1.39  christos 			case '2':
    573  1.39  christos 			case '3':
    574  1.39  christos 			case '4':
    575  1.39  christos 			case '5':
    576  1.39  christos 			case '6':
    577  1.39  christos 			case '7':
    578  1.39  christos 			case '8':
    579  1.39  christos 			case '9':
    580  1.39  christos 				i = wc - '0';
    581  1.39  christos 				assert(i < NPAREN);
    582  1.39  christos 				if (p->pend[i] != 0) {
    583  1.39  christos 					assert(i <= p->g->nsub);
    584  1.39  christos 					EMIT(OBACK_, i);
    585  1.39  christos 					assert(p->pbegin[i] != 0);
    586  1.39  christos 					assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
    587  1.39  christos 					assert(OP(p->strip[p->pend[i]]) == ORPAREN);
    588  1.39  christos 					(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
    589  1.39  christos 					EMIT(O_BACK, i);
    590  1.39  christos 				} else
    591  1.39  christos 					SETERROR(REG_ESUBREG);
    592  1.39  christos 				p->g->backrefs = 1;
    593  1.39  christos 				break;
    594  1.39  christos 			default:
    595  1.39  christos 				handled = 0;
    596  1.39  christos 			}
    597  1.39  christos 			/* Don't proceed to the POSIX bits if we've already handled it */
    598  1.39  christos 			if (handled)
    599  1.39  christos 				break;
    600  1.39  christos 		}
    601  1.39  christos #endif
    602  1.39  christos 		switch (wc) {
    603  1.39  christos 		case '<':
    604  1.39  christos 			EMIT(OBOW, 0);
    605  1.39  christos 			break;
    606  1.39  christos 		case '>':
    607  1.39  christos 			EMIT(OEOW, 0);
    608  1.39  christos 			break;
    609  1.39  christos 		default:
    610  1.39  christos 			if (may_escape(p, wc))
    611  1.39  christos 				ordinary(p, wc);
    612  1.39  christos 			else
    613  1.39  christos 				SETERROR(REG_EESCAPE);
    614  1.39  christos 			break;
    615  1.39  christos 		}
    616   1.1       jtc 		break;
    617   1.1       jtc 	default:
    618  1.38  christos 		if (p->error != 0)
    619  1.39  christos 			return (false);
    620  1.39  christos 		p->next--;
    621  1.39  christos 		wc = WGETNEXT();
    622  1.39  christos 		ordinary(p, wc);
    623   1.1       jtc 		break;
    624   1.1       jtc 	}
    625   1.1       jtc 
    626   1.1       jtc 	if (!MORE())
    627  1.39  christos 		return (false);
    628   1.1       jtc 	c = PEEK();
    629   1.1       jtc 	/* we call { a repetition if followed by a digit */
    630  1.39  christos 	if (!( c == '*' || c == '+' || c == '?' || c == '{'))
    631  1.39  christos 		return (false);		/* no repetition, we're done */
    632  1.39  christos 	else if (c == '{')
    633  1.39  christos 		(void)REQUIRE(MORE2() && \
    634  1.39  christos 		    (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
    635   1.1       jtc 	NEXT();
    636   1.1       jtc 
    637  1.39  christos 	(void)REQUIRE(!wascaret, REG_BADRPT);
    638   1.1       jtc 	switch (c) {
    639   1.1       jtc 	case '*':	/* implemented as +? */
    640   1.4       jtc 		/* this case does not require the (y|) trick, noKLUDGE */
    641   1.1       jtc 		INSERT(OPLUS_, pos);
    642   1.1       jtc 		ASTERN(O_PLUS, pos);
    643   1.1       jtc 		INSERT(OQUEST_, pos);
    644   1.1       jtc 		ASTERN(O_QUEST, pos);
    645   1.1       jtc 		break;
    646   1.1       jtc 	case '+':
    647   1.1       jtc 		INSERT(OPLUS_, pos);
    648   1.1       jtc 		ASTERN(O_PLUS, pos);
    649   1.1       jtc 		break;
    650   1.1       jtc 	case '?':
    651   1.4       jtc 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
    652   1.4       jtc 		INSERT(OCH_, pos);		/* offset slightly wrong */
    653   1.4       jtc 		ASTERN(OOR1, pos);		/* this one's right */
    654   1.4       jtc 		AHEAD(pos);			/* fix the OCH_ */
    655   1.4       jtc 		EMIT(OOR2, 0);			/* offset very wrong... */
    656   1.4       jtc 		AHEAD(THERE());			/* ...so fix it */
    657   1.4       jtc 		ASTERN(O_CH, THERETHERE());
    658   1.1       jtc 		break;
    659   1.1       jtc 	case '{':
    660   1.1       jtc 		count = p_count(p);
    661   1.1       jtc 		if (EAT(',')) {
    662  1.39  christos 			if (isdigit((uch)PEEK())) {
    663   1.1       jtc 				count2 = p_count(p);
    664  1.39  christos 				(void)REQUIRE(count <= count2, REG_BADBR);
    665   1.1       jtc 			} else		/* single number with comma */
    666   1.1       jtc 				count2 = INFINITY;
    667   1.1       jtc 		} else		/* just a single number */
    668   1.1       jtc 			count2 = count;
    669  1.39  christos 		repeat(p, pos, count, count2);
    670   1.1       jtc 		if (!EAT('}')) {	/* error heuristics */
    671   1.1       jtc 			while (MORE() && PEEK() != '}')
    672   1.1       jtc 				NEXT();
    673  1.39  christos 			(void)REQUIRE(MORE(), REG_EBRACE);
    674   1.1       jtc 			SETERROR(REG_BADBR);
    675   1.1       jtc 		}
    676   1.1       jtc 		break;
    677   1.1       jtc 	}
    678   1.1       jtc 
    679   1.1       jtc 	if (!MORE())
    680  1.39  christos 		return (false);
    681   1.1       jtc 	c = PEEK();
    682   1.1       jtc 	if (!( c == '*' || c == '+' || c == '?' ||
    683  1.39  christos 				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
    684  1.39  christos 		return (false);
    685   1.1       jtc 	SETERROR(REG_BADRPT);
    686  1.39  christos 	return (false);
    687   1.1       jtc }
    688   1.1       jtc 
    689   1.1       jtc /*
    690   1.1       jtc  - p_str - string (no metacharacters) "parser"
    691   1.9     perry  == static void p_str(struct parse *p);
    692   1.1       jtc  */
    693   1.1       jtc static void
    694  1.39  christos p_str(struct parse *p)
    695   1.1       jtc {
    696  1.39  christos 	(void)REQUIRE(MORE(), REG_EMPTY);
    697  1.39  christos 	while (MORE())
    698  1.39  christos 		ordinary(p, WGETNEXT());
    699  1.39  christos }
    700  1.14     lukem 
    701  1.39  christos /*
    702  1.39  christos  * Eat consecutive branch delimiters for the kind of expression that we are
    703  1.39  christos  * parsing, return the number of delimiters that we ate.
    704  1.39  christos  */
    705  1.39  christos static int
    706  1.39  christos p_branch_eat_delim(struct parse *p, struct branchc *bc)
    707  1.39  christos {
    708  1.39  christos 	int nskip;
    709  1.14     lukem 
    710  1.39  christos 	(void)bc;
    711  1.39  christos 	nskip = 0;
    712  1.39  christos 	while (EATSPEC('|'))
    713  1.39  christos 		++nskip;
    714  1.39  christos 	return (nskip);
    715   1.1       jtc }
    716   1.1       jtc 
    717   1.1       jtc /*
    718  1.39  christos  * Insert necessary branch book-keeping operations. This emits a
    719  1.39  christos  * bogus 'next' offset, since we still have more to parse
    720   1.1       jtc  */
    721   1.1       jtc static void
    722  1.39  christos p_branch_ins_offset(struct parse *p, struct branchc *bc)
    723   1.9     perry {
    724   1.1       jtc 
    725  1.39  christos 	if (bc->nbranch == 0) {
    726  1.39  christos 		INSERT(OCH_, bc->start);	/* offset is wrong */
    727  1.39  christos 		bc->fwd = bc->start;
    728  1.39  christos 		bc->back = bc->start;
    729  1.39  christos 	}
    730  1.39  christos 
    731  1.39  christos 	ASTERN(OOR1, bc->back);
    732  1.39  christos 	bc->back = THERE();
    733  1.39  christos 	AHEAD(bc->fwd);			/* fix previous offset */
    734  1.39  christos 	bc->fwd = HERE();
    735  1.39  christos 	EMIT(OOR2, 0);			/* offset is very wrong */
    736  1.39  christos 	++bc->nbranch;
    737  1.39  christos }
    738  1.39  christos 
    739  1.39  christos /*
    740  1.39  christos  * Fix the offset of the tail branch, if we actually had any branches.
    741  1.39  christos  * This is to correct the bogus placeholder offset that we use.
    742  1.39  christos  */
    743  1.39  christos static void
    744  1.39  christos p_branch_fix_tail(struct parse *p, struct branchc *bc)
    745  1.39  christos {
    746  1.14     lukem 
    747  1.39  christos 	/* Fix bogus offset at the tail if we actually have branches */
    748  1.39  christos 	if (bc->nbranch > 0) {
    749  1.39  christos 		AHEAD(bc->fwd);
    750  1.39  christos 		ASTERN(O_CH, bc->back);
    751  1.30  christos 	}
    752  1.39  christos }
    753  1.39  christos 
    754  1.39  christos /*
    755  1.39  christos  * Signal to the parser that an empty branch has been encountered; this will,
    756  1.39  christos  * in the future, be used to allow for more permissive behavior with empty
    757  1.39  christos  * branches. The return value should indicate whether parsing may continue
    758  1.39  christos  * or not.
    759  1.39  christos  */
    760  1.39  christos static bool
    761  1.39  christos p_branch_empty(struct parse *p, struct branchc *bc)
    762  1.39  christos {
    763  1.39  christos 
    764  1.39  christos 	(void)bc;
    765  1.39  christos 	SETERROR(REG_EMPTY);
    766  1.39  christos 	return (false);
    767  1.39  christos }
    768  1.39  christos 
    769  1.39  christos /*
    770  1.39  christos  * Take care of any branching requirements. This includes inserting the
    771  1.39  christos  * appropriate branching instructions as well as eating all of the branch
    772  1.39  christos  * delimiters until we either run out of pattern or need to parse more pattern.
    773  1.39  christos  */
    774  1.39  christos static bool
    775  1.39  christos p_branch_do(struct parse *p, struct branchc *bc)
    776  1.39  christos {
    777  1.39  christos 	int ate = 0;
    778  1.39  christos 
    779  1.39  christos 	ate = p_branch_eat_delim(p, bc);
    780  1.39  christos 	if (ate == 0)
    781  1.39  christos 		return (false);
    782  1.39  christos 	else if ((ate > 1 || (bc->outer && !MORE())) && !p_branch_empty(p, bc))
    783  1.39  christos 		/*
    784  1.39  christos 		 * Halt parsing only if we have an empty branch and p_branch_empty
    785  1.39  christos 		 * indicates that we must not continue. In the future, this will not
    786  1.39  christos 		 * necessarily be an error.
    787  1.39  christos 		 */
    788  1.39  christos 		return (false);
    789  1.39  christos 	p_branch_ins_offset(p, bc);
    790  1.39  christos 
    791  1.39  christos 	return (true);
    792  1.39  christos }
    793  1.30  christos 
    794  1.39  christos static void
    795  1.39  christos p_bre_pre_parse(struct parse *p, struct branchc *bc)
    796  1.39  christos {
    797  1.14     lukem 
    798  1.40  christos 	(void)bc;
    799  1.39  christos 	/*
    800  1.39  christos 	 * Does not move cleanly into expression parser because of
    801  1.39  christos 	 * ordinary interpration of * at the beginning position of
    802  1.39  christos 	 * an expression.
    803  1.39  christos 	 */
    804   1.1       jtc 	if (EAT('^')) {
    805   1.1       jtc 		EMIT(OBOL, 0);
    806   1.1       jtc 		p->g->iflags |= USEBOL;
    807   1.1       jtc 		p->g->nbol++;
    808   1.1       jtc 	}
    809  1.39  christos }
    810  1.39  christos 
    811  1.39  christos static void
    812  1.39  christos p_bre_post_parse(struct parse *p, struct branchc *bc)
    813  1.39  christos {
    814  1.39  christos 
    815  1.39  christos 	/* Expression is terminating due to EOL token */
    816  1.39  christos 	if (bc->terminate) {
    817   1.1       jtc 		DROP(1);
    818   1.1       jtc 		EMIT(OEOL, 0);
    819   1.1       jtc 		p->g->iflags |= USEEOL;
    820   1.1       jtc 		p->g->neol++;
    821   1.1       jtc 	}
    822  1.39  christos }
    823   1.1       jtc 
    824  1.39  christos /*
    825  1.39  christos  - p_re - Top level parser, concatenation and BRE anchoring
    826  1.39  christos  == static void p_re(struct parse *p, int end1, int end2);
    827  1.39  christos  * Giving end1 as OUT essentially eliminates the end1/end2 check.
    828  1.39  christos  *
    829  1.39  christos  * This implementation is a bit of a kludge, in that a trailing $ is first
    830  1.39  christos  * taken as an ordinary character and then revised to be an anchor.
    831  1.39  christos  * The amount of lookahead needed to avoid this kludge is excessive.
    832  1.39  christos  */
    833  1.39  christos static void
    834  1.39  christos p_re(struct parse *p,
    835  1.39  christos 	int end1,	/* first terminating character */
    836  1.39  christos 	int end2)	/* second terminating character; ignored for EREs */
    837  1.39  christos {
    838  1.39  christos 	struct branchc bc;
    839  1.39  christos 
    840  1.39  christos 	bc.nbranch = 0;
    841  1.39  christos 	if (end1 == OUT && end2 == OUT)
    842  1.39  christos 		bc.outer = true;
    843  1.39  christos 	else
    844  1.39  christos 		bc.outer = false;
    845  1.39  christos #define	SEEEND()	(!p->bre ? SEE(end1) : SEETWO(end1, end2))
    846  1.39  christos 	for (;;) {
    847  1.39  christos 		bc.start = HERE();
    848  1.39  christos 		bc.nchain = 0;
    849  1.39  christos 		bc.terminate = false;
    850  1.39  christos 		if (p->pre_parse != NULL)
    851  1.39  christos 			p->pre_parse(p, &bc);
    852  1.39  christos 		while (MORE() && (!p->allowbranch || !SEESPEC('|')) && !SEEEND()) {
    853  1.39  christos 			bc.terminate = p->parse_expr(p, &bc);
    854  1.39  christos 			++bc.nchain;
    855  1.39  christos 		}
    856  1.39  christos 		if (p->post_parse != NULL)
    857  1.39  christos 			p->post_parse(p, &bc);
    858  1.39  christos 		(void) REQUIRE(p->gnuext || HERE() != bc.start, REG_EMPTY);
    859  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    860  1.39  christos 		if (p->gnuext && HERE() == bc.start && !p_branch_empty(p, &bc))
    861  1.39  christos 			break;
    862  1.39  christos #endif
    863  1.39  christos 		if (!p->allowbranch)
    864  1.39  christos 			break;
    865  1.39  christos 		/*
    866  1.39  christos 		 * p_branch_do's return value indicates whether we should
    867  1.39  christos 		 * continue parsing or not. This is both for correctness and
    868  1.39  christos 		 * a slight optimization, because it will check if we've
    869  1.39  christos 		 * encountered an empty branch or the end of the string
    870  1.39  christos 		 * immediately following a branch delimiter.
    871  1.39  christos 		 */
    872  1.39  christos 		if (!p_branch_do(p, &bc))
    873  1.39  christos 			break;
    874  1.39  christos 	}
    875  1.39  christos #undef SEE_END
    876  1.39  christos 	if (p->allowbranch)
    877  1.39  christos 		p_branch_fix_tail(p, &bc);
    878  1.39  christos 	assert(!MORE() || SEE(end1));
    879   1.1       jtc }
    880   1.1       jtc 
    881   1.1       jtc /*
    882   1.1       jtc  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
    883  1.39  christos  == static bool p_simp_re(struct parse *p, struct branchc *bc);
    884   1.1       jtc  */
    885  1.39  christos static bool			/* was the simple RE an unbackslashed $? */
    886  1.39  christos p_simp_re(struct parse *p, struct branchc *bc)
    887   1.1       jtc {
    888   1.9     perry 	int c;
    889  1.39  christos 	int cc;			/* convenient/control character */
    890   1.9     perry 	int count;
    891   1.9     perry 	int count2;
    892  1.39  christos 	sopno pos;
    893  1.39  christos 	bool handled;
    894  1.40  christos 	size_t i;
    895  1.39  christos 	wint_t wc;
    896   1.9     perry 	sopno subno;
    897   1.1       jtc #	define	BACKSL	(1<<CHAR_BIT)
    898   1.1       jtc 
    899  1.39  christos 	pos = HERE();		/* repetition op, if any, covers from here */
    900  1.39  christos 	handled = false;
    901   1.1       jtc 
    902   1.1       jtc 	assert(MORE());		/* caller should have ensured this */
    903   1.1       jtc 	c = GETNEXT();
    904   1.1       jtc 	if (c == '\\') {
    905  1.39  christos 		(void)REQUIRE(MORE(), REG_EESCAPE);
    906  1.39  christos 		cc = GETNEXT();
    907  1.39  christos 		c = BACKSL | cc;
    908  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
    909  1.39  christos 		if (p->gnuext) {
    910  1.39  christos 			handled = true;
    911  1.39  christos 			switch (c) {
    912  1.39  christos 			case BACKSL|'`':
    913  1.39  christos 				EMIT(OBOS, 0);
    914  1.39  christos 				break;
    915  1.39  christos 			case BACKSL|'\'':
    916  1.39  christos 				EMIT(OEOS, 0);
    917  1.39  christos 				break;
    918  1.39  christos 			case BACKSL|'B':
    919  1.39  christos 				EMIT(ONWBND, 0);
    920  1.39  christos 				break;
    921  1.39  christos 			case BACKSL|'b':
    922  1.39  christos 				EMIT(OWBND, 0);
    923  1.39  christos 				break;
    924  1.39  christos 			case BACKSL|'W':
    925  1.39  christos 			case BACKSL|'w':
    926  1.39  christos 			case BACKSL|'S':
    927  1.39  christos 			case BACKSL|'s':
    928  1.39  christos 				p_b_pseudoclass(p, cc);
    929  1.39  christos 				break;
    930  1.46  christos 			case BACKSL|'a':
    931  1.46  christos 				ordinary(p, '\a');
    932  1.46  christos 				break;
    933  1.46  christos 			case BACKSL|'e':
    934  1.46  christos 				ordinary(p, '\e');
    935  1.46  christos 				break;
    936  1.46  christos 			case BACKSL|'f':
    937  1.46  christos 				ordinary(p, '\f');
    938  1.46  christos 				break;
    939  1.46  christos 			case BACKSL|'n':
    940  1.46  christos 				ordinary(p, '\n');
    941  1.46  christos 				break;
    942  1.46  christos 			case BACKSL|'r':
    943  1.46  christos 				ordinary(p, '\r');
    944  1.46  christos 				break;
    945  1.46  christos 			case BACKSL|'t':
    946  1.46  christos 				ordinary(p, '\t');
    947  1.46  christos 				break;
    948  1.46  christos 			case BACKSL|'v':
    949  1.46  christos 				ordinary(p, '\v');
    950  1.46  christos 				break;
    951  1.39  christos 			default:
    952  1.39  christos 				handled = false;
    953  1.39  christos 			}
    954  1.39  christos 		}
    955  1.39  christos #endif
    956   1.1       jtc 	}
    957  1.39  christos 	if (!handled) {
    958  1.39  christos 		switch (c) {
    959  1.39  christos 		case '.':
    960  1.39  christos 			if (p->g->cflags&REG_NEWLINE)
    961  1.39  christos 				nonnewline(p);
    962  1.39  christos 			else
    963  1.39  christos 				EMIT(OANY, 0);
    964  1.39  christos 			break;
    965  1.39  christos 		case '[':
    966  1.39  christos 			p_bracket(p);
    967  1.39  christos 			break;
    968  1.39  christos 		case BACKSL|'<':
    969  1.39  christos 			EMIT(OBOW, 0);
    970  1.39  christos 			break;
    971  1.39  christos 		case BACKSL|'>':
    972  1.39  christos 			EMIT(OEOW, 0);
    973  1.39  christos 			break;
    974  1.39  christos 		case BACKSL|'{':
    975  1.39  christos 			SETERROR(REG_BADRPT);
    976  1.39  christos 			break;
    977  1.39  christos 		case BACKSL|'(':
    978  1.39  christos 			p->g->nsub++;
    979  1.40  christos 			subno = (sopno)p->g->nsub;
    980  1.39  christos 			if (subno < NPAREN)
    981  1.39  christos 				p->pbegin[subno] = HERE();
    982  1.39  christos 			EMIT(OLPAREN, subno);
    983  1.39  christos 			/* the MORE here is an error heuristic */
    984  1.39  christos 			if (MORE() && !SEETWO('\\', ')'))
    985  1.39  christos 				p_re(p, '\\', ')');
    986  1.39  christos 			if (subno < NPAREN) {
    987  1.39  christos 				p->pend[subno] = HERE();
    988  1.39  christos 				assert(p->pend[subno] != 0);
    989  1.39  christos 			}
    990  1.39  christos 			EMIT(ORPAREN, subno);
    991  1.39  christos 			(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
    992  1.39  christos 			break;
    993  1.39  christos 		case BACKSL|')':	/* should not get here -- must be user */
    994  1.39  christos 			SETERROR(REG_EPAREN);
    995  1.39  christos 			break;
    996  1.39  christos 		case BACKSL|'1':
    997  1.39  christos 		case BACKSL|'2':
    998  1.39  christos 		case BACKSL|'3':
    999  1.39  christos 		case BACKSL|'4':
   1000  1.39  christos 		case BACKSL|'5':
   1001  1.39  christos 		case BACKSL|'6':
   1002  1.39  christos 		case BACKSL|'7':
   1003  1.39  christos 		case BACKSL|'8':
   1004  1.39  christos 		case BACKSL|'9':
   1005  1.39  christos 			i = (c&~BACKSL) - '0';
   1006  1.39  christos 			assert(i < NPAREN);
   1007  1.39  christos 			if (p->pend[i] != 0) {
   1008  1.39  christos 				assert(i <= p->g->nsub);
   1009  1.39  christos 				EMIT(OBACK_, i);
   1010  1.39  christos 				assert(p->pbegin[i] != 0);
   1011  1.39  christos 				assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
   1012  1.39  christos 				assert(OP(p->strip[p->pend[i]]) == ORPAREN);
   1013  1.39  christos 				(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
   1014  1.39  christos 				EMIT(O_BACK, i);
   1015  1.39  christos 			} else
   1016  1.39  christos 				SETERROR(REG_ESUBREG);
   1017  1.39  christos 			p->g->backrefs = 1;
   1018  1.39  christos 			break;
   1019  1.39  christos 		case '*':
   1020  1.39  christos 			/*
   1021  1.39  christos 			 * Ordinary if used as the first character beyond BOL anchor of
   1022  1.39  christos 			 * a (sub-)expression, counts as a bad repetition operator if it
   1023  1.39  christos 			 * appears otherwise.
   1024  1.39  christos 			 */
   1025  1.39  christos 			(void)REQUIRE(bc->nchain == 0, REG_BADRPT);
   1026  1.39  christos 			/* FALLTHROUGH */
   1027  1.39  christos 		default:
   1028  1.39  christos 			if (p->error != 0)
   1029  1.39  christos 				return (false);	/* Definitely not $... */
   1030  1.39  christos 			p->next--;
   1031  1.39  christos 			wc = WGETNEXT();
   1032  1.39  christos 			if ((c & BACKSL) == 0 || may_escape(p, wc))
   1033  1.39  christos 				ordinary(p, wc);
   1034  1.39  christos 			else
   1035  1.39  christos 				SETERROR(REG_EESCAPE);
   1036  1.39  christos 			break;
   1037   1.1       jtc 		}
   1038   1.1       jtc 	}
   1039   1.1       jtc 
   1040   1.1       jtc 	if (EAT('*')) {		/* implemented as +? */
   1041   1.4       jtc 		/* this case does not require the (y|) trick, noKLUDGE */
   1042   1.1       jtc 		INSERT(OPLUS_, pos);
   1043   1.1       jtc 		ASTERN(O_PLUS, pos);
   1044   1.1       jtc 		INSERT(OQUEST_, pos);
   1045   1.1       jtc 		ASTERN(O_QUEST, pos);
   1046  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
   1047  1.39  christos 	} else if (p->gnuext && EATTWO('\\', '?')) {
   1048  1.39  christos 		INSERT(OQUEST_, pos);
   1049  1.39  christos 		ASTERN(O_QUEST, pos);
   1050  1.39  christos 	} else if (p->gnuext && EATTWO('\\', '+')) {
   1051  1.39  christos 		INSERT(OPLUS_, pos);
   1052  1.39  christos 		ASTERN(O_PLUS, pos);
   1053  1.39  christos #endif
   1054   1.1       jtc 	} else if (EATTWO('\\', '{')) {
   1055   1.1       jtc 		count = p_count(p);
   1056   1.1       jtc 		if (EAT(',')) {
   1057  1.39  christos 			if (MORE() && isdigit((uch)PEEK())) {
   1058   1.1       jtc 				count2 = p_count(p);
   1059  1.39  christos 				(void)REQUIRE(count <= count2, REG_BADBR);
   1060   1.1       jtc 			} else		/* single number with comma */
   1061   1.1       jtc 				count2 = INFINITY;
   1062   1.1       jtc 		} else		/* just a single number */
   1063   1.1       jtc 			count2 = count;
   1064  1.39  christos 		repeat(p, pos, count, count2);
   1065   1.1       jtc 		if (!EATTWO('\\', '}')) {	/* error heuristics */
   1066   1.1       jtc 			while (MORE() && !SEETWO('\\', '}'))
   1067   1.1       jtc 				NEXT();
   1068  1.39  christos 			(void)REQUIRE(MORE(), REG_EBRACE);
   1069   1.1       jtc 			SETERROR(REG_BADBR);
   1070   1.1       jtc 		}
   1071  1.39  christos 	} else if (c == '$')     /* $ (but not \$) ends it */
   1072  1.39  christos 		return (true);
   1073   1.1       jtc 
   1074  1.39  christos 	return (false);
   1075   1.1       jtc }
   1076   1.1       jtc 
   1077   1.1       jtc /*
   1078   1.1       jtc  - p_count - parse a repetition count
   1079   1.9     perry  == static int p_count(struct parse *p);
   1080   1.1       jtc  */
   1081   1.1       jtc static int			/* the value */
   1082  1.39  christos p_count(struct parse *p)
   1083   1.1       jtc {
   1084   1.9     perry 	int count = 0;
   1085   1.9     perry 	int ndigits = 0;
   1086   1.1       jtc 
   1087  1.39  christos 	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
   1088   1.1       jtc 		count = count*10 + (GETNEXT() - '0');
   1089   1.1       jtc 		ndigits++;
   1090   1.1       jtc 	}
   1091   1.1       jtc 
   1092  1.39  christos 	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
   1093   1.1       jtc 	return(count);
   1094   1.1       jtc }
   1095   1.1       jtc 
   1096   1.1       jtc /*
   1097   1.1       jtc  - p_bracket - parse a bracketed character list
   1098   1.9     perry  == static void p_bracket(struct parse *p);
   1099   1.1       jtc  */
   1100   1.1       jtc static void
   1101  1.39  christos p_bracket(struct parse *p)
   1102   1.1       jtc {
   1103  1.14     lukem 	cset *cs;
   1104  1.39  christos 	wint_t ch;
   1105  1.14     lukem 
   1106   1.1       jtc 	/* Dept of Truly Sickening Special-Case Kludges */
   1107  1.39  christos 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
   1108   1.1       jtc 		EMIT(OBOW, 0);
   1109   1.1       jtc 		NEXTn(6);
   1110   1.1       jtc 		return;
   1111   1.1       jtc 	}
   1112  1.39  christos 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
   1113   1.1       jtc 		EMIT(OEOW, 0);
   1114   1.1       jtc 		NEXTn(6);
   1115   1.1       jtc 		return;
   1116   1.1       jtc 	}
   1117   1.1       jtc 
   1118  1.39  christos 	if ((cs = allocset(p)) == NULL)
   1119  1.39  christos 		return;
   1120  1.39  christos 
   1121  1.39  christos 	if (p->g->cflags&REG_ICASE)
   1122  1.39  christos 		cs->icase = 1;
   1123   1.1       jtc 	if (EAT('^'))
   1124  1.39  christos 		cs->invert = 1;
   1125   1.1       jtc 	if (EAT(']'))
   1126  1.39  christos 		CHadd(p, cs, ']');
   1127   1.1       jtc 	else if (EAT('-'))
   1128  1.39  christos 		CHadd(p, cs, '-');
   1129   1.1       jtc 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
   1130   1.1       jtc 		p_b_term(p, cs);
   1131   1.1       jtc 	if (EAT('-'))
   1132  1.39  christos 		CHadd(p, cs, '-');
   1133  1.39  christos 	(void)MUSTEAT(']', REG_EBRACK);
   1134   1.1       jtc 
   1135   1.1       jtc 	if (p->error != 0)	/* don't mess things up further */
   1136   1.1       jtc 		return;
   1137   1.1       jtc 
   1138  1.39  christos 	if (cs->invert && p->g->cflags&REG_NEWLINE)
   1139  1.39  christos 		cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
   1140   1.1       jtc 
   1141  1.39  christos 	if ((ch = singleton(cs)) != OUT) {	/* optimize singleton sets */
   1142  1.39  christos 		ordinary(p, ch);
   1143  1.39  christos 		freeset(p, cs);
   1144  1.39  christos 	} else
   1145  1.40  christos 		EMIT(OANYOF, (size_t)(cs - p->g->sets));
   1146  1.39  christos }
   1147   1.1       jtc 
   1148  1.39  christos static int
   1149  1.39  christos p_range_cmp(wchar_t c1, wchar_t c2)
   1150  1.39  christos {
   1151  1.39  christos #ifdef REGEX_LIBC_COLLATE
   1152  1.39  christos 	return __wcollate_range_cmp(c1, c2);
   1153  1.42  christos #elif defined(NLS)
   1154  1.39  christos 	/* Copied from libc/collate __wcollate_range_cmp */
   1155  1.39  christos 	wchar_t s1[2], s2[2];
   1156   1.1       jtc 
   1157  1.39  christos 	s1[0] = c1;
   1158  1.39  christos 	s1[1] = L'\0';
   1159  1.39  christos 	s2[0] = c2;
   1160  1.39  christos 	s2[1] = L'\0';
   1161  1.42  christos 	return wcscoll(s1, s2);
   1162  1.42  christos #else
   1163  1.42  christos 	char s1[2], s2[2];
   1164  1.42  christos 
   1165  1.42  christos 	s1[0] = (char)c1;
   1166  1.42  christos 	s1[1] = '\0';
   1167  1.42  christos 	s2[0] = (char)c2;
   1168  1.42  christos 	s2[1] = '\0';
   1169  1.42  christos 	return strcoll(s1, s2);
   1170  1.39  christos #endif
   1171   1.1       jtc }
   1172   1.1       jtc 
   1173   1.1       jtc /*
   1174   1.1       jtc  - p_b_term - parse one term of a bracketed character list
   1175   1.9     perry  == static void p_b_term(struct parse *p, cset *cs);
   1176   1.1       jtc  */
   1177   1.1       jtc static void
   1178  1.39  christos p_b_term(struct parse *p, cset *cs)
   1179   1.1       jtc {
   1180   1.9     perry 	char c;
   1181  1.39  christos 	wint_t start, finish;
   1182  1.39  christos 	wint_t i;
   1183  1.39  christos #ifdef REGEX_LIBC_COLLATE
   1184  1.39  christos 	struct xlocale_collate *table =
   1185  1.39  christos 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
   1186  1.39  christos #endif
   1187   1.1       jtc 
   1188  1.14     lukem 	_DIAGASSERT(p != NULL);
   1189  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1190  1.14     lukem 
   1191   1.1       jtc 	/* classify what we've got */
   1192   1.1       jtc 	switch ((MORE()) ? PEEK() : '\0') {
   1193   1.1       jtc 	case '[':
   1194   1.1       jtc 		c = (MORE2()) ? PEEK2() : '\0';
   1195   1.1       jtc 		break;
   1196   1.1       jtc 	case '-':
   1197   1.1       jtc 		SETERROR(REG_ERANGE);
   1198   1.1       jtc 		return;			/* NOTE RETURN */
   1199   1.1       jtc 	default:
   1200   1.1       jtc 		c = '\0';
   1201   1.1       jtc 		break;
   1202   1.1       jtc 	}
   1203   1.1       jtc 
   1204   1.1       jtc 	switch (c) {
   1205   1.1       jtc 	case ':':		/* character class */
   1206   1.1       jtc 		NEXT2();
   1207  1.39  christos 		(void)REQUIRE(MORE(), REG_EBRACK);
   1208   1.1       jtc 		c = PEEK();
   1209  1.39  christos 		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
   1210   1.1       jtc 		p_b_cclass(p, cs);
   1211  1.39  christos 		(void)REQUIRE(MORE(), REG_EBRACK);
   1212  1.39  christos 		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
   1213   1.1       jtc 		break;
   1214   1.1       jtc 	case '=':		/* equivalence class */
   1215   1.1       jtc 		NEXT2();
   1216  1.39  christos 		(void)REQUIRE(MORE(), REG_EBRACK);
   1217   1.1       jtc 		c = PEEK();
   1218  1.39  christos 		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
   1219   1.1       jtc 		p_b_eclass(p, cs);
   1220  1.39  christos 		(void)REQUIRE(MORE(), REG_EBRACK);
   1221  1.39  christos 		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
   1222   1.1       jtc 		break;
   1223   1.1       jtc 	default:		/* symbol, ordinary character, or range */
   1224   1.1       jtc 		start = p_b_symbol(p);
   1225   1.1       jtc 		if (SEE('-') && MORE2() && PEEK2() != ']') {
   1226   1.1       jtc 			/* range */
   1227   1.1       jtc 			NEXT();
   1228   1.1       jtc 			if (EAT('-'))
   1229   1.1       jtc 				finish = '-';
   1230   1.1       jtc 			else
   1231   1.1       jtc 				finish = p_b_symbol(p);
   1232   1.1       jtc 		} else
   1233   1.1       jtc 			finish = start;
   1234  1.39  christos 		if (start == finish)
   1235  1.39  christos 			CHadd(p, cs, start);
   1236  1.39  christos 		else {
   1237  1.39  christos #ifdef REGEX_LIBC_COLLATE
   1238  1.39  christos 			if (table->__collate_load_error || MB_CUR_MAX > 1) {
   1239  1.39  christos #else
   1240  1.39  christos 			if (MB_CUR_MAX > 1) {
   1241  1.39  christos #endif
   1242  1.39  christos 				(void)REQUIRE(start <= finish, REG_ERANGE);
   1243  1.39  christos 				CHaddrange(p, cs, start, finish);
   1244  1.39  christos 			} else {
   1245  1.39  christos 				(void)REQUIRE(p_range_cmp(start, finish) <= 0, REG_ERANGE);
   1246  1.39  christos 				for (i = 0; i <= UCHAR_MAX; i++) {
   1247  1.39  christos 					if (p_range_cmp(start, i) <= 0 &&
   1248  1.39  christos 					    p_range_cmp(i, finish) <= 0 )
   1249  1.39  christos 						CHadd(p, cs, i);
   1250  1.39  christos 				}
   1251  1.39  christos 			}
   1252  1.39  christos 		}
   1253  1.39  christos 		break;
   1254  1.39  christos 	}
   1255  1.39  christos }
   1256  1.39  christos 
   1257  1.39  christos #ifdef REGEX_GNU_EXTENSIONS
   1258  1.39  christos /*
   1259  1.39  christos  - p_b_pseudoclass - parse a pseudo-class (\w, \W, \s, \S)
   1260  1.39  christos  == static int p_b_pseudoclass(struct parse *p, char c)
   1261  1.39  christos  */
   1262  1.39  christos static int
   1263  1.39  christos p_b_pseudoclass(struct parse *p, char c) {
   1264  1.39  christos 	cset *cs;
   1265  1.39  christos 
   1266  1.39  christos 	if ((cs = allocset(p)) == NULL)
   1267  1.39  christos 		return(0);
   1268  1.39  christos 
   1269  1.39  christos 	if (p->g->cflags&REG_ICASE)
   1270  1.39  christos 		cs->icase = 1;
   1271  1.39  christos 
   1272  1.39  christos 	switch (c) {
   1273  1.39  christos 	case 'W':
   1274  1.39  christos 		cs->invert = 1;
   1275  1.39  christos 		/* FALLTHROUGH */
   1276  1.39  christos 	case 'w':
   1277  1.39  christos 		p_b_cclass_named(p, cs, "alnum");
   1278   1.1       jtc 		break;
   1279  1.39  christos 	case 'S':
   1280  1.39  christos 		cs->invert = 1;
   1281  1.39  christos 		/* FALLTHROUGH */
   1282  1.39  christos 	case 's':
   1283  1.39  christos 		p_b_cclass_named(p, cs, "space");
   1284  1.39  christos 		break;
   1285  1.39  christos 	default:
   1286  1.39  christos 		return(0);
   1287   1.1       jtc 	}
   1288  1.39  christos 
   1289  1.40  christos 	EMIT(OANYOF, (size_t)(cs - p->g->sets));
   1290  1.39  christos 	return(1);
   1291   1.1       jtc }
   1292  1.39  christos #endif
   1293   1.1       jtc 
   1294   1.1       jtc /*
   1295   1.1       jtc  - p_b_cclass - parse a character-class name and deal with it
   1296   1.9     perry  == static void p_b_cclass(struct parse *p, cset *cs);
   1297   1.1       jtc  */
   1298   1.1       jtc static void
   1299  1.39  christos p_b_cclass(struct parse *p, cset *cs)
   1300   1.1       jtc {
   1301  1.39  christos 	const char *sp = p->next;
   1302   1.9     perry 	size_t len;
   1303  1.39  christos 	char clname[16];
   1304   1.1       jtc 
   1305  1.39  christos 	while (MORE() && isalpha((uch)PEEK()))
   1306   1.1       jtc 		NEXT();
   1307   1.1       jtc 	len = p->next - sp;
   1308  1.39  christos 	if (len >= sizeof(clname) - 1) {
   1309   1.1       jtc 		SETERROR(REG_ECTYPE);
   1310   1.1       jtc 		return;
   1311   1.1       jtc 	}
   1312  1.39  christos 	memcpy(clname, sp, len);
   1313  1.39  christos 	clname[len] = '\0';
   1314  1.39  christos 
   1315  1.39  christos 	p_b_cclass_named(p, cs, clname);
   1316  1.39  christos }
   1317  1.42  christos 
   1318  1.39  christos /*
   1319  1.39  christos  - p_b_cclass_named - deal with a named character class
   1320  1.39  christos  == static void p_b_cclass_named(struct parse *p, cset *cs, const char []);
   1321  1.39  christos  */
   1322  1.39  christos static void
   1323  1.39  christos p_b_cclass_named(struct parse *p, cset *cs, const char clname[]) {
   1324  1.39  christos 	wctype_t wct;
   1325   1.1       jtc 
   1326  1.39  christos 	if ((wct = wctype(clname)) == 0) {
   1327  1.39  christos 		SETERROR(REG_ECTYPE);
   1328  1.39  christos 		return;
   1329  1.39  christos 	}
   1330  1.39  christos 	CHaddtype(p, cs, wct);
   1331   1.1       jtc }
   1332   1.1       jtc 
   1333   1.1       jtc /*
   1334   1.1       jtc  - p_b_eclass - parse an equivalence-class name and deal with it
   1335   1.9     perry  == static void p_b_eclass(struct parse *p, cset *cs);
   1336   1.1       jtc  *
   1337   1.1       jtc  * This implementation is incomplete. xxx
   1338   1.1       jtc  */
   1339   1.1       jtc static void
   1340  1.39  christos p_b_eclass(struct parse *p, cset *cs)
   1341   1.1       jtc {
   1342  1.39  christos 	wint_t c;
   1343   1.1       jtc 
   1344  1.14     lukem 	_DIAGASSERT(p != NULL);
   1345  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1346  1.14     lukem 
   1347   1.1       jtc 	c = p_b_coll_elem(p, '=');
   1348  1.39  christos 	CHadd(p, cs, c);
   1349   1.1       jtc }
   1350   1.1       jtc 
   1351   1.1       jtc /*
   1352   1.1       jtc  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
   1353  1.39  christos  == static wint_t p_b_symbol(struct parse *p);
   1354   1.1       jtc  */
   1355  1.39  christos static wint_t			/* value of symbol */
   1356  1.39  christos p_b_symbol(struct parse *p)
   1357   1.1       jtc {
   1358  1.39  christos 	wint_t value;
   1359   1.1       jtc 
   1360  1.14     lukem 	_DIAGASSERT(p != NULL);
   1361  1.14     lukem 
   1362  1.39  christos 	(void)REQUIRE(MORE(), REG_EBRACK);
   1363   1.1       jtc 	if (!EATTWO('[', '.'))
   1364  1.39  christos 		return(WGETNEXT());
   1365   1.1       jtc 
   1366   1.1       jtc 	/* collating symbol */
   1367   1.1       jtc 	value = p_b_coll_elem(p, '.');
   1368  1.39  christos 	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
   1369   1.1       jtc 	return(value);
   1370   1.1       jtc }
   1371   1.1       jtc 
   1372   1.1       jtc /*
   1373   1.1       jtc  - p_b_coll_elem - parse a collating-element name and look it up
   1374  1.39  christos  == static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
   1375   1.1       jtc  */
   1376  1.39  christos static wint_t			/* value of collating element */
   1377  1.39  christos p_b_coll_elem(struct parse *p,
   1378  1.39  christos 	wint_t endc)		/* name ended by endc,']' */
   1379  1.39  christos {
   1380  1.39  christos 	const char *sp = p->next;
   1381  1.39  christos 	struct cname *cp;
   1382  1.42  christos 	size_t len;
   1383   1.1       jtc 
   1384  1.14     lukem 	_DIAGASSERT(p != NULL);
   1385  1.14     lukem 
   1386   1.1       jtc 	while (MORE() && !SEETWO(endc, ']'))
   1387   1.1       jtc 		NEXT();
   1388   1.1       jtc 	if (!MORE()) {
   1389   1.1       jtc 		SETERROR(REG_EBRACK);
   1390   1.1       jtc 		return(0);
   1391   1.1       jtc 	}
   1392   1.1       jtc 	len = p->next - sp;
   1393   1.1       jtc 	for (cp = cnames; cp->name != NULL; cp++)
   1394  1.37  christos 		if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
   1395   1.1       jtc 			return(cp->code);	/* known name */
   1396  1.42  christos #ifdef NLS
   1397  1.42  christos 	mbstate_t mbs;
   1398  1.42  christos 	wchar_t wc;
   1399  1.42  christos 	size_t clen;
   1400  1.42  christos 
   1401  1.39  christos 	memset(&mbs, 0, sizeof(mbs));
   1402  1.39  christos 	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
   1403  1.39  christos 		return (wc);			/* single character */
   1404  1.39  christos 	else if (clen == (size_t)-1 || clen == (size_t)-2)
   1405  1.39  christos 		SETERROR(REG_ILLSEQ);
   1406  1.39  christos 	else
   1407  1.39  christos 		SETERROR(REG_ECOLLATE);		/* neither */
   1408   1.1       jtc 	return(0);
   1409  1.42  christos #else
   1410  1.42  christos 	if (len == 1)
   1411  1.42  christos 		return *sp;    /* single character */
   1412  1.42  christos 	SETERROR(REG_ECOLLATE);                 /* neither */
   1413  1.42  christos 	return 0;
   1414  1.42  christos #endif
   1415   1.1       jtc }
   1416   1.1       jtc 
   1417   1.1       jtc /*
   1418  1.39  christos  - may_escape - determine whether 'ch' is escape-able in the current context
   1419  1.39  christos  == static int may_escape(struct parse *p, const wint_t ch)
   1420  1.39  christos  */
   1421  1.39  christos static bool
   1422  1.39  christos may_escape(struct parse *p, const wint_t ch)
   1423  1.39  christos {
   1424  1.39  christos 
   1425  1.39  christos 	if ((p->pflags & PFLAG_LEGACY_ESC) != 0)
   1426  1.39  christos 		return (true);
   1427  1.39  christos 	if (isalpha(ch) || ch == '\'' || ch == '`')
   1428  1.39  christos 		return (false);
   1429  1.39  christos 	return (true);
   1430  1.39  christos #ifdef NOTYET
   1431  1.39  christos 	/*
   1432  1.39  christos 	 * Build a whitelist of characters that may be escaped to produce an
   1433  1.39  christos 	 * ordinary in the current context. This assumes that these have not
   1434  1.39  christos 	 * been otherwise interpreted as a special character. Escaping an
   1435  1.39  christos 	 * ordinary character yields undefined results according to
   1436  1.39  christos 	 * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take
   1437  1.39  christos 	 * advantage of this and use escaped ordinary characters to provide
   1438  1.39  christos 	 * special meaning, e.g. \b, \B, \w, \W, \s, \S.
   1439  1.39  christos 	 */
   1440  1.39  christos 	switch(ch) {
   1441  1.39  christos 	case '|':
   1442  1.39  christos 	case '+':
   1443  1.39  christos 	case '?':
   1444  1.39  christos 		/* The above characters may not be escaped in BREs */
   1445  1.39  christos 		if (!(p->g->cflags&REG_EXTENDED))
   1446  1.39  christos 			return (false);
   1447  1.39  christos 		/* Fallthrough */
   1448  1.39  christos 	case '(':
   1449  1.39  christos 	case ')':
   1450  1.39  christos 	case '{':
   1451  1.39  christos 	case '}':
   1452  1.39  christos 	case '.':
   1453  1.39  christos 	case '[':
   1454  1.39  christos 	case ']':
   1455  1.39  christos 	case '\\':
   1456  1.39  christos 	case '*':
   1457  1.39  christos 	case '^':
   1458  1.39  christos 	case '$':
   1459  1.39  christos 		return (true);
   1460  1.39  christos 	default:
   1461  1.39  christos 		return (false);
   1462  1.39  christos 	}
   1463  1.39  christos #endif
   1464  1.39  christos }
   1465  1.39  christos 
   1466  1.39  christos /*
   1467   1.1       jtc  - othercase - return the case counterpart of an alphabetic
   1468  1.39  christos  == static wint_t othercase(wint_t ch);
   1469   1.1       jtc  */
   1470  1.39  christos static wint_t			/* if no counterpart, return ch */
   1471  1.39  christos othercase(wint_t ch)
   1472  1.39  christos {
   1473  1.39  christos 	assert(iswalpha(ch));
   1474  1.39  christos 	if (iswupper(ch))
   1475  1.39  christos 		return(towlower(ch));
   1476  1.39  christos 	else if (iswlower(ch))
   1477  1.39  christos 		return(towupper(ch));
   1478   1.1       jtc 	else			/* peculiar, but could happen */
   1479   1.1       jtc 		return(ch);
   1480   1.1       jtc }
   1481   1.1       jtc 
   1482   1.1       jtc /*
   1483   1.1       jtc  - bothcases - emit a dualcase version of a two-case character
   1484  1.39  christos  == static void bothcases(struct parse *p, wint_t ch);
   1485   1.1       jtc  *
   1486   1.1       jtc  * Boy, is this implementation ever a kludge...
   1487   1.1       jtc  */
   1488   1.1       jtc static void
   1489  1.39  christos bothcases(struct parse *p, wint_t ch)
   1490   1.1       jtc {
   1491  1.39  christos 	const char *oldnext = p->next;
   1492  1.39  christos 	const char *oldend = p->end;
   1493  1.39  christos 	char bracket[3 + MB_LEN_MAX];
   1494  1.39  christos 	size_t n;
   1495   1.1       jtc 
   1496  1.14     lukem 	_DIAGASSERT(p != NULL);
   1497  1.14     lukem 
   1498   1.1       jtc 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
   1499   1.1       jtc 	p->next = bracket;
   1500  1.42  christos #ifdef NLS
   1501  1.42  christos 	mbstate_t mbs;
   1502  1.39  christos 	memset(&mbs, 0, sizeof(mbs));
   1503  1.39  christos 	n = wcrtomb(bracket, ch, &mbs);
   1504  1.39  christos 	assert(n != (size_t)-1);
   1505  1.42  christos #else
   1506  1.42  christos 	n = 0;
   1507  1.42  christos 	bracket[n++] = ch;
   1508  1.42  christos #endif
   1509  1.39  christos 	bracket[n] = ']';
   1510  1.39  christos 	bracket[n + 1] = '\0';
   1511  1.39  christos 	p->end = bracket+n+1;
   1512   1.1       jtc 	p_bracket(p);
   1513  1.39  christos 	assert(p->next == p->end);
   1514   1.1       jtc 	p->next = oldnext;
   1515   1.1       jtc 	p->end = oldend;
   1516   1.1       jtc }
   1517   1.1       jtc 
   1518   1.1       jtc /*
   1519   1.1       jtc  - ordinary - emit an ordinary character
   1520  1.39  christos  == static void ordinary(struct parse *p, wint_t ch);
   1521   1.1       jtc  */
   1522   1.1       jtc static void
   1523  1.39  christos ordinary(struct parse *p, wint_t ch)
   1524   1.1       jtc {
   1525  1.39  christos 	cset *cs;
   1526   1.1       jtc 
   1527  1.14     lukem 	_DIAGASSERT(p != NULL);
   1528  1.14     lukem 
   1529  1.39  christos 	if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
   1530  1.39  christos 		bothcases(p, ch);
   1531  1.40  christos 	else if ((wint_t)(ch & OPDMASK) == ch)
   1532  1.40  christos 		EMIT(OCHAR, (size_t)ch);
   1533   1.1       jtc 	else {
   1534  1.39  christos 		/*
   1535  1.39  christos 		 * Kludge: character is too big to fit into an OCHAR operand.
   1536  1.39  christos 		 * Emit a singleton set.
   1537  1.39  christos 		 */
   1538  1.39  christos 		if ((cs = allocset(p)) == NULL)
   1539  1.39  christos 			return;
   1540  1.39  christos 		CHadd(p, cs, ch);
   1541  1.40  christos 		EMIT(OANYOF, (size_t)(cs - p->g->sets));
   1542   1.1       jtc 	}
   1543   1.1       jtc }
   1544   1.1       jtc 
   1545   1.1       jtc /*
   1546   1.1       jtc  - nonnewline - emit REG_NEWLINE version of OANY
   1547   1.9     perry  == static void nonnewline(struct parse *p);
   1548   1.1       jtc  *
   1549   1.1       jtc  * Boy, is this implementation ever a kludge...
   1550   1.1       jtc  */
   1551   1.1       jtc static void
   1552  1.39  christos nonnewline(struct parse *p)
   1553   1.1       jtc {
   1554  1.39  christos 	const char *oldnext = p->next;
   1555  1.39  christos 	const char *oldend = p->end;
   1556   1.1       jtc 	char bracket[4];
   1557   1.1       jtc 
   1558  1.14     lukem 	_DIAGASSERT(p != NULL);
   1559  1.14     lukem 
   1560   1.1       jtc 	p->next = bracket;
   1561   1.1       jtc 	p->end = bracket+3;
   1562   1.1       jtc 	bracket[0] = '^';
   1563   1.1       jtc 	bracket[1] = '\n';
   1564   1.1       jtc 	bracket[2] = ']';
   1565   1.1       jtc 	bracket[3] = '\0';
   1566   1.1       jtc 	p_bracket(p);
   1567   1.1       jtc 	assert(p->next == bracket+3);
   1568   1.1       jtc 	p->next = oldnext;
   1569   1.1       jtc 	p->end = oldend;
   1570   1.1       jtc }
   1571   1.1       jtc 
   1572   1.1       jtc /*
   1573   1.1       jtc  - repeat - generate code for a bounded repetition, recursively if needed
   1574  1.39  christos  == static void repeat(struct parse *p, sopno start, int from, int to);
   1575   1.1       jtc  */
   1576   1.1       jtc static void
   1577  1.39  christos repeat(struct parse *p,
   1578  1.39  christos 	sopno start,		/* operand from here to end of strip */
   1579  1.39  christos 	int from,		/* repeated from this number */
   1580  1.39  christos 	int to)			/* to this number of times (maybe INFINITY) */
   1581   1.1       jtc {
   1582  1.39  christos 	sopno finish = HERE();
   1583   1.1       jtc #	define	N	2
   1584   1.1       jtc #	define	INF	3
   1585   1.1       jtc #	define	REP(f, t)	((f)*8 + (t))
   1586   1.1       jtc #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
   1587   1.9     perry 	sopno copy;
   1588   1.1       jtc 
   1589  1.14     lukem 	_DIAGASSERT(p != NULL);
   1590  1.14     lukem 
   1591  1.39  christos 	if (p->error != 0)	/* head off possible runaway recursion */
   1592  1.30  christos 		return;
   1593  1.30  christos 
   1594   1.1       jtc 	assert(from <= to);
   1595   1.1       jtc 
   1596   1.1       jtc 	switch (REP(MAP(from), MAP(to))) {
   1597   1.1       jtc 	case REP(0, 0):			/* must be user doing this */
   1598   1.1       jtc 		DROP(finish-start);	/* drop the operand */
   1599   1.1       jtc 		break;
   1600   1.1       jtc 	case REP(0, 1):			/* as x{1,1}? */
   1601   1.1       jtc 	case REP(0, N):			/* as x{1,n}? */
   1602   1.1       jtc 	case REP(0, INF):		/* as x{1,}? */
   1603   1.4       jtc 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
   1604   1.4       jtc 		INSERT(OCH_, start);		/* offset is wrong... */
   1605  1.39  christos 		repeat(p, start+1, 1, to);
   1606   1.4       jtc 		ASTERN(OOR1, start);
   1607   1.1       jtc 		AHEAD(start);			/* ... fix it */
   1608   1.4       jtc 		EMIT(OOR2, 0);
   1609   1.4       jtc 		AHEAD(THERE());
   1610   1.4       jtc 		ASTERN(O_CH, THERETHERE());
   1611   1.1       jtc 		break;
   1612   1.1       jtc 	case REP(1, 1):			/* trivial case */
   1613   1.1       jtc 		/* done */
   1614   1.1       jtc 		break;
   1615   1.1       jtc 	case REP(1, N):			/* as x?x{1,n-1} */
   1616   1.4       jtc 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
   1617   1.4       jtc 		INSERT(OCH_, start);
   1618   1.4       jtc 		ASTERN(OOR1, start);
   1619   1.4       jtc 		AHEAD(start);
   1620   1.4       jtc 		EMIT(OOR2, 0);			/* offset very wrong... */
   1621   1.4       jtc 		AHEAD(THERE());			/* ...so fix it */
   1622   1.4       jtc 		ASTERN(O_CH, THERETHERE());
   1623   1.1       jtc 		copy = dupl(p, start+1, finish+1);
   1624   1.4       jtc 		assert(copy == finish+4);
   1625  1.39  christos 		repeat(p, copy, 1, to-1);
   1626   1.1       jtc 		break;
   1627   1.1       jtc 	case REP(1, INF):		/* as x+ */
   1628   1.1       jtc 		INSERT(OPLUS_, start);
   1629   1.1       jtc 		ASTERN(O_PLUS, start);
   1630   1.1       jtc 		break;
   1631   1.1       jtc 	case REP(N, N):			/* as xx{m-1,n-1} */
   1632   1.1       jtc 		copy = dupl(p, start, finish);
   1633  1.39  christos 		repeat(p, copy, from-1, to-1);
   1634   1.1       jtc 		break;
   1635   1.1       jtc 	case REP(N, INF):		/* as xx{n-1,INF} */
   1636   1.1       jtc 		copy = dupl(p, start, finish);
   1637  1.39  christos 		repeat(p, copy, from-1, to);
   1638   1.1       jtc 		break;
   1639   1.1       jtc 	default:			/* "can't happen" */
   1640   1.1       jtc 		SETERROR(REG_ASSERT);	/* just in case */
   1641   1.1       jtc 		break;
   1642   1.1       jtc 	}
   1643   1.1       jtc }
   1644   1.1       jtc 
   1645   1.1       jtc /*
   1646  1.39  christos  - wgetnext - helper function for WGETNEXT() macro. Gets the next wide
   1647  1.39  christos  - character from the parse struct, signals a REG_ILLSEQ error if the
   1648  1.39  christos  - character can't be converted. Returns the number of bytes consumed.
   1649  1.39  christos  */
   1650  1.39  christos static wint_t
   1651  1.39  christos wgetnext(struct parse *p)
   1652  1.39  christos {
   1653  1.42  christos #ifdef NLS
   1654  1.39  christos 	mbstate_t mbs;
   1655  1.39  christos 	wchar_t wc;
   1656  1.39  christos 	size_t n;
   1657  1.39  christos 
   1658  1.39  christos 	memset(&mbs, 0, sizeof(mbs));
   1659  1.40  christos 	n = mbrtowc(&wc, p->next, (size_t)(p->end - p->next), &mbs);
   1660  1.39  christos 	if (n == (size_t)-1 || n == (size_t)-2) {
   1661  1.39  christos 		SETERROR(REG_ILLSEQ);
   1662  1.39  christos 		return (0);
   1663  1.39  christos 	}
   1664  1.39  christos 	if (n == 0)
   1665  1.39  christos 		n = 1;
   1666  1.39  christos 	p->next += n;
   1667  1.42  christos 	return wc;
   1668  1.42  christos #else
   1669  1.42  christos 	return *p->next++;
   1670  1.42  christos #endif
   1671  1.39  christos }
   1672  1.39  christos 
   1673  1.39  christos /*
   1674   1.1       jtc  - seterr - set an error condition
   1675   1.9     perry  == static int seterr(struct parse *p, int e);
   1676   1.1       jtc  */
   1677   1.1       jtc static int			/* useless but makes type checking happy */
   1678  1.39  christos seterr(struct parse *p, int e)
   1679   1.1       jtc {
   1680  1.14     lukem 
   1681  1.14     lukem 	_DIAGASSERT(p != NULL);
   1682  1.14     lukem 
   1683   1.1       jtc 	if (p->error == 0)	/* keep earliest error condition */
   1684   1.1       jtc 		p->error = e;
   1685   1.1       jtc 	p->next = nuls;		/* try to bring things to a halt */
   1686   1.1       jtc 	p->end = nuls;
   1687   1.1       jtc 	return(0);		/* make the return value well-defined */
   1688   1.1       jtc }
   1689   1.1       jtc 
   1690   1.1       jtc /*
   1691   1.1       jtc  - allocset - allocate a set of characters for []
   1692   1.9     perry  == static cset *allocset(struct parse *p);
   1693   1.1       jtc  */
   1694   1.1       jtc static cset *
   1695  1.39  christos allocset(struct parse *p)
   1696   1.1       jtc {
   1697  1.39  christos 	cset *cs, *ncs;
   1698   1.1       jtc 
   1699  1.14     lukem 	_DIAGASSERT(p != NULL);
   1700  1.14     lukem 
   1701  1.39  christos 	ncs = reallocarray(p->g->sets, p->g->ncsets + 1, sizeof(*ncs));
   1702  1.39  christos 	if (ncs == NULL) {
   1703  1.39  christos 		SETERROR(REG_ESPACE);
   1704  1.39  christos 		return (NULL);
   1705   1.1       jtc 	}
   1706  1.39  christos 	p->g->sets = ncs;
   1707  1.39  christos 	cs = &p->g->sets[p->g->ncsets++];
   1708  1.39  christos 	memset(cs, 0, sizeof(*cs));
   1709   1.1       jtc 
   1710   1.1       jtc 	return(cs);
   1711   1.1       jtc }
   1712   1.1       jtc 
   1713   1.1       jtc /*
   1714   1.1       jtc  - freeset - free a now-unused set
   1715   1.9     perry  == static void freeset(struct parse *p, cset *cs);
   1716   1.1       jtc  */
   1717   1.1       jtc static void
   1718  1.39  christos freeset(struct parse *p, cset *cs)
   1719   1.1       jtc {
   1720  1.14     lukem 	cset *top;
   1721  1.14     lukem 
   1722  1.14     lukem 	_DIAGASSERT(p != NULL);
   1723  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1724  1.14     lukem 
   1725  1.14     lukem 	top = &p->g->sets[p->g->ncsets];
   1726   1.1       jtc 
   1727  1.39  christos 	free(cs->wides);
   1728  1.39  christos 	free(cs->ranges);
   1729  1.39  christos 	free(cs->types);
   1730  1.39  christos 	memset(cs, 0, sizeof(*cs));
   1731   1.1       jtc 	if (cs == top-1)	/* recover only the easy case */
   1732   1.1       jtc 		p->g->ncsets--;
   1733   1.1       jtc }
   1734   1.1       jtc 
   1735   1.1       jtc /*
   1736  1.39  christos  - singleton - Determine whether a set contains only one character,
   1737  1.39  christos  - returning it if so, otherwise returning OUT.
   1738   1.1       jtc  */
   1739  1.39  christos static wint_t
   1740  1.39  christos singleton(cset *cs)
   1741   1.1       jtc {
   1742  1.39  christos 	wint_t i, s, n;
   1743   1.1       jtc 
   1744  1.39  christos 	for (i = n = 0; i < NC; i++)
   1745  1.39  christos 		if (CHIN(cs, i)) {
   1746   1.1       jtc 			n++;
   1747  1.39  christos 			s = i;
   1748  1.39  christos 		}
   1749  1.39  christos 	if (n == 1)
   1750  1.39  christos 		return (s);
   1751  1.39  christos 	if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
   1752  1.39  christos 	    cs->icase == 0)
   1753  1.39  christos 		return (cs->wides[0]);
   1754  1.39  christos 	/* Don't bother handling the other cases. */
   1755  1.39  christos 	return (OUT);
   1756   1.1       jtc }
   1757   1.1       jtc 
   1758   1.1       jtc /*
   1759  1.39  christos  - CHadd - add character to character set.
   1760   1.1       jtc  */
   1761   1.1       jtc static void
   1762  1.39  christos CHadd(struct parse *p, cset *cs, wint_t ch)
   1763   1.1       jtc {
   1764  1.39  christos 	wint_t nch, *newwides;
   1765  1.14     lukem 
   1766  1.14     lukem 	_DIAGASSERT(p != NULL);
   1767  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1768  1.14     lukem 
   1769  1.39  christos 	assert(ch >= 0);
   1770  1.39  christos 	if (ch < NC)
   1771  1.40  christos 		cs->bmp[(unsigned)ch >> 3] |= 1 << (ch & 7);
   1772  1.39  christos 	else {
   1773  1.39  christos 		newwides = reallocarray(cs->wides, cs->nwides + 1,
   1774  1.39  christos 		    sizeof(*cs->wides));
   1775  1.39  christos 		if (newwides == NULL) {
   1776  1.39  christos 			SETERROR(REG_ESPACE);
   1777  1.39  christos 			return;
   1778  1.39  christos 		}
   1779  1.39  christos 		cs->wides = newwides;
   1780  1.39  christos 		cs->wides[cs->nwides++] = ch;
   1781  1.39  christos 	}
   1782  1.39  christos 	if (cs->icase) {
   1783  1.39  christos 		if ((nch = towlower(ch)) < NC)
   1784  1.40  christos 			cs->bmp[(unsigned)nch >> 3] |= 1 << (nch & 7);
   1785  1.39  christos 		if ((nch = towupper(ch)) < NC)
   1786  1.40  christos 			cs->bmp[(unsigned)nch >> 3] |= 1 << (nch & 7);
   1787   1.1       jtc 	}
   1788   1.1       jtc }
   1789   1.1       jtc 
   1790   1.1       jtc /*
   1791  1.39  christos  - CHaddrange - add all characters in the range [min,max] to a character set.
   1792   1.1       jtc  */
   1793   1.1       jtc static void
   1794  1.39  christos CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max)
   1795   1.1       jtc {
   1796  1.39  christos 	crange *newranges;
   1797  1.14     lukem 
   1798  1.39  christos 	_DIAGASSERT(p != NULL);
   1799  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1800  1.14     lukem 
   1801  1.39  christos 	for (; min < NC && min <= max; min++)
   1802  1.39  christos 		CHadd(p, cs, min);
   1803  1.39  christos 	if (min >= max)
   1804  1.39  christos 		return;
   1805  1.39  christos 	newranges = reallocarray(cs->ranges, cs->nranges + 1,
   1806  1.39  christos 	    sizeof(*cs->ranges));
   1807  1.39  christos 	if (newranges == NULL) {
   1808  1.39  christos 		SETERROR(REG_ESPACE);
   1809   1.1       jtc 		return;
   1810   1.1       jtc 	}
   1811  1.39  christos 	cs->ranges = newranges;
   1812  1.39  christos 	cs->ranges[cs->nranges].min = min;
   1813  1.39  christos 	cs->ranges[cs->nranges].max = max;
   1814  1.39  christos 	cs->nranges++;
   1815   1.1       jtc }
   1816   1.1       jtc 
   1817   1.1       jtc /*
   1818  1.39  christos  - CHaddtype - add all characters of a certain type to a character set.
   1819   1.1       jtc  */
   1820   1.1       jtc static void
   1821  1.39  christos CHaddtype(struct parse *p, cset *cs, wctype_t wct)
   1822   1.1       jtc {
   1823  1.39  christos 	wint_t i;
   1824  1.39  christos 	wctype_t *newtypes;
   1825  1.14     lukem 
   1826  1.14     lukem 	_DIAGASSERT(p != NULL);
   1827  1.14     lukem 	_DIAGASSERT(cs != NULL);
   1828  1.14     lukem 
   1829  1.39  christos 	for (i = 0; i < NC; i++)
   1830  1.39  christos 		if (iswctype(i, wct))
   1831  1.39  christos 			CHadd(p, cs, i);
   1832  1.39  christos 	newtypes = reallocarray(cs->types, cs->ntypes + 1,
   1833  1.39  christos 	    sizeof(*cs->types));
   1834  1.39  christos 	if (newtypes == NULL) {
   1835  1.39  christos 		SETERROR(REG_ESPACE);
   1836   1.1       jtc 		return;
   1837  1.39  christos 	}
   1838  1.39  christos 	cs->types = newtypes;
   1839  1.39  christos 	cs->types[cs->ntypes++] = wct;
   1840   1.1       jtc }
   1841   1.1       jtc 
   1842   1.1       jtc /*
   1843   1.1       jtc  - dupl - emit a duplicate of a bunch of sops
   1844   1.9     perry  == static sopno dupl(struct parse *p, sopno start, sopno finish);
   1845   1.1       jtc  */
   1846   1.1       jtc static sopno			/* start of duplicate */
   1847  1.39  christos dupl(struct parse *p,
   1848  1.39  christos 	sopno start,		/* from here */
   1849  1.39  christos 	sopno finish)		/* to this less one */
   1850   1.1       jtc {
   1851  1.39  christos 	sopno ret = HERE();
   1852   1.9     perry 	sopno len = finish - start;
   1853   1.1       jtc 
   1854  1.14     lukem 	_DIAGASSERT(p != NULL);
   1855  1.14     lukem 
   1856   1.1       jtc 	assert(finish >= start);
   1857   1.1       jtc 	if (len == 0)
   1858   1.1       jtc 		return(ret);
   1859  1.39  christos 	if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
   1860  1.39  christos 		return(ret);
   1861  1.40  christos 	(void) memcpy(p->strip + p->slen,
   1862  1.40  christos 	    p->strip + start, len * sizeof(*p->strip));
   1863   1.1       jtc 	p->slen += len;
   1864   1.1       jtc 	return(ret);
   1865   1.1       jtc }
   1866   1.1       jtc 
   1867   1.1       jtc /*
   1868   1.1       jtc  - doemit - emit a strip operator
   1869   1.9     perry  == static void doemit(struct parse *p, sop op, size_t opnd);
   1870   1.1       jtc  *
   1871   1.1       jtc  * It might seem better to implement this as a macro with a function as
   1872   1.1       jtc  * hard-case backup, but it's just too big and messy unless there are
   1873   1.1       jtc  * some changes to the data structures.  Maybe later.
   1874   1.1       jtc  */
   1875   1.1       jtc static void
   1876  1.39  christos doemit(struct parse *p, sop op, size_t opnd)
   1877   1.1       jtc {
   1878   1.1       jtc 	/* avoid making error situations worse */
   1879   1.1       jtc 	if (p->error != 0)
   1880   1.1       jtc 		return;
   1881   1.1       jtc 
   1882  1.39  christos 	_DIAGASSERT(p != NULL);
   1883  1.39  christos 
   1884   1.1       jtc 	/* deal with oversize operands ("can't happen", more or less) */
   1885   1.1       jtc 	assert(opnd < 1<<OPSHIFT);
   1886   1.1       jtc 
   1887   1.1       jtc 	/* deal with undersized strip */
   1888   1.1       jtc 	if (p->slen >= p->ssize)
   1889  1.30  christos 		if (!enlarge(p, (p->ssize+1) / 2 * 3))	/* +50% */
   1890  1.30  christos 			return;
   1891   1.1       jtc 
   1892   1.1       jtc 	/* finally, it's all reduced to the easy case */
   1893  1.40  christos 	p->strip[p->slen++] = (sopno)SOP(op, opnd);
   1894   1.1       jtc }
   1895   1.1       jtc 
   1896   1.1       jtc /*
   1897   1.1       jtc  - doinsert - insert a sop into the strip
   1898   1.9     perry  == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
   1899   1.1       jtc  */
   1900   1.1       jtc static void
   1901  1.39  christos doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
   1902   1.1       jtc {
   1903   1.9     perry 	sopno sn;
   1904   1.9     perry 	sop s;
   1905   1.9     perry 	int i;
   1906   1.1       jtc 
   1907  1.14     lukem 	_DIAGASSERT(p != NULL);
   1908  1.14     lukem 
   1909   1.1       jtc 	/* avoid making error situations worse */
   1910   1.1       jtc 	if (p->error != 0)
   1911   1.1       jtc 		return;
   1912   1.1       jtc 
   1913   1.1       jtc 	sn = HERE();
   1914   1.1       jtc 	EMIT(op, opnd);		/* do checks, ensure space */
   1915   1.1       jtc 	assert(HERE() == sn+1);
   1916   1.1       jtc 	s = p->strip[sn];
   1917   1.1       jtc 
   1918   1.1       jtc 	/* adjust paren pointers */
   1919   1.1       jtc 	assert(pos > 0);
   1920   1.1       jtc 	for (i = 1; i < NPAREN; i++) {
   1921   1.1       jtc 		if (p->pbegin[i] >= pos) {
   1922   1.1       jtc 			p->pbegin[i]++;
   1923   1.1       jtc 		}
   1924   1.1       jtc 		if (p->pend[i] >= pos) {
   1925   1.1       jtc 			p->pend[i]++;
   1926   1.1       jtc 		}
   1927   1.1       jtc 	}
   1928   1.1       jtc 
   1929  1.40  christos 	memmove(&p->strip[pos+1], &p->strip[pos],
   1930  1.40  christos 	    (HERE()-pos-1)*sizeof(*p->strip));
   1931   1.1       jtc 	p->strip[pos] = s;
   1932   1.1       jtc }
   1933   1.1       jtc 
   1934   1.1       jtc /*
   1935   1.1       jtc  - dofwd - complete a forward reference
   1936   1.9     perry  == static void dofwd(struct parse *p, sopno pos, sop value);
   1937   1.1       jtc  */
   1938   1.1       jtc static void
   1939  1.39  christos dofwd(struct parse *p, sopno pos, sop value)
   1940   1.1       jtc {
   1941  1.14     lukem 
   1942  1.14     lukem 	_DIAGASSERT(p != NULL);
   1943  1.14     lukem 
   1944   1.1       jtc 	/* avoid making error situations worse */
   1945   1.1       jtc 	if (p->error != 0)
   1946   1.1       jtc 		return;
   1947   1.1       jtc 
   1948   1.1       jtc 	assert(value < 1<<OPSHIFT);
   1949  1.39  christos 	p->strip[pos] = OP(p->strip[pos]) | value;
   1950   1.1       jtc }
   1951   1.1       jtc 
   1952   1.1       jtc /*
   1953   1.1       jtc  - enlarge - enlarge the strip
   1954  1.39  christos  == static int enlarge(struct parse *p, sopno size);
   1955   1.1       jtc  */
   1956  1.30  christos static int
   1957  1.35     joerg enlarge(struct parse *p, sopno size)
   1958   1.1       jtc {
   1959  1.39  christos 	sop *sp;
   1960  1.39  christos 
   1961  1.14     lukem 	_DIAGASSERT(p != NULL);
   1962  1.14     lukem 
   1963   1.1       jtc 	if (p->ssize >= size)
   1964  1.30  christos 		return 1;
   1965   1.1       jtc 
   1966  1.40  christos 	sp = reallocarray(p->strip, size, sizeof(*p->strip));
   1967  1.39  christos 	if (sp == NULL) {
   1968   1.1       jtc 		SETERROR(REG_ESPACE);
   1969  1.30  christos 		return 0;
   1970   1.1       jtc 	}
   1971  1.39  christos 	p->strip = sp;
   1972  1.35     joerg 	p->ssize = size;
   1973  1.30  christos 	return 1;
   1974   1.1       jtc }
   1975   1.1       jtc 
   1976   1.1       jtc /*
   1977   1.1       jtc  - stripsnug - compact the strip
   1978   1.9     perry  == static void stripsnug(struct parse *p, struct re_guts *g);
   1979   1.1       jtc  */
   1980   1.1       jtc static void
   1981  1.39  christos stripsnug(struct parse *p, struct re_guts *g)
   1982   1.1       jtc {
   1983  1.14     lukem 
   1984  1.14     lukem 	_DIAGASSERT(p != NULL);
   1985  1.14     lukem 	_DIAGASSERT(g != NULL);
   1986  1.14     lukem 
   1987   1.1       jtc 	g->nstates = p->slen;
   1988  1.40  christos 	g->strip = reallocarray(p->strip, p->slen, sizeof(*p->strip));
   1989  1.39  christos 	if (g->strip == NULL) {
   1990  1.39  christos 		SETERROR(REG_ESPACE);
   1991  1.39  christos 		g->strip = p->strip;
   1992  1.39  christos 	}
   1993   1.1       jtc }
   1994   1.1       jtc 
   1995   1.1       jtc /*
   1996   1.1       jtc  - findmust - fill in must and mlen with longest mandatory literal string
   1997   1.9     perry  == static void findmust(struct parse *p, struct re_guts *g);
   1998   1.1       jtc  *
   1999   1.1       jtc  * This algorithm could do fancy things like analyzing the operands of |
   2000   1.1       jtc  * for common subsequences.  Someday.  This code is simple and finds most
   2001   1.1       jtc  * of the interesting cases.
   2002   1.1       jtc  *
   2003   1.1       jtc  * Note that must and mlen got initialized during setup.
   2004   1.1       jtc  */
   2005   1.1       jtc static void
   2006  1.39  christos findmust(struct parse *p, struct re_guts *g)
   2007   1.1       jtc {
   2008   1.9     perry 	sop *scan;
   2009   1.7  christos 	sop *start = NULL;
   2010   1.9     perry 	sop *newstart = NULL;
   2011   1.9     perry 	sopno newlen;
   2012   1.9     perry 	sop s;
   2013   1.9     perry 	char *cp;
   2014  1.39  christos 	int offset;
   2015  1.39  christos 	mbstate_t mbs;
   2016   1.1       jtc 
   2017  1.14     lukem 	_DIAGASSERT(p != NULL);
   2018  1.14     lukem 	_DIAGASSERT(g != NULL);
   2019  1.14     lukem 
   2020   1.1       jtc 	/* avoid making error situations worse */
   2021   1.1       jtc 	if (p->error != 0)
   2022   1.1       jtc 		return;
   2023   1.1       jtc 
   2024  1.39  christos #ifdef notyet
   2025  1.39  christos 	/*
   2026  1.39  christos 	 * It's not generally safe to do a ``char'' substring search on
   2027  1.39  christos 	 * multibyte character strings, but it's safe for at least
   2028  1.39  christos 	 * UTF-8 (see RFC 3629).
   2029  1.39  christos 	 */
   2030  1.39  christos 	if (MB_CUR_MAX > 1 &&
   2031  1.39  christos 	    strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
   2032  1.39  christos 		return;
   2033  1.39  christos #endif
   2034  1.39  christos 
   2035   1.1       jtc 	/* find the longest OCHAR sequence in strip */
   2036   1.1       jtc 	newlen = 0;
   2037  1.39  christos 	offset = 0;
   2038  1.39  christos 	g->moffset = 0;
   2039   1.1       jtc 	scan = g->strip + 1;
   2040   1.1       jtc 	do {
   2041   1.1       jtc 		s = *scan++;
   2042   1.1       jtc 		switch (OP(s)) {
   2043   1.1       jtc 		case OCHAR:		/* sequence member */
   2044  1.39  christos 			if (newlen == 0) {		/* new sequence */
   2045  1.39  christos 				memset(&mbs, 0, sizeof(mbs));
   2046   1.1       jtc 				newstart = scan - 1;
   2047  1.39  christos 			}
   2048  1.42  christos #ifdef NLS
   2049  1.42  christos 			char buf[MB_LEN_MAX];
   2050  1.42  christos 			size_t clen = wcrtomb(buf, (int)OPND(s), &mbs);
   2051  1.39  christos 			if (clen == (size_t)-1)
   2052  1.39  christos 				goto toohard;
   2053  1.40  christos 			newlen += (sopno)clen;
   2054  1.42  christos #else
   2055  1.42  christos 			newlen++;
   2056  1.42  christos #endif
   2057   1.1       jtc 			break;
   2058   1.1       jtc 		case OPLUS_:		/* things that don't break one */
   2059   1.1       jtc 		case OLPAREN:
   2060   1.1       jtc 		case ORPAREN:
   2061   1.1       jtc 			break;
   2062   1.1       jtc 		case OQUEST_:		/* things that must be skipped */
   2063   1.1       jtc 		case OCH_:
   2064  1.39  christos 			offset = altoffset(scan, offset);
   2065   1.1       jtc 			scan--;
   2066   1.1       jtc 			do {
   2067   1.1       jtc 				scan += OPND(s);
   2068   1.1       jtc 				s = *scan;
   2069   1.1       jtc 				/* assert() interferes w debug printouts */
   2070  1.40  christos 				if (OP(s) != O_QUEST &&
   2071  1.40  christos 				    OP(s) != O_CH && OP(s) != OOR2) {
   2072   1.1       jtc 					g->iflags |= BAD;
   2073   1.1       jtc 					return;
   2074   1.1       jtc 				}
   2075  1.40  christos 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
   2076  1.11  christos 			/* FALLTHROUGH */
   2077  1.39  christos 		case OBOW:		/* things that break a sequence */
   2078  1.39  christos 		case OEOW:
   2079  1.39  christos 		case OBOL:
   2080  1.39  christos 		case OEOL:
   2081  1.39  christos 		case OBOS:
   2082  1.39  christos 		case OEOS:
   2083  1.39  christos 		case OWBND:
   2084  1.39  christos 		case ONWBND:
   2085  1.39  christos 		case O_QUEST:
   2086  1.39  christos 		case O_CH:
   2087  1.39  christos 		case OEND:
   2088  1.39  christos 			if (newlen > (sopno)g->mlen) {		/* ends one */
   2089   1.1       jtc 				start = newstart;
   2090   1.1       jtc 				g->mlen = newlen;
   2091  1.39  christos 				if (offset > -1) {
   2092  1.39  christos 					g->moffset += offset;
   2093  1.39  christos 					offset = newlen;
   2094  1.39  christos 				} else
   2095  1.39  christos 					g->moffset = offset;
   2096  1.39  christos 			} else {
   2097  1.39  christos 				if (offset > -1)
   2098  1.39  christos 					offset += newlen;
   2099   1.1       jtc 			}
   2100   1.1       jtc 			newlen = 0;
   2101   1.1       jtc 			break;
   2102  1.39  christos 		case OANY:
   2103  1.39  christos 			if (newlen > (sopno)g->mlen) {		/* ends one */
   2104  1.39  christos 				start = newstart;
   2105  1.39  christos 				g->mlen = newlen;
   2106  1.39  christos 				if (offset > -1) {
   2107  1.39  christos 					g->moffset += offset;
   2108  1.39  christos 					offset = newlen;
   2109  1.39  christos 				} else
   2110  1.39  christos 					g->moffset = offset;
   2111  1.39  christos 			} else {
   2112  1.39  christos 				if (offset > -1)
   2113  1.39  christos 					offset += newlen;
   2114  1.39  christos 			}
   2115  1.39  christos 			if (offset > -1)
   2116  1.39  christos 				offset++;
   2117  1.39  christos 			newlen = 0;
   2118  1.39  christos 			break;
   2119  1.39  christos 		case OANYOF:		/* may or may not invalidate offset */
   2120  1.39  christos 			/* First, everything as OANY */
   2121  1.39  christos 			if (newlen > (sopno)g->mlen) {		/* ends one */
   2122  1.39  christos 				start = newstart;
   2123  1.39  christos 				g->mlen = newlen;
   2124  1.39  christos 				if (offset > -1) {
   2125  1.39  christos 					g->moffset += offset;
   2126  1.39  christos 					offset = newlen;
   2127  1.39  christos 				} else
   2128  1.39  christos 					g->moffset = offset;
   2129  1.39  christos 			} else {
   2130  1.39  christos 				if (offset > -1)
   2131  1.39  christos 					offset += newlen;
   2132  1.39  christos 			}
   2133  1.39  christos 			if (offset > -1)
   2134  1.39  christos 				offset++;
   2135  1.39  christos 			newlen = 0;
   2136  1.39  christos 			break;
   2137  1.42  christos #ifdef NLS
   2138  1.40  christos 		toohard:/*FALLTHROUGH*/
   2139  1.42  christos #endif
   2140  1.39  christos 		default:
   2141  1.39  christos 			/* Anything here makes it impossible or too hard
   2142  1.39  christos 			 * to calculate the offset -- so we give up;
   2143  1.39  christos 			 * save the last known good offset, in case the
   2144  1.39  christos 			 * must sequence doesn't occur later.
   2145  1.39  christos 			 */
   2146  1.39  christos 			if (newlen > (sopno)g->mlen) {		/* ends one */
   2147  1.39  christos 				start = newstart;
   2148  1.39  christos 				g->mlen = newlen;
   2149  1.39  christos 				if (offset > -1)
   2150  1.39  christos 					g->moffset += offset;
   2151  1.39  christos 				else
   2152  1.39  christos 					g->moffset = offset;
   2153  1.39  christos 			}
   2154  1.39  christos 			offset = -1;
   2155  1.39  christos 			newlen = 0;
   2156  1.39  christos 			break;
   2157   1.1       jtc 		}
   2158   1.1       jtc 	} while (OP(s) != OEND);
   2159   1.1       jtc 
   2160  1.39  christos 	if (g->mlen == 0) {		/* there isn't one */
   2161  1.39  christos 		g->moffset = -1;
   2162   1.1       jtc 		return;
   2163  1.39  christos 	}
   2164   1.1       jtc 
   2165   1.1       jtc 	/* turn it into a character string */
   2166   1.1       jtc 	g->must = malloc((size_t)g->mlen + 1);
   2167   1.1       jtc 	if (g->must == NULL) {		/* argh; just forget it */
   2168   1.1       jtc 		g->mlen = 0;
   2169  1.39  christos 		g->moffset = -1;
   2170   1.1       jtc 		return;
   2171   1.1       jtc 	}
   2172   1.1       jtc 	cp = g->must;
   2173   1.1       jtc 	scan = start;
   2174  1.39  christos 	memset(&mbs, 0, sizeof(mbs));
   2175  1.39  christos 	while (cp < g->must + g->mlen) {
   2176   1.1       jtc 		while (OP(s = *scan++) != OCHAR)
   2177   1.1       jtc 			continue;
   2178  1.42  christos #ifdef NLS
   2179  1.43  christos 		size_t clen = wcrtomb(cp, (int)OPND(s), &mbs);
   2180  1.39  christos 		assert(clen != (size_t)-1);
   2181  1.39  christos 		cp += clen;
   2182  1.42  christos #else
   2183  1.42  christos 		*cp++ = OPND(s);
   2184  1.42  christos #endif
   2185   1.1       jtc 	}
   2186   1.2       jtc 	assert(cp == g->must + g->mlen);
   2187   1.1       jtc 	*cp++ = '\0';		/* just on general principles */
   2188   1.1       jtc }
   2189   1.1       jtc 
   2190   1.1       jtc /*
   2191  1.39  christos  - altoffset - choose biggest offset among multiple choices
   2192  1.39  christos  == static int altoffset(sop *scan, int offset);
   2193  1.39  christos  *
   2194  1.39  christos  * Compute, recursively if necessary, the largest offset among multiple
   2195  1.39  christos  * re paths.
   2196  1.39  christos  */
   2197  1.39  christos static int
   2198  1.39  christos altoffset(sop *scan, int offset)
   2199  1.39  christos {
   2200  1.39  christos 	int largest;
   2201  1.39  christos 	int try;
   2202  1.39  christos 	sop s;
   2203  1.39  christos 
   2204  1.39  christos 	_DIAGASSERT(scan != NULL);
   2205  1.39  christos 
   2206  1.39  christos 	/* If we gave up already on offsets, return */
   2207  1.39  christos 	if (offset == -1)
   2208  1.39  christos 		return -1;
   2209  1.39  christos 
   2210  1.39  christos 	largest = 0;
   2211  1.39  christos 	try = 0;
   2212  1.39  christos 	s = *scan++;
   2213  1.40  christos 	while (OP(s) != O_QUEST && OP(s) != O_CH) {
   2214  1.39  christos 		switch (OP(s)) {
   2215  1.39  christos 		case OOR1:
   2216  1.39  christos 			if (try > largest)
   2217  1.39  christos 				largest = try;
   2218  1.39  christos 			try = 0;
   2219  1.39  christos 			break;
   2220  1.39  christos 		case OQUEST_:
   2221  1.39  christos 		case OCH_:
   2222  1.39  christos 			try = altoffset(scan, try);
   2223  1.39  christos 			if (try == -1)
   2224  1.39  christos 				return -1;
   2225  1.39  christos 			scan--;
   2226  1.39  christos 			do {
   2227  1.39  christos 				scan += OPND(s);
   2228  1.39  christos 				s = *scan;
   2229  1.40  christos 				if (OP(s) != O_QUEST &&
   2230  1.40  christos 				    OP(s) != O_CH && OP(s) != OOR2)
   2231  1.39  christos 					return -1;
   2232  1.40  christos 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
   2233  1.39  christos 			/* We must skip to the next position, or we'll
   2234  1.39  christos 			 * leave altoffset() too early.
   2235  1.39  christos 			 */
   2236  1.39  christos 			scan++;
   2237  1.39  christos 			break;
   2238  1.39  christos 		case OANYOF:
   2239  1.39  christos 		case OCHAR:
   2240  1.39  christos 		case OANY:
   2241  1.39  christos 			try++;
   2242  1.40  christos 			/*FALLTHROUGH*/
   2243  1.39  christos 		case OBOW:
   2244  1.39  christos 		case OEOW:
   2245  1.39  christos 		case OWBND:
   2246  1.39  christos 		case ONWBND:
   2247  1.39  christos 		case OLPAREN:
   2248  1.39  christos 		case ORPAREN:
   2249  1.39  christos 		case OOR2:
   2250  1.39  christos 			break;
   2251  1.39  christos 		default:
   2252  1.39  christos 			try = -1;
   2253  1.39  christos 			break;
   2254  1.39  christos 		}
   2255  1.39  christos 		if (try == -1)
   2256  1.39  christos 			return -1;
   2257  1.39  christos 		s = *scan++;
   2258  1.39  christos 	}
   2259  1.39  christos 
   2260  1.39  christos 	if (try > largest)
   2261  1.39  christos 		largest = try;
   2262  1.39  christos 
   2263  1.39  christos 	return largest+offset;
   2264  1.39  christos }
   2265  1.39  christos 
   2266  1.39  christos /*
   2267  1.39  christos  - computejumps - compute char jumps for BM scan
   2268  1.39  christos  == static void computejumps(struct parse *p, struct re_guts *g);
   2269  1.39  christos  *
   2270  1.39  christos  * This algorithm assumes g->must exists and is has size greater than
   2271  1.39  christos  * zero. It's based on the algorithm found on Computer Algorithms by
   2272  1.39  christos  * Sara Baase.
   2273  1.39  christos  *
   2274  1.39  christos  * A char jump is the number of characters one needs to jump based on
   2275  1.39  christos  * the value of the character from the text that was mismatched.
   2276  1.39  christos  */
   2277  1.39  christos static void
   2278  1.39  christos computejumps(struct parse *p, struct re_guts *g)
   2279  1.39  christos {
   2280  1.39  christos 	int ch;
   2281  1.39  christos 	size_t mindex;
   2282  1.39  christos 
   2283  1.39  christos 	_DIAGASSERT(p != NULL);
   2284  1.39  christos 	_DIAGASSERT(g != NULL);
   2285  1.39  christos 
   2286  1.39  christos 	/* Avoid making errors worse */
   2287  1.39  christos 	if (p->error != 0)
   2288  1.39  christos 		return;
   2289  1.39  christos 
   2290  1.39  christos 	g->charjump = calloc((NC_MAX + 1), sizeof(*g->charjump));
   2291  1.39  christos 	if (g->charjump == NULL)	/* Not a fatal error */
   2292  1.39  christos 		return;
   2293  1.39  christos 	/* Adjust for signed chars, if necessary */
   2294  1.39  christos 	g->charjump = &g->charjump[-(CHAR_MIN)];
   2295  1.39  christos 
   2296  1.39  christos 	/* If the character does not exist in the pattern, the jump
   2297  1.39  christos 	 * is equal to the number of characters in the pattern.
   2298  1.39  christos 	 */
   2299  1.39  christos 	for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
   2300  1.39  christos 		g->charjump[ch] = g->mlen;
   2301  1.39  christos 
   2302  1.39  christos 	/* If the character does exist, compute the jump that would
   2303  1.39  christos 	 * take us to the last character in the pattern equal to it
   2304  1.39  christos 	 * (notice that we match right to left, so that last character
   2305  1.39  christos 	 * is the first one that would be matched).
   2306  1.39  christos 	 */
   2307  1.39  christos 	for (mindex = 0; mindex < g->mlen; mindex++)
   2308  1.39  christos 		g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
   2309  1.39  christos }
   2310  1.39  christos 
   2311  1.39  christos /*
   2312  1.39  christos  - computematchjumps - compute match jumps for BM scan
   2313  1.39  christos  == static void computematchjumps(struct parse *p, struct re_guts *g);
   2314  1.39  christos  *
   2315  1.39  christos  * This algorithm assumes g->must exists and is has size greater than
   2316  1.39  christos  * zero. It's based on the algorithm found on Computer Algorithms by
   2317  1.39  christos  * Sara Baase.
   2318  1.39  christos  *
   2319  1.39  christos  * A match jump is the number of characters one needs to advance based
   2320  1.39  christos  * on the already-matched suffix.
   2321  1.39  christos  * Notice that all values here are minus (g->mlen-1), because of the way
   2322  1.39  christos  * the search algorithm works.
   2323  1.39  christos  */
   2324  1.39  christos static void
   2325  1.39  christos computematchjumps(struct parse *p, struct re_guts *g)
   2326  1.39  christos {
   2327  1.39  christos 	size_t mindex;		/* General "must" iterator */
   2328  1.39  christos 	size_t suffix;		/* Keeps track of matching suffix */
   2329  1.39  christos 	size_t ssuffix;		/* Keeps track of suffixes' suffix */
   2330  1.39  christos 	size_t* pmatches;	/* pmatches[k] points to the next i
   2331  1.39  christos 				 * such that i+1...mlen is a substring
   2332  1.39  christos 				 * of k+1...k+mlen-i-1
   2333  1.39  christos 				 */
   2334  1.39  christos 
   2335  1.39  christos 	_DIAGASSERT(p != NULL);
   2336  1.39  christos 	_DIAGASSERT(g != NULL);
   2337  1.39  christos 
   2338  1.39  christos 	/* Avoid making errors worse */
   2339  1.39  christos 	if (p->error != 0)
   2340  1.39  christos 		return;
   2341  1.39  christos 
   2342  1.39  christos 	pmatches = calloc(g->mlen, sizeof(*pmatches));
   2343  1.39  christos 	if (pmatches == NULL) {
   2344  1.39  christos 		g->matchjump = NULL;
   2345  1.39  christos 		return;
   2346  1.39  christos 	}
   2347  1.39  christos 
   2348  1.39  christos 	g->matchjump = calloc(g->mlen, sizeof(*g->matchjump));
   2349  1.39  christos 	if (g->matchjump == NULL) {	/* Not a fatal error */
   2350  1.39  christos 		free(pmatches);
   2351  1.39  christos 		return;
   2352  1.39  christos 	}
   2353  1.39  christos 
   2354  1.39  christos 	/* Set maximum possible jump for each character in the pattern */
   2355  1.39  christos 	for (mindex = 0; mindex < g->mlen; mindex++)
   2356  1.39  christos 		g->matchjump[mindex] = 2 * g->mlen - mindex - 1;
   2357  1.39  christos 
   2358  1.39  christos 	/* Compute pmatches[] */
   2359  1.39  christos 	for (suffix = mindex = g->mlen; mindex-- > 0; suffix--) {
   2360  1.39  christos 		pmatches[mindex] = suffix;
   2361  1.39  christos 
   2362  1.39  christos 		/* If a mismatch is found, interrupting the substring,
   2363  1.39  christos 		 * compute the matchjump for that position. If no
   2364  1.39  christos 		 * mismatch is found, then a text substring mismatched
   2365  1.39  christos 		 * against the suffix will also mismatch against the
   2366  1.39  christos 		 * substring.
   2367  1.39  christos 		 */
   2368  1.39  christos 		while (suffix < g->mlen
   2369  1.39  christos 		    && g->must[mindex] != g->must[suffix]) {
   2370  1.39  christos 			g->matchjump[suffix] = MIN(g->matchjump[suffix],
   2371  1.39  christos 			    g->mlen - mindex - 1);
   2372  1.39  christos 			suffix = pmatches[suffix];
   2373  1.39  christos 		}
   2374  1.39  christos 	}
   2375  1.39  christos 
   2376  1.39  christos 	/* Compute the matchjump up to the last substring found to jump
   2377  1.39  christos 	 * to the beginning of the largest must pattern prefix matching
   2378  1.39  christos 	 * it's own suffix.
   2379  1.39  christos 	 */
   2380  1.39  christos 	for (mindex = 0; mindex <= suffix; mindex++)
   2381  1.39  christos 		g->matchjump[mindex] = MIN(g->matchjump[mindex],
   2382  1.39  christos 		    g->mlen + suffix - mindex);
   2383  1.39  christos 
   2384  1.39  christos         ssuffix = pmatches[suffix];
   2385  1.39  christos         while (suffix < g->mlen) {
   2386  1.39  christos                 while (suffix <= ssuffix && suffix < g->mlen) {
   2387  1.39  christos                         g->matchjump[suffix] = MIN(g->matchjump[suffix],
   2388  1.39  christos 			    g->mlen + ssuffix - suffix);
   2389  1.39  christos                         suffix++;
   2390  1.39  christos                 }
   2391  1.39  christos 		if (suffix < g->mlen)
   2392  1.39  christos                 	ssuffix = pmatches[ssuffix];
   2393  1.39  christos         }
   2394  1.39  christos 
   2395  1.39  christos 	free(pmatches);
   2396  1.39  christos }
   2397  1.39  christos 
   2398  1.39  christos /*
   2399   1.1       jtc  - pluscount - count + nesting
   2400   1.9     perry  == static sopno pluscount(struct parse *p, struct re_guts *g);
   2401   1.1       jtc  */
   2402   1.1       jtc static sopno			/* nesting depth */
   2403  1.39  christos pluscount(struct parse *p, struct re_guts *g)
   2404   1.1       jtc {
   2405   1.9     perry 	sop *scan;
   2406   1.9     perry 	sop s;
   2407   1.9     perry 	sopno plusnest = 0;
   2408   1.9     perry 	sopno maxnest = 0;
   2409  1.14     lukem 
   2410  1.14     lukem 	_DIAGASSERT(p != NULL);
   2411  1.14     lukem 	_DIAGASSERT(g != NULL);
   2412   1.1       jtc 
   2413   1.1       jtc 	if (p->error != 0)
   2414   1.1       jtc 		return(0);	/* there may not be an OEND */
   2415   1.1       jtc 
   2416   1.1       jtc 	scan = g->strip + 1;
   2417   1.1       jtc 	do {
   2418   1.1       jtc 		s = *scan++;
   2419   1.1       jtc 		switch (OP(s)) {
   2420   1.1       jtc 		case OPLUS_:
   2421   1.1       jtc 			plusnest++;
   2422   1.1       jtc 			break;
   2423   1.1       jtc 		case O_PLUS:
   2424   1.1       jtc 			if (plusnest > maxnest)
   2425   1.1       jtc 				maxnest = plusnest;
   2426   1.1       jtc 			plusnest--;
   2427   1.1       jtc 			break;
   2428   1.1       jtc 		}
   2429   1.1       jtc 	} while (OP(s) != OEND);
   2430   1.1       jtc 	if (plusnest != 0)
   2431   1.1       jtc 		g->iflags |= BAD;
   2432   1.1       jtc 	return(maxnest);
   2433   1.1       jtc }
   2434