Home | History | Annotate | Line # | Download | only in regex
      1 /*	$NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $	*/
      2 
      3 /*-
      4  * SPDX-License-Identifier: BSD-3-Clause
      5  *
      6  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
      7  * Copyright (c) 1992, 1993, 1994
      8  *	The Regents of the University of California.  All rights reserved.
      9  *
     10  * This code is derived from software contributed to Berkeley by
     11  * Henry Spencer.
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  * 3. Neither the name of the University nor the names of its contributors
     22  *    may be used to endorse or promote products derived from this software
     23  *    without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     35  * SUCH DAMAGE.
     36  *
     37  *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
     38  */
     39 
     40 #if HAVE_NBTOOL_CONFIG_H
     41 #include "nbtool_config.h"
     42 #endif
     43 
     44 #include <sys/cdefs.h>
     45 #if 0
     46 static char sccsid[] = "@(#)regexec.c	8.3 (Berkeley) 3/20/94";
     47 __FBSDID("$FreeBSD: head/lib/libc/regex/regexec.c 326025 2017-11-20 19:49:47Z pfg $");
     48 #endif
     49 __RCSID("$NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $");
     50 
     51 /*
     52  * the outer shell of regexec()
     53  *
     54  * This file includes engine.c three times, after muchos fiddling with the
     55  * macros that code uses.  This lets the same code operate on two different
     56  * representations for state sets and characters.
     57  */
     58 
     59 #ifndef LIBHACK
     60 #include "namespace.h"
     61 #endif
     62 #include <sys/types.h>
     63 #include <stdio.h>
     64 #include <stdlib.h>
     65 #include <string.h>
     66 #include <limits.h>
     67 #include <ctype.h>
     68 #include <regex.h>
     69 
     70 #if defined(__weak_alias) && !defined(LIBHACK)
     71 __weak_alias(regexec,_regexec)
     72 #endif
     73 
     74 #include "utils.h"
     75 #include "regex2.h"
     76 
     77 static __inline size_t
     78 xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
     79 {
     80 #ifdef NLS
     81 	size_t nr;
     82 	wchar_t wc;
     83 
     84 	nr = mbrtowc(&wc, s, n, mbs);
     85 	if (wi != NULL)
     86 		*wi = wc;
     87 	if (nr == 0)
     88 		return (1);
     89 	else if (nr == (size_t)-1 || nr == (size_t)-2) {
     90 		memset(mbs, 0, sizeof(*mbs));
     91 		if (wi != NULL)
     92 			*wi = dummy;
     93 		return (1);
     94 	} else
     95                 return (nr);
     96 #else
     97 	if (wi)
     98 		*wi = *s;
     99 	return 1;
    100 #endif
    101 }
    102 
    103 static __inline size_t
    104 xmbrtowc_dummy(wint_t *wi,
    105 		const char *s,
    106 		size_t n __unused,
    107 		mbstate_t *mbs __unused,
    108 		wint_t dummy __unused)
    109 {
    110 
    111 	if (wi != NULL)
    112 		*wi = (unsigned char)*s;
    113 	return (1);
    114 }
    115 
    116 /* macros for manipulating states, small version */
    117 #define	states	long
    118 #define	states1	states		/* for later use in regexec() decision */
    119 #define	CLEAR(v)	((v) = 0)
    120 #define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
    121 #define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
    122 #define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
    123 #define	ASSIGN(d, s)	((d) = (s))
    124 #define	EQ(a, b)	((a) == (b))
    125 #define	STATEVARS	long dummy	/* dummy version */
    126 #define	STATESETUP(m, n)	/* nothing */
    127 #define	STATETEARDOWN(m)	/* nothing */
    128 #define	SETUP(v)	((v) = 0)
    129 #define	onestate	long
    130 #define	INIT(o, n)	((o) = (unsigned long)1 << (n))
    131 #define	INC(o)	((o) <<= 1)
    132 #define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
    133 /* some abbreviations; note that some of these know variable names! */
    134 /* do "if I'm here, I can also be there" etc without branches */
    135 #define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
    136 #define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
    137 #define	ISSETBACK(v, n)	(((v) & ((unsigned long)here >> (n))) != 0)
    138 /* no multibyte support */
    139 #define	XMBRTOWC	xmbrtowc_dummy
    140 #define	ZAPSTATE(mbs)	((void)(mbs))
    141 /* function names */
    142 #define SNAMES			/* engine.c looks after details */
    143 
    144 #include "engine.c"
    145 
    146 /* now undo things */
    147 #undef	states
    148 #undef	CLEAR
    149 #undef	SET0
    150 #undef	SET1
    151 #undef	ISSET
    152 #undef	ASSIGN
    153 #undef	EQ
    154 #undef	STATEVARS
    155 #undef	STATESETUP
    156 #undef	STATETEARDOWN
    157 #undef	SETUP
    158 #undef	onestate
    159 #undef	INIT
    160 #undef	INC
    161 #undef	ISSTATEIN
    162 #undef	FWD
    163 #undef	BACK
    164 #undef	ISSETBACK
    165 #undef	SNAMES
    166 #undef	XMBRTOWC
    167 #undef	ZAPSTATE
    168 
    169 /* macros for manipulating states, large version */
    170 #define	states	char *
    171 #define	CLEAR(v)	memset(v, 0, m->g->nstates)
    172 #define	SET0(v, n)	((v)[n] = 0)
    173 #define	SET1(v, n)	((v)[n] = 1)
    174 #define	ISSET(v, n)	((v)[n])
    175 #define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates)
    176 #define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
    177 #define	STATEVARS	long vn; char *space
    178 #define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
    179 				if ((m)->space == NULL) return(REG_ESPACE); \
    180 				(m)->vn = 0; }
    181 #define	STATETEARDOWN(m)	{ free((m)->space); }
    182 #define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
    183 #define	onestate	long
    184 #define	INIT(o, n)	((o) = (n))
    185 #define	INC(o)	((o)++)
    186 #define	ISSTATEIN(v, o)	((v)[o])
    187 /* some abbreviations; note that some of these know variable names! */
    188 /* do "if I'm here, I can also be there" etc without branches */
    189 #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
    190 #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
    191 #define	ISSETBACK(v, n)	((v)[here - (n)])
    192 /* no multibyte support */
    193 #define	XMBRTOWC	xmbrtowc_dummy
    194 #define	ZAPSTATE(mbs)	((void)(mbs))
    195 /* function names */
    196 #define	LNAMES			/* flag */
    197 
    198 #include "engine.c"
    199 
    200 /* multibyte character & large states version */
    201 #undef	LNAMES
    202 #undef	XMBRTOWC
    203 #undef	ZAPSTATE
    204 #define	XMBRTOWC	xmbrtowc
    205 #define	ZAPSTATE(mbs)	memset((mbs), 0, sizeof(*(mbs)))
    206 #define	MNAMES
    207 
    208 #include "engine.c"
    209 
    210 /*
    211  - regexec - interface for matching
    212  = extern int regexec(const regex_t *, const char *, size_t, \
    213  =					regmatch_t [], int);
    214  = #define	REG_NOTBOL	00001
    215  = #define	REG_NOTEOL	00002
    216  = #define	REG_STARTEND	00004
    217  = #define	REG_TRACE	00400	// tracing of execution
    218  = #define	REG_LARGE	01000	// force large representation
    219  = #define	REG_BACKR	02000	// force use of backref code
    220  *
    221  * We put this here so we can exploit knowledge of the state representation
    222  * when choosing which matcher to call.  Also, by this point the matchers
    223  * have been prototyped.
    224  */
    225 int				/* 0 success, REG_NOMATCH failure */
    226 regexec(const regex_t * __restrict preg,
    227 	const char * __restrict string,
    228 	size_t nmatch,
    229 	regmatch_t pmatch[__restrict],
    230 	int eflags)
    231 {
    232 	struct re_guts *g = preg->re_g;
    233 #ifdef REDEBUG
    234 #	define	GOODFLAGS(f)	(f)
    235 #else
    236 #	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
    237 #endif
    238 	_DIAGASSERT(preg != NULL);
    239 	_DIAGASSERT(string != NULL);
    240 
    241 	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
    242 		return(REG_BADPAT);
    243 	assert(!(g->iflags&BAD));
    244 	if (g->iflags&BAD)		/* backstop for no-debug case */
    245 		return(REG_BADPAT);
    246 	eflags = GOODFLAGS(eflags);
    247 
    248 	if (MB_CUR_MAX > 1)
    249 		return(mmatcher(g, string, nmatch, pmatch, eflags));
    250 	else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
    251 		return(smatcher(g, string, nmatch, pmatch, eflags));
    252 	else
    253 		return(lmatcher(g, string, nmatch, pmatch, eflags));
    254 }
    255