1 1.26 christos /* $NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $ */ 2 1.6 cgd 3 1.4 cgd /*- 4 1.23 christos * SPDX-License-Identifier: BSD-3-Clause 5 1.23 christos * 6 1.23 christos * Copyright (c) 1992, 1993, 1994 Henry Spencer. 7 1.4 cgd * Copyright (c) 1992, 1993, 1994 8 1.4 cgd * The Regents of the University of California. All rights reserved. 9 1.4 cgd * 10 1.4 cgd * This code is derived from software contributed to Berkeley by 11 1.4 cgd * Henry Spencer. 12 1.4 cgd * 13 1.4 cgd * Redistribution and use in source and binary forms, with or without 14 1.4 cgd * modification, are permitted provided that the following conditions 15 1.4 cgd * are met: 16 1.4 cgd * 1. Redistributions of source code must retain the above copyright 17 1.4 cgd * notice, this list of conditions and the following disclaimer. 18 1.4 cgd * 2. Redistributions in binary form must reproduce the above copyright 19 1.4 cgd * notice, this list of conditions and the following disclaimer in the 20 1.4 cgd * documentation and/or other materials provided with the distribution. 21 1.17 agc * 3. Neither the name of the University nor the names of its contributors 22 1.17 agc * may be used to endorse or promote products derived from this software 23 1.17 agc * without specific prior written permission. 24 1.17 agc * 25 1.17 agc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 1.17 agc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 1.17 agc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 1.17 agc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 1.17 agc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 1.17 agc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 1.17 agc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 1.17 agc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 1.17 agc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 1.17 agc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 1.17 agc * SUCH DAMAGE. 36 1.17 agc * 37 1.17 agc * @(#)regexec.c 8.3 (Berkeley) 3/20/94 38 1.17 agc */ 39 1.17 agc 40 1.26 christos #if HAVE_NBTOOL_CONFIG_H 41 1.26 christos #include "nbtool_config.h" 42 1.26 christos #endif 43 1.26 christos 44 1.8 christos #include <sys/cdefs.h> 45 1.6 cgd #if 0 46 1.4 cgd static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94"; 47 1.23 christos __FBSDID("$FreeBSD: head/lib/libc/regex/regexec.c 326025 2017-11-20 19:49:47Z pfg $"); 48 1.6 cgd #endif 49 1.26 christos __RCSID("$NetBSD: regexec.c,v 1.26 2021/02/26 19:24:47 christos Exp $"); 50 1.4 cgd 51 1.1 jtc /* 52 1.1 jtc * the outer shell of regexec() 53 1.1 jtc * 54 1.23 christos * This file includes engine.c three times, after muchos fiddling with the 55 1.1 jtc * macros that code uses. This lets the same code operate on two different 56 1.23 christos * representations for state sets and characters. 57 1.1 jtc */ 58 1.23 christos 59 1.25 christos #ifndef LIBHACK 60 1.9 jtc #include "namespace.h" 61 1.25 christos #endif 62 1.1 jtc #include <sys/types.h> 63 1.1 jtc #include <stdio.h> 64 1.1 jtc #include <stdlib.h> 65 1.1 jtc #include <string.h> 66 1.23 christos #include <limits.h> 67 1.23 christos #include <ctype.h> 68 1.20 junyoung #include <regex.h> 69 1.9 jtc 70 1.25 christos #if defined(__weak_alias) && !defined(LIBHACK) 71 1.15 mycroft __weak_alias(regexec,_regexec) 72 1.9 jtc #endif 73 1.1 jtc 74 1.1 jtc #include "utils.h" 75 1.1 jtc #include "regex2.h" 76 1.1 jtc 77 1.23 christos static __inline size_t 78 1.23 christos xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy) 79 1.23 christos { 80 1.25 christos #ifdef NLS 81 1.23 christos size_t nr; 82 1.23 christos wchar_t wc; 83 1.23 christos 84 1.23 christos nr = mbrtowc(&wc, s, n, mbs); 85 1.23 christos if (wi != NULL) 86 1.23 christos *wi = wc; 87 1.23 christos if (nr == 0) 88 1.23 christos return (1); 89 1.23 christos else if (nr == (size_t)-1 || nr == (size_t)-2) { 90 1.23 christos memset(mbs, 0, sizeof(*mbs)); 91 1.23 christos if (wi != NULL) 92 1.23 christos *wi = dummy; 93 1.23 christos return (1); 94 1.23 christos } else 95 1.23 christos return (nr); 96 1.25 christos #else 97 1.25 christos if (wi) 98 1.25 christos *wi = *s; 99 1.25 christos return 1; 100 1.25 christos #endif 101 1.23 christos } 102 1.23 christos 103 1.23 christos static __inline size_t 104 1.23 christos xmbrtowc_dummy(wint_t *wi, 105 1.23 christos const char *s, 106 1.23 christos size_t n __unused, 107 1.23 christos mbstate_t *mbs __unused, 108 1.23 christos wint_t dummy __unused) 109 1.23 christos { 110 1.23 christos 111 1.23 christos if (wi != NULL) 112 1.23 christos *wi = (unsigned char)*s; 113 1.23 christos return (1); 114 1.23 christos } 115 1.23 christos 116 1.1 jtc /* macros for manipulating states, small version */ 117 1.23 christos #define states long 118 1.23 christos #define states1 states /* for later use in regexec() decision */ 119 1.1 jtc #define CLEAR(v) ((v) = 0) 120 1.12 drochner #define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) 121 1.12 drochner #define SET1(v, n) ((v) |= (unsigned long)1 << (n)) 122 1.12 drochner #define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) 123 1.1 jtc #define ASSIGN(d, s) ((d) = (s)) 124 1.1 jtc #define EQ(a, b) ((a) == (b)) 125 1.23 christos #define STATEVARS long dummy /* dummy version */ 126 1.1 jtc #define STATESETUP(m, n) /* nothing */ 127 1.1 jtc #define STATETEARDOWN(m) /* nothing */ 128 1.1 jtc #define SETUP(v) ((v) = 0) 129 1.23 christos #define onestate long 130 1.12 drochner #define INIT(o, n) ((o) = (unsigned long)1 << (n)) 131 1.7 cgd #define INC(o) ((o) <<= 1) 132 1.5 cgd #define ISSTATEIN(v, o) (((v) & (o)) != 0) 133 1.1 jtc /* some abbreviations; note that some of these know variable names! */ 134 1.1 jtc /* do "if I'm here, I can also be there" etc without branches */ 135 1.12 drochner #define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) 136 1.12 drochner #define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) 137 1.12 drochner #define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) 138 1.23 christos /* no multibyte support */ 139 1.23 christos #define XMBRTOWC xmbrtowc_dummy 140 1.23 christos #define ZAPSTATE(mbs) ((void)(mbs)) 141 1.1 jtc /* function names */ 142 1.1 jtc #define SNAMES /* engine.c looks after details */ 143 1.1 jtc 144 1.1 jtc #include "engine.c" 145 1.1 jtc 146 1.1 jtc /* now undo things */ 147 1.1 jtc #undef states 148 1.1 jtc #undef CLEAR 149 1.1 jtc #undef SET0 150 1.1 jtc #undef SET1 151 1.1 jtc #undef ISSET 152 1.1 jtc #undef ASSIGN 153 1.1 jtc #undef EQ 154 1.1 jtc #undef STATEVARS 155 1.1 jtc #undef STATESETUP 156 1.1 jtc #undef STATETEARDOWN 157 1.1 jtc #undef SETUP 158 1.1 jtc #undef onestate 159 1.1 jtc #undef INIT 160 1.1 jtc #undef INC 161 1.1 jtc #undef ISSTATEIN 162 1.1 jtc #undef FWD 163 1.1 jtc #undef BACK 164 1.1 jtc #undef ISSETBACK 165 1.1 jtc #undef SNAMES 166 1.23 christos #undef XMBRTOWC 167 1.23 christos #undef ZAPSTATE 168 1.1 jtc 169 1.1 jtc /* macros for manipulating states, large version */ 170 1.1 jtc #define states char * 171 1.23 christos #define CLEAR(v) memset(v, 0, m->g->nstates) 172 1.1 jtc #define SET0(v, n) ((v)[n] = 0) 173 1.1 jtc #define SET1(v, n) ((v)[n] = 1) 174 1.1 jtc #define ISSET(v, n) ((v)[n]) 175 1.23 christos #define ASSIGN(d, s) memcpy(d, s, m->g->nstates) 176 1.23 christos #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) 177 1.23 christos #define STATEVARS long vn; char *space 178 1.23 christos #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ 179 1.23 christos if ((m)->space == NULL) return(REG_ESPACE); \ 180 1.23 christos (m)->vn = 0; } 181 1.23 christos #define STATETEARDOWN(m) { free((m)->space); } 182 1.23 christos #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) 183 1.23 christos #define onestate long 184 1.23 christos #define INIT(o, n) ((o) = (n)) 185 1.1 jtc #define INC(o) ((o)++) 186 1.1 jtc #define ISSTATEIN(v, o) ((v)[o]) 187 1.1 jtc /* some abbreviations; note that some of these know variable names! */ 188 1.1 jtc /* do "if I'm here, I can also be there" etc without branches */ 189 1.1 jtc #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) 190 1.1 jtc #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) 191 1.1 jtc #define ISSETBACK(v, n) ((v)[here - (n)]) 192 1.23 christos /* no multibyte support */ 193 1.23 christos #define XMBRTOWC xmbrtowc_dummy 194 1.23 christos #define ZAPSTATE(mbs) ((void)(mbs)) 195 1.1 jtc /* function names */ 196 1.1 jtc #define LNAMES /* flag */ 197 1.1 jtc 198 1.1 jtc #include "engine.c" 199 1.1 jtc 200 1.23 christos /* multibyte character & large states version */ 201 1.23 christos #undef LNAMES 202 1.23 christos #undef XMBRTOWC 203 1.23 christos #undef ZAPSTATE 204 1.23 christos #define XMBRTOWC xmbrtowc 205 1.23 christos #define ZAPSTATE(mbs) memset((mbs), 0, sizeof(*(mbs))) 206 1.23 christos #define MNAMES 207 1.23 christos 208 1.23 christos #include "engine.c" 209 1.23 christos 210 1.1 jtc /* 211 1.1 jtc - regexec - interface for matching 212 1.2 jtc = extern int regexec(const regex_t *, const char *, size_t, \ 213 1.2 jtc = regmatch_t [], int); 214 1.1 jtc = #define REG_NOTBOL 00001 215 1.1 jtc = #define REG_NOTEOL 00002 216 1.1 jtc = #define REG_STARTEND 00004 217 1.1 jtc = #define REG_TRACE 00400 // tracing of execution 218 1.1 jtc = #define REG_LARGE 01000 // force large representation 219 1.1 jtc = #define REG_BACKR 02000 // force use of backref code 220 1.1 jtc * 221 1.1 jtc * We put this here so we can exploit knowledge of the state representation 222 1.1 jtc * when choosing which matcher to call. Also, by this point the matchers 223 1.1 jtc * have been prototyped. 224 1.1 jtc */ 225 1.1 jtc int /* 0 success, REG_NOMATCH failure */ 226 1.23 christos regexec(const regex_t * __restrict preg, 227 1.23 christos const char * __restrict string, 228 1.23 christos size_t nmatch, 229 1.23 christos regmatch_t pmatch[__restrict], 230 1.23 christos int eflags) 231 1.1 jtc { 232 1.10 perry struct re_guts *g = preg->re_g; 233 1.1 jtc #ifdef REDEBUG 234 1.1 jtc # define GOODFLAGS(f) (f) 235 1.1 jtc #else 236 1.1 jtc # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) 237 1.13 lukem #endif 238 1.13 lukem _DIAGASSERT(preg != NULL); 239 1.13 lukem _DIAGASSERT(string != NULL); 240 1.1 jtc 241 1.1 jtc if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) 242 1.1 jtc return(REG_BADPAT); 243 1.1 jtc assert(!(g->iflags&BAD)); 244 1.1 jtc if (g->iflags&BAD) /* backstop for no-debug case */ 245 1.1 jtc return(REG_BADPAT); 246 1.3 jtc eflags = GOODFLAGS(eflags); 247 1.1 jtc 248 1.23 christos if (MB_CUR_MAX > 1) 249 1.23 christos return(mmatcher(g, string, nmatch, pmatch, eflags)); 250 1.23 christos else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) 251 1.23 christos return(smatcher(g, string, nmatch, pmatch, eflags)); 252 1.1 jtc else 253 1.23 christos return(lmatcher(g, string, nmatch, pmatch, eflags)); 254 1.1 jtc } 255