1 1.1 christos /* Extended regular expression matching and search library, 2 1.1 christos version 0.12. 3 1.1 christos (Implements POSIX draft P1003.2/D11.2, except for some of the 4 1.1 christos internationalization features.) 5 1.1 christos 6 1.1.1.10 christos Copyright (C) 1993-2024 Free Software Foundation, Inc. 7 1.1 christos This file is part of the GNU C Library. 8 1.1 christos 9 1.1 christos The GNU C Library is free software; you can redistribute it and/or 10 1.1 christos modify it under the terms of the GNU Lesser General Public 11 1.1 christos License as published by the Free Software Foundation; either 12 1.1 christos version 2.1 of the License, or (at your option) any later version. 13 1.1 christos 14 1.1 christos The GNU C Library is distributed in the hope that it will be useful, 15 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of 16 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 1.1 christos Lesser General Public License for more details. 18 1.1 christos 19 1.1 christos You should have received a copy of the GNU Lesser General Public 20 1.1 christos License along with the GNU C Library; if not, write to the Free 21 1.1 christos Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 22 1.1 christos 02110-1301 USA. */ 23 1.1 christos 24 1.1 christos /* This file has been modified for usage in libiberty. It includes "xregex.h" 25 1.1 christos instead of <regex.h>. The "xregex.h" header file renames all external 26 1.1 christos routines with an "x" prefix so they do not collide with the native regex 27 1.1 christos routines or with other components regex routines. */ 28 1.1 christos /* AIX requires this to be the first thing in the file. */ 29 1.1 christos #if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC 30 1.1 christos #pragma alloca 31 1.1 christos #endif 32 1.1 christos 33 1.1.1.9 christos #if __GNUC__ >= 12 34 1.1.1.9 christos # pragma GCC diagnostic ignored "-Wuse-after-free" 35 1.1.1.9 christos #endif 36 1.1.1.9 christos 37 1.1 christos #undef _GNU_SOURCE 38 1.1 christos #define _GNU_SOURCE 39 1.1 christos 40 1.1 christos #ifndef INSIDE_RECURSION 41 1.1 christos # ifdef HAVE_CONFIG_H 42 1.1 christos # include <config.h> 43 1.1 christos # endif 44 1.1 christos #endif 45 1.1 christos 46 1.1 christos #include <ansidecl.h> 47 1.1 christos 48 1.1 christos #ifndef INSIDE_RECURSION 49 1.1 christos 50 1.1 christos # if defined STDC_HEADERS && !defined emacs 51 1.1 christos # include <stddef.h> 52 1.1.1.2 christos # define PTR_INT_TYPE ptrdiff_t 53 1.1 christos # else 54 1.1 christos /* We need this for `regex.h', and perhaps for the Emacs include files. */ 55 1.1 christos # include <sys/types.h> 56 1.1.1.2 christos # define PTR_INT_TYPE long 57 1.1 christos # endif 58 1.1 christos 59 1.1 christos # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) 60 1.1 christos 61 1.1 christos /* For platform which support the ISO C amendement 1 functionality we 62 1.1 christos support user defined character classes. */ 63 1.1 christos # if defined _LIBC || WIDE_CHAR_SUPPORT 64 1.1 christos /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 65 1.1 christos # include <wchar.h> 66 1.1 christos # include <wctype.h> 67 1.1 christos # endif 68 1.1 christos 69 1.1 christos # ifdef _LIBC 70 1.1 christos /* We have to keep the namespace clean. */ 71 1.1 christos # define regfree(preg) __regfree (preg) 72 1.1 christos # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) 73 1.1 christos # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) 74 1.1 christos # define regerror(errcode, preg, errbuf, errbuf_size) \ 75 1.1 christos __regerror(errcode, preg, errbuf, errbuf_size) 76 1.1 christos # define re_set_registers(bu, re, nu, st, en) \ 77 1.1 christos __re_set_registers (bu, re, nu, st, en) 78 1.1 christos # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ 79 1.1 christos __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 80 1.1 christos # define re_match(bufp, string, size, pos, regs) \ 81 1.1 christos __re_match (bufp, string, size, pos, regs) 82 1.1 christos # define re_search(bufp, string, size, startpos, range, regs) \ 83 1.1 christos __re_search (bufp, string, size, startpos, range, regs) 84 1.1 christos # define re_compile_pattern(pattern, length, bufp) \ 85 1.1 christos __re_compile_pattern (pattern, length, bufp) 86 1.1 christos # define re_set_syntax(syntax) __re_set_syntax (syntax) 87 1.1 christos # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ 88 1.1 christos __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) 89 1.1 christos # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) 90 1.1 christos 91 1.1 christos # define btowc __btowc 92 1.1 christos 93 1.1 christos /* We are also using some library internals. */ 94 1.1 christos # include <locale/localeinfo.h> 95 1.1 christos # include <locale/elem-hash.h> 96 1.1 christos # include <langinfo.h> 97 1.1 christos # include <locale/coll-lookup.h> 98 1.1 christos # endif 99 1.1 christos 100 1.1 christos /* This is for other GNU distributions with internationalized messages. */ 101 1.1 christos # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC 102 1.1 christos # include <libintl.h> 103 1.1 christos # ifdef _LIBC 104 1.1 christos # undef gettext 105 1.1 christos # define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) 106 1.1 christos # endif 107 1.1 christos # else 108 1.1 christos # define gettext(msgid) (msgid) 109 1.1 christos # endif 110 1.1 christos 111 1.1 christos # ifndef gettext_noop 112 1.1 christos /* This define is so xgettext can find the internationalizable 113 1.1 christos strings. */ 114 1.1 christos # define gettext_noop(String) String 115 1.1 christos # endif 116 1.1 christos 117 1.1 christos /* The `emacs' switch turns on certain matching commands 118 1.1 christos that make sense only in Emacs. */ 119 1.1 christos # ifdef emacs 120 1.1 christos 121 1.1 christos # include "lisp.h" 122 1.1 christos # include "buffer.h" 123 1.1 christos # include "syntax.h" 124 1.1 christos 125 1.1 christos # else /* not emacs */ 126 1.1 christos 127 1.1 christos /* If we are not linking with Emacs proper, 128 1.1 christos we can't use the relocating allocator 129 1.1 christos even if config.h says that we can. */ 130 1.1 christos # undef REL_ALLOC 131 1.1 christos 132 1.1 christos # if defined STDC_HEADERS || defined _LIBC 133 1.1 christos # include <stdlib.h> 134 1.1 christos # else 135 1.1 christos char *malloc (); 136 1.1 christos char *realloc (); 137 1.1 christos # endif 138 1.1 christos 139 1.1 christos /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 140 1.1 christos If nothing else has been done, use the method below. */ 141 1.1 christos # ifdef INHIBIT_STRING_HEADER 142 1.1 christos # if !(defined HAVE_BZERO && defined HAVE_BCOPY) 143 1.1 christos # if !defined bzero && !defined bcopy 144 1.1 christos # undef INHIBIT_STRING_HEADER 145 1.1 christos # endif 146 1.1 christos # endif 147 1.1 christos # endif 148 1.1 christos 149 1.1 christos /* This is the normal way of making sure we have a bcopy and a bzero. 150 1.1 christos This is used in most programs--a few other programs avoid this 151 1.1 christos by defining INHIBIT_STRING_HEADER. */ 152 1.1 christos # ifndef INHIBIT_STRING_HEADER 153 1.1 christos # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC 154 1.1 christos # include <string.h> 155 1.1 christos # ifndef bzero 156 1.1 christos # ifndef _LIBC 157 1.1.1.4 christos # define bzero(s, n) ((void) memset (s, '\0', n)) 158 1.1 christos # else 159 1.1 christos # define bzero(s, n) __bzero (s, n) 160 1.1 christos # endif 161 1.1 christos # endif 162 1.1 christos # else 163 1.1 christos # include <strings.h> 164 1.1 christos # ifndef memcmp 165 1.1 christos # define memcmp(s1, s2, n) bcmp (s1, s2, n) 166 1.1 christos # endif 167 1.1 christos # ifndef memcpy 168 1.1 christos # define memcpy(d, s, n) (bcopy (s, d, n), (d)) 169 1.1 christos # endif 170 1.1 christos # endif 171 1.1 christos # endif 172 1.1 christos 173 1.1 christos /* Define the syntax stuff for \<, \>, etc. */ 174 1.1 christos 175 1.1 christos /* This must be nonzero for the wordchar and notwordchar pattern 176 1.1 christos commands in re_match_2. */ 177 1.1 christos # ifndef Sword 178 1.1 christos # define Sword 1 179 1.1 christos # endif 180 1.1 christos 181 1.1 christos # ifdef SWITCH_ENUM_BUG 182 1.1 christos # define SWITCH_ENUM_CAST(x) ((int)(x)) 183 1.1 christos # else 184 1.1 christos # define SWITCH_ENUM_CAST(x) (x) 185 1.1 christos # endif 186 1.1 christos 187 1.1 christos # endif /* not emacs */ 188 1.1 christos 189 1.1 christos # if defined _LIBC || HAVE_LIMITS_H 190 1.1 christos # include <limits.h> 191 1.1 christos # endif 192 1.1 christos 193 1.1 christos # ifndef MB_LEN_MAX 194 1.1 christos # define MB_LEN_MAX 1 195 1.1 christos # endif 196 1.1 christos 197 1.1 christos /* Get the interface, including the syntax bits. */ 199 1.1 christos # include "xregex.h" /* change for libiberty */ 200 1.1 christos 201 1.1 christos /* isalpha etc. are used for the character classes. */ 202 1.1 christos # include <ctype.h> 203 1.1 christos 204 1.1 christos /* Jim Meyering writes: 205 1.1 christos 206 1.1 christos "... Some ctype macros are valid only for character codes that 207 1.1 christos isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 208 1.1 christos using /bin/cc or gcc but without giving an ansi option). So, all 209 1.1 christos ctype uses should be through macros like ISPRINT... If 210 1.1 christos STDC_HEADERS is defined, then autoconf has verified that the ctype 211 1.1 christos macros don't need to be guarded with references to isascii. ... 212 1.1 christos Defining isascii to 1 should let any compiler worth its salt 213 1.1 christos eliminate the && through constant folding." 214 1.1 christos Solaris defines some of these symbols so we must undefine them first. */ 215 1.1 christos 216 1.1 christos # undef ISASCII 217 1.1 christos # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) 218 1.1 christos # define ISASCII(c) 1 219 1.1 christos # else 220 1.1 christos # define ISASCII(c) isascii(c) 221 1.1 christos # endif 222 1.1 christos 223 1.1 christos # ifdef isblank 224 1.1 christos # define ISBLANK(c) (ISASCII (c) && isblank (c)) 225 1.1 christos # else 226 1.1 christos # define ISBLANK(c) ((c) == ' ' || (c) == '\t') 227 1.1 christos # endif 228 1.1 christos # ifdef isgraph 229 1.1 christos # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) 230 1.1 christos # else 231 1.1 christos # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 232 1.1 christos # endif 233 1.1 christos 234 1.1 christos # undef ISPRINT 235 1.1 christos # define ISPRINT(c) (ISASCII (c) && isprint (c)) 236 1.1 christos # define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 237 1.1 christos # define ISALNUM(c) (ISASCII (c) && isalnum (c)) 238 1.1 christos # define ISALPHA(c) (ISASCII (c) && isalpha (c)) 239 1.1 christos # define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) 240 1.1 christos # define ISLOWER(c) (ISASCII (c) && islower (c)) 241 1.1 christos # define ISPUNCT(c) (ISASCII (c) && ispunct (c)) 242 1.1 christos # define ISSPACE(c) (ISASCII (c) && isspace (c)) 243 1.1 christos # define ISUPPER(c) (ISASCII (c) && isupper (c)) 244 1.1 christos # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) 245 1.1 christos 246 1.1 christos # ifdef _tolower 247 1.1 christos # define TOLOWER(c) _tolower(c) 248 1.1 christos # else 249 1.1 christos # define TOLOWER(c) tolower(c) 250 1.1 christos # endif 251 1.1 christos 252 1.1 christos # ifndef NULL 253 1.1 christos # define NULL (void *)0 254 1.1 christos # endif 255 1.1 christos 256 1.1 christos /* We remove any previous definition of `SIGN_EXTEND_CHAR', 257 1.1 christos since ours (we hope) works properly with all combinations of 258 1.1 christos machines, compilers, `char' and `unsigned char' argument types. 259 1.1 christos (Per Bothner suggested the basic approach.) */ 260 1.1 christos # undef SIGN_EXTEND_CHAR 261 1.1 christos # if __STDC__ 262 1.1 christos # define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 263 1.1 christos # else /* not __STDC__ */ 264 1.1 christos /* As in Harbison and Steele. */ 265 1.1 christos # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) 266 1.1 christos # endif 267 1.1 christos 268 1.1 christos # ifndef emacs 270 1.1 christos /* How many characters in the character set. */ 271 1.1 christos # define CHAR_SET_SIZE 256 272 1.1 christos 273 1.1 christos # ifdef SYNTAX_TABLE 274 1.1 christos 275 1.1 christos extern char *re_syntax_table; 276 1.1 christos 277 1.1 christos # else /* not SYNTAX_TABLE */ 278 1.1 christos 279 1.1 christos static char re_syntax_table[CHAR_SET_SIZE]; 280 1.1 christos 281 1.1 christos static void init_syntax_once (void); 282 1.1 christos 283 1.1 christos static void 284 1.1 christos init_syntax_once (void) 285 1.1 christos { 286 1.1 christos register int c; 287 1.1 christos static int done = 0; 288 1.1 christos 289 1.1 christos if (done) 290 1.1 christos return; 291 1.1 christos bzero (re_syntax_table, sizeof re_syntax_table); 292 1.1 christos 293 1.1 christos for (c = 0; c < CHAR_SET_SIZE; ++c) 294 1.1 christos if (ISALNUM (c)) 295 1.1 christos re_syntax_table[c] = Sword; 296 1.1 christos 297 1.1 christos re_syntax_table['_'] = Sword; 298 1.1 christos 299 1.1 christos done = 1; 300 1.1 christos } 301 1.1 christos 302 1.1 christos # endif /* not SYNTAX_TABLE */ 303 1.1 christos 304 1.1 christos # define SYNTAX(c) re_syntax_table[(unsigned char) (c)] 305 1.1 christos 306 1.1 christos # endif /* emacs */ 307 1.1 christos 308 1.1 christos /* Integer type for pointers. */ 310 1.1 christos # if !defined _LIBC && !defined HAVE_UINTPTR_T 311 1.1 christos typedef unsigned long int uintptr_t; 312 1.1 christos # endif 313 1.1 christos 314 1.1 christos /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 315 1.1 christos use `alloca' instead of `malloc'. This is because using malloc in 316 1.1 christos re_search* or re_match* could cause memory leaks when C-g is used in 317 1.1 christos Emacs; also, malloc is slower and causes storage fragmentation. On 318 1.1 christos the other hand, malloc is more portable, and easier to debug. 319 1.1 christos 320 1.1 christos Because we sometimes use alloca, some routines have to be macros, 321 1.1 christos not functions -- `alloca'-allocated space disappears at the end of the 322 1.1 christos function it is called in. */ 323 1.1 christos 324 1.1 christos # ifdef REGEX_MALLOC 325 1.1 christos 326 1.1 christos # define REGEX_ALLOCATE malloc 327 1.1 christos # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) 328 1.1 christos # define REGEX_FREE free 329 1.1 christos 330 1.1 christos # else /* not REGEX_MALLOC */ 331 1.1 christos 332 1.1 christos /* Emacs already defines alloca, sometimes. */ 333 1.1 christos # ifndef alloca 334 1.1 christos 335 1.1 christos /* Make alloca work the best possible way. */ 336 1.1 christos # ifdef __GNUC__ 337 1.1 christos # define alloca __builtin_alloca 338 1.1 christos # else /* not __GNUC__ */ 339 1.1 christos # if HAVE_ALLOCA_H 340 1.1 christos # include <alloca.h> 341 1.1 christos # endif /* HAVE_ALLOCA_H */ 342 1.1 christos # endif /* not __GNUC__ */ 343 1.1 christos 344 1.1 christos # endif /* not alloca */ 345 1.1 christos 346 1.1 christos # define REGEX_ALLOCATE alloca 347 1.1 christos 348 1.1 christos /* Assumes a `char *destination' variable. */ 349 1.1 christos # define REGEX_REALLOCATE(source, osize, nsize) \ 350 1.1 christos (destination = (char *) alloca (nsize), \ 351 1.1 christos memcpy (destination, source, osize)) 352 1.1 christos 353 1.1 christos /* No need to do anything to free, after alloca. */ 354 1.1 christos # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 355 1.1 christos 356 1.1 christos # endif /* not REGEX_MALLOC */ 357 1.1 christos 358 1.1 christos /* Define how to allocate the failure stack. */ 359 1.1 christos 360 1.1 christos # if defined REL_ALLOC && defined REGEX_MALLOC 361 1.1 christos 362 1.1 christos # define REGEX_ALLOCATE_STACK(size) \ 363 1.1 christos r_alloc (&failure_stack_ptr, (size)) 364 1.1 christos # define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 365 1.1 christos r_re_alloc (&failure_stack_ptr, (nsize)) 366 1.1 christos # define REGEX_FREE_STACK(ptr) \ 367 1.1 christos r_alloc_free (&failure_stack_ptr) 368 1.1 christos 369 1.1 christos # else /* not using relocating allocator */ 370 1.1 christos 371 1.1 christos # ifdef REGEX_MALLOC 372 1.1 christos 373 1.1 christos # define REGEX_ALLOCATE_STACK malloc 374 1.1 christos # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) 375 1.1 christos # define REGEX_FREE_STACK free 376 1.1 christos 377 1.1 christos # else /* not REGEX_MALLOC */ 378 1.1 christos 379 1.1 christos # define REGEX_ALLOCATE_STACK alloca 380 1.1 christos 381 1.1 christos # define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 382 1.1 christos REGEX_REALLOCATE (source, osize, nsize) 383 1.1 christos /* No need to explicitly free anything. */ 384 1.1 christos # define REGEX_FREE_STACK(arg) 385 1.1 christos 386 1.1 christos # endif /* not REGEX_MALLOC */ 387 1.1 christos # endif /* not using relocating allocator */ 388 1.1 christos 389 1.1 christos 390 1.1 christos /* True if `size1' is non-NULL and PTR is pointing anywhere inside 391 1.1 christos `string1' or just past its end. This works if PTR is NULL, which is 392 1.1 christos a good thing. */ 393 1.1 christos # define FIRST_STRING_P(ptr) \ 394 1.1 christos (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) 395 1.1 christos 396 1.1 christos /* (Re)Allocate N items of type T using malloc, or fail. */ 397 1.1 christos # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 398 1.1 christos # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 399 1.1 christos # define RETALLOC_IF(addr, n, t) \ 400 1.1 christos if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 401 1.1 christos # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 402 1.1 christos 403 1.1 christos # define BYTEWIDTH 8 /* In bits. */ 404 1.1 christos 405 1.1 christos # define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 406 1.1 christos 407 1.1 christos # undef MAX 408 1.1 christos # undef MIN 409 1.1 christos # define MAX(a, b) ((a) > (b) ? (a) : (b)) 410 1.1 christos # define MIN(a, b) ((a) < (b) ? (a) : (b)) 411 1.1 christos 412 1.1 christos typedef char boolean; 413 1.1 christos # define false 0 414 1.1 christos # define true 1 415 1.1 christos 416 1.1 christos static reg_errcode_t byte_regex_compile (const char *pattern, size_t size, 417 1.1 christos reg_syntax_t syntax, 418 1.1 christos struct re_pattern_buffer *bufp); 419 1.1 christos 420 1.1 christos static int byte_re_match_2_internal (struct re_pattern_buffer *bufp, 421 1.1 christos const char *string1, int size1, 422 1.1 christos const char *string2, int size2, 423 1.1 christos int pos, 424 1.1 christos struct re_registers *regs, 425 1.1 christos int stop); 426 1.1 christos static int byte_re_search_2 (struct re_pattern_buffer *bufp, 427 1.1 christos const char *string1, int size1, 428 1.1 christos const char *string2, int size2, 429 1.1 christos int startpos, int range, 430 1.1 christos struct re_registers *regs, int stop); 431 1.1 christos static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp); 432 1.1 christos 433 1.1 christos #ifdef MBS_SUPPORT 434 1.1 christos static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size, 435 1.1 christos reg_syntax_t syntax, 436 1.1 christos struct re_pattern_buffer *bufp); 437 1.1 christos 438 1.1 christos 439 1.1 christos static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp, 440 1.1 christos const char *cstring1, int csize1, 441 1.1 christos const char *cstring2, int csize2, 442 1.1 christos int pos, 443 1.1 christos struct re_registers *regs, 444 1.1 christos int stop, 445 1.1 christos wchar_t *string1, int size1, 446 1.1 christos wchar_t *string2, int size2, 447 1.1 christos int *mbs_offset1, int *mbs_offset2); 448 1.1 christos static int wcs_re_search_2 (struct re_pattern_buffer *bufp, 449 1.1 christos const char *string1, int size1, 450 1.1 christos const char *string2, int size2, 451 1.1 christos int startpos, int range, 452 1.1 christos struct re_registers *regs, int stop); 453 1.1 christos static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp); 454 1.1 christos #endif 455 1.1 christos 456 1.1 christos /* These are the command codes that appear in compiled regular 458 1.1 christos expressions. Some opcodes are followed by argument bytes. A 459 1.1 christos command code can specify any interpretation whatsoever for its 460 1.1 christos arguments. Zero bytes may appear in the compiled regular expression. */ 461 1.1 christos 462 1.1 christos typedef enum 463 1.1 christos { 464 1.1 christos no_op = 0, 465 1.1 christos 466 1.1 christos /* Succeed right away--no more backtracking. */ 467 1.1 christos succeed, 468 1.1 christos 469 1.1 christos /* Followed by one byte giving n, then by n literal bytes. */ 470 1.1 christos exactn, 471 1.1 christos 472 1.1 christos # ifdef MBS_SUPPORT 473 1.1 christos /* Same as exactn, but contains binary data. */ 474 1.1 christos exactn_bin, 475 1.1 christos # endif 476 1.1 christos 477 1.1 christos /* Matches any (more or less) character. */ 478 1.1 christos anychar, 479 1.1 christos 480 1.1 christos /* Matches any one char belonging to specified set. First 481 1.1 christos following byte is number of bitmap bytes. Then come bytes 482 1.1 christos for a bitmap saying which chars are in. Bits in each byte 483 1.1 christos are ordered low-bit-first. A character is in the set if its 484 1.1 christos bit is 1. A character too large to have a bit in the map is 485 1.1 christos automatically not in the set. */ 486 1.1 christos /* ifdef MBS_SUPPORT, following element is length of character 487 1.1 christos classes, length of collating symbols, length of equivalence 488 1.1 christos classes, length of character ranges, and length of characters. 489 1.1 christos Next, character class element, collating symbols elements, 490 1.1 christos equivalence class elements, range elements, and character 491 1.1 christos elements follow. 492 1.1 christos See regex_compile function. */ 493 1.1 christos charset, 494 1.1 christos 495 1.1 christos /* Same parameters as charset, but match any character that is 496 1.1 christos not one of those specified. */ 497 1.1 christos charset_not, 498 1.1 christos 499 1.1 christos /* Start remembering the text that is matched, for storing in a 500 1.1 christos register. Followed by one byte with the register number, in 501 1.1 christos the range 0 to one less than the pattern buffer's re_nsub 502 1.1 christos field. Then followed by one byte with the number of groups 503 1.1 christos inner to this one. (This last has to be part of the 504 1.1 christos start_memory only because we need it in the on_failure_jump 505 1.1 christos of re_match_2.) */ 506 1.1 christos start_memory, 507 1.1 christos 508 1.1 christos /* Stop remembering the text that is matched and store it in a 509 1.1 christos memory register. Followed by one byte with the register 510 1.1 christos number, in the range 0 to one less than `re_nsub' in the 511 1.1 christos pattern buffer, and one byte with the number of inner groups, 512 1.1 christos just like `start_memory'. (We need the number of inner 513 1.1 christos groups here because we don't have any easy way of finding the 514 1.1 christos corresponding start_memory when we're at a stop_memory.) */ 515 1.1 christos stop_memory, 516 1.1 christos 517 1.1 christos /* Match a duplicate of something remembered. Followed by one 518 1.1 christos byte containing the register number. */ 519 1.1 christos duplicate, 520 1.1 christos 521 1.1 christos /* Fail unless at beginning of line. */ 522 1.1 christos begline, 523 1.1 christos 524 1.1 christos /* Fail unless at end of line. */ 525 1.1 christos endline, 526 1.1 christos 527 1.1 christos /* Succeeds if at beginning of buffer (if emacs) or at beginning 528 1.1 christos of string to be matched (if not). */ 529 1.1 christos begbuf, 530 1.1 christos 531 1.1 christos /* Analogously, for end of buffer/string. */ 532 1.1 christos endbuf, 533 1.1 christos 534 1.1 christos /* Followed by two byte relative address to which to jump. */ 535 1.1 christos jump, 536 1.1 christos 537 1.1 christos /* Same as jump, but marks the end of an alternative. */ 538 1.1 christos jump_past_alt, 539 1.1 christos 540 1.1 christos /* Followed by two-byte relative address of place to resume at 541 1.1 christos in case of failure. */ 542 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 543 1.1 christos on_failure_jump, 544 1.1 christos 545 1.1 christos /* Like on_failure_jump, but pushes a placeholder instead of the 546 1.1 christos current string position when executed. */ 547 1.1 christos on_failure_keep_string_jump, 548 1.1 christos 549 1.1 christos /* Throw away latest failure point and then jump to following 550 1.1 christos two-byte relative address. */ 551 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 552 1.1 christos pop_failure_jump, 553 1.1 christos 554 1.1 christos /* Change to pop_failure_jump if know won't have to backtrack to 555 1.1 christos match; otherwise change to jump. This is used to jump 556 1.1 christos back to the beginning of a repeat. If what follows this jump 557 1.1 christos clearly won't match what the repeat does, such that we can be 558 1.1 christos sure that there is no use backtracking out of repetitions 559 1.1 christos already matched, then we change it to a pop_failure_jump. 560 1.1 christos Followed by two-byte address. */ 561 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 562 1.1 christos maybe_pop_jump, 563 1.1 christos 564 1.1 christos /* Jump to following two-byte address, and push a dummy failure 565 1.1 christos point. This failure point will be thrown away if an attempt 566 1.1 christos is made to use it for a failure. A `+' construct makes this 567 1.1 christos before the first repeat. Also used as an intermediary kind 568 1.1 christos of jump when compiling an alternative. */ 569 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 570 1.1 christos dummy_failure_jump, 571 1.1 christos 572 1.1 christos /* Push a dummy failure point and continue. Used at the end of 573 1.1 christos alternatives. */ 574 1.1 christos push_dummy_failure, 575 1.1 christos 576 1.1 christos /* Followed by two-byte relative address and two-byte number n. 577 1.1 christos After matching N times, jump to the address upon failure. */ 578 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 579 1.1 christos succeed_n, 580 1.1 christos 581 1.1 christos /* Followed by two-byte relative address, and two-byte number n. 582 1.1 christos Jump to the address N times, then fail. */ 583 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 584 1.1 christos jump_n, 585 1.1 christos 586 1.1 christos /* Set the following two-byte relative address to the 587 1.1 christos subsequent two-byte number. The address *includes* the two 588 1.1 christos bytes of number. */ 589 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 590 1.1 christos set_number_at, 591 1.1 christos 592 1.1 christos wordchar, /* Matches any word-constituent character. */ 593 1.1 christos notwordchar, /* Matches any char that is not a word-constituent. */ 594 1.1 christos 595 1.1 christos wordbeg, /* Succeeds if at word beginning. */ 596 1.1 christos wordend, /* Succeeds if at word end. */ 597 1.1 christos 598 1.1 christos wordbound, /* Succeeds if at a word boundary. */ 599 1.1 christos notwordbound /* Succeeds if not at a word boundary. */ 600 1.1 christos 601 1.1 christos # ifdef emacs 602 1.1 christos ,before_dot, /* Succeeds if before point. */ 603 1.1 christos at_dot, /* Succeeds if at point. */ 604 1.1 christos after_dot, /* Succeeds if after point. */ 605 1.1 christos 606 1.1 christos /* Matches any character whose syntax is specified. Followed by 607 1.1 christos a byte which contains a syntax code, e.g., Sword. */ 608 1.1 christos syntaxspec, 609 1.1 christos 610 1.1 christos /* Matches any character whose syntax is not that specified. */ 611 1.1 christos notsyntaxspec 612 1.1 christos # endif /* emacs */ 613 1.1 christos } re_opcode_t; 614 1.1 christos #endif /* not INSIDE_RECURSION */ 615 1.1 christos 616 1.1 christos 618 1.1 christos #ifdef BYTE 619 1.1 christos # define CHAR_T char 620 1.1 christos # define UCHAR_T unsigned char 621 1.1 christos # define COMPILED_BUFFER_VAR bufp->buffer 622 1.1 christos # define OFFSET_ADDRESS_SIZE 2 623 1.1 christos # define PREFIX(name) byte_##name 624 1.1 christos # define ARG_PREFIX(name) name 625 1.1 christos # define PUT_CHAR(c) putchar (c) 626 1.1 christos #else 627 1.1 christos # ifdef WCHAR 628 1.1 christos # define CHAR_T wchar_t 629 1.1 christos # define UCHAR_T wchar_t 630 1.1 christos # define COMPILED_BUFFER_VAR wc_buffer 631 1.1 christos # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ 632 1.1 christos # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1) 633 1.1 christos # define PREFIX(name) wcs_##name 634 1.1 christos # define ARG_PREFIX(name) c##name 635 1.1 christos /* Should we use wide stream?? */ 636 1.1 christos # define PUT_CHAR(c) printf ("%C", c); 637 1.1 christos # define TRUE 1 638 1.1 christos # define FALSE 0 639 1.1 christos # else 640 1.1 christos # ifdef MBS_SUPPORT 641 1.1 christos # define WCHAR 642 1.1 christos # define INSIDE_RECURSION 643 1.1 christos # include "regex.c" 644 1.1 christos # undef INSIDE_RECURSION 645 1.1 christos # endif 646 1.1 christos # define BYTE 647 1.1 christos # define INSIDE_RECURSION 648 1.1 christos # include "regex.c" 649 1.1 christos # undef INSIDE_RECURSION 650 1.1 christos # endif 651 1.1 christos #endif 652 1.1 christos 653 1.1 christos #ifdef INSIDE_RECURSION 654 1.1 christos /* Common operations on the compiled pattern. */ 655 1.1 christos 656 1.1 christos /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ 657 1.1 christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 658 1.1 christos 659 1.1 christos # ifdef WCHAR 660 1.1 christos # define STORE_NUMBER(destination, number) \ 661 1.1 christos do { \ 662 1.1 christos *(destination) = (UCHAR_T)(number); \ 663 1.1 christos } while (0) 664 1.1 christos # else /* BYTE */ 665 1.1 christos # define STORE_NUMBER(destination, number) \ 666 1.1 christos do { \ 667 1.1 christos (destination)[0] = (number) & 0377; \ 668 1.1 christos (destination)[1] = (number) >> 8; \ 669 1.1 christos } while (0) 670 1.1 christos # endif /* WCHAR */ 671 1.1 christos 672 1.1 christos /* Same as STORE_NUMBER, except increment DESTINATION to 673 1.1 christos the byte after where the number is stored. Therefore, DESTINATION 674 1.1 christos must be an lvalue. */ 675 1.1 christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 676 1.1 christos 677 1.1 christos # define STORE_NUMBER_AND_INCR(destination, number) \ 678 1.1 christos do { \ 679 1.1 christos STORE_NUMBER (destination, number); \ 680 1.1 christos (destination) += OFFSET_ADDRESS_SIZE; \ 681 1.1 christos } while (0) 682 1.1 christos 683 1.1 christos /* Put into DESTINATION a number stored in two contiguous bytes starting 684 1.1 christos at SOURCE. */ 685 1.1 christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 686 1.1 christos 687 1.1 christos # ifdef WCHAR 688 1.1 christos # define EXTRACT_NUMBER(destination, source) \ 689 1.1 christos do { \ 690 1.1 christos (destination) = *(source); \ 691 1.1.1.5 christos } while (0) 692 1.1 christos # else /* BYTE */ 693 1.1 christos # define EXTRACT_NUMBER(destination, source) \ 694 1.1 christos do { \ 695 1.1 christos (destination) = *(source) & 0377; \ 696 1.1 christos (destination) += ((unsigned) SIGN_EXTEND_CHAR (*((source) + 1))) << 8; \ 697 1.1 christos } while (0) 698 1.1 christos # endif 699 1.1 christos 700 1.1 christos # ifdef DEBUG 701 1.1 christos static void PREFIX(extract_number) (int *dest, UCHAR_T *source); 702 1.1 christos static void 703 1.1 christos PREFIX(extract_number) (int *dest, UCHAR_T *source) 704 1.1 christos { 705 1.1 christos # ifdef WCHAR 706 1.1 christos *dest = *source; 707 1.1 christos # else /* BYTE */ 708 1.1 christos int temp = SIGN_EXTEND_CHAR (*(source + 1)); 709 1.1 christos *dest = *source & 0377; 710 1.1 christos *dest += temp << 8; 711 1.1 christos # endif 712 1.1 christos } 713 1.1 christos 714 1.1 christos # ifndef EXTRACT_MACROS /* To debug the macros. */ 715 1.1 christos # undef EXTRACT_NUMBER 716 1.1 christos # define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src) 717 1.1 christos # endif /* not EXTRACT_MACROS */ 718 1.1 christos 719 1.1 christos # endif /* DEBUG */ 720 1.1 christos 721 1.1 christos /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. 722 1.1 christos SOURCE must be an lvalue. */ 723 1.1 christos 724 1.1 christos # define EXTRACT_NUMBER_AND_INCR(destination, source) \ 725 1.1 christos do { \ 726 1.1 christos EXTRACT_NUMBER (destination, source); \ 727 1.1 christos (source) += OFFSET_ADDRESS_SIZE; \ 728 1.1 christos } while (0) 729 1.1 christos 730 1.1 christos # ifdef DEBUG 731 1.1 christos static void PREFIX(extract_number_and_incr) (int *destination, 732 1.1 christos UCHAR_T **source); 733 1.1 christos static void 734 1.1 christos PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source) 735 1.1 christos { 736 1.1 christos PREFIX(extract_number) (destination, *source); 737 1.1 christos *source += OFFSET_ADDRESS_SIZE; 738 1.1 christos } 739 1.1 christos 740 1.1 christos # ifndef EXTRACT_MACROS 741 1.1 christos # undef EXTRACT_NUMBER_AND_INCR 742 1.1 christos # define EXTRACT_NUMBER_AND_INCR(dest, src) \ 743 1.1 christos PREFIX(extract_number_and_incr) (&dest, &src) 744 1.1 christos # endif /* not EXTRACT_MACROS */ 745 1.1 christos 746 1.1 christos # endif /* DEBUG */ 747 1.1 christos 748 1.1 christos 749 1.1 christos 751 1.1 christos /* If DEBUG is defined, Regex prints many voluminous messages about what 752 1.1 christos it is doing (if the variable `debug' is nonzero). If linked with the 753 1.1 christos main program in `iregex.c', you can enter patterns and strings 754 1.1 christos interactively. And if linked with the main program in `main.c' and 755 1.1 christos the other test files, you can run the already-written tests. */ 756 1.1 christos 757 1.1 christos # ifdef DEBUG 758 1.1 christos 759 1.1 christos # ifndef DEFINED_ONCE 760 1.1 christos 761 1.1 christos /* We use standard I/O for debugging. */ 762 1.1 christos # include <stdio.h> 763 1.1 christos 764 1.1 christos /* It is useful to test things that ``must'' be true when debugging. */ 765 1.1 christos # include <assert.h> 766 1.1 christos 767 1.1 christos static int debug; 768 1.1 christos 769 1.1 christos # define DEBUG_STATEMENT(e) e 770 1.1 christos # define DEBUG_PRINT1(x) if (debug) printf (x) 771 1.1 christos # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) 772 1.1 christos # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) 773 1.1 christos # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) 774 1.1 christos # endif /* not DEFINED_ONCE */ 775 1.1 christos 776 1.1 christos # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 777 1.1 christos if (debug) PREFIX(print_partial_compiled_pattern) (s, e) 778 1.1 christos # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 779 1.1 christos if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2) 780 1.1 christos 781 1.1 christos 782 1.1 christos /* Print the fastmap in human-readable form. */ 783 1.1 christos 784 1.1 christos # ifndef DEFINED_ONCE 785 1.1 christos void 786 1.1 christos print_fastmap (char *fastmap) 787 1.1 christos { 788 1.1 christos unsigned was_a_range = 0; 789 1.1 christos unsigned i = 0; 790 1.1 christos 791 1.1 christos while (i < (1 << BYTEWIDTH)) 792 1.1 christos { 793 1.1 christos if (fastmap[i++]) 794 1.1 christos { 795 1.1 christos was_a_range = 0; 796 1.1 christos putchar (i - 1); 797 1.1 christos while (i < (1 << BYTEWIDTH) && fastmap[i]) 798 1.1 christos { 799 1.1 christos was_a_range = 1; 800 1.1 christos i++; 801 1.1 christos } 802 1.1 christos if (was_a_range) 803 1.1 christos { 804 1.1 christos printf ("-"); 805 1.1 christos putchar (i - 1); 806 1.1 christos } 807 1.1 christos } 808 1.1 christos } 809 1.1 christos putchar ('\n'); 810 1.1 christos } 811 1.1 christos # endif /* not DEFINED_ONCE */ 812 1.1 christos 813 1.1 christos 814 1.1 christos /* Print a compiled pattern string in human-readable form, starting at 815 1.1 christos the START pointer into it and ending just before the pointer END. */ 816 1.1 christos 817 1.1 christos void 818 1.1 christos PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end) 819 1.1 christos { 820 1.1 christos int mcnt, mcnt2; 821 1.1 christos UCHAR_T *p1; 822 1.1 christos UCHAR_T *p = start; 823 1.1 christos UCHAR_T *pend = end; 824 1.1 christos 825 1.1 christos if (start == NULL) 826 1.1 christos { 827 1.1 christos printf ("(null)\n"); 828 1.1 christos return; 829 1.1 christos } 830 1.1 christos 831 1.1 christos /* Loop over pattern commands. */ 832 1.1 christos while (p < pend) 833 1.1 christos { 834 1.1 christos # ifdef _LIBC 835 1.1 christos printf ("%td:\t", p - start); 836 1.1 christos # else 837 1.1 christos printf ("%ld:\t", (long int) (p - start)); 838 1.1 christos # endif 839 1.1 christos 840 1.1 christos switch ((re_opcode_t) *p++) 841 1.1 christos { 842 1.1 christos case no_op: 843 1.1 christos printf ("/no_op"); 844 1.1 christos break; 845 1.1 christos 846 1.1 christos case exactn: 847 1.1 christos mcnt = *p++; 848 1.1 christos printf ("/exactn/%d", mcnt); 849 1.1 christos do 850 1.1 christos { 851 1.1 christos putchar ('/'); 852 1.1 christos PUT_CHAR (*p++); 853 1.1 christos } 854 1.1 christos while (--mcnt); 855 1.1 christos break; 856 1.1 christos 857 1.1 christos # ifdef MBS_SUPPORT 858 1.1 christos case exactn_bin: 859 1.1 christos mcnt = *p++; 860 1.1 christos printf ("/exactn_bin/%d", mcnt); 861 1.1 christos do 862 1.1 christos { 863 1.1 christos printf("/%lx", (long int) *p++); 864 1.1 christos } 865 1.1 christos while (--mcnt); 866 1.1 christos break; 867 1.1 christos # endif /* MBS_SUPPORT */ 868 1.1 christos 869 1.1 christos case start_memory: 870 1.1 christos mcnt = *p++; 871 1.1 christos printf ("/start_memory/%d/%ld", mcnt, (long int) *p++); 872 1.1 christos break; 873 1.1 christos 874 1.1 christos case stop_memory: 875 1.1 christos mcnt = *p++; 876 1.1 christos printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++); 877 1.1 christos break; 878 1.1 christos 879 1.1 christos case duplicate: 880 1.1 christos printf ("/duplicate/%ld", (long int) *p++); 881 1.1 christos break; 882 1.1 christos 883 1.1 christos case anychar: 884 1.1 christos printf ("/anychar"); 885 1.1 christos break; 886 1.1 christos 887 1.1 christos case charset: 888 1.1 christos case charset_not: 889 1.1 christos { 890 1.1 christos # ifdef WCHAR 891 1.1 christos int i, length; 892 1.1 christos wchar_t *workp = p; 893 1.1 christos printf ("/charset [%s", 894 1.1 christos (re_opcode_t) *(workp - 1) == charset_not ? "^" : ""); 895 1.1 christos p += 5; 896 1.1 christos length = *workp++; /* the length of char_classes */ 897 1.1 christos for (i=0 ; i<length ; i++) 898 1.1 christos printf("[:%lx:]", (long int) *p++); 899 1.1 christos length = *workp++; /* the length of collating_symbol */ 900 1.1 christos for (i=0 ; i<length ;) 901 1.1 christos { 902 1.1 christos printf("[."); 903 1.1 christos while(*p != 0) 904 1.1 christos PUT_CHAR((i++,*p++)); 905 1.1 christos i++,p++; 906 1.1 christos printf(".]"); 907 1.1 christos } 908 1.1 christos length = *workp++; /* the length of equivalence_class */ 909 1.1 christos for (i=0 ; i<length ;) 910 1.1 christos { 911 1.1 christos printf("[="); 912 1.1 christos while(*p != 0) 913 1.1 christos PUT_CHAR((i++,*p++)); 914 1.1 christos i++,p++; 915 1.1 christos printf("=]"); 916 1.1 christos } 917 1.1 christos length = *workp++; /* the length of char_range */ 918 1.1 christos for (i=0 ; i<length ; i++) 919 1.1 christos { 920 1.1 christos wchar_t range_start = *p++; 921 1.1 christos wchar_t range_end = *p++; 922 1.1 christos printf("%C-%C", range_start, range_end); 923 1.1 christos } 924 1.1 christos length = *workp++; /* the length of char */ 925 1.1 christos for (i=0 ; i<length ; i++) 926 1.1 christos printf("%C", *p++); 927 1.1 christos putchar (']'); 928 1.1 christos # else 929 1.1 christos register int c, last = -100; 930 1.1 christos register int in_range = 0; 931 1.1 christos 932 1.1 christos printf ("/charset [%s", 933 1.1 christos (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 934 1.1 christos 935 1.1 christos assert (p + *p < pend); 936 1.1 christos 937 1.1 christos for (c = 0; c < 256; c++) 938 1.1 christos if (c / 8 < *p 939 1.1 christos && (p[1 + (c/8)] & (1 << (c % 8)))) 940 1.1 christos { 941 1.1 christos /* Are we starting a range? */ 942 1.1 christos if (last + 1 == c && ! in_range) 943 1.1 christos { 944 1.1 christos putchar ('-'); 945 1.1 christos in_range = 1; 946 1.1 christos } 947 1.1 christos /* Have we broken a range? */ 948 1.1 christos else if (last + 1 != c && in_range) 949 1.1 christos { 950 1.1 christos putchar (last); 951 1.1 christos in_range = 0; 952 1.1 christos } 953 1.1 christos 954 1.1 christos if (! in_range) 955 1.1 christos putchar (c); 956 1.1 christos 957 1.1 christos last = c; 958 1.1 christos } 959 1.1 christos 960 1.1 christos if (in_range) 961 1.1 christos putchar (last); 962 1.1 christos 963 1.1 christos putchar (']'); 964 1.1 christos 965 1.1 christos p += 1 + *p; 966 1.1 christos # endif /* WCHAR */ 967 1.1 christos } 968 1.1 christos break; 969 1.1 christos 970 1.1 christos case begline: 971 1.1 christos printf ("/begline"); 972 1.1 christos break; 973 1.1 christos 974 1.1 christos case endline: 975 1.1 christos printf ("/endline"); 976 1.1 christos break; 977 1.1 christos 978 1.1 christos case on_failure_jump: 979 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 980 1.1 christos # ifdef _LIBC 981 1.1 christos printf ("/on_failure_jump to %td", p + mcnt - start); 982 1.1 christos # else 983 1.1 christos printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start)); 984 1.1 christos # endif 985 1.1 christos break; 986 1.1 christos 987 1.1 christos case on_failure_keep_string_jump: 988 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 989 1.1 christos # ifdef _LIBC 990 1.1 christos printf ("/on_failure_keep_string_jump to %td", p + mcnt - start); 991 1.1 christos # else 992 1.1 christos printf ("/on_failure_keep_string_jump to %ld", 993 1.1 christos (long int) (p + mcnt - start)); 994 1.1 christos # endif 995 1.1 christos break; 996 1.1 christos 997 1.1 christos case dummy_failure_jump: 998 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 999 1.1 christos # ifdef _LIBC 1000 1.1 christos printf ("/dummy_failure_jump to %td", p + mcnt - start); 1001 1.1 christos # else 1002 1.1 christos printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start)); 1003 1.1 christos # endif 1004 1.1 christos break; 1005 1.1 christos 1006 1.1 christos case push_dummy_failure: 1007 1.1 christos printf ("/push_dummy_failure"); 1008 1.1 christos break; 1009 1.1 christos 1010 1.1 christos case maybe_pop_jump: 1011 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1012 1.1 christos # ifdef _LIBC 1013 1.1 christos printf ("/maybe_pop_jump to %td", p + mcnt - start); 1014 1.1 christos # else 1015 1.1 christos printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start)); 1016 1.1 christos # endif 1017 1.1 christos break; 1018 1.1 christos 1019 1.1 christos case pop_failure_jump: 1020 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1021 1.1 christos # ifdef _LIBC 1022 1.1 christos printf ("/pop_failure_jump to %td", p + mcnt - start); 1023 1.1 christos # else 1024 1.1 christos printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start)); 1025 1.1 christos # endif 1026 1.1 christos break; 1027 1.1 christos 1028 1.1 christos case jump_past_alt: 1029 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1030 1.1 christos # ifdef _LIBC 1031 1.1 christos printf ("/jump_past_alt to %td", p + mcnt - start); 1032 1.1 christos # else 1033 1.1 christos printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start)); 1034 1.1 christos # endif 1035 1.1 christos break; 1036 1.1 christos 1037 1.1 christos case jump: 1038 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1039 1.1 christos # ifdef _LIBC 1040 1.1 christos printf ("/jump to %td", p + mcnt - start); 1041 1.1 christos # else 1042 1.1 christos printf ("/jump to %ld", (long int) (p + mcnt - start)); 1043 1.1 christos # endif 1044 1.1 christos break; 1045 1.1 christos 1046 1.1 christos case succeed_n: 1047 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1048 1.1 christos p1 = p + mcnt; 1049 1.1 christos PREFIX(extract_number_and_incr) (&mcnt2, &p); 1050 1.1 christos # ifdef _LIBC 1051 1.1 christos printf ("/succeed_n to %td, %d times", p1 - start, mcnt2); 1052 1.1 christos # else 1053 1.1 christos printf ("/succeed_n to %ld, %d times", 1054 1.1 christos (long int) (p1 - start), mcnt2); 1055 1.1 christos # endif 1056 1.1 christos break; 1057 1.1 christos 1058 1.1 christos case jump_n: 1059 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1060 1.1 christos p1 = p + mcnt; 1061 1.1 christos PREFIX(extract_number_and_incr) (&mcnt2, &p); 1062 1.1 christos printf ("/jump_n to %d, %d times", p1 - start, mcnt2); 1063 1.1 christos break; 1064 1.1 christos 1065 1.1 christos case set_number_at: 1066 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1067 1.1 christos p1 = p + mcnt; 1068 1.1 christos PREFIX(extract_number_and_incr) (&mcnt2, &p); 1069 1.1 christos # ifdef _LIBC 1070 1.1 christos printf ("/set_number_at location %td to %d", p1 - start, mcnt2); 1071 1.1 christos # else 1072 1.1 christos printf ("/set_number_at location %ld to %d", 1073 1.1 christos (long int) (p1 - start), mcnt2); 1074 1.1 christos # endif 1075 1.1 christos break; 1076 1.1 christos 1077 1.1 christos case wordbound: 1078 1.1 christos printf ("/wordbound"); 1079 1.1 christos break; 1080 1.1 christos 1081 1.1 christos case notwordbound: 1082 1.1 christos printf ("/notwordbound"); 1083 1.1 christos break; 1084 1.1 christos 1085 1.1 christos case wordbeg: 1086 1.1 christos printf ("/wordbeg"); 1087 1.1 christos break; 1088 1.1 christos 1089 1.1 christos case wordend: 1090 1.1 christos printf ("/wordend"); 1091 1.1 christos break; 1092 1.1 christos 1093 1.1 christos # ifdef emacs 1094 1.1 christos case before_dot: 1095 1.1 christos printf ("/before_dot"); 1096 1.1 christos break; 1097 1.1 christos 1098 1.1 christos case at_dot: 1099 1.1 christos printf ("/at_dot"); 1100 1.1 christos break; 1101 1.1 christos 1102 1.1 christos case after_dot: 1103 1.1 christos printf ("/after_dot"); 1104 1.1 christos break; 1105 1.1 christos 1106 1.1 christos case syntaxspec: 1107 1.1 christos printf ("/syntaxspec"); 1108 1.1 christos mcnt = *p++; 1109 1.1 christos printf ("/%d", mcnt); 1110 1.1 christos break; 1111 1.1 christos 1112 1.1 christos case notsyntaxspec: 1113 1.1 christos printf ("/notsyntaxspec"); 1114 1.1 christos mcnt = *p++; 1115 1.1 christos printf ("/%d", mcnt); 1116 1.1 christos break; 1117 1.1 christos # endif /* emacs */ 1118 1.1 christos 1119 1.1 christos case wordchar: 1120 1.1 christos printf ("/wordchar"); 1121 1.1 christos break; 1122 1.1 christos 1123 1.1 christos case notwordchar: 1124 1.1 christos printf ("/notwordchar"); 1125 1.1 christos break; 1126 1.1 christos 1127 1.1 christos case begbuf: 1128 1.1 christos printf ("/begbuf"); 1129 1.1 christos break; 1130 1.1 christos 1131 1.1 christos case endbuf: 1132 1.1 christos printf ("/endbuf"); 1133 1.1 christos break; 1134 1.1 christos 1135 1.1 christos default: 1136 1.1 christos printf ("?%ld", (long int) *(p-1)); 1137 1.1 christos } 1138 1.1 christos 1139 1.1 christos putchar ('\n'); 1140 1.1 christos } 1141 1.1 christos 1142 1.1 christos # ifdef _LIBC 1143 1.1 christos printf ("%td:\tend of pattern.\n", p - start); 1144 1.1 christos # else 1145 1.1 christos printf ("%ld:\tend of pattern.\n", (long int) (p - start)); 1146 1.1 christos # endif 1147 1.1 christos } 1148 1.1 christos 1149 1.1 christos 1150 1.1 christos void 1151 1.1 christos PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp) 1152 1.1 christos { 1153 1.1 christos UCHAR_T *buffer = (UCHAR_T*) bufp->buffer; 1154 1.1 christos 1155 1.1 christos PREFIX(print_partial_compiled_pattern) (buffer, buffer 1156 1.1 christos + bufp->used / sizeof(UCHAR_T)); 1157 1.1 christos printf ("%ld bytes used/%ld bytes allocated.\n", 1158 1.1 christos bufp->used, bufp->allocated); 1159 1.1 christos 1160 1.1 christos if (bufp->fastmap_accurate && bufp->fastmap) 1161 1.1 christos { 1162 1.1 christos printf ("fastmap: "); 1163 1.1 christos print_fastmap (bufp->fastmap); 1164 1.1 christos } 1165 1.1 christos 1166 1.1 christos # ifdef _LIBC 1167 1.1 christos printf ("re_nsub: %Zd\t", bufp->re_nsub); 1168 1.1 christos # else 1169 1.1 christos printf ("re_nsub: %ld\t", (long int) bufp->re_nsub); 1170 1.1 christos # endif 1171 1.1 christos printf ("regs_alloc: %d\t", bufp->regs_allocated); 1172 1.1 christos printf ("can_be_null: %d\t", bufp->can_be_null); 1173 1.1 christos printf ("newline_anchor: %d\n", bufp->newline_anchor); 1174 1.1 christos printf ("no_sub: %d\t", bufp->no_sub); 1175 1.1 christos printf ("not_bol: %d\t", bufp->not_bol); 1176 1.1 christos printf ("not_eol: %d\t", bufp->not_eol); 1177 1.1 christos printf ("syntax: %lx\n", bufp->syntax); 1178 1.1 christos /* Perhaps we should print the translate table? */ 1179 1.1 christos } 1180 1.1 christos 1181 1.1 christos 1182 1.1 christos void 1183 1.1 christos PREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1, 1184 1.1 christos int size1, const CHAR_T *string2, int size2) 1185 1.1 christos { 1186 1.1 christos int this_char; 1187 1.1 christos 1188 1.1 christos if (where == NULL) 1189 1.1 christos printf ("(null)"); 1190 1.1 christos else 1191 1.1 christos { 1192 1.1 christos int cnt; 1193 1.1 christos 1194 1.1 christos if (FIRST_STRING_P (where)) 1195 1.1 christos { 1196 1.1 christos for (this_char = where - string1; this_char < size1; this_char++) 1197 1.1 christos PUT_CHAR (string1[this_char]); 1198 1.1 christos 1199 1.1 christos where = string2; 1200 1.1 christos } 1201 1.1 christos 1202 1.1 christos cnt = 0; 1203 1.1 christos for (this_char = where - string2; this_char < size2; this_char++) 1204 1.1 christos { 1205 1.1 christos PUT_CHAR (string2[this_char]); 1206 1.1 christos if (++cnt > 100) 1207 1.1 christos { 1208 1.1 christos fputs ("...", stdout); 1209 1.1 christos break; 1210 1.1 christos } 1211 1.1 christos } 1212 1.1 christos } 1213 1.1 christos } 1214 1.1 christos 1215 1.1 christos # ifndef DEFINED_ONCE 1216 1.1 christos void 1217 1.1 christos printchar (int c) 1218 1.1 christos { 1219 1.1 christos putc (c, stderr); 1220 1.1 christos } 1221 1.1 christos # endif 1222 1.1 christos 1223 1.1 christos # else /* not DEBUG */ 1224 1.1 christos 1225 1.1 christos # ifndef DEFINED_ONCE 1226 1.1 christos # undef assert 1227 1.1 christos # define assert(e) 1228 1.1 christos 1229 1.1 christos # define DEBUG_STATEMENT(e) 1230 1.1 christos # define DEBUG_PRINT1(x) 1231 1.1 christos # define DEBUG_PRINT2(x1, x2) 1232 1.1 christos # define DEBUG_PRINT3(x1, x2, x3) 1233 1.1 christos # define DEBUG_PRINT4(x1, x2, x3, x4) 1234 1.1 christos # endif /* not DEFINED_ONCE */ 1235 1.1 christos # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 1236 1.1 christos # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 1237 1.1 christos 1238 1.1 christos # endif /* not DEBUG */ 1239 1.1 christos 1240 1.1 christos 1241 1.1 christos 1243 1.1 christos # ifdef WCHAR 1244 1.1 christos /* This convert a multibyte string to a wide character string. 1245 1.1 christos And write their correspondances to offset_buffer(see below) 1246 1.1 christos and write whether each wchar_t is binary data to is_binary. 1247 1.1 christos This assume invalid multibyte sequences as binary data. 1248 1.1 christos We assume offset_buffer and is_binary is already allocated 1249 1.1 christos enough space. */ 1250 1.1 christos 1251 1.1 christos static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src, 1252 1.1 christos size_t len, int *offset_buffer, 1253 1.1 christos char *is_binary); 1254 1.1 christos static size_t 1255 1.1 christos convert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len, 1256 1.1 christos int *offset_buffer, char *is_binary) 1257 1.1 christos /* It hold correspondances between src(char string) and 1258 1.1 christos dest(wchar_t string) for optimization. 1259 1.1 christos e.g. src = "xxxyzz" 1260 1.1 christos dest = {'X', 'Y', 'Z'} 1261 1.1 christos (each "xxx", "y" and "zz" represent one multibyte character 1262 1.1 christos corresponding to 'X', 'Y' and 'Z'.) 1263 1.1 christos offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")} 1264 1.1 christos = {0, 3, 4, 6} 1265 1.1 christos */ 1266 1.1 christos { 1267 1.1 christos wchar_t *pdest = dest; 1268 1.1 christos const unsigned char *psrc = src; 1269 1.1 christos size_t wc_count = 0; 1270 1.1 christos 1271 1.1 christos mbstate_t mbs; 1272 1.1 christos int i, consumed; 1273 1.1 christos size_t mb_remain = len; 1274 1.1 christos size_t mb_count = 0; 1275 1.1 christos 1276 1.1 christos /* Initialize the conversion state. */ 1277 1.1 christos memset (&mbs, 0, sizeof (mbstate_t)); 1278 1.1 christos 1279 1.1 christos offset_buffer[0] = 0; 1280 1.1 christos for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed, 1281 1.1 christos psrc += consumed) 1282 1.1 christos { 1283 1.1 christos #ifdef _LIBC 1284 1.1 christos consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs); 1285 1.1 christos #else 1286 1.1 christos consumed = mbrtowc (pdest, psrc, mb_remain, &mbs); 1287 1.1 christos #endif 1288 1.1 christos 1289 1.1 christos if (consumed <= 0) 1290 1.1 christos /* failed to convert. maybe src contains binary data. 1291 1.1 christos So we consume 1 byte manualy. */ 1292 1.1 christos { 1293 1.1 christos *pdest = *psrc; 1294 1.1 christos consumed = 1; 1295 1.1 christos is_binary[wc_count] = TRUE; 1296 1.1 christos } 1297 1.1 christos else 1298 1.1 christos is_binary[wc_count] = FALSE; 1299 1.1 christos /* In sjis encoding, we use yen sign as escape character in 1300 1.1 christos place of reverse solidus. So we convert 0x5c(yen sign in 1301 1.1 christos sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse 1302 1.1 christos solidus in UCS2). */ 1303 1.1 christos if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5) 1304 1.1 christos *pdest = (wchar_t) *psrc; 1305 1.1 christos 1306 1.1 christos offset_buffer[wc_count + 1] = mb_count += consumed; 1307 1.1 christos } 1308 1.1 christos 1309 1.1 christos /* Fill remain of the buffer with sentinel. */ 1310 1.1 christos for (i = wc_count + 1 ; i <= len ; i++) 1311 1.1 christos offset_buffer[i] = mb_count + 1; 1312 1.1 christos 1313 1.1 christos return wc_count; 1314 1.1 christos } 1315 1.1 christos 1316 1.1 christos # endif /* WCHAR */ 1317 1.1 christos 1318 1.1 christos #else /* not INSIDE_RECURSION */ 1319 1.1 christos 1320 1.1 christos /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 1321 1.1 christos also be assigned to arbitrarily: each pattern buffer stores its own 1322 1.1 christos syntax, so it can be changed between regex compilations. */ 1323 1.1 christos /* This has no initializer because initialized variables in Emacs 1324 1.1 christos become read-only after dumping. */ 1325 1.1 christos reg_syntax_t re_syntax_options; 1326 1.1 christos 1327 1.1 christos 1328 1.1 christos /* Specify the precise syntax of regexps for compilation. This provides 1329 1.1 christos for compatibility for various utilities which historically have 1330 1.1 christos different, incompatible syntaxes. 1331 1.1 christos 1332 1.1 christos The argument SYNTAX is a bit mask comprised of the various bits 1333 1.1 christos defined in regex.h. We return the old syntax. */ 1334 1.1 christos 1335 1.1 christos reg_syntax_t 1336 1.1 christos re_set_syntax (reg_syntax_t syntax) 1337 1.1 christos { 1338 1.1 christos reg_syntax_t ret = re_syntax_options; 1339 1.1 christos 1340 1.1 christos re_syntax_options = syntax; 1341 1.1 christos # ifdef DEBUG 1342 1.1 christos if (syntax & RE_DEBUG) 1343 1.1 christos debug = 1; 1344 1.1 christos else if (debug) /* was on but now is not */ 1345 1.1 christos debug = 0; 1346 1.1 christos # endif /* DEBUG */ 1347 1.1 christos return ret; 1348 1.1 christos } 1349 1.1 christos # ifdef _LIBC 1350 1.1 christos weak_alias (__re_set_syntax, re_set_syntax) 1351 1.1 christos # endif 1352 1.1 christos 1353 1.1 christos /* This table gives an error message for each of the error codes listed 1355 1.1 christos in regex.h. Obviously the order here has to be same as there. 1356 1.1 christos POSIX doesn't require that we do anything for REG_NOERROR, 1357 1.1 christos but why not be nice? */ 1358 1.1 christos 1359 1.1 christos static const char *re_error_msgid[] = 1360 1.1 christos { 1361 1.1 christos gettext_noop ("Success"), /* REG_NOERROR */ 1362 1.1 christos gettext_noop ("No match"), /* REG_NOMATCH */ 1363 1.1 christos gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ 1364 1.1 christos gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ 1365 1.1 christos gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ 1366 1.1 christos gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ 1367 1.1 christos gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ 1368 1.1 christos gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ 1369 1.1 christos gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ 1370 1.1 christos gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ 1371 1.1 christos gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ 1372 1.1 christos gettext_noop ("Invalid range end"), /* REG_ERANGE */ 1373 1.1 christos gettext_noop ("Memory exhausted"), /* REG_ESPACE */ 1374 1.1 christos gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ 1375 1.1 christos gettext_noop ("Premature end of regular expression"), /* REG_EEND */ 1376 1.1 christos gettext_noop ("Regular expression too big"), /* REG_ESIZE */ 1377 1.1 christos gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ 1378 1.1 christos }; 1379 1.1 christos 1380 1.1 christos #endif /* INSIDE_RECURSION */ 1382 1.1 christos 1383 1.1 christos #ifndef DEFINED_ONCE 1384 1.1 christos /* Avoiding alloca during matching, to placate r_alloc. */ 1385 1.1 christos 1386 1.1 christos /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 1387 1.1 christos searching and matching functions should not call alloca. On some 1388 1.1 christos systems, alloca is implemented in terms of malloc, and if we're 1389 1.1 christos using the relocating allocator routines, then malloc could cause a 1390 1.1 christos relocation, which might (if the strings being searched are in the 1391 1.1 christos ralloc heap) shift the data out from underneath the regexp 1392 1.1 christos routines. 1393 1.1 christos 1394 1.1 christos Here's another reason to avoid allocation: Emacs 1395 1.1 christos processes input from X in a signal handler; processing X input may 1396 1.1 christos call malloc; if input arrives while a matching routine is calling 1397 1.1 christos malloc, then we're scrod. But Emacs can't just block input while 1398 1.1 christos calling matching routines; then we don't notice interrupts when 1399 1.1 christos they come in. So, Emacs blocks input around all regexp calls 1400 1.1 christos except the matching calls, which it leaves unprotected, in the 1401 1.1 christos faith that they will not malloc. */ 1402 1.1 christos 1403 1.1 christos /* Normally, this is fine. */ 1404 1.1 christos # define MATCH_MAY_ALLOCATE 1405 1.1 christos 1406 1.1 christos /* When using GNU C, we are not REALLY using the C alloca, no matter 1407 1.1 christos what config.h may say. So don't take precautions for it. */ 1408 1.1 christos # ifdef __GNUC__ 1409 1.1 christos # undef C_ALLOCA 1410 1.1 christos # endif 1411 1.1 christos 1412 1.1 christos /* The match routines may not allocate if (1) they would do it with malloc 1413 1.1 christos and (2) it's not safe for them to use malloc. 1414 1.1 christos Note that if REL_ALLOC is defined, matching would not use malloc for the 1415 1.1 christos failure stack, but we would still use it for the register vectors; 1416 1.1 christos so REL_ALLOC should not affect this. */ 1417 1.1 christos # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs 1418 1.1 christos # undef MATCH_MAY_ALLOCATE 1419 1.1 christos # endif 1420 1.1 christos #endif /* not DEFINED_ONCE */ 1421 1.1 christos 1422 1.1 christos #ifdef INSIDE_RECURSION 1424 1.1 christos /* Failure stack declarations and macros; both re_compile_fastmap and 1425 1.1 christos re_match_2 use a failure stack. These have to be macros because of 1426 1.1 christos REGEX_ALLOCATE_STACK. */ 1427 1.1 christos 1428 1.1 christos 1429 1.1 christos /* Number of failure points for which to initially allocate space 1430 1.1 christos when matching. If this number is exceeded, we allocate more 1431 1.1 christos space, so it is not a hard limit. */ 1432 1.1 christos # ifndef INIT_FAILURE_ALLOC 1433 1.1 christos # define INIT_FAILURE_ALLOC 5 1434 1.1 christos # endif 1435 1.1 christos 1436 1.1 christos /* Roughly the maximum number of failure points on the stack. Would be 1437 1.1 christos exactly that if always used MAX_FAILURE_ITEMS items each time we failed. 1438 1.1 christos This is a variable only so users of regex can assign to it; we never 1439 1.1 christos change it ourselves. */ 1440 1.1 christos 1441 1.1 christos # ifdef INT_IS_16BIT 1442 1.1 christos 1443 1.1 christos # ifndef DEFINED_ONCE 1444 1.1 christos # if defined MATCH_MAY_ALLOCATE 1445 1.1 christos /* 4400 was enough to cause a crash on Alpha OSF/1, 1446 1.1 christos whose default stack limit is 2mb. */ 1447 1.1 christos long int re_max_failures = 4000; 1448 1.1 christos # else 1449 1.1 christos long int re_max_failures = 2000; 1450 1.1 christos # endif 1451 1.1 christos # endif 1452 1.1 christos 1453 1.1 christos union PREFIX(fail_stack_elt) 1454 1.1 christos { 1455 1.1 christos UCHAR_T *pointer; 1456 1.1 christos long int integer; 1457 1.1 christos }; 1458 1.1 christos 1459 1.1 christos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); 1460 1.1 christos 1461 1.1 christos typedef struct 1462 1.1 christos { 1463 1.1 christos PREFIX(fail_stack_elt_t) *stack; 1464 1.1 christos unsigned long int size; 1465 1.1 christos unsigned long int avail; /* Offset of next open position. */ 1466 1.1 christos } PREFIX(fail_stack_type); 1467 1.1 christos 1468 1.1 christos # else /* not INT_IS_16BIT */ 1469 1.1 christos 1470 1.1 christos # ifndef DEFINED_ONCE 1471 1.1 christos # if defined MATCH_MAY_ALLOCATE 1472 1.1 christos /* 4400 was enough to cause a crash on Alpha OSF/1, 1473 1.1 christos whose default stack limit is 2mb. */ 1474 1.1 christos int re_max_failures = 4000; 1475 1.1 christos # else 1476 1.1 christos int re_max_failures = 2000; 1477 1.1 christos # endif 1478 1.1 christos # endif 1479 1.1 christos 1480 1.1 christos union PREFIX(fail_stack_elt) 1481 1.1 christos { 1482 1.1 christos UCHAR_T *pointer; 1483 1.1 christos int integer; 1484 1.1 christos }; 1485 1.1 christos 1486 1.1 christos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); 1487 1.1 christos 1488 1.1 christos typedef struct 1489 1.1 christos { 1490 1.1 christos PREFIX(fail_stack_elt_t) *stack; 1491 1.1 christos unsigned size; 1492 1.1 christos unsigned avail; /* Offset of next open position. */ 1493 1.1 christos } PREFIX(fail_stack_type); 1494 1.1 christos 1495 1.1 christos # endif /* INT_IS_16BIT */ 1496 1.1 christos 1497 1.1 christos # ifndef DEFINED_ONCE 1498 1.1 christos # define FAIL_STACK_EMPTY() (fail_stack.avail == 0) 1499 1.1 christos # define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) 1500 1.1 christos # define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 1501 1.1 christos # endif 1502 1.1 christos 1503 1.1 christos 1504 1.1 christos /* Define macros to initialize and free the failure stack. 1505 1.1 christos Do `return -2' if the alloc fails. */ 1506 1.1 christos 1507 1.1 christos # ifdef MATCH_MAY_ALLOCATE 1508 1.1 christos # define INIT_FAIL_STACK() \ 1509 1.1 christos do { \ 1510 1.1 christos fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \ 1511 1.1 christos REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \ 1512 1.1 christos \ 1513 1.1 christos if (fail_stack.stack == NULL) \ 1514 1.1 christos return -2; \ 1515 1.1 christos \ 1516 1.1 christos fail_stack.size = INIT_FAILURE_ALLOC; \ 1517 1.1 christos fail_stack.avail = 0; \ 1518 1.1 christos } while (0) 1519 1.1 christos 1520 1.1 christos # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) 1521 1.1 christos # else 1522 1.1 christos # define INIT_FAIL_STACK() \ 1523 1.1 christos do { \ 1524 1.1 christos fail_stack.avail = 0; \ 1525 1.1 christos } while (0) 1526 1.1 christos 1527 1.1 christos # define RESET_FAIL_STACK() 1528 1.1 christos # endif 1529 1.1 christos 1530 1.1 christos 1531 1.1 christos /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. 1532 1.1 christos 1533 1.1 christos Return 1 if succeeds, and 0 if either ran out of memory 1534 1.1 christos allocating space for it or it was already too large. 1535 1.1 christos 1536 1.1 christos REGEX_REALLOCATE_STACK requires `destination' be declared. */ 1537 1.1 christos 1538 1.1 christos # define DOUBLE_FAIL_STACK(fail_stack) \ 1539 1.1 christos ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ 1540 1.1 christos ? 0 \ 1541 1.1 christos : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \ 1542 1.1 christos REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 1543 1.1 christos (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \ 1544 1.1 christos ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\ 1545 1.1 christos \ 1546 1.1 christos (fail_stack).stack == NULL \ 1547 1.1 christos ? 0 \ 1548 1.1 christos : ((fail_stack).size <<= 1, \ 1549 1.1 christos 1))) 1550 1.1 christos 1551 1.1 christos 1552 1.1 christos /* Push pointer POINTER on FAIL_STACK. 1553 1.1 christos Return 1 if was able to do so and 0 if ran out of memory allocating 1554 1.1 christos space to do so. */ 1555 1.1 christos # define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ 1556 1.1 christos ((FAIL_STACK_FULL () \ 1557 1.1 christos && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ 1558 1.1 christos ? 0 \ 1559 1.1 christos : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1560 1.1 christos 1)) 1561 1.1 christos 1562 1.1 christos /* Push a pointer value onto the failure stack. 1563 1.1 christos Assumes the variable `fail_stack'. Probably should only 1564 1.1 christos be called from within `PUSH_FAILURE_POINT'. */ 1565 1.1 christos # define PUSH_FAILURE_POINTER(item) \ 1566 1.1 christos fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item) 1567 1.1 christos 1568 1.1 christos /* This pushes an integer-valued item onto the failure stack. 1569 1.1 christos Assumes the variable `fail_stack'. Probably should only 1570 1.1 christos be called from within `PUSH_FAILURE_POINT'. */ 1571 1.1 christos # define PUSH_FAILURE_INT(item) \ 1572 1.1 christos fail_stack.stack[fail_stack.avail++].integer = (item) 1573 1.1 christos 1574 1.1 christos /* Push a fail_stack_elt_t value onto the failure stack. 1575 1.1 christos Assumes the variable `fail_stack'. Probably should only 1576 1.1 christos be called from within `PUSH_FAILURE_POINT'. */ 1577 1.1 christos # define PUSH_FAILURE_ELT(item) \ 1578 1.1 christos fail_stack.stack[fail_stack.avail++] = (item) 1579 1.1 christos 1580 1.1 christos /* These three POP... operations complement the three PUSH... operations. 1581 1.1 christos All assume that `fail_stack' is nonempty. */ 1582 1.1 christos # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer 1583 1.1 christos # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer 1584 1.1 christos # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] 1585 1.1 christos 1586 1.1 christos /* Used to omit pushing failure point id's when we're not debugging. */ 1587 1.1 christos # ifdef DEBUG 1588 1.1 christos # define DEBUG_PUSH PUSH_FAILURE_INT 1589 1.1 christos # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () 1590 1.1 christos # else 1591 1.1 christos # define DEBUG_PUSH(item) 1592 1.1 christos # define DEBUG_POP(item_addr) 1593 1.1 christos # endif 1594 1.1 christos 1595 1.1 christos 1596 1.1 christos /* Push the information about the state we will need 1597 1.1 christos if we ever fail back to it. 1598 1.1 christos 1599 1.1 christos Requires variables fail_stack, regstart, regend, reg_info, and 1600 1.1 christos num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' 1601 1.1 christos be declared. 1602 1.1 christos 1603 1.1 christos Does `return FAILURE_CODE' if runs out of memory. */ 1604 1.1 christos 1605 1.1 christos # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ 1606 1.1 christos do { \ 1607 1.1 christos char *destination; \ 1608 1.1 christos /* Must be int, so when we don't save any registers, the arithmetic \ 1609 1.1 christos of 0 + -1 isn't done as unsigned. */ \ 1610 1.1 christos /* Can't be int, since there is not a shred of a guarantee that int \ 1611 1.1 christos is wide enough to hold a value of something to which pointer can \ 1612 1.1 christos be assigned */ \ 1613 1.1 christos active_reg_t this_reg; \ 1614 1.1 christos \ 1615 1.1 christos DEBUG_STATEMENT (failure_id++); \ 1616 1.1 christos DEBUG_STATEMENT (nfailure_points_pushed++); \ 1617 1.1 christos DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 1618 1.1 christos DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 1619 1.1 christos DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 1620 1.1 christos \ 1621 1.1 christos DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ 1622 1.1 christos DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ 1623 1.1 christos \ 1624 1.1 christos /* Ensure we have enough space allocated for what we will push. */ \ 1625 1.1 christos while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ 1626 1.1 christos { \ 1627 1.1 christos if (!DOUBLE_FAIL_STACK (fail_stack)) \ 1628 1.1 christos return failure_code; \ 1629 1.1 christos \ 1630 1.1 christos DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ 1631 1.1 christos (fail_stack).size); \ 1632 1.1 christos DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ 1633 1.1 christos } \ 1634 1.1 christos \ 1635 1.1 christos /* Push the info, starting with the registers. */ \ 1636 1.1 christos DEBUG_PRINT1 ("\n"); \ 1637 1.1 christos \ 1638 1.1 christos if (1) \ 1639 1.1 christos for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ 1640 1.1 christos this_reg++) \ 1641 1.1 christos { \ 1642 1.1 christos DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ 1643 1.1 christos DEBUG_STATEMENT (num_regs_pushed++); \ 1644 1.1 christos \ 1645 1.1 christos DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 1646 1.1 christos PUSH_FAILURE_POINTER (regstart[this_reg]); \ 1647 1.1 christos \ 1648 1.1 christos DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 1649 1.1 christos PUSH_FAILURE_POINTER (regend[this_reg]); \ 1650 1.1 christos \ 1651 1.1 christos DEBUG_PRINT2 (" info: %p\n ", \ 1652 1.1 christos reg_info[this_reg].word.pointer); \ 1653 1.1 christos DEBUG_PRINT2 (" match_null=%d", \ 1654 1.1 christos REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ 1655 1.1 christos DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ 1656 1.1 christos DEBUG_PRINT2 (" matched_something=%d", \ 1657 1.1 christos MATCHED_SOMETHING (reg_info[this_reg])); \ 1658 1.1 christos DEBUG_PRINT2 (" ever_matched=%d", \ 1659 1.1 christos EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ 1660 1.1 christos DEBUG_PRINT1 ("\n"); \ 1661 1.1 christos PUSH_FAILURE_ELT (reg_info[this_reg].word); \ 1662 1.1 christos } \ 1663 1.1 christos \ 1664 1.1 christos DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ 1665 1.1 christos PUSH_FAILURE_INT (lowest_active_reg); \ 1666 1.1 christos \ 1667 1.1 christos DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ 1668 1.1 christos PUSH_FAILURE_INT (highest_active_reg); \ 1669 1.1 christos \ 1670 1.1 christos DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ 1671 1.1 christos DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ 1672 1.1 christos PUSH_FAILURE_POINTER (pattern_place); \ 1673 1.1 christos \ 1674 1.1 christos DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ 1675 1.1 christos DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ 1676 1.1 christos size2); \ 1677 1.1 christos DEBUG_PRINT1 ("'\n"); \ 1678 1.1 christos PUSH_FAILURE_POINTER (string_place); \ 1679 1.1 christos \ 1680 1.1 christos DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ 1681 1.1 christos DEBUG_PUSH (failure_id); \ 1682 1.1 christos } while (0) 1683 1.1 christos 1684 1.1 christos # ifndef DEFINED_ONCE 1685 1.1 christos /* This is the number of items that are pushed and popped on the stack 1686 1.1 christos for each register. */ 1687 1.1 christos # define NUM_REG_ITEMS 3 1688 1.1 christos 1689 1.1 christos /* Individual items aside from the registers. */ 1690 1.1 christos # ifdef DEBUG 1691 1.1 christos # define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ 1692 1.1 christos # else 1693 1.1 christos # define NUM_NONREG_ITEMS 4 1694 1.1 christos # endif 1695 1.1 christos 1696 1.1 christos /* We push at most this many items on the stack. */ 1697 1.1 christos /* We used to use (num_regs - 1), which is the number of registers 1698 1.1 christos this regexp will save; but that was changed to 5 1699 1.1 christos to avoid stack overflow for a regexp with lots of parens. */ 1700 1.1 christos # define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) 1701 1.1 christos 1702 1.1 christos /* We actually push this many items. */ 1703 1.1 christos # define NUM_FAILURE_ITEMS \ 1704 1.1 christos (((0 \ 1705 1.1 christos ? 0 : highest_active_reg - lowest_active_reg + 1) \ 1706 1.1 christos * NUM_REG_ITEMS) \ 1707 1.1 christos + NUM_NONREG_ITEMS) 1708 1.1 christos 1709 1.1 christos /* How many items can still be added to the stack without overflowing it. */ 1710 1.1 christos # define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) 1711 1.1 christos # endif /* not DEFINED_ONCE */ 1712 1.1 christos 1713 1.1 christos 1714 1.1 christos /* Pops what PUSH_FAIL_STACK pushes. 1715 1.1 christos 1716 1.1 christos We restore into the parameters, all of which should be lvalues: 1717 1.1 christos STR -- the saved data position. 1718 1.1 christos PAT -- the saved pattern position. 1719 1.1 christos LOW_REG, HIGH_REG -- the highest and lowest active registers. 1720 1.1 christos REGSTART, REGEND -- arrays of string positions. 1721 1.1 christos REG_INFO -- array of information about each subexpression. 1722 1.1 christos 1723 1.1 christos Also assumes the variables `fail_stack' and (if debugging), `bufp', 1724 1.1 christos `pend', `string1', `size1', `string2', and `size2'. */ 1725 1.1 christos # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 1726 1.1 christos { \ 1727 1.1 christos DEBUG_STATEMENT (unsigned failure_id;) \ 1728 1.1 christos active_reg_t this_reg; \ 1729 1.1 christos const UCHAR_T *string_temp; \ 1730 1.1 christos \ 1731 1.1 christos assert (!FAIL_STACK_EMPTY ()); \ 1732 1.1 christos \ 1733 1.1 christos /* Remove failure points and point to how many regs pushed. */ \ 1734 1.1 christos DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ 1735 1.1 christos DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ 1736 1.1 christos DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ 1737 1.1 christos \ 1738 1.1 christos assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ 1739 1.1 christos \ 1740 1.1 christos DEBUG_POP (&failure_id); \ 1741 1.1 christos DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ 1742 1.1 christos \ 1743 1.1 christos /* If the saved string location is NULL, it came from an \ 1744 1.1 christos on_failure_keep_string_jump opcode, and we want to throw away the \ 1745 1.1 christos saved NULL, thus retaining our current position in the string. */ \ 1746 1.1 christos string_temp = POP_FAILURE_POINTER (); \ 1747 1.1 christos if (string_temp != NULL) \ 1748 1.1 christos str = (const CHAR_T *) string_temp; \ 1749 1.1 christos \ 1750 1.1 christos DEBUG_PRINT2 (" Popping string %p: `", str); \ 1751 1.1 christos DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 1752 1.1 christos DEBUG_PRINT1 ("'\n"); \ 1753 1.1 christos \ 1754 1.1 christos pat = (UCHAR_T *) POP_FAILURE_POINTER (); \ 1755 1.1 christos DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ 1756 1.1 christos DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ 1757 1.1 christos \ 1758 1.1 christos /* Restore register info. */ \ 1759 1.1 christos high_reg = (active_reg_t) POP_FAILURE_INT (); \ 1760 1.1 christos DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \ 1761 1.1 christos \ 1762 1.1 christos low_reg = (active_reg_t) POP_FAILURE_INT (); \ 1763 1.1 christos DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \ 1764 1.1 christos \ 1765 1.1 christos if (1) \ 1766 1.1 christos for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ 1767 1.1 christos { \ 1768 1.1 christos DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \ 1769 1.1 christos \ 1770 1.1 christos reg_info[this_reg].word = POP_FAILURE_ELT (); \ 1771 1.1 christos DEBUG_PRINT2 (" info: %p\n", \ 1772 1.1 christos reg_info[this_reg].word.pointer); \ 1773 1.1 christos \ 1774 1.1 christos regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ 1775 1.1 christos DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 1776 1.1 christos \ 1777 1.1 christos regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ 1778 1.1 christos DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 1779 1.1 christos } \ 1780 1.1 christos else \ 1781 1.1 christos { \ 1782 1.1 christos for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ 1783 1.1 christos { \ 1784 1.1 christos reg_info[this_reg].word.integer = 0; \ 1785 1.1 christos regend[this_reg] = 0; \ 1786 1.1 christos regstart[this_reg] = 0; \ 1787 1.1 christos } \ 1788 1.1 christos highest_active_reg = high_reg; \ 1789 1.1 christos } \ 1790 1.1 christos \ 1791 1.1 christos set_regs_matched_done = 0; \ 1792 1.1 christos DEBUG_STATEMENT (nfailure_points_popped++); \ 1793 1.1 christos } /* POP_FAILURE_POINT */ 1794 1.1 christos 1795 1.1 christos /* Structure for per-register (a.k.a. per-group) information. 1797 1.1 christos Other register information, such as the 1798 1.1 christos starting and ending positions (which are addresses), and the list of 1799 1.1 christos inner groups (which is a bits list) are maintained in separate 1800 1.1 christos variables. 1801 1.1 christos 1802 1.1 christos We are making a (strictly speaking) nonportable assumption here: that 1803 1.1 christos the compiler will pack our bit fields into something that fits into 1804 1.1 christos the type of `word', i.e., is something that fits into one item on the 1805 1.1 christos failure stack. */ 1806 1.1 christos 1807 1.1 christos 1808 1.1 christos /* Declarations and macros for re_match_2. */ 1809 1.1 christos 1810 1.1 christos typedef union 1811 1.1 christos { 1812 1.1 christos PREFIX(fail_stack_elt_t) word; 1813 1.1 christos struct 1814 1.1 christos { 1815 1.1 christos /* This field is one if this group can match the empty string, 1816 1.1 christos zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ 1817 1.1 christos # define MATCH_NULL_UNSET_VALUE 3 1818 1.1 christos unsigned match_null_string_p : 2; 1819 1.1 christos unsigned is_active : 1; 1820 1.1 christos unsigned matched_something : 1; 1821 1.1 christos unsigned ever_matched_something : 1; 1822 1.1 christos } bits; 1823 1.1 christos } PREFIX(register_info_type); 1824 1.1 christos 1825 1.1 christos # ifndef DEFINED_ONCE 1826 1.1 christos # define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) 1827 1.1 christos # define IS_ACTIVE(R) ((R).bits.is_active) 1828 1.1 christos # define MATCHED_SOMETHING(R) ((R).bits.matched_something) 1829 1.1 christos # define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) 1830 1.1 christos 1831 1.1 christos 1832 1.1 christos /* Call this when have matched a real character; it sets `matched' flags 1833 1.1 christos for the subexpressions which we are currently inside. Also records 1834 1.1 christos that those subexprs have matched. */ 1835 1.1 christos # define SET_REGS_MATCHED() \ 1836 1.1 christos do \ 1837 1.1 christos { \ 1838 1.1 christos if (!set_regs_matched_done) \ 1839 1.1 christos { \ 1840 1.1 christos active_reg_t r; \ 1841 1.1 christos set_regs_matched_done = 1; \ 1842 1.1 christos for (r = lowest_active_reg; r <= highest_active_reg; r++) \ 1843 1.1 christos { \ 1844 1.1 christos MATCHED_SOMETHING (reg_info[r]) \ 1845 1.1 christos = EVER_MATCHED_SOMETHING (reg_info[r]) \ 1846 1.1 christos = 1; \ 1847 1.1 christos } \ 1848 1.1 christos } \ 1849 1.1 christos } \ 1850 1.1 christos while (0) 1851 1.1 christos # endif /* not DEFINED_ONCE */ 1852 1.1 christos 1853 1.1 christos /* Registers are set to a sentinel when they haven't yet matched. */ 1854 1.1 christos static CHAR_T PREFIX(reg_unset_dummy); 1855 1.1 christos # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy)) 1856 1.1 christos # define REG_UNSET(e) ((e) == REG_UNSET_VALUE) 1857 1.1 christos 1858 1.1 christos /* Subroutine declarations and macros for regex_compile. */ 1859 1.1 christos static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg); 1860 1.1 christos static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, 1861 1.1 christos int arg1, int arg2); 1862 1.1 christos static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, 1863 1.1 christos int arg, UCHAR_T *end); 1864 1.1 christos static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, 1865 1.1 christos int arg1, int arg2, UCHAR_T *end); 1866 1.1 christos static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern, 1867 1.1 christos const CHAR_T *p, 1868 1.1 christos reg_syntax_t syntax); 1869 1.1 christos static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p, 1870 1.1 christos const CHAR_T *pend, 1871 1.1 christos reg_syntax_t syntax); 1872 1.1 christos # ifdef WCHAR 1873 1.1 christos static reg_errcode_t wcs_compile_range (CHAR_T range_start, 1874 1.1 christos const CHAR_T **p_ptr, 1875 1.1 christos const CHAR_T *pend, 1876 1.1 christos char *translate, 1877 1.1 christos reg_syntax_t syntax, 1878 1.1 christos UCHAR_T *b, 1879 1.1 christos CHAR_T *char_set); 1880 1.1 christos static void insert_space (int num, CHAR_T *loc, CHAR_T *end); 1881 1.1 christos # else /* BYTE */ 1882 1.1 christos static reg_errcode_t byte_compile_range (unsigned int range_start, 1883 1.1 christos const char **p_ptr, 1884 1.1 christos const char *pend, 1885 1.1 christos char *translate, 1886 1.1 christos reg_syntax_t syntax, 1887 1.1 christos unsigned char *b); 1888 1.1 christos # endif /* WCHAR */ 1889 1.1 christos 1890 1.1 christos /* Fetch the next character in the uncompiled pattern---translating it 1891 1.1 christos if necessary. Also cast from a signed character in the constant 1892 1.1 christos string passed to us by the user to an unsigned char that we can use 1893 1.1 christos as an array index (in, e.g., `translate'). */ 1894 1.1 christos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff, 1895 1.1 christos because it is impossible to allocate 4GB array for some encodings 1896 1.1 christos which have 4 byte character_set like UCS4. */ 1897 1.1 christos # ifndef PATFETCH 1898 1.1 christos # ifdef WCHAR 1899 1.1 christos # define PATFETCH(c) \ 1900 1.1 christos do {if (p == pend) return REG_EEND; \ 1901 1.1 christos c = (UCHAR_T) *p++; \ 1902 1.1 christos if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \ 1903 1.1 christos } while (0) 1904 1.1 christos # else /* BYTE */ 1905 1.1 christos # define PATFETCH(c) \ 1906 1.1 christos do {if (p == pend) return REG_EEND; \ 1907 1.1 christos c = (unsigned char) *p++; \ 1908 1.1 christos if (translate) c = (unsigned char) translate[c]; \ 1909 1.1 christos } while (0) 1910 1.1 christos # endif /* WCHAR */ 1911 1.1 christos # endif 1912 1.1 christos 1913 1.1 christos /* Fetch the next character in the uncompiled pattern, with no 1914 1.1 christos translation. */ 1915 1.1 christos # define PATFETCH_RAW(c) \ 1916 1.1 christos do {if (p == pend) return REG_EEND; \ 1917 1.1 christos c = (UCHAR_T) *p++; \ 1918 1.1 christos } while (0) 1919 1.1 christos 1920 1.1 christos /* Go backwards one character in the pattern. */ 1921 1.1 christos # define PATUNFETCH p-- 1922 1.1 christos 1923 1.1 christos 1924 1.1 christos /* If `translate' is non-null, return translate[D], else just D. We 1925 1.1 christos cast the subscript to translate because some data is declared as 1926 1.1 christos `char *', to avoid warnings when a string constant is passed. But 1927 1.1 christos when we use a character as a subscript we must make it unsigned. */ 1928 1.1 christos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff, 1929 1.1 christos because it is impossible to allocate 4GB array for some encodings 1930 1.1 christos which have 4 byte character_set like UCS4. */ 1931 1.1 christos 1932 1.1 christos # ifndef TRANSLATE 1933 1.1 christos # ifdef WCHAR 1934 1.1 christos # define TRANSLATE(d) \ 1935 1.1 christos ((translate && ((UCHAR_T) (d)) <= 0xff) \ 1936 1.1 christos ? (char) translate[(unsigned char) (d)] : (d)) 1937 1.1 christos # else /* BYTE */ 1938 1.1 christos # define TRANSLATE(d) \ 1939 1.1 christos (translate ? (char) translate[(unsigned char) (d)] : (char) (d)) 1940 1.1 christos # endif /* WCHAR */ 1941 1.1 christos # endif 1942 1.1 christos 1943 1.1 christos 1944 1.1 christos /* Macros for outputting the compiled pattern into `buffer'. */ 1945 1.1 christos 1946 1.1 christos /* If the buffer isn't allocated when it comes in, use this. */ 1947 1.1 christos # define INIT_BUF_SIZE (32 * sizeof(UCHAR_T)) 1948 1.1 christos 1949 1.1 christos /* Make sure we have at least N more bytes of space in buffer. */ 1950 1.1 christos # ifdef WCHAR 1951 1.1 christos # define GET_BUFFER_SPACE(n) \ 1952 1.1 christos while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \ 1953 1.1 christos + (n)*sizeof(CHAR_T)) > bufp->allocated) \ 1954 1.1 christos EXTEND_BUFFER () 1955 1.1 christos # else /* BYTE */ 1956 1.1 christos # define GET_BUFFER_SPACE(n) \ 1957 1.1 christos while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ 1958 1.1 christos EXTEND_BUFFER () 1959 1.1 christos # endif /* WCHAR */ 1960 1.1 christos 1961 1.1 christos /* Make sure we have one more byte of buffer space and then add C to it. */ 1962 1.1 christos # define BUF_PUSH(c) \ 1963 1.1 christos do { \ 1964 1.1 christos GET_BUFFER_SPACE (1); \ 1965 1.1 christos *b++ = (UCHAR_T) (c); \ 1966 1.1 christos } while (0) 1967 1.1 christos 1968 1.1 christos 1969 1.1 christos /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ 1970 1.1 christos # define BUF_PUSH_2(c1, c2) \ 1971 1.1 christos do { \ 1972 1.1 christos GET_BUFFER_SPACE (2); \ 1973 1.1 christos *b++ = (UCHAR_T) (c1); \ 1974 1.1 christos *b++ = (UCHAR_T) (c2); \ 1975 1.1 christos } while (0) 1976 1.1 christos 1977 1.1 christos 1978 1.1 christos /* As with BUF_PUSH_2, except for three bytes. */ 1979 1.1 christos # define BUF_PUSH_3(c1, c2, c3) \ 1980 1.1 christos do { \ 1981 1.1 christos GET_BUFFER_SPACE (3); \ 1982 1.1 christos *b++ = (UCHAR_T) (c1); \ 1983 1.1 christos *b++ = (UCHAR_T) (c2); \ 1984 1.1 christos *b++ = (UCHAR_T) (c3); \ 1985 1.1 christos } while (0) 1986 1.1 christos 1987 1.1 christos /* Store a jump with opcode OP at LOC to location TO. We store a 1988 1.1 christos relative address offset by the three bytes the jump itself occupies. */ 1989 1.1 christos # define STORE_JUMP(op, loc, to) \ 1990 1.1 christos PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE))) 1991 1.1 christos 1992 1.1 christos /* Likewise, for a two-argument jump. */ 1993 1.1 christos # define STORE_JUMP2(op, loc, to, arg) \ 1994 1.1 christos PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg) 1995 1.1 christos 1996 1.1 christos /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 1997 1.1 christos # define INSERT_JUMP(op, loc, to) \ 1998 1.1 christos PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b) 1999 1.1 christos 2000 1.1 christos /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 2001 1.1 christos # define INSERT_JUMP2(op, loc, to, arg) \ 2002 1.1 christos PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\ 2003 1.1 christos arg, b) 2004 1.1 christos 2005 1.1 christos /* This is not an arbitrary limit: the arguments which represent offsets 2006 1.1 christos into the pattern are two bytes long. So if 2^16 bytes turns out to 2007 1.1 christos be too small, many things would have to change. */ 2008 1.1 christos /* Any other compiler which, like MSC, has allocation limit below 2^16 2009 1.1 christos bytes will have to use approach similar to what was done below for 2010 1.1 christos MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up 2011 1.1 christos reallocating to 0 bytes. Such thing is not going to work too well. 2012 1.1 christos You have been warned!! */ 2013 1.1 christos # ifndef DEFINED_ONCE 2014 1.1 christos # if defined _MSC_VER && !defined WIN32 2015 1.1 christos /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. 2016 1.1 christos The REALLOC define eliminates a flurry of conversion warnings, 2017 1.1 christos but is not required. */ 2018 1.1 christos # define MAX_BUF_SIZE 65500L 2019 1.1 christos # define REALLOC(p,s) realloc ((p), (size_t) (s)) 2020 1.1 christos # else 2021 1.1 christos # define MAX_BUF_SIZE (1L << 16) 2022 1.1 christos # define REALLOC(p,s) realloc ((p), (s)) 2023 1.1 christos # endif 2024 1.1 christos 2025 1.1 christos /* Extend the buffer by twice its current size via realloc and 2026 1.1 christos reset the pointers that pointed into the old block to point to the 2027 1.1 christos correct places in the new one. If extending the buffer results in it 2028 1.1 christos being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 2029 1.1 christos # if __BOUNDED_POINTERS__ 2030 1.1 christos # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated) 2031 1.1 christos # define MOVE_BUFFER_POINTER(P) \ 2032 1.1 christos (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr) 2033 1.1 christos # define ELSE_EXTEND_BUFFER_HIGH_BOUND \ 2034 1.1 christos else \ 2035 1.1 christos { \ 2036 1.1 christos SET_HIGH_BOUND (b); \ 2037 1.1 christos SET_HIGH_BOUND (begalt); \ 2038 1.1 christos if (fixup_alt_jump) \ 2039 1.1 christos SET_HIGH_BOUND (fixup_alt_jump); \ 2040 1.1 christos if (laststart) \ 2041 1.1 christos SET_HIGH_BOUND (laststart); \ 2042 1.1 christos if (pending_exact) \ 2043 1.1 christos SET_HIGH_BOUND (pending_exact); \ 2044 1.1 christos } 2045 1.1 christos # else 2046 1.1 christos # define MOVE_BUFFER_POINTER(P) (P) += incr 2047 1.1 christos # define ELSE_EXTEND_BUFFER_HIGH_BOUND 2048 1.1 christos # endif 2049 1.1 christos # endif /* not DEFINED_ONCE */ 2050 1.1 christos 2051 1.1 christos # ifdef WCHAR 2052 1.1 christos # define EXTEND_BUFFER() \ 2053 1.1.1.2 christos do { \ 2054 1.1 christos UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \ 2055 1.1 christos int wchar_count; \ 2056 1.1 christos if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \ 2057 1.1 christos return REG_ESIZE; \ 2058 1.1 christos bufp->allocated <<= 1; \ 2059 1.1 christos if (bufp->allocated > MAX_BUF_SIZE) \ 2060 1.1 christos bufp->allocated = MAX_BUF_SIZE; \ 2061 1.1 christos /* How many characters the new buffer can have? */ \ 2062 1.1.1.2 christos wchar_count = bufp->allocated / sizeof(UCHAR_T); \ 2063 1.1 christos if (wchar_count == 0) wchar_count = 1; \ 2064 1.1 christos /* Truncate the buffer to CHAR_T align. */ \ 2065 1.1 christos bufp->allocated = wchar_count * sizeof(UCHAR_T); \ 2066 1.1 christos RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \ 2067 1.1 christos bufp->buffer = (char*)COMPILED_BUFFER_VAR; \ 2068 1.1 christos if (COMPILED_BUFFER_VAR == NULL) \ 2069 1.1 christos return REG_ESPACE; \ 2070 1.1 christos /* If the buffer moved, move all the pointers into it. */ \ 2071 1.1 christos if (old_buffer != COMPILED_BUFFER_VAR) \ 2072 1.1 christos { \ 2073 1.1 christos PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer; \ 2074 1.1 christos MOVE_BUFFER_POINTER (b); \ 2075 1.1 christos MOVE_BUFFER_POINTER (begalt); \ 2076 1.1 christos if (fixup_alt_jump) \ 2077 1.1 christos MOVE_BUFFER_POINTER (fixup_alt_jump); \ 2078 1.1 christos if (laststart) \ 2079 1.1 christos MOVE_BUFFER_POINTER (laststart); \ 2080 1.1 christos if (pending_exact) \ 2081 1.1 christos MOVE_BUFFER_POINTER (pending_exact); \ 2082 1.1 christos } \ 2083 1.1 christos ELSE_EXTEND_BUFFER_HIGH_BOUND \ 2084 1.1 christos } while (0) 2085 1.1 christos # else /* BYTE */ 2086 1.1 christos # define EXTEND_BUFFER() \ 2087 1.1 christos do { \ 2088 1.1 christos UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \ 2089 1.1 christos if (bufp->allocated == MAX_BUF_SIZE) \ 2090 1.1.1.2 christos return REG_ESIZE; \ 2091 1.1 christos bufp->allocated <<= 1; \ 2092 1.1 christos if (bufp->allocated > MAX_BUF_SIZE) \ 2093 1.1 christos bufp->allocated = MAX_BUF_SIZE; \ 2094 1.1 christos bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \ 2095 1.1 christos bufp->allocated); \ 2096 1.1 christos if (COMPILED_BUFFER_VAR == NULL) \ 2097 1.1 christos return REG_ESPACE; \ 2098 1.1 christos /* If the buffer moved, move all the pointers into it. */ \ 2099 1.1 christos if (old_buffer != COMPILED_BUFFER_VAR) \ 2100 1.1 christos { \ 2101 1.1 christos PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer; \ 2102 1.1 christos MOVE_BUFFER_POINTER (b); \ 2103 1.1 christos MOVE_BUFFER_POINTER (begalt); \ 2104 1.1 christos if (fixup_alt_jump) \ 2105 1.1 christos MOVE_BUFFER_POINTER (fixup_alt_jump); \ 2106 1.1 christos if (laststart) \ 2107 1.1 christos MOVE_BUFFER_POINTER (laststart); \ 2108 1.1 christos if (pending_exact) \ 2109 1.1 christos MOVE_BUFFER_POINTER (pending_exact); \ 2110 1.1 christos } \ 2111 1.1 christos ELSE_EXTEND_BUFFER_HIGH_BOUND \ 2112 1.1 christos } while (0) 2113 1.1 christos # endif /* WCHAR */ 2114 1.1 christos 2115 1.1 christos # ifndef DEFINED_ONCE 2116 1.1 christos /* Since we have one byte reserved for the register number argument to 2117 1.1 christos {start,stop}_memory, the maximum number of groups we can report 2118 1.1 christos things about is what fits in that byte. */ 2119 1.1 christos # define MAX_REGNUM 255 2120 1.1 christos 2121 1.1 christos /* But patterns can have more than `MAX_REGNUM' registers. We just 2122 1.1 christos ignore the excess. */ 2123 1.1 christos typedef unsigned regnum_t; 2124 1.1 christos 2125 1.1 christos 2126 1.1 christos /* Macros for the compile stack. */ 2127 1.1 christos 2128 1.1 christos /* Since offsets can go either forwards or backwards, this type needs to 2129 1.1 christos be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 2130 1.1 christos /* int may be not enough when sizeof(int) == 2. */ 2131 1.1 christos typedef long pattern_offset_t; 2132 1.1 christos 2133 1.1 christos typedef struct 2134 1.1 christos { 2135 1.1 christos pattern_offset_t begalt_offset; 2136 1.1 christos pattern_offset_t fixup_alt_jump; 2137 1.1 christos pattern_offset_t inner_group_offset; 2138 1.1 christos pattern_offset_t laststart_offset; 2139 1.1 christos regnum_t regnum; 2140 1.1 christos } compile_stack_elt_t; 2141 1.1 christos 2142 1.1 christos 2143 1.1 christos typedef struct 2144 1.1 christos { 2145 1.1 christos compile_stack_elt_t *stack; 2146 1.1 christos unsigned size; 2147 1.1 christos unsigned avail; /* Offset of next open position. */ 2148 1.1 christos } compile_stack_type; 2149 1.1 christos 2150 1.1 christos 2151 1.1 christos # define INIT_COMPILE_STACK_SIZE 32 2152 1.1 christos 2153 1.1 christos # define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 2154 1.1 christos # define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 2155 1.1 christos 2156 1.1 christos /* The next available element. */ 2157 1.1 christos # define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 2158 1.1 christos 2159 1.1 christos # endif /* not DEFINED_ONCE */ 2160 1.1 christos 2161 1.1 christos /* Set the bit for character C in a list. */ 2162 1.1 christos # ifndef DEFINED_ONCE 2163 1.1 christos # define SET_LIST_BIT(c) \ 2164 1.1 christos (b[((unsigned char) (c)) / BYTEWIDTH] \ 2165 1.1 christos |= 1 << (((unsigned char) c) % BYTEWIDTH)) 2166 1.1 christos # endif /* DEFINED_ONCE */ 2167 1.1 christos 2168 1.1 christos /* Get the next unsigned number in the uncompiled pattern. */ 2169 1.1 christos # define GET_UNSIGNED_NUMBER(num) \ 2170 1.1 christos { \ 2171 1.1 christos while (p != pend) \ 2172 1.1 christos { \ 2173 1.1 christos PATFETCH (c); \ 2174 1.1 christos if (c < '0' || c > '9') \ 2175 1.1 christos break; \ 2176 1.1 christos if (num <= RE_DUP_MAX) \ 2177 1.1 christos { \ 2178 1.1 christos if (num < 0) \ 2179 1.1 christos num = 0; \ 2180 1.1 christos num = num * 10 + c - '0'; \ 2181 1.1 christos } \ 2182 1.1 christos } \ 2183 1.1 christos } 2184 1.1 christos 2185 1.1 christos # ifndef DEFINED_ONCE 2186 1.1 christos # if defined _LIBC || WIDE_CHAR_SUPPORT 2187 1.1 christos /* The GNU C library provides support for user-defined character classes 2188 1.1 christos and the functions from ISO C amendement 1. */ 2189 1.1 christos # ifdef CHARCLASS_NAME_MAX 2190 1.1 christos # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX 2191 1.1 christos # else 2192 1.1 christos /* This shouldn't happen but some implementation might still have this 2193 1.1 christos problem. Use a reasonable default value. */ 2194 1.1 christos # define CHAR_CLASS_MAX_LENGTH 256 2195 1.1 christos # endif 2196 1.1 christos 2197 1.1 christos # ifdef _LIBC 2198 1.1 christos # define IS_CHAR_CLASS(string) __wctype (string) 2199 1.1 christos # else 2200 1.1 christos # define IS_CHAR_CLASS(string) wctype (string) 2201 1.1 christos # endif 2202 1.1 christos # else 2203 1.1 christos # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ 2204 1.1 christos 2205 1.1 christos # define IS_CHAR_CLASS(string) \ 2206 1.1 christos (STREQ (string, "alpha") || STREQ (string, "upper") \ 2207 1.1 christos || STREQ (string, "lower") || STREQ (string, "digit") \ 2208 1.1 christos || STREQ (string, "alnum") || STREQ (string, "xdigit") \ 2209 1.1 christos || STREQ (string, "space") || STREQ (string, "print") \ 2210 1.1 christos || STREQ (string, "punct") || STREQ (string, "graph") \ 2211 1.1 christos || STREQ (string, "cntrl") || STREQ (string, "blank")) 2212 1.1 christos # endif 2213 1.1 christos # endif /* DEFINED_ONCE */ 2214 1.1 christos 2215 1.1 christos # ifndef MATCH_MAY_ALLOCATE 2217 1.1 christos 2218 1.1 christos /* If we cannot allocate large objects within re_match_2_internal, 2219 1.1 christos we make the fail stack and register vectors global. 2220 1.1 christos The fail stack, we grow to the maximum size when a regexp 2221 1.1 christos is compiled. 2222 1.1 christos The register vectors, we adjust in size each time we 2223 1.1 christos compile a regexp, according to the number of registers it needs. */ 2224 1.1 christos 2225 1.1 christos static PREFIX(fail_stack_type) fail_stack; 2226 1.1 christos 2227 1.1 christos /* Size with which the following vectors are currently allocated. 2228 1.1 christos That is so we can make them bigger as needed, 2229 1.1 christos but never make them smaller. */ 2230 1.1 christos # ifdef DEFINED_ONCE 2231 1.1 christos static int regs_allocated_size; 2232 1.1 christos 2233 1.1 christos static const char ** regstart, ** regend; 2234 1.1 christos static const char ** old_regstart, ** old_regend; 2235 1.1 christos static const char **best_regstart, **best_regend; 2236 1.1 christos static const char **reg_dummy; 2237 1.1 christos # endif /* DEFINED_ONCE */ 2238 1.1 christos 2239 1.1 christos static PREFIX(register_info_type) *PREFIX(reg_info); 2240 1.1 christos static PREFIX(register_info_type) *PREFIX(reg_info_dummy); 2241 1.1 christos 2242 1.1 christos /* Make the register vectors big enough for NUM_REGS registers, 2243 1.1 christos but don't make them smaller. */ 2244 1.1 christos 2245 1.1 christos static void 2246 1.1 christos PREFIX(regex_grow_registers) (int num_regs) 2247 1.1 christos { 2248 1.1 christos if (num_regs > regs_allocated_size) 2249 1.1 christos { 2250 1.1 christos RETALLOC_IF (regstart, num_regs, const char *); 2251 1.1 christos RETALLOC_IF (regend, num_regs, const char *); 2252 1.1 christos RETALLOC_IF (old_regstart, num_regs, const char *); 2253 1.1 christos RETALLOC_IF (old_regend, num_regs, const char *); 2254 1.1 christos RETALLOC_IF (best_regstart, num_regs, const char *); 2255 1.1 christos RETALLOC_IF (best_regend, num_regs, const char *); 2256 1.1 christos RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type)); 2257 1.1 christos RETALLOC_IF (reg_dummy, num_regs, const char *); 2258 1.1 christos RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type)); 2259 1.1 christos 2260 1.1 christos regs_allocated_size = num_regs; 2261 1.1 christos } 2262 1.1 christos } 2263 1.1 christos 2264 1.1 christos # endif /* not MATCH_MAY_ALLOCATE */ 2265 1.1 christos 2266 1.1 christos # ifndef DEFINED_ONCE 2268 1.1 christos static boolean group_in_compile_stack (compile_stack_type compile_stack, 2269 1.1 christos regnum_t regnum); 2270 1.1 christos # endif /* not DEFINED_ONCE */ 2271 1.1 christos 2272 1.1 christos /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. 2273 1.1 christos Returns one of error codes defined in `regex.h', or zero for success. 2274 1.1 christos 2275 1.1 christos Assumes the `allocated' (and perhaps `buffer') and `translate' 2276 1.1 christos fields are set in BUFP on entry. 2277 1.1 christos 2278 1.1 christos If it succeeds, results are put in BUFP (if it returns an error, the 2279 1.1 christos contents of BUFP are undefined): 2280 1.1 christos `buffer' is the compiled pattern; 2281 1.1 christos `syntax' is set to SYNTAX; 2282 1.1 christos `used' is set to the length of the compiled pattern; 2283 1.1 christos `fastmap_accurate' is zero; 2284 1.1 christos `re_nsub' is the number of subexpressions in PATTERN; 2285 1.1 christos `not_bol' and `not_eol' are zero; 2286 1.1 christos 2287 1.1 christos The `fastmap' and `newline_anchor' fields are neither 2288 1.1 christos examined nor set. */ 2289 1.1 christos 2290 1.1 christos /* Return, freeing storage we allocated. */ 2291 1.1 christos # ifdef WCHAR 2292 1.1 christos # define FREE_STACK_RETURN(value) \ 2293 1.1 christos return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value) 2294 1.1 christos # else 2295 1.1 christos # define FREE_STACK_RETURN(value) \ 2296 1.1 christos return (free (compile_stack.stack), value) 2297 1.1 christos # endif /* WCHAR */ 2298 1.1 christos 2299 1.1 christos static reg_errcode_t 2300 1.1 christos PREFIX(regex_compile) (const char *ARG_PREFIX(pattern), 2301 1.1 christos size_t ARG_PREFIX(size), reg_syntax_t syntax, 2302 1.1 christos struct re_pattern_buffer *bufp) 2303 1.1 christos { 2304 1.1 christos /* We fetch characters from PATTERN here. Even though PATTERN is 2305 1.1 christos `char *' (i.e., signed), we declare these variables as unsigned, so 2306 1.1 christos they can be reliably used as array indices. */ 2307 1.1 christos register UCHAR_T c, c1; 2308 1.1 christos 2309 1.1 christos #ifdef WCHAR 2310 1.1 christos /* A temporary space to keep wchar_t pattern and compiled pattern. */ 2311 1.1 christos CHAR_T *pattern, *COMPILED_BUFFER_VAR; 2312 1.1 christos size_t size; 2313 1.1 christos /* offset buffer for optimization. See convert_mbs_to_wc. */ 2314 1.1 christos int *mbs_offset = NULL; 2315 1.1 christos /* It hold whether each wchar_t is binary data or not. */ 2316 1.1 christos char *is_binary = NULL; 2317 1.1 christos /* A flag whether exactn is handling binary data or not. */ 2318 1.1 christos char is_exactn_bin = FALSE; 2319 1.1 christos #endif /* WCHAR */ 2320 1.1 christos 2321 1.1 christos /* A random temporary spot in PATTERN. */ 2322 1.1 christos const CHAR_T *p1; 2323 1.1 christos 2324 1.1 christos /* Points to the end of the buffer, where we should append. */ 2325 1.1 christos register UCHAR_T *b; 2326 1.1 christos 2327 1.1 christos /* Keeps track of unclosed groups. */ 2328 1.1 christos compile_stack_type compile_stack; 2329 1.1 christos 2330 1.1 christos /* Points to the current (ending) position in the pattern. */ 2331 1.1 christos #ifdef WCHAR 2332 1.1 christos const CHAR_T *p; 2333 1.1 christos const CHAR_T *pend; 2334 1.1 christos #else /* BYTE */ 2335 1.1 christos const CHAR_T *p = pattern; 2336 1.1 christos const CHAR_T *pend = pattern + size; 2337 1.1 christos #endif /* WCHAR */ 2338 1.1 christos 2339 1.1 christos /* How to translate the characters in the pattern. */ 2340 1.1 christos RE_TRANSLATE_TYPE translate = bufp->translate; 2341 1.1 christos 2342 1.1 christos /* Address of the count-byte of the most recently inserted `exactn' 2343 1.1 christos command. This makes it possible to tell if a new exact-match 2344 1.1 christos character can be added to that command or if the character requires 2345 1.1 christos a new `exactn' command. */ 2346 1.1 christos UCHAR_T *pending_exact = 0; 2347 1.1 christos 2348 1.1 christos /* Address of start of the most recently finished expression. 2349 1.1 christos This tells, e.g., postfix * where to find the start of its 2350 1.1 christos operand. Reset at the beginning of groups and alternatives. */ 2351 1.1 christos UCHAR_T *laststart = 0; 2352 1.1 christos 2353 1.1 christos /* Address of beginning of regexp, or inside of last group. */ 2354 1.1 christos UCHAR_T *begalt; 2355 1.1 christos 2356 1.1 christos /* Address of the place where a forward jump should go to the end of 2357 1.1 christos the containing expression. Each alternative of an `or' -- except the 2358 1.1 christos last -- ends with a forward jump of this sort. */ 2359 1.1 christos UCHAR_T *fixup_alt_jump = 0; 2360 1.1 christos 2361 1.1 christos /* Counts open-groups as they are encountered. Remembered for the 2362 1.1 christos matching close-group on the compile stack, so the same register 2363 1.1 christos number is put in the stop_memory as the start_memory. */ 2364 1.1 christos regnum_t regnum = 0; 2365 1.1 christos 2366 1.1 christos #ifdef WCHAR 2367 1.1 christos /* Initialize the wchar_t PATTERN and offset_buffer. */ 2368 1.1 christos p = pend = pattern = TALLOC(csize + 1, CHAR_T); 2369 1.1 christos mbs_offset = TALLOC(csize + 1, int); 2370 1.1 christos is_binary = TALLOC(csize + 1, char); 2371 1.1 christos if (pattern == NULL || mbs_offset == NULL || is_binary == NULL) 2372 1.1 christos { 2373 1.1 christos free(pattern); 2374 1.1 christos free(mbs_offset); 2375 1.1 christos free(is_binary); 2376 1.1 christos return REG_ESPACE; 2377 1.1 christos } 2378 1.1 christos pattern[csize] = L'\0'; /* sentinel */ 2379 1.1 christos size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary); 2380 1.1 christos pend = p + size; 2381 1.1 christos if (size < 0) 2382 1.1 christos { 2383 1.1 christos free(pattern); 2384 1.1 christos free(mbs_offset); 2385 1.1 christos free(is_binary); 2386 1.1 christos return REG_BADPAT; 2387 1.1 christos } 2388 1.1 christos #endif 2389 1.1 christos 2390 1.1 christos #ifdef DEBUG 2391 1.1 christos DEBUG_PRINT1 ("\nCompiling pattern: "); 2392 1.1 christos if (debug) 2393 1.1 christos { 2394 1.1 christos unsigned debug_count; 2395 1.1 christos 2396 1.1 christos for (debug_count = 0; debug_count < size; debug_count++) 2397 1.1 christos PUT_CHAR (pattern[debug_count]); 2398 1.1 christos putchar ('\n'); 2399 1.1 christos } 2400 1.1 christos #endif /* DEBUG */ 2401 1.1 christos 2402 1.1 christos /* Initialize the compile stack. */ 2403 1.1 christos compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); 2404 1.1 christos if (compile_stack.stack == NULL) 2405 1.1 christos { 2406 1.1 christos #ifdef WCHAR 2407 1.1 christos free(pattern); 2408 1.1 christos free(mbs_offset); 2409 1.1 christos free(is_binary); 2410 1.1 christos #endif 2411 1.1 christos return REG_ESPACE; 2412 1.1 christos } 2413 1.1 christos 2414 1.1 christos compile_stack.size = INIT_COMPILE_STACK_SIZE; 2415 1.1 christos compile_stack.avail = 0; 2416 1.1 christos 2417 1.1 christos /* Initialize the pattern buffer. */ 2418 1.1 christos bufp->syntax = syntax; 2419 1.1 christos bufp->fastmap_accurate = 0; 2420 1.1 christos bufp->not_bol = bufp->not_eol = 0; 2421 1.1 christos 2422 1.1 christos /* Set `used' to zero, so that if we return an error, the pattern 2423 1.1 christos printer (for debugging) will think there's no pattern. We reset it 2424 1.1 christos at the end. */ 2425 1.1 christos bufp->used = 0; 2426 1.1 christos 2427 1.1 christos /* Always count groups, whether or not bufp->no_sub is set. */ 2428 1.1 christos bufp->re_nsub = 0; 2429 1.1 christos 2430 1.1 christos #if !defined emacs && !defined SYNTAX_TABLE 2431 1.1 christos /* Initialize the syntax table. */ 2432 1.1 christos init_syntax_once (); 2433 1.1 christos #endif 2434 1.1 christos 2435 1.1 christos if (bufp->allocated == 0) 2436 1.1 christos { 2437 1.1 christos if (bufp->buffer) 2438 1.1 christos { /* If zero allocated, but buffer is non-null, try to realloc 2439 1.1 christos enough space. This loses if buffer's address is bogus, but 2440 1.1 christos that is the user's responsibility. */ 2441 1.1 christos #ifdef WCHAR 2442 1.1 christos /* Free bufp->buffer and allocate an array for wchar_t pattern 2443 1.1 christos buffer. */ 2444 1.1 christos free(bufp->buffer); 2445 1.1 christos COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T), 2446 1.1 christos UCHAR_T); 2447 1.1 christos #else 2448 1.1 christos RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T); 2449 1.1 christos #endif /* WCHAR */ 2450 1.1 christos } 2451 1.1 christos else 2452 1.1 christos { /* Caller did not allocate a buffer. Do it for them. */ 2453 1.1 christos COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T), 2454 1.1 christos UCHAR_T); 2455 1.1 christos } 2456 1.1 christos 2457 1.1 christos if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE); 2458 1.1 christos #ifdef WCHAR 2459 1.1 christos bufp->buffer = (char*)COMPILED_BUFFER_VAR; 2460 1.1 christos #endif /* WCHAR */ 2461 1.1 christos bufp->allocated = INIT_BUF_SIZE; 2462 1.1 christos } 2463 1.1 christos #ifdef WCHAR 2464 1.1 christos else 2465 1.1 christos COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer; 2466 1.1 christos #endif 2467 1.1 christos 2468 1.1 christos begalt = b = COMPILED_BUFFER_VAR; 2469 1.1 christos 2470 1.1 christos /* Loop through the uncompiled pattern until we're at the end. */ 2471 1.1 christos while (p != pend) 2472 1.1 christos { 2473 1.1 christos PATFETCH (c); 2474 1.1 christos 2475 1.1 christos switch (c) 2476 1.1 christos { 2477 1.1 christos case '^': 2478 1.1 christos { 2479 1.1 christos if ( /* If at start of pattern, it's an operator. */ 2480 1.1 christos p == pattern + 1 2481 1.1 christos /* If context independent, it's an operator. */ 2482 1.1 christos || syntax & RE_CONTEXT_INDEP_ANCHORS 2483 1.1 christos /* Otherwise, depends on what's come before. */ 2484 1.1 christos || PREFIX(at_begline_loc_p) (pattern, p, syntax)) 2485 1.1 christos BUF_PUSH (begline); 2486 1.1 christos else 2487 1.1 christos goto normal_char; 2488 1.1 christos } 2489 1.1 christos break; 2490 1.1 christos 2491 1.1 christos 2492 1.1 christos case '$': 2493 1.1 christos { 2494 1.1 christos if ( /* If at end of pattern, it's an operator. */ 2495 1.1 christos p == pend 2496 1.1 christos /* If context independent, it's an operator. */ 2497 1.1 christos || syntax & RE_CONTEXT_INDEP_ANCHORS 2498 1.1 christos /* Otherwise, depends on what's next. */ 2499 1.1.1.6 christos || PREFIX(at_endline_loc_p) (p, pend, syntax)) 2500 1.1 christos BUF_PUSH (endline); 2501 1.1 christos else 2502 1.1 christos goto normal_char; 2503 1.1 christos } 2504 1.1 christos break; 2505 1.1 christos 2506 1.1 christos 2507 1.1 christos case '+': 2508 1.1 christos case '?': 2509 1.1 christos if ((syntax & RE_BK_PLUS_QM) 2510 1.1 christos || (syntax & RE_LIMITED_OPS)) 2511 1.1 christos goto normal_char; 2512 1.1 christos /* Fall through. */ 2513 1.1 christos handle_plus: 2514 1.1 christos case '*': 2515 1.1 christos /* If there is no previous pattern... */ 2516 1.1 christos if (!laststart) 2517 1.1 christos { 2518 1.1 christos if (syntax & RE_CONTEXT_INVALID_OPS) 2519 1.1 christos FREE_STACK_RETURN (REG_BADRPT); 2520 1.1 christos else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 2521 1.1 christos goto normal_char; 2522 1.1 christos } 2523 1.1 christos 2524 1.1 christos { 2525 1.1 christos /* Are we optimizing this jump? */ 2526 1.1 christos boolean keep_string_p = false; 2527 1.1 christos 2528 1.1 christos /* 1 means zero (many) matches is allowed. */ 2529 1.1 christos char zero_times_ok = 0, many_times_ok = 0; 2530 1.1 christos 2531 1.1 christos /* If there is a sequence of repetition chars, collapse it 2532 1.1 christos down to just one (the right one). We can't combine 2533 1.1 christos interval operators with these because of, e.g., `a{2}*', 2534 1.1 christos which should only match an even number of `a's. */ 2535 1.1 christos 2536 1.1 christos for (;;) 2537 1.1 christos { 2538 1.1 christos zero_times_ok |= c != '+'; 2539 1.1 christos many_times_ok |= c != '?'; 2540 1.1 christos 2541 1.1 christos if (p == pend) 2542 1.1 christos break; 2543 1.1 christos 2544 1.1 christos PATFETCH (c); 2545 1.1 christos 2546 1.1 christos if (c == '*' 2547 1.1 christos || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) 2548 1.1 christos ; 2549 1.1 christos 2550 1.1 christos else if (syntax & RE_BK_PLUS_QM && c == '\\') 2551 1.1 christos { 2552 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2553 1.1 christos 2554 1.1 christos PATFETCH (c1); 2555 1.1 christos if (!(c1 == '+' || c1 == '?')) 2556 1.1 christos { 2557 1.1 christos PATUNFETCH; 2558 1.1 christos PATUNFETCH; 2559 1.1 christos break; 2560 1.1 christos } 2561 1.1 christos 2562 1.1 christos c = c1; 2563 1.1 christos } 2564 1.1 christos else 2565 1.1 christos { 2566 1.1 christos PATUNFETCH; 2567 1.1 christos break; 2568 1.1 christos } 2569 1.1 christos 2570 1.1 christos /* If we get here, we found another repeat character. */ 2571 1.1 christos } 2572 1.1 christos 2573 1.1 christos /* Star, etc. applied to an empty pattern is equivalent 2574 1.1 christos to an empty pattern. */ 2575 1.1 christos if (!laststart) 2576 1.1 christos break; 2577 1.1 christos 2578 1.1 christos /* Now we know whether or not zero matches is allowed 2579 1.1 christos and also whether or not two or more matches is allowed. */ 2580 1.1 christos if (many_times_ok) 2581 1.1 christos { /* More than one repetition is allowed, so put in at the 2582 1.1 christos end a backward relative jump from `b' to before the next 2583 1.1 christos jump we're going to put in below (which jumps from 2584 1.1 christos laststart to after this jump). 2585 1.1 christos 2586 1.1 christos But if we are at the `*' in the exact sequence `.*\n', 2587 1.1 christos insert an unconditional jump backwards to the ., 2588 1.1 christos instead of the beginning of the loop. This way we only 2589 1.1 christos push a failure point once, instead of every time 2590 1.1 christos through the loop. */ 2591 1.1 christos assert (p - 1 > pattern); 2592 1.1 christos 2593 1.1 christos /* Allocate the space for the jump. */ 2594 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 2595 1.1 christos 2596 1.1 christos /* We know we are not at the first character of the pattern, 2597 1.1 christos because laststart was nonzero. And we've already 2598 1.1 christos incremented `p', by the way, to be the character after 2599 1.1 christos the `*'. Do we have to do something analogous here 2600 1.1 christos for null bytes, because of RE_DOT_NOT_NULL? */ 2601 1.1 christos if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 2602 1.1 christos && zero_times_ok 2603 1.1 christos && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 2604 1.1 christos && !(syntax & RE_DOT_NEWLINE)) 2605 1.1 christos { /* We have .*\n. */ 2606 1.1 christos STORE_JUMP (jump, b, laststart); 2607 1.1 christos keep_string_p = true; 2608 1.1 christos } 2609 1.1 christos else 2610 1.1 christos /* Anything else. */ 2611 1.1 christos STORE_JUMP (maybe_pop_jump, b, laststart - 2612 1.1 christos (1 + OFFSET_ADDRESS_SIZE)); 2613 1.1 christos 2614 1.1 christos /* We've added more stuff to the buffer. */ 2615 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 2616 1.1 christos } 2617 1.1 christos 2618 1.1 christos /* On failure, jump from laststart to b + 3, which will be the 2619 1.1 christos end of the buffer after this jump is inserted. */ 2620 1.1 christos /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of 2621 1.1 christos 'b + 3'. */ 2622 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 2623 1.1 christos INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 2624 1.1 christos : on_failure_jump, 2625 1.1 christos laststart, b + 1 + OFFSET_ADDRESS_SIZE); 2626 1.1 christos pending_exact = 0; 2627 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 2628 1.1 christos 2629 1.1 christos if (!zero_times_ok) 2630 1.1 christos { 2631 1.1 christos /* At least one repetition is required, so insert a 2632 1.1 christos `dummy_failure_jump' before the initial 2633 1.1 christos `on_failure_jump' instruction of the loop. This 2634 1.1 christos effects a skip over that instruction the first time 2635 1.1 christos we hit that loop. */ 2636 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 2637 1.1 christos INSERT_JUMP (dummy_failure_jump, laststart, laststart + 2638 1.1 christos 2 + 2 * OFFSET_ADDRESS_SIZE); 2639 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 2640 1.1 christos } 2641 1.1 christos } 2642 1.1 christos break; 2643 1.1 christos 2644 1.1 christos 2645 1.1 christos case '.': 2646 1.1 christos laststart = b; 2647 1.1 christos BUF_PUSH (anychar); 2648 1.1 christos break; 2649 1.1 christos 2650 1.1 christos 2651 1.1 christos case '[': 2652 1.1 christos { 2653 1.1 christos boolean had_char_class = false; 2654 1.1 christos #ifdef WCHAR 2655 1.1 christos CHAR_T range_start = 0xffffffff; 2656 1.1 christos #else 2657 1.1 christos unsigned int range_start = 0xffffffff; 2658 1.1 christos #endif 2659 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2660 1.1 christos 2661 1.1 christos #ifdef WCHAR 2662 1.1 christos /* We assume a charset(_not) structure as a wchar_t array. 2663 1.1 christos charset[0] = (re_opcode_t) charset(_not) 2664 1.1 christos charset[1] = l (= length of char_classes) 2665 1.1 christos charset[2] = m (= length of collating_symbols) 2666 1.1 christos charset[3] = n (= length of equivalence_classes) 2667 1.1 christos charset[4] = o (= length of char_ranges) 2668 1.1 christos charset[5] = p (= length of chars) 2669 1.1 christos 2670 1.1 christos charset[6] = char_class (wctype_t) 2671 1.1 christos charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t) 2672 1.1 christos ... 2673 1.1 christos charset[l+5] = char_class (wctype_t) 2674 1.1 christos 2675 1.1 christos charset[l+6] = collating_symbol (wchar_t) 2676 1.1 christos ... 2677 1.1 christos charset[l+m+5] = collating_symbol (wchar_t) 2678 1.1 christos ifdef _LIBC we use the index if 2679 1.1 christos _NL_COLLATE_SYMB_EXTRAMB instead of 2680 1.1 christos wchar_t string. 2681 1.1 christos 2682 1.1 christos charset[l+m+6] = equivalence_classes (wchar_t) 2683 1.1 christos ... 2684 1.1 christos charset[l+m+n+5] = equivalence_classes (wchar_t) 2685 1.1 christos ifdef _LIBC we use the index in 2686 1.1 christos _NL_COLLATE_WEIGHT instead of 2687 1.1 christos wchar_t string. 2688 1.1 christos 2689 1.1 christos charset[l+m+n+6] = range_start 2690 1.1 christos charset[l+m+n+7] = range_end 2691 1.1 christos ... 2692 1.1 christos charset[l+m+n+2o+4] = range_start 2693 1.1 christos charset[l+m+n+2o+5] = range_end 2694 1.1 christos ifdef _LIBC we use the value looked up 2695 1.1 christos in _NL_COLLATE_COLLSEQ instead of 2696 1.1 christos wchar_t character. 2697 1.1 christos 2698 1.1 christos charset[l+m+n+2o+6] = char 2699 1.1 christos ... 2700 1.1 christos charset[l+m+n+2o+p+5] = char 2701 1.1 christos 2702 1.1 christos */ 2703 1.1 christos 2704 1.1 christos /* We need at least 6 spaces: the opcode, the length of 2705 1.1 christos char_classes, the length of collating_symbols, the length of 2706 1.1 christos equivalence_classes, the length of char_ranges, the length of 2707 1.1 christos chars. */ 2708 1.1 christos GET_BUFFER_SPACE (6); 2709 1.1 christos 2710 1.1 christos /* Save b as laststart. And We use laststart as the pointer 2711 1.1 christos to the first element of the charset here. 2712 1.1 christos In other words, laststart[i] indicates charset[i]. */ 2713 1.1 christos laststart = b; 2714 1.1 christos 2715 1.1 christos /* We test `*p == '^' twice, instead of using an if 2716 1.1 christos statement, so we only need one BUF_PUSH. */ 2717 1.1 christos BUF_PUSH (*p == '^' ? charset_not : charset); 2718 1.1 christos if (*p == '^') 2719 1.1 christos p++; 2720 1.1 christos 2721 1.1 christos /* Push the length of char_classes, the length of 2722 1.1 christos collating_symbols, the length of equivalence_classes, the 2723 1.1 christos length of char_ranges and the length of chars. */ 2724 1.1 christos BUF_PUSH_3 (0, 0, 0); 2725 1.1 christos BUF_PUSH_2 (0, 0); 2726 1.1 christos 2727 1.1 christos /* Remember the first position in the bracket expression. */ 2728 1.1 christos p1 = p; 2729 1.1 christos 2730 1.1 christos /* charset_not matches newline according to a syntax bit. */ 2731 1.1 christos if ((re_opcode_t) b[-6] == charset_not 2732 1.1 christos && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 2733 1.1 christos { 2734 1.1 christos BUF_PUSH('\n'); 2735 1.1 christos laststart[5]++; /* Update the length of characters */ 2736 1.1 christos } 2737 1.1 christos 2738 1.1 christos /* Read in characters and ranges, setting map bits. */ 2739 1.1 christos for (;;) 2740 1.1 christos { 2741 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2742 1.1 christos 2743 1.1 christos PATFETCH (c); 2744 1.1 christos 2745 1.1 christos /* \ might escape characters inside [...] and [^...]. */ 2746 1.1 christos if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 2747 1.1 christos { 2748 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2749 1.1 christos 2750 1.1 christos PATFETCH (c1); 2751 1.1 christos BUF_PUSH(c1); 2752 1.1 christos laststart[5]++; /* Update the length of chars */ 2753 1.1 christos range_start = c1; 2754 1.1 christos continue; 2755 1.1 christos } 2756 1.1 christos 2757 1.1 christos /* Could be the end of the bracket expression. If it's 2758 1.1 christos not (i.e., when the bracket expression is `[]' so 2759 1.1 christos far), the ']' character bit gets set way below. */ 2760 1.1 christos if (c == ']' && p != p1 + 1) 2761 1.1 christos break; 2762 1.1 christos 2763 1.1 christos /* Look ahead to see if it's a range when the last thing 2764 1.1 christos was a character class. */ 2765 1.1 christos if (had_char_class && c == '-' && *p != ']') 2766 1.1 christos FREE_STACK_RETURN (REG_ERANGE); 2767 1.1 christos 2768 1.1 christos /* Look ahead to see if it's a range when the last thing 2769 1.1 christos was a character: if this is a hyphen not at the 2770 1.1 christos beginning or the end of a list, then it's the range 2771 1.1 christos operator. */ 2772 1.1 christos if (c == '-' 2773 1.1 christos && !(p - 2 >= pattern && p[-2] == '[') 2774 1.1 christos && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 2775 1.1 christos && *p != ']') 2776 1.1 christos { 2777 1.1 christos reg_errcode_t ret; 2778 1.1 christos /* Allocate the space for range_start and range_end. */ 2779 1.1 christos GET_BUFFER_SPACE (2); 2780 1.1 christos /* Update the pointer to indicate end of buffer. */ 2781 1.1 christos b += 2; 2782 1.1 christos ret = wcs_compile_range (range_start, &p, pend, translate, 2783 1.1 christos syntax, b, laststart); 2784 1.1 christos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2785 1.1 christos range_start = 0xffffffff; 2786 1.1 christos } 2787 1.1 christos else if (p[0] == '-' && p[1] != ']') 2788 1.1 christos { /* This handles ranges made up of characters only. */ 2789 1.1 christos reg_errcode_t ret; 2790 1.1 christos 2791 1.1 christos /* Move past the `-'. */ 2792 1.1 christos PATFETCH (c1); 2793 1.1 christos /* Allocate the space for range_start and range_end. */ 2794 1.1 christos GET_BUFFER_SPACE (2); 2795 1.1 christos /* Update the pointer to indicate end of buffer. */ 2796 1.1 christos b += 2; 2797 1.1 christos ret = wcs_compile_range (c, &p, pend, translate, syntax, b, 2798 1.1 christos laststart); 2799 1.1 christos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2800 1.1 christos range_start = 0xffffffff; 2801 1.1 christos } 2802 1.1 christos 2803 1.1 christos /* See if we're at the beginning of a possible character 2804 1.1 christos class. */ 2805 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 2806 1.1 christos { /* Leave room for the null. */ 2807 1.1 christos char str[CHAR_CLASS_MAX_LENGTH + 1]; 2808 1.1 christos 2809 1.1 christos PATFETCH (c); 2810 1.1 christos c1 = 0; 2811 1.1 christos 2812 1.1 christos /* If pattern is `[[:'. */ 2813 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2814 1.1 christos 2815 1.1 christos for (;;) 2816 1.1 christos { 2817 1.1 christos PATFETCH (c); 2818 1.1 christos if ((c == ':' && *p == ']') || p == pend) 2819 1.1 christos break; 2820 1.1 christos if (c1 < CHAR_CLASS_MAX_LENGTH) 2821 1.1 christos str[c1++] = c; 2822 1.1 christos else 2823 1.1 christos /* This is in any case an invalid class name. */ 2824 1.1 christos str[0] = '\0'; 2825 1.1 christos } 2826 1.1 christos str[c1] = '\0'; 2827 1.1 christos 2828 1.1 christos /* If isn't a word bracketed by `[:' and `:]': 2829 1.1 christos undo the ending character, the letters, and leave 2830 1.1 christos the leading `:' and `[' (but store them as character). */ 2831 1.1 christos if (c == ':' && *p == ']') 2832 1.1 christos { 2833 1.1 christos wctype_t wt; 2834 1.1 christos uintptr_t alignedp; 2835 1.1 christos 2836 1.1 christos /* Query the character class as wctype_t. */ 2837 1.1 christos wt = IS_CHAR_CLASS (str); 2838 1.1 christos if (wt == 0) 2839 1.1 christos FREE_STACK_RETURN (REG_ECTYPE); 2840 1.1 christos 2841 1.1 christos /* Throw away the ] at the end of the character 2842 1.1 christos class. */ 2843 1.1 christos PATFETCH (c); 2844 1.1 christos 2845 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2846 1.1 christos 2847 1.1 christos /* Allocate the space for character class. */ 2848 1.1 christos GET_BUFFER_SPACE(CHAR_CLASS_SIZE); 2849 1.1 christos /* Update the pointer to indicate end of buffer. */ 2850 1.1 christos b += CHAR_CLASS_SIZE; 2851 1.1 christos /* Move data which follow character classes 2852 1.1 christos not to violate the data. */ 2853 1.1 christos insert_space(CHAR_CLASS_SIZE, 2854 1.1 christos laststart + 6 + laststart[1], 2855 1.1 christos b - 1); 2856 1.1 christos alignedp = ((uintptr_t)(laststart + 6 + laststart[1]) 2857 1.1 christos + __alignof__(wctype_t) - 1) 2858 1.1 christos & ~(uintptr_t)(__alignof__(wctype_t) - 1); 2859 1.1 christos /* Store the character class. */ 2860 1.1 christos *((wctype_t*)alignedp) = wt; 2861 1.1 christos /* Update length of char_classes */ 2862 1.1 christos laststart[1] += CHAR_CLASS_SIZE; 2863 1.1 christos 2864 1.1 christos had_char_class = true; 2865 1.1 christos } 2866 1.1 christos else 2867 1.1 christos { 2868 1.1 christos c1++; 2869 1.1 christos while (c1--) 2870 1.1 christos PATUNFETCH; 2871 1.1 christos BUF_PUSH ('['); 2872 1.1 christos BUF_PUSH (':'); 2873 1.1 christos laststart[5] += 2; /* Update the length of characters */ 2874 1.1 christos range_start = ':'; 2875 1.1 christos had_char_class = false; 2876 1.1 christos } 2877 1.1 christos } 2878 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '=' 2879 1.1 christos || *p == '.')) 2880 1.1 christos { 2881 1.1 christos CHAR_T str[128]; /* Should be large enough. */ 2882 1.1 christos CHAR_T delim = *p; /* '=' or '.' */ 2883 1.1 christos # ifdef _LIBC 2884 1.1 christos uint32_t nrules = 2885 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 2886 1.1 christos # endif 2887 1.1 christos PATFETCH (c); 2888 1.1 christos c1 = 0; 2889 1.1 christos 2890 1.1 christos /* If pattern is `[[=' or '[[.'. */ 2891 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2892 1.1 christos 2893 1.1 christos for (;;) 2894 1.1 christos { 2895 1.1 christos PATFETCH (c); 2896 1.1 christos if ((c == delim && *p == ']') || p == pend) 2897 1.1 christos break; 2898 1.1 christos if (c1 < sizeof (str) - 1) 2899 1.1 christos str[c1++] = c; 2900 1.1 christos else 2901 1.1 christos /* This is in any case an invalid class name. */ 2902 1.1 christos str[0] = '\0'; 2903 1.1 christos } 2904 1.1 christos str[c1] = '\0'; 2905 1.1 christos 2906 1.1 christos if (c == delim && *p == ']' && str[0] != '\0') 2907 1.1 christos { 2908 1.1 christos unsigned int i, offset; 2909 1.1 christos /* If we have no collation data we use the default 2910 1.1 christos collation in which each character is in a class 2911 1.1 christos by itself. It also means that ASCII is the 2912 1.1 christos character set and therefore we cannot have character 2913 1.1 christos with more than one byte in the multibyte 2914 1.1 christos representation. */ 2915 1.1 christos 2916 1.1 christos /* If not defined _LIBC, we push the name and 2917 1.1 christos `\0' for the sake of matching performance. */ 2918 1.1 christos int datasize = c1 + 1; 2919 1.1 christos 2920 1.1 christos # ifdef _LIBC 2921 1.1 christos int32_t idx = 0; 2922 1.1 christos if (nrules == 0) 2923 1.1 christos # endif 2924 1.1 christos { 2925 1.1 christos if (c1 != 1) 2926 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 2927 1.1 christos } 2928 1.1 christos # ifdef _LIBC 2929 1.1 christos else 2930 1.1 christos { 2931 1.1 christos const int32_t *table; 2932 1.1 christos const int32_t *weights; 2933 1.1 christos const int32_t *extra; 2934 1.1 christos const int32_t *indirect; 2935 1.1 christos wint_t *cp; 2936 1.1 christos 2937 1.1 christos /* This #include defines a local function! */ 2938 1.1 christos # include <locale/weightwc.h> 2939 1.1 christos 2940 1.1 christos if(delim == '=') 2941 1.1 christos { 2942 1.1 christos /* We push the index for equivalence class. */ 2943 1.1 christos cp = (wint_t*)str; 2944 1.1 christos 2945 1.1 christos table = (const int32_t *) 2946 1.1 christos _NL_CURRENT (LC_COLLATE, 2947 1.1 christos _NL_COLLATE_TABLEWC); 2948 1.1 christos weights = (const int32_t *) 2949 1.1 christos _NL_CURRENT (LC_COLLATE, 2950 1.1 christos _NL_COLLATE_WEIGHTWC); 2951 1.1 christos extra = (const int32_t *) 2952 1.1 christos _NL_CURRENT (LC_COLLATE, 2953 1.1 christos _NL_COLLATE_EXTRAWC); 2954 1.1 christos indirect = (const int32_t *) 2955 1.1 christos _NL_CURRENT (LC_COLLATE, 2956 1.1 christos _NL_COLLATE_INDIRECTWC); 2957 1.1 christos 2958 1.1 christos idx = findidx ((const wint_t**)&cp); 2959 1.1 christos if (idx == 0 || cp < (wint_t*) str + c1) 2960 1.1 christos /* This is no valid character. */ 2961 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 2962 1.1 christos 2963 1.1 christos str[0] = (wchar_t)idx; 2964 1.1 christos } 2965 1.1 christos else /* delim == '.' */ 2966 1.1 christos { 2967 1.1 christos /* We push collation sequence value 2968 1.1 christos for collating symbol. */ 2969 1.1 christos int32_t table_size; 2970 1.1 christos const int32_t *symb_table; 2971 1.1 christos const unsigned char *extra; 2972 1.1 christos int32_t idx; 2973 1.1 christos int32_t elem; 2974 1.1 christos int32_t second; 2975 1.1 christos int32_t hash; 2976 1.1 christos char char_str[c1]; 2977 1.1 christos 2978 1.1 christos /* We have to convert the name to a single-byte 2979 1.1 christos string. This is possible since the names 2980 1.1 christos consist of ASCII characters and the internal 2981 1.1 christos representation is UCS4. */ 2982 1.1 christos for (i = 0; i < c1; ++i) 2983 1.1 christos char_str[i] = str[i]; 2984 1.1 christos 2985 1.1 christos table_size = 2986 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, 2987 1.1 christos _NL_COLLATE_SYMB_HASH_SIZEMB); 2988 1.1 christos symb_table = (const int32_t *) 2989 1.1 christos _NL_CURRENT (LC_COLLATE, 2990 1.1 christos _NL_COLLATE_SYMB_TABLEMB); 2991 1.1 christos extra = (const unsigned char *) 2992 1.1 christos _NL_CURRENT (LC_COLLATE, 2993 1.1 christos _NL_COLLATE_SYMB_EXTRAMB); 2994 1.1 christos 2995 1.1 christos /* Locate the character in the hashing table. */ 2996 1.1 christos hash = elem_hash (char_str, c1); 2997 1.1 christos 2998 1.1 christos idx = 0; 2999 1.1 christos elem = hash % table_size; 3000 1.1 christos second = hash % (table_size - 2); 3001 1.1 christos while (symb_table[2 * elem] != 0) 3002 1.1 christos { 3003 1.1 christos /* First compare the hashing value. */ 3004 1.1 christos if (symb_table[2 * elem] == hash 3005 1.1 christos && c1 == extra[symb_table[2 * elem + 1]] 3006 1.1 christos && memcmp (char_str, 3007 1.1 christos &extra[symb_table[2 * elem + 1] 3008 1.1 christos + 1], c1) == 0) 3009 1.1 christos { 3010 1.1 christos /* Yep, this is the entry. */ 3011 1.1 christos idx = symb_table[2 * elem + 1]; 3012 1.1 christos idx += 1 + extra[idx]; 3013 1.1 christos break; 3014 1.1 christos } 3015 1.1 christos 3016 1.1 christos /* Next entry. */ 3017 1.1 christos elem += second; 3018 1.1 christos } 3019 1.1 christos 3020 1.1 christos if (symb_table[2 * elem] != 0) 3021 1.1 christos { 3022 1.1 christos /* Compute the index of the byte sequence 3023 1.1 christos in the table. */ 3024 1.1 christos idx += 1 + extra[idx]; 3025 1.1 christos /* Adjust for the alignment. */ 3026 1.1 christos idx = (idx + 3) & ~3; 3027 1.1 christos 3028 1.1 christos str[0] = (wchar_t) idx + 4; 3029 1.1 christos } 3030 1.1 christos else if (symb_table[2 * elem] == 0 && c1 == 1) 3031 1.1 christos { 3032 1.1 christos /* No valid character. Match it as a 3033 1.1 christos single byte character. */ 3034 1.1 christos had_char_class = false; 3035 1.1 christos BUF_PUSH(str[0]); 3036 1.1 christos /* Update the length of characters */ 3037 1.1 christos laststart[5]++; 3038 1.1 christos range_start = str[0]; 3039 1.1 christos 3040 1.1 christos /* Throw away the ] at the end of the 3041 1.1 christos collating symbol. */ 3042 1.1 christos PATFETCH (c); 3043 1.1 christos /* exit from the switch block. */ 3044 1.1 christos continue; 3045 1.1 christos } 3046 1.1 christos else 3047 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3048 1.1 christos } 3049 1.1 christos datasize = 1; 3050 1.1 christos } 3051 1.1 christos # endif 3052 1.1 christos /* Throw away the ] at the end of the equivalence 3053 1.1 christos class (or collating symbol). */ 3054 1.1 christos PATFETCH (c); 3055 1.1 christos 3056 1.1 christos /* Allocate the space for the equivalence class 3057 1.1 christos (or collating symbol) (and '\0' if needed). */ 3058 1.1 christos GET_BUFFER_SPACE(datasize); 3059 1.1 christos /* Update the pointer to indicate end of buffer. */ 3060 1.1 christos b += datasize; 3061 1.1 christos 3062 1.1 christos if (delim == '=') 3063 1.1 christos { /* equivalence class */ 3064 1.1 christos /* Calculate the offset of char_ranges, 3065 1.1 christos which is next to equivalence_classes. */ 3066 1.1 christos offset = laststart[1] + laststart[2] 3067 1.1 christos + laststart[3] +6; 3068 1.1 christos /* Insert space. */ 3069 1.1 christos insert_space(datasize, laststart + offset, b - 1); 3070 1.1 christos 3071 1.1 christos /* Write the equivalence_class and \0. */ 3072 1.1 christos for (i = 0 ; i < datasize ; i++) 3073 1.1 christos laststart[offset + i] = str[i]; 3074 1.1 christos 3075 1.1 christos /* Update the length of equivalence_classes. */ 3076 1.1 christos laststart[3] += datasize; 3077 1.1 christos had_char_class = true; 3078 1.1 christos } 3079 1.1 christos else /* delim == '.' */ 3080 1.1 christos { /* collating symbol */ 3081 1.1 christos /* Calculate the offset of the equivalence_classes, 3082 1.1 christos which is next to collating_symbols. */ 3083 1.1 christos offset = laststart[1] + laststart[2] + 6; 3084 1.1 christos /* Insert space and write the collationg_symbol 3085 1.1 christos and \0. */ 3086 1.1 christos insert_space(datasize, laststart + offset, b-1); 3087 1.1 christos for (i = 0 ; i < datasize ; i++) 3088 1.1 christos laststart[offset + i] = str[i]; 3089 1.1 christos 3090 1.1 christos /* In re_match_2_internal if range_start < -1, we 3091 1.1 christos assume -range_start is the offset of the 3092 1.1 christos collating symbol which is specified as 3093 1.1 christos the character of the range start. So we assign 3094 1.1 christos -(laststart[1] + laststart[2] + 6) to 3095 1.1 christos range_start. */ 3096 1.1 christos range_start = -(laststart[1] + laststart[2] + 6); 3097 1.1 christos /* Update the length of collating_symbol. */ 3098 1.1 christos laststart[2] += datasize; 3099 1.1 christos had_char_class = false; 3100 1.1 christos } 3101 1.1 christos } 3102 1.1 christos else 3103 1.1 christos { 3104 1.1 christos c1++; 3105 1.1 christos while (c1--) 3106 1.1 christos PATUNFETCH; 3107 1.1 christos BUF_PUSH ('['); 3108 1.1 christos BUF_PUSH (delim); 3109 1.1 christos laststart[5] += 2; /* Update the length of characters */ 3110 1.1 christos range_start = delim; 3111 1.1 christos had_char_class = false; 3112 1.1 christos } 3113 1.1 christos } 3114 1.1 christos else 3115 1.1 christos { 3116 1.1 christos had_char_class = false; 3117 1.1 christos BUF_PUSH(c); 3118 1.1 christos laststart[5]++; /* Update the length of characters */ 3119 1.1 christos range_start = c; 3120 1.1 christos } 3121 1.1 christos } 3122 1.1 christos 3123 1.1 christos #else /* BYTE */ 3124 1.1 christos /* Ensure that we have enough space to push a charset: the 3125 1.1 christos opcode, the length count, and the bitset; 34 bytes in all. */ 3126 1.1 christos GET_BUFFER_SPACE (34); 3127 1.1 christos 3128 1.1 christos laststart = b; 3129 1.1 christos 3130 1.1 christos /* We test `*p == '^' twice, instead of using an if 3131 1.1 christos statement, so we only need one BUF_PUSH. */ 3132 1.1 christos BUF_PUSH (*p == '^' ? charset_not : charset); 3133 1.1 christos if (*p == '^') 3134 1.1 christos p++; 3135 1.1 christos 3136 1.1 christos /* Remember the first position in the bracket expression. */ 3137 1.1 christos p1 = p; 3138 1.1 christos 3139 1.1 christos /* Push the number of bytes in the bitmap. */ 3140 1.1 christos BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 3141 1.1 christos 3142 1.1 christos /* Clear the whole map. */ 3143 1.1 christos bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 3144 1.1 christos 3145 1.1 christos /* charset_not matches newline according to a syntax bit. */ 3146 1.1 christos if ((re_opcode_t) b[-2] == charset_not 3147 1.1 christos && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 3148 1.1 christos SET_LIST_BIT ('\n'); 3149 1.1 christos 3150 1.1 christos /* Read in characters and ranges, setting map bits. */ 3151 1.1 christos for (;;) 3152 1.1 christos { 3153 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3154 1.1 christos 3155 1.1 christos PATFETCH (c); 3156 1.1 christos 3157 1.1 christos /* \ might escape characters inside [...] and [^...]. */ 3158 1.1 christos if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 3159 1.1 christos { 3160 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 3161 1.1 christos 3162 1.1 christos PATFETCH (c1); 3163 1.1 christos SET_LIST_BIT (c1); 3164 1.1 christos range_start = c1; 3165 1.1 christos continue; 3166 1.1 christos } 3167 1.1 christos 3168 1.1 christos /* Could be the end of the bracket expression. If it's 3169 1.1 christos not (i.e., when the bracket expression is `[]' so 3170 1.1 christos far), the ']' character bit gets set way below. */ 3171 1.1 christos if (c == ']' && p != p1 + 1) 3172 1.1 christos break; 3173 1.1 christos 3174 1.1 christos /* Look ahead to see if it's a range when the last thing 3175 1.1 christos was a character class. */ 3176 1.1 christos if (had_char_class && c == '-' && *p != ']') 3177 1.1 christos FREE_STACK_RETURN (REG_ERANGE); 3178 1.1 christos 3179 1.1 christos /* Look ahead to see if it's a range when the last thing 3180 1.1 christos was a character: if this is a hyphen not at the 3181 1.1 christos beginning or the end of a list, then it's the range 3182 1.1 christos operator. */ 3183 1.1 christos if (c == '-' 3184 1.1 christos && !(p - 2 >= pattern && p[-2] == '[') 3185 1.1 christos && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 3186 1.1 christos && *p != ']') 3187 1.1 christos { 3188 1.1 christos reg_errcode_t ret 3189 1.1 christos = byte_compile_range (range_start, &p, pend, translate, 3190 1.1 christos syntax, b); 3191 1.1 christos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 3192 1.1 christos range_start = 0xffffffff; 3193 1.1 christos } 3194 1.1 christos 3195 1.1 christos else if (p[0] == '-' && p[1] != ']') 3196 1.1 christos { /* This handles ranges made up of characters only. */ 3197 1.1 christos reg_errcode_t ret; 3198 1.1 christos 3199 1.1 christos /* Move past the `-'. */ 3200 1.1 christos PATFETCH (c1); 3201 1.1 christos 3202 1.1 christos ret = byte_compile_range (c, &p, pend, translate, syntax, b); 3203 1.1 christos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 3204 1.1 christos range_start = 0xffffffff; 3205 1.1 christos } 3206 1.1 christos 3207 1.1 christos /* See if we're at the beginning of a possible character 3208 1.1 christos class. */ 3209 1.1 christos 3210 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 3211 1.1 christos { /* Leave room for the null. */ 3212 1.1 christos char str[CHAR_CLASS_MAX_LENGTH + 1]; 3213 1.1 christos 3214 1.1 christos PATFETCH (c); 3215 1.1 christos c1 = 0; 3216 1.1 christos 3217 1.1 christos /* If pattern is `[[:'. */ 3218 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3219 1.1 christos 3220 1.1 christos for (;;) 3221 1.1 christos { 3222 1.1 christos PATFETCH (c); 3223 1.1 christos if ((c == ':' && *p == ']') || p == pend) 3224 1.1 christos break; 3225 1.1 christos if (c1 < CHAR_CLASS_MAX_LENGTH) 3226 1.1 christos str[c1++] = c; 3227 1.1 christos else 3228 1.1 christos /* This is in any case an invalid class name. */ 3229 1.1 christos str[0] = '\0'; 3230 1.1 christos } 3231 1.1 christos str[c1] = '\0'; 3232 1.1 christos 3233 1.1 christos /* If isn't a word bracketed by `[:' and `:]': 3234 1.1 christos undo the ending character, the letters, and leave 3235 1.1 christos the leading `:' and `[' (but set bits for them). */ 3236 1.1 christos if (c == ':' && *p == ']') 3237 1.1 christos { 3238 1.1 christos # if defined _LIBC || WIDE_CHAR_SUPPORT 3239 1.1 christos boolean is_lower = STREQ (str, "lower"); 3240 1.1 christos boolean is_upper = STREQ (str, "upper"); 3241 1.1 christos wctype_t wt; 3242 1.1 christos int ch; 3243 1.1 christos 3244 1.1 christos wt = IS_CHAR_CLASS (str); 3245 1.1 christos if (wt == 0) 3246 1.1 christos FREE_STACK_RETURN (REG_ECTYPE); 3247 1.1 christos 3248 1.1 christos /* Throw away the ] at the end of the character 3249 1.1 christos class. */ 3250 1.1 christos PATFETCH (c); 3251 1.1 christos 3252 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3253 1.1 christos 3254 1.1 christos for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) 3255 1.1 christos { 3256 1.1 christos # ifdef _LIBC 3257 1.1 christos if (__iswctype (__btowc (ch), wt)) 3258 1.1 christos SET_LIST_BIT (ch); 3259 1.1 christos # else 3260 1.1 christos if (iswctype (btowc (ch), wt)) 3261 1.1 christos SET_LIST_BIT (ch); 3262 1.1 christos # endif 3263 1.1 christos 3264 1.1 christos if (translate && (is_upper || is_lower) 3265 1.1 christos && (ISUPPER (ch) || ISLOWER (ch))) 3266 1.1 christos SET_LIST_BIT (ch); 3267 1.1 christos } 3268 1.1 christos 3269 1.1 christos had_char_class = true; 3270 1.1 christos # else 3271 1.1 christos int ch; 3272 1.1 christos boolean is_alnum = STREQ (str, "alnum"); 3273 1.1 christos boolean is_alpha = STREQ (str, "alpha"); 3274 1.1 christos boolean is_blank = STREQ (str, "blank"); 3275 1.1 christos boolean is_cntrl = STREQ (str, "cntrl"); 3276 1.1 christos boolean is_digit = STREQ (str, "digit"); 3277 1.1 christos boolean is_graph = STREQ (str, "graph"); 3278 1.1 christos boolean is_lower = STREQ (str, "lower"); 3279 1.1 christos boolean is_print = STREQ (str, "print"); 3280 1.1 christos boolean is_punct = STREQ (str, "punct"); 3281 1.1 christos boolean is_space = STREQ (str, "space"); 3282 1.1 christos boolean is_upper = STREQ (str, "upper"); 3283 1.1 christos boolean is_xdigit = STREQ (str, "xdigit"); 3284 1.1 christos 3285 1.1 christos if (!IS_CHAR_CLASS (str)) 3286 1.1 christos FREE_STACK_RETURN (REG_ECTYPE); 3287 1.1 christos 3288 1.1 christos /* Throw away the ] at the end of the character 3289 1.1 christos class. */ 3290 1.1 christos PATFETCH (c); 3291 1.1 christos 3292 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3293 1.1 christos 3294 1.1 christos for (ch = 0; ch < 1 << BYTEWIDTH; ch++) 3295 1.1 christos { 3296 1.1 christos /* This was split into 3 if's to 3297 1.1 christos avoid an arbitrary limit in some compiler. */ 3298 1.1 christos if ( (is_alnum && ISALNUM (ch)) 3299 1.1 christos || (is_alpha && ISALPHA (ch)) 3300 1.1 christos || (is_blank && ISBLANK (ch)) 3301 1.1 christos || (is_cntrl && ISCNTRL (ch))) 3302 1.1 christos SET_LIST_BIT (ch); 3303 1.1 christos if ( (is_digit && ISDIGIT (ch)) 3304 1.1 christos || (is_graph && ISGRAPH (ch)) 3305 1.1 christos || (is_lower && ISLOWER (ch)) 3306 1.1 christos || (is_print && ISPRINT (ch))) 3307 1.1 christos SET_LIST_BIT (ch); 3308 1.1 christos if ( (is_punct && ISPUNCT (ch)) 3309 1.1 christos || (is_space && ISSPACE (ch)) 3310 1.1 christos || (is_upper && ISUPPER (ch)) 3311 1.1 christos || (is_xdigit && ISXDIGIT (ch))) 3312 1.1 christos SET_LIST_BIT (ch); 3313 1.1 christos if ( translate && (is_upper || is_lower) 3314 1.1 christos && (ISUPPER (ch) || ISLOWER (ch))) 3315 1.1 christos SET_LIST_BIT (ch); 3316 1.1 christos } 3317 1.1 christos had_char_class = true; 3318 1.1 christos # endif /* libc || wctype.h */ 3319 1.1 christos } 3320 1.1 christos else 3321 1.1 christos { 3322 1.1 christos c1++; 3323 1.1 christos while (c1--) 3324 1.1 christos PATUNFETCH; 3325 1.1 christos SET_LIST_BIT ('['); 3326 1.1 christos SET_LIST_BIT (':'); 3327 1.1 christos range_start = ':'; 3328 1.1 christos had_char_class = false; 3329 1.1 christos } 3330 1.1 christos } 3331 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=') 3332 1.1 christos { 3333 1.1 christos unsigned char str[MB_LEN_MAX + 1]; 3334 1.1 christos # ifdef _LIBC 3335 1.1 christos uint32_t nrules = 3336 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3337 1.1 christos # endif 3338 1.1 christos 3339 1.1 christos PATFETCH (c); 3340 1.1 christos c1 = 0; 3341 1.1 christos 3342 1.1 christos /* If pattern is `[[='. */ 3343 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3344 1.1 christos 3345 1.1 christos for (;;) 3346 1.1 christos { 3347 1.1 christos PATFETCH (c); 3348 1.1 christos if ((c == '=' && *p == ']') || p == pend) 3349 1.1 christos break; 3350 1.1 christos if (c1 < MB_LEN_MAX) 3351 1.1 christos str[c1++] = c; 3352 1.1 christos else 3353 1.1 christos /* This is in any case an invalid class name. */ 3354 1.1 christos str[0] = '\0'; 3355 1.1 christos } 3356 1.1 christos str[c1] = '\0'; 3357 1.1 christos 3358 1.1 christos if (c == '=' && *p == ']' && str[0] != '\0') 3359 1.1 christos { 3360 1.1 christos /* If we have no collation data we use the default 3361 1.1 christos collation in which each character is in a class 3362 1.1 christos by itself. It also means that ASCII is the 3363 1.1 christos character set and therefore we cannot have character 3364 1.1 christos with more than one byte in the multibyte 3365 1.1 christos representation. */ 3366 1.1 christos # ifdef _LIBC 3367 1.1 christos if (nrules == 0) 3368 1.1 christos # endif 3369 1.1 christos { 3370 1.1 christos if (c1 != 1) 3371 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3372 1.1 christos 3373 1.1 christos /* Throw away the ] at the end of the equivalence 3374 1.1 christos class. */ 3375 1.1 christos PATFETCH (c); 3376 1.1 christos 3377 1.1 christos /* Set the bit for the character. */ 3378 1.1 christos SET_LIST_BIT (str[0]); 3379 1.1 christos } 3380 1.1 christos # ifdef _LIBC 3381 1.1 christos else 3382 1.1 christos { 3383 1.1 christos /* Try to match the byte sequence in `str' against 3384 1.1 christos those known to the collate implementation. 3385 1.1 christos First find out whether the bytes in `str' are 3386 1.1 christos actually from exactly one character. */ 3387 1.1 christos const int32_t *table; 3388 1.1 christos const unsigned char *weights; 3389 1.1 christos const unsigned char *extra; 3390 1.1 christos const int32_t *indirect; 3391 1.1 christos int32_t idx; 3392 1.1 christos const unsigned char *cp = str; 3393 1.1 christos int ch; 3394 1.1 christos 3395 1.1 christos /* This #include defines a local function! */ 3396 1.1 christos # include <locale/weight.h> 3397 1.1 christos 3398 1.1 christos table = (const int32_t *) 3399 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 3400 1.1 christos weights = (const unsigned char *) 3401 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); 3402 1.1 christos extra = (const unsigned char *) 3403 1.1.1.3 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 3404 1.1 christos indirect = (const int32_t *) 3405 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 3406 1.1 christos 3407 1.1 christos idx = findidx (&cp); 3408 1.1 christos if (idx == 0 || cp < str + c1) 3409 1.1 christos /* This is no valid character. */ 3410 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3411 1.1 christos 3412 1.1 christos /* Throw away the ] at the end of the equivalence 3413 1.1 christos class. */ 3414 1.1 christos PATFETCH (c); 3415 1.1 christos 3416 1.1 christos /* Now we have to go through the whole table 3417 1.1 christos and find all characters which have the same 3418 1.1 christos first level weight. 3419 1.1 christos 3420 1.1 christos XXX Note that this is not entirely correct. 3421 1.1 christos we would have to match multibyte sequences 3422 1.1 christos but this is not possible with the current 3423 1.1 christos implementation. */ 3424 1.1 christos for (ch = 1; ch < 256; ++ch) 3425 1.1 christos /* XXX This test would have to be changed if we 3426 1.1 christos would allow matching multibyte sequences. */ 3427 1.1 christos if (table[ch] > 0) 3428 1.1 christos { 3429 1.1 christos int32_t idx2 = table[ch]; 3430 1.1 christos size_t len = weights[idx2]; 3431 1.1 christos 3432 1.1 christos /* Test whether the lenghts match. */ 3433 1.1 christos if (weights[idx] == len) 3434 1.1 christos { 3435 1.1 christos /* They do. New compare the bytes of 3436 1.1 christos the weight. */ 3437 1.1 christos size_t cnt = 0; 3438 1.1 christos 3439 1.1 christos while (cnt < len 3440 1.1 christos && (weights[idx + 1 + cnt] 3441 1.1 christos == weights[idx2 + 1 + cnt])) 3442 1.1 christos ++cnt; 3443 1.1 christos 3444 1.1 christos if (cnt == len) 3445 1.1 christos /* They match. Mark the character as 3446 1.1 christos acceptable. */ 3447 1.1 christos SET_LIST_BIT (ch); 3448 1.1 christos } 3449 1.1 christos } 3450 1.1 christos } 3451 1.1 christos # endif 3452 1.1 christos had_char_class = true; 3453 1.1 christos } 3454 1.1 christos else 3455 1.1 christos { 3456 1.1 christos c1++; 3457 1.1 christos while (c1--) 3458 1.1 christos PATUNFETCH; 3459 1.1 christos SET_LIST_BIT ('['); 3460 1.1 christos SET_LIST_BIT ('='); 3461 1.1 christos range_start = '='; 3462 1.1 christos had_char_class = false; 3463 1.1 christos } 3464 1.1 christos } 3465 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.') 3466 1.1 christos { 3467 1.1 christos unsigned char str[128]; /* Should be large enough. */ 3468 1.1 christos # ifdef _LIBC 3469 1.1 christos uint32_t nrules = 3470 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3471 1.1 christos # endif 3472 1.1 christos 3473 1.1 christos PATFETCH (c); 3474 1.1 christos c1 = 0; 3475 1.1 christos 3476 1.1 christos /* If pattern is `[[.'. */ 3477 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3478 1.1 christos 3479 1.1 christos for (;;) 3480 1.1 christos { 3481 1.1 christos PATFETCH (c); 3482 1.1 christos if ((c == '.' && *p == ']') || p == pend) 3483 1.1 christos break; 3484 1.1 christos if (c1 < sizeof (str)) 3485 1.1 christos str[c1++] = c; 3486 1.1 christos else 3487 1.1 christos /* This is in any case an invalid class name. */ 3488 1.1 christos str[0] = '\0'; 3489 1.1 christos } 3490 1.1 christos str[c1] = '\0'; 3491 1.1 christos 3492 1.1 christos if (c == '.' && *p == ']' && str[0] != '\0') 3493 1.1 christos { 3494 1.1 christos /* If we have no collation data we use the default 3495 1.1 christos collation in which each character is the name 3496 1.1 christos for its own class which contains only the one 3497 1.1 christos character. It also means that ASCII is the 3498 1.1 christos character set and therefore we cannot have character 3499 1.1 christos with more than one byte in the multibyte 3500 1.1 christos representation. */ 3501 1.1 christos # ifdef _LIBC 3502 1.1 christos if (nrules == 0) 3503 1.1 christos # endif 3504 1.1 christos { 3505 1.1 christos if (c1 != 1) 3506 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3507 1.1 christos 3508 1.1 christos /* Throw away the ] at the end of the equivalence 3509 1.1 christos class. */ 3510 1.1 christos PATFETCH (c); 3511 1.1 christos 3512 1.1 christos /* Set the bit for the character. */ 3513 1.1 christos SET_LIST_BIT (str[0]); 3514 1.1 christos range_start = ((const unsigned char *) str)[0]; 3515 1.1 christos } 3516 1.1 christos # ifdef _LIBC 3517 1.1 christos else 3518 1.1 christos { 3519 1.1 christos /* Try to match the byte sequence in `str' against 3520 1.1 christos those known to the collate implementation. 3521 1.1 christos First find out whether the bytes in `str' are 3522 1.1 christos actually from exactly one character. */ 3523 1.1 christos int32_t table_size; 3524 1.1 christos const int32_t *symb_table; 3525 1.1 christos const unsigned char *extra; 3526 1.1 christos int32_t idx; 3527 1.1 christos int32_t elem; 3528 1.1 christos int32_t second; 3529 1.1 christos int32_t hash; 3530 1.1 christos 3531 1.1 christos table_size = 3532 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, 3533 1.1 christos _NL_COLLATE_SYMB_HASH_SIZEMB); 3534 1.1 christos symb_table = (const int32_t *) 3535 1.1 christos _NL_CURRENT (LC_COLLATE, 3536 1.1 christos _NL_COLLATE_SYMB_TABLEMB); 3537 1.1 christos extra = (const unsigned char *) 3538 1.1 christos _NL_CURRENT (LC_COLLATE, 3539 1.1 christos _NL_COLLATE_SYMB_EXTRAMB); 3540 1.1 christos 3541 1.1 christos /* Locate the character in the hashing table. */ 3542 1.1 christos hash = elem_hash (str, c1); 3543 1.1 christos 3544 1.1 christos idx = 0; 3545 1.1 christos elem = hash % table_size; 3546 1.1 christos second = hash % (table_size - 2); 3547 1.1 christos while (symb_table[2 * elem] != 0) 3548 1.1 christos { 3549 1.1 christos /* First compare the hashing value. */ 3550 1.1 christos if (symb_table[2 * elem] == hash 3551 1.1 christos && c1 == extra[symb_table[2 * elem + 1]] 3552 1.1 christos && memcmp (str, 3553 1.1 christos &extra[symb_table[2 * elem + 1] 3554 1.1 christos + 1], 3555 1.1 christos c1) == 0) 3556 1.1 christos { 3557 1.1 christos /* Yep, this is the entry. */ 3558 1.1 christos idx = symb_table[2 * elem + 1]; 3559 1.1 christos idx += 1 + extra[idx]; 3560 1.1 christos break; 3561 1.1 christos } 3562 1.1 christos 3563 1.1 christos /* Next entry. */ 3564 1.1 christos elem += second; 3565 1.1 christos } 3566 1.1 christos 3567 1.1 christos if (symb_table[2 * elem] == 0) 3568 1.1 christos /* This is no valid character. */ 3569 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3570 1.1 christos 3571 1.1 christos /* Throw away the ] at the end of the equivalence 3572 1.1 christos class. */ 3573 1.1 christos PATFETCH (c); 3574 1.1 christos 3575 1.1 christos /* Now add the multibyte character(s) we found 3576 1.1 christos to the accept list. 3577 1.1 christos 3578 1.1 christos XXX Note that this is not entirely correct. 3579 1.1 christos we would have to match multibyte sequences 3580 1.1 christos but this is not possible with the current 3581 1.1 christos implementation. Also, we have to match 3582 1.1 christos collating symbols, which expand to more than 3583 1.1 christos one file, as a whole and not allow the 3584 1.1 christos individual bytes. */ 3585 1.1 christos c1 = extra[idx++]; 3586 1.1 christos if (c1 == 1) 3587 1.1 christos range_start = extra[idx]; 3588 1.1 christos while (c1-- > 0) 3589 1.1 christos { 3590 1.1 christos SET_LIST_BIT (extra[idx]); 3591 1.1 christos ++idx; 3592 1.1 christos } 3593 1.1 christos } 3594 1.1 christos # endif 3595 1.1 christos had_char_class = false; 3596 1.1 christos } 3597 1.1 christos else 3598 1.1 christos { 3599 1.1 christos c1++; 3600 1.1 christos while (c1--) 3601 1.1 christos PATUNFETCH; 3602 1.1 christos SET_LIST_BIT ('['); 3603 1.1 christos SET_LIST_BIT ('.'); 3604 1.1 christos range_start = '.'; 3605 1.1 christos had_char_class = false; 3606 1.1 christos } 3607 1.1 christos } 3608 1.1 christos else 3609 1.1 christos { 3610 1.1 christos had_char_class = false; 3611 1.1 christos SET_LIST_BIT (c); 3612 1.1 christos range_start = c; 3613 1.1 christos } 3614 1.1 christos } 3615 1.1 christos 3616 1.1 christos /* Discard any (non)matching list bytes that are all 0 at the 3617 1.1 christos end of the map. Decrease the map-length byte too. */ 3618 1.1 christos while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 3619 1.1 christos b[-1]--; 3620 1.1 christos b += b[-1]; 3621 1.1 christos #endif /* WCHAR */ 3622 1.1 christos } 3623 1.1 christos break; 3624 1.1 christos 3625 1.1 christos 3626 1.1 christos case '(': 3627 1.1 christos if (syntax & RE_NO_BK_PARENS) 3628 1.1 christos goto handle_open; 3629 1.1 christos else 3630 1.1 christos goto normal_char; 3631 1.1 christos 3632 1.1 christos 3633 1.1 christos case ')': 3634 1.1 christos if (syntax & RE_NO_BK_PARENS) 3635 1.1 christos goto handle_close; 3636 1.1 christos else 3637 1.1 christos goto normal_char; 3638 1.1 christos 3639 1.1 christos 3640 1.1 christos case '\n': 3641 1.1 christos if (syntax & RE_NEWLINE_ALT) 3642 1.1 christos goto handle_alt; 3643 1.1 christos else 3644 1.1 christos goto normal_char; 3645 1.1 christos 3646 1.1 christos 3647 1.1 christos case '|': 3648 1.1 christos if (syntax & RE_NO_BK_VBAR) 3649 1.1 christos goto handle_alt; 3650 1.1 christos else 3651 1.1 christos goto normal_char; 3652 1.1 christos 3653 1.1 christos 3654 1.1 christos case '{': 3655 1.1 christos if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) 3656 1.1 christos goto handle_interval; 3657 1.1 christos else 3658 1.1 christos goto normal_char; 3659 1.1 christos 3660 1.1 christos 3661 1.1 christos case '\\': 3662 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 3663 1.1 christos 3664 1.1 christos /* Do not translate the character after the \, so that we can 3665 1.1 christos distinguish, e.g., \B from \b, even if we normally would 3666 1.1 christos translate, e.g., B to b. */ 3667 1.1 christos PATFETCH_RAW (c); 3668 1.1 christos 3669 1.1 christos switch (c) 3670 1.1 christos { 3671 1.1 christos case '(': 3672 1.1 christos if (syntax & RE_NO_BK_PARENS) 3673 1.1 christos goto normal_backslash; 3674 1.1 christos 3675 1.1 christos handle_open: 3676 1.1 christos bufp->re_nsub++; 3677 1.1 christos regnum++; 3678 1.1 christos 3679 1.1 christos if (COMPILE_STACK_FULL) 3680 1.1 christos { 3681 1.1 christos RETALLOC (compile_stack.stack, compile_stack.size << 1, 3682 1.1 christos compile_stack_elt_t); 3683 1.1 christos if (compile_stack.stack == NULL) return REG_ESPACE; 3684 1.1 christos 3685 1.1 christos compile_stack.size <<= 1; 3686 1.1 christos } 3687 1.1 christos 3688 1.1 christos /* These are the values to restore when we hit end of this 3689 1.1 christos group. They are all relative offsets, so that if the 3690 1.1 christos whole pattern moves because of realloc, they will still 3691 1.1 christos be valid. */ 3692 1.1 christos COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR; 3693 1.1 christos COMPILE_STACK_TOP.fixup_alt_jump 3694 1.1 christos = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0; 3695 1.1 christos COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR; 3696 1.1 christos COMPILE_STACK_TOP.regnum = regnum; 3697 1.1 christos 3698 1.1 christos /* We will eventually replace the 0 with the number of 3699 1.1 christos groups inner to this one. But do not push a 3700 1.1 christos start_memory for groups beyond the last one we can 3701 1.1 christos represent in the compiled pattern. */ 3702 1.1 christos if (regnum <= MAX_REGNUM) 3703 1.1 christos { 3704 1.1 christos COMPILE_STACK_TOP.inner_group_offset = b 3705 1.1 christos - COMPILED_BUFFER_VAR + 2; 3706 1.1 christos BUF_PUSH_3 (start_memory, regnum, 0); 3707 1.1 christos } 3708 1.1 christos 3709 1.1 christos compile_stack.avail++; 3710 1.1 christos 3711 1.1 christos fixup_alt_jump = 0; 3712 1.1 christos laststart = 0; 3713 1.1 christos begalt = b; 3714 1.1 christos /* If we've reached MAX_REGNUM groups, then this open 3715 1.1 christos won't actually generate any code, so we'll have to 3716 1.1 christos clear pending_exact explicitly. */ 3717 1.1 christos pending_exact = 0; 3718 1.1 christos break; 3719 1.1 christos 3720 1.1 christos 3721 1.1 christos case ')': 3722 1.1 christos if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 3723 1.1 christos 3724 1.1 christos if (COMPILE_STACK_EMPTY) 3725 1.1 christos { 3726 1.1 christos if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 3727 1.1 christos goto normal_backslash; 3728 1.1 christos else 3729 1.1 christos FREE_STACK_RETURN (REG_ERPAREN); 3730 1.1 christos } 3731 1.1 christos 3732 1.1 christos handle_close: 3733 1.1 christos if (fixup_alt_jump) 3734 1.1 christos { /* Push a dummy failure point at the end of the 3735 1.1 christos alternative for a possible future 3736 1.1 christos `pop_failure_jump' to pop. See comments at 3737 1.1 christos `push_dummy_failure' in `re_match_2'. */ 3738 1.1 christos BUF_PUSH (push_dummy_failure); 3739 1.1 christos 3740 1.1 christos /* We allocated space for this jump when we assigned 3741 1.1 christos to `fixup_alt_jump', in the `handle_alt' case below. */ 3742 1.1 christos STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 3743 1.1 christos } 3744 1.1 christos 3745 1.1 christos /* See similar code for backslashed left paren above. */ 3746 1.1 christos if (COMPILE_STACK_EMPTY) 3747 1.1 christos { 3748 1.1 christos if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 3749 1.1 christos goto normal_char; 3750 1.1 christos else 3751 1.1 christos FREE_STACK_RETURN (REG_ERPAREN); 3752 1.1 christos } 3753 1.1 christos 3754 1.1 christos /* Since we just checked for an empty stack above, this 3755 1.1 christos ``can't happen''. */ 3756 1.1 christos assert (compile_stack.avail != 0); 3757 1.1 christos { 3758 1.1 christos /* We don't just want to restore into `regnum', because 3759 1.1 christos later groups should continue to be numbered higher, 3760 1.1 christos as in `(ab)c(de)' -- the second group is #2. */ 3761 1.1 christos regnum_t this_group_regnum; 3762 1.1 christos 3763 1.1 christos compile_stack.avail--; 3764 1.1 christos begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset; 3765 1.1 christos fixup_alt_jump 3766 1.1 christos = COMPILE_STACK_TOP.fixup_alt_jump 3767 1.1 christos ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1 3768 1.1 christos : 0; 3769 1.1 christos laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset; 3770 1.1 christos this_group_regnum = COMPILE_STACK_TOP.regnum; 3771 1.1 christos /* If we've reached MAX_REGNUM groups, then this open 3772 1.1 christos won't actually generate any code, so we'll have to 3773 1.1 christos clear pending_exact explicitly. */ 3774 1.1 christos pending_exact = 0; 3775 1.1 christos 3776 1.1 christos /* We're at the end of the group, so now we know how many 3777 1.1 christos groups were inside this one. */ 3778 1.1 christos if (this_group_regnum <= MAX_REGNUM) 3779 1.1 christos { 3780 1.1 christos UCHAR_T *inner_group_loc 3781 1.1 christos = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset; 3782 1.1 christos 3783 1.1 christos *inner_group_loc = regnum - this_group_regnum; 3784 1.1 christos BUF_PUSH_3 (stop_memory, this_group_regnum, 3785 1.1 christos regnum - this_group_regnum); 3786 1.1 christos } 3787 1.1 christos } 3788 1.1 christos break; 3789 1.1 christos 3790 1.1 christos 3791 1.1 christos case '|': /* `\|'. */ 3792 1.1 christos if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) 3793 1.1 christos goto normal_backslash; 3794 1.1 christos handle_alt: 3795 1.1 christos if (syntax & RE_LIMITED_OPS) 3796 1.1 christos goto normal_char; 3797 1.1 christos 3798 1.1 christos /* Insert before the previous alternative a jump which 3799 1.1 christos jumps to this alternative if the former fails. */ 3800 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 3801 1.1 christos INSERT_JUMP (on_failure_jump, begalt, 3802 1.1 christos b + 2 + 2 * OFFSET_ADDRESS_SIZE); 3803 1.1 christos pending_exact = 0; 3804 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 3805 1.1 christos 3806 1.1 christos /* The alternative before this one has a jump after it 3807 1.1 christos which gets executed if it gets matched. Adjust that 3808 1.1 christos jump so it will jump to this alternative's analogous 3809 1.1 christos jump (put in below, which in turn will jump to the next 3810 1.1 christos (if any) alternative's such jump, etc.). The last such 3811 1.1 christos jump jumps to the correct final destination. A picture: 3812 1.1 christos _____ _____ 3813 1.1 christos | | | | 3814 1.1 christos | v | v 3815 1.1 christos a | b | c 3816 1.1 christos 3817 1.1 christos If we are at `b', then fixup_alt_jump right now points to a 3818 1.1 christos three-byte space after `a'. We'll put in the jump, set 3819 1.1 christos fixup_alt_jump to right after `b', and leave behind three 3820 1.1 christos bytes which we'll fill in when we get to after `c'. */ 3821 1.1 christos 3822 1.1 christos if (fixup_alt_jump) 3823 1.1 christos STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 3824 1.1 christos 3825 1.1 christos /* Mark and leave space for a jump after this alternative, 3826 1.1 christos to be filled in later either by next alternative or 3827 1.1 christos when know we're at the end of a series of alternatives. */ 3828 1.1 christos fixup_alt_jump = b; 3829 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 3830 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 3831 1.1 christos 3832 1.1 christos laststart = 0; 3833 1.1 christos begalt = b; 3834 1.1 christos break; 3835 1.1 christos 3836 1.1 christos 3837 1.1 christos case '{': 3838 1.1 christos /* If \{ is a literal. */ 3839 1.1 christos if (!(syntax & RE_INTERVALS) 3840 1.1 christos /* If we're at `\{' and it's not the open-interval 3841 1.1 christos operator. */ 3842 1.1 christos || (syntax & RE_NO_BK_BRACES)) 3843 1.1 christos goto normal_backslash; 3844 1.1 christos 3845 1.1 christos handle_interval: 3846 1.1 christos { 3847 1.1 christos /* If got here, then the syntax allows intervals. */ 3848 1.1 christos 3849 1.1 christos /* At least (most) this many matches must be made. */ 3850 1.1 christos int lower_bound = -1, upper_bound = -1; 3851 1.1 christos 3852 1.1 christos /* Place in the uncompiled pattern (i.e., just after 3853 1.1 christos the '{') to go back to if the interval is invalid. */ 3854 1.1 christos const CHAR_T *beg_interval = p; 3855 1.1 christos 3856 1.1 christos if (p == pend) 3857 1.1 christos goto invalid_interval; 3858 1.1 christos 3859 1.1 christos GET_UNSIGNED_NUMBER (lower_bound); 3860 1.1 christos 3861 1.1 christos if (c == ',') 3862 1.1 christos { 3863 1.1 christos GET_UNSIGNED_NUMBER (upper_bound); 3864 1.1 christos if (upper_bound < 0) 3865 1.1 christos upper_bound = RE_DUP_MAX; 3866 1.1 christos } 3867 1.1 christos else 3868 1.1 christos /* Interval such as `{1}' => match exactly once. */ 3869 1.1 christos upper_bound = lower_bound; 3870 1.1 christos 3871 1.1 christos if (! (0 <= lower_bound && lower_bound <= upper_bound)) 3872 1.1 christos goto invalid_interval; 3873 1.1 christos 3874 1.1 christos if (!(syntax & RE_NO_BK_BRACES)) 3875 1.1 christos { 3876 1.1 christos if (c != '\\' || p == pend) 3877 1.1 christos goto invalid_interval; 3878 1.1 christos PATFETCH (c); 3879 1.1 christos } 3880 1.1 christos 3881 1.1 christos if (c != '}') 3882 1.1 christos goto invalid_interval; 3883 1.1 christos 3884 1.1 christos /* If it's invalid to have no preceding re. */ 3885 1.1 christos if (!laststart) 3886 1.1 christos { 3887 1.1 christos if (syntax & RE_CONTEXT_INVALID_OPS 3888 1.1 christos && !(syntax & RE_INVALID_INTERVAL_ORD)) 3889 1.1 christos FREE_STACK_RETURN (REG_BADRPT); 3890 1.1 christos else if (syntax & RE_CONTEXT_INDEP_OPS) 3891 1.1 christos laststart = b; 3892 1.1 christos else 3893 1.1 christos goto unfetch_interval; 3894 1.1 christos } 3895 1.1 christos 3896 1.1 christos /* We just parsed a valid interval. */ 3897 1.1 christos 3898 1.1 christos if (RE_DUP_MAX < upper_bound) 3899 1.1 christos FREE_STACK_RETURN (REG_BADBR); 3900 1.1 christos 3901 1.1 christos /* If the upper bound is zero, don't want to succeed at 3902 1.1 christos all; jump from `laststart' to `b + 3', which will be 3903 1.1 christos the end of the buffer after we insert the jump. */ 3904 1.1 christos /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' 3905 1.1 christos instead of 'b + 3'. */ 3906 1.1 christos if (upper_bound == 0) 3907 1.1 christos { 3908 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 3909 1.1 christos INSERT_JUMP (jump, laststart, b + 1 3910 1.1 christos + OFFSET_ADDRESS_SIZE); 3911 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 3912 1.1 christos } 3913 1.1 christos 3914 1.1 christos /* Otherwise, we have a nontrivial interval. When 3915 1.1 christos we're all done, the pattern will look like: 3916 1.1 christos set_number_at <jump count> <upper bound> 3917 1.1 christos set_number_at <succeed_n count> <lower bound> 3918 1.1 christos succeed_n <after jump addr> <succeed_n count> 3919 1.1 christos <body of loop> 3920 1.1 christos jump_n <succeed_n addr> <jump count> 3921 1.1 christos (The upper bound and `jump_n' are omitted if 3922 1.1 christos `upper_bound' is 1, though.) */ 3923 1.1 christos else 3924 1.1 christos { /* If the upper bound is > 1, we need to insert 3925 1.1 christos more at the end of the loop. */ 3926 1.1 christos unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE + 3927 1.1 christos (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE); 3928 1.1 christos 3929 1.1 christos GET_BUFFER_SPACE (nbytes); 3930 1.1 christos 3931 1.1 christos /* Initialize lower bound of the `succeed_n', even 3932 1.1 christos though it will be set during matching by its 3933 1.1 christos attendant `set_number_at' (inserted next), 3934 1.1 christos because `re_compile_fastmap' needs to know. 3935 1.1 christos Jump to the `jump_n' we might insert below. */ 3936 1.1 christos INSERT_JUMP2 (succeed_n, laststart, 3937 1.1 christos b + 1 + 2 * OFFSET_ADDRESS_SIZE 3938 1.1 christos + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE) 3939 1.1 christos , lower_bound); 3940 1.1 christos b += 1 + 2 * OFFSET_ADDRESS_SIZE; 3941 1.1 christos 3942 1.1 christos /* Code to initialize the lower bound. Insert 3943 1.1 christos before the `succeed_n'. The `5' is the last two 3944 1.1 christos bytes of this `set_number_at', plus 3 bytes of 3945 1.1 christos the following `succeed_n'. */ 3946 1.1 christos /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE' 3947 1.1 christos is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE' 3948 1.1 christos of the following `succeed_n'. */ 3949 1.1 christos PREFIX(insert_op2) (set_number_at, laststart, 1 3950 1.1 christos + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b); 3951 1.1 christos b += 1 + 2 * OFFSET_ADDRESS_SIZE; 3952 1.1 christos 3953 1.1 christos if (upper_bound > 1) 3954 1.1 christos { /* More than one repetition is allowed, so 3955 1.1 christos append a backward jump to the `succeed_n' 3956 1.1 christos that starts this interval. 3957 1.1 christos 3958 1.1 christos When we've reached this during matching, 3959 1.1 christos we'll have matched the interval once, so 3960 1.1 christos jump back only `upper_bound - 1' times. */ 3961 1.1 christos STORE_JUMP2 (jump_n, b, laststart 3962 1.1 christos + 2 * OFFSET_ADDRESS_SIZE + 1, 3963 1.1 christos upper_bound - 1); 3964 1.1 christos b += 1 + 2 * OFFSET_ADDRESS_SIZE; 3965 1.1 christos 3966 1.1 christos /* The location we want to set is the second 3967 1.1 christos parameter of the `jump_n'; that is `b-2' as 3968 1.1 christos an absolute address. `laststart' will be 3969 1.1 christos the `set_number_at' we're about to insert; 3970 1.1 christos `laststart+3' the number to set, the source 3971 1.1 christos for the relative address. But we are 3972 1.1 christos inserting into the middle of the pattern -- 3973 1.1 christos so everything is getting moved up by 5. 3974 1.1 christos Conclusion: (b - 2) - (laststart + 3) + 5, 3975 1.1 christos i.e., b - laststart. 3976 1.1 christos 3977 1.1 christos We insert this at the beginning of the loop 3978 1.1 christos so that if we fail during matching, we'll 3979 1.1 christos reinitialize the bounds. */ 3980 1.1 christos PREFIX(insert_op2) (set_number_at, laststart, 3981 1.1 christos b - laststart, 3982 1.1 christos upper_bound - 1, b); 3983 1.1 christos b += 1 + 2 * OFFSET_ADDRESS_SIZE; 3984 1.1 christos } 3985 1.1 christos } 3986 1.1 christos pending_exact = 0; 3987 1.1 christos break; 3988 1.1 christos 3989 1.1 christos invalid_interval: 3990 1.1 christos if (!(syntax & RE_INVALID_INTERVAL_ORD)) 3991 1.1 christos FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR); 3992 1.1 christos unfetch_interval: 3993 1.1 christos /* Match the characters as literals. */ 3994 1.1 christos p = beg_interval; 3995 1.1 christos c = '{'; 3996 1.1 christos if (syntax & RE_NO_BK_BRACES) 3997 1.1 christos goto normal_char; 3998 1.1 christos else 3999 1.1 christos goto normal_backslash; 4000 1.1 christos } 4001 1.1 christos 4002 1.1 christos #ifdef emacs 4003 1.1 christos /* There is no way to specify the before_dot and after_dot 4004 1.1 christos operators. rms says this is ok. --karl */ 4005 1.1 christos case '=': 4006 1.1 christos BUF_PUSH (at_dot); 4007 1.1 christos break; 4008 1.1 christos 4009 1.1 christos case 's': 4010 1.1 christos laststart = b; 4011 1.1 christos PATFETCH (c); 4012 1.1 christos BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 4013 1.1 christos break; 4014 1.1 christos 4015 1.1 christos case 'S': 4016 1.1 christos laststart = b; 4017 1.1 christos PATFETCH (c); 4018 1.1 christos BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 4019 1.1 christos break; 4020 1.1 christos #endif /* emacs */ 4021 1.1 christos 4022 1.1 christos 4023 1.1 christos case 'w': 4024 1.1 christos if (syntax & RE_NO_GNU_OPS) 4025 1.1 christos goto normal_char; 4026 1.1 christos laststart = b; 4027 1.1 christos BUF_PUSH (wordchar); 4028 1.1 christos break; 4029 1.1 christos 4030 1.1 christos 4031 1.1 christos case 'W': 4032 1.1 christos if (syntax & RE_NO_GNU_OPS) 4033 1.1 christos goto normal_char; 4034 1.1 christos laststart = b; 4035 1.1 christos BUF_PUSH (notwordchar); 4036 1.1 christos break; 4037 1.1 christos 4038 1.1 christos 4039 1.1 christos case '<': 4040 1.1 christos if (syntax & RE_NO_GNU_OPS) 4041 1.1 christos goto normal_char; 4042 1.1 christos BUF_PUSH (wordbeg); 4043 1.1 christos break; 4044 1.1 christos 4045 1.1 christos case '>': 4046 1.1 christos if (syntax & RE_NO_GNU_OPS) 4047 1.1 christos goto normal_char; 4048 1.1 christos BUF_PUSH (wordend); 4049 1.1 christos break; 4050 1.1 christos 4051 1.1 christos case 'b': 4052 1.1 christos if (syntax & RE_NO_GNU_OPS) 4053 1.1 christos goto normal_char; 4054 1.1 christos BUF_PUSH (wordbound); 4055 1.1 christos break; 4056 1.1 christos 4057 1.1 christos case 'B': 4058 1.1 christos if (syntax & RE_NO_GNU_OPS) 4059 1.1 christos goto normal_char; 4060 1.1 christos BUF_PUSH (notwordbound); 4061 1.1 christos break; 4062 1.1 christos 4063 1.1 christos case '`': 4064 1.1 christos if (syntax & RE_NO_GNU_OPS) 4065 1.1 christos goto normal_char; 4066 1.1 christos BUF_PUSH (begbuf); 4067 1.1 christos break; 4068 1.1 christos 4069 1.1 christos case '\'': 4070 1.1 christos if (syntax & RE_NO_GNU_OPS) 4071 1.1 christos goto normal_char; 4072 1.1 christos BUF_PUSH (endbuf); 4073 1.1 christos break; 4074 1.1 christos 4075 1.1 christos case '1': case '2': case '3': case '4': case '5': 4076 1.1 christos case '6': case '7': case '8': case '9': 4077 1.1 christos if (syntax & RE_NO_BK_REFS) 4078 1.1 christos goto normal_char; 4079 1.1 christos 4080 1.1 christos c1 = c - '0'; 4081 1.1 christos 4082 1.1 christos if (c1 > regnum) 4083 1.1 christos FREE_STACK_RETURN (REG_ESUBREG); 4084 1.1 christos 4085 1.1 christos /* Can't back reference to a subexpression if inside of it. */ 4086 1.1 christos if (group_in_compile_stack (compile_stack, (regnum_t) c1)) 4087 1.1 christos goto normal_char; 4088 1.1 christos 4089 1.1 christos laststart = b; 4090 1.1 christos BUF_PUSH_2 (duplicate, c1); 4091 1.1 christos break; 4092 1.1 christos 4093 1.1 christos 4094 1.1 christos case '+': 4095 1.1 christos case '?': 4096 1.1 christos if (syntax & RE_BK_PLUS_QM) 4097 1.1 christos goto handle_plus; 4098 1.1 christos else 4099 1.1 christos goto normal_backslash; 4100 1.1 christos 4101 1.1 christos default: 4102 1.1 christos normal_backslash: 4103 1.1 christos /* You might think it would be useful for \ to mean 4104 1.1 christos not to translate; but if we don't translate it 4105 1.1 christos it will never match anything. */ 4106 1.1 christos c = TRANSLATE (c); 4107 1.1 christos goto normal_char; 4108 1.1 christos } 4109 1.1 christos break; 4110 1.1 christos 4111 1.1 christos 4112 1.1 christos default: 4113 1.1 christos /* Expects the character in `c'. */ 4114 1.1 christos normal_char: 4115 1.1 christos /* If no exactn currently being built. */ 4116 1.1 christos if (!pending_exact 4117 1.1 christos #ifdef WCHAR 4118 1.1 christos /* If last exactn handle binary(or character) and 4119 1.1 christos new exactn handle character(or binary). */ 4120 1.1 christos || is_exactn_bin != is_binary[p - 1 - pattern] 4121 1.1 christos #endif /* WCHAR */ 4122 1.1 christos 4123 1.1 christos /* If last exactn not at current position. */ 4124 1.1 christos || pending_exact + *pending_exact + 1 != b 4125 1.1 christos 4126 1.1 christos /* We have only one byte following the exactn for the count. */ 4127 1.1 christos || *pending_exact == (1 << BYTEWIDTH) - 1 4128 1.1 christos 4129 1.1 christos /* If followed by a repetition operator. */ 4130 1.1 christos || *p == '*' || *p == '^' 4131 1.1 christos || ((syntax & RE_BK_PLUS_QM) 4132 1.1 christos ? *p == '\\' && (p[1] == '+' || p[1] == '?') 4133 1.1 christos : (*p == '+' || *p == '?')) 4134 1.1 christos || ((syntax & RE_INTERVALS) 4135 1.1 christos && ((syntax & RE_NO_BK_BRACES) 4136 1.1 christos ? *p == '{' 4137 1.1 christos : (p[0] == '\\' && p[1] == '{')))) 4138 1.1 christos { 4139 1.1 christos /* Start building a new exactn. */ 4140 1.1 christos 4141 1.1 christos laststart = b; 4142 1.1 christos 4143 1.1 christos #ifdef WCHAR 4144 1.1 christos /* Is this exactn binary data or character? */ 4145 1.1 christos is_exactn_bin = is_binary[p - 1 - pattern]; 4146 1.1 christos if (is_exactn_bin) 4147 1.1 christos BUF_PUSH_2 (exactn_bin, 0); 4148 1.1 christos else 4149 1.1 christos BUF_PUSH_2 (exactn, 0); 4150 1.1 christos #else 4151 1.1 christos BUF_PUSH_2 (exactn, 0); 4152 1.1 christos #endif /* WCHAR */ 4153 1.1 christos pending_exact = b - 1; 4154 1.1 christos } 4155 1.1 christos 4156 1.1 christos BUF_PUSH (c); 4157 1.1 christos (*pending_exact)++; 4158 1.1 christos break; 4159 1.1 christos } /* switch (c) */ 4160 1.1 christos } /* while p != pend */ 4161 1.1 christos 4162 1.1 christos 4163 1.1 christos /* Through the pattern now. */ 4164 1.1 christos 4165 1.1 christos if (fixup_alt_jump) 4166 1.1 christos STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 4167 1.1 christos 4168 1.1 christos if (!COMPILE_STACK_EMPTY) 4169 1.1 christos FREE_STACK_RETURN (REG_EPAREN); 4170 1.1 christos 4171 1.1 christos /* If we don't want backtracking, force success 4172 1.1 christos the first time we reach the end of the compiled pattern. */ 4173 1.1 christos if (syntax & RE_NO_POSIX_BACKTRACKING) 4174 1.1 christos BUF_PUSH (succeed); 4175 1.1 christos 4176 1.1 christos #ifdef WCHAR 4177 1.1 christos free (pattern); 4178 1.1 christos free (mbs_offset); 4179 1.1 christos free (is_binary); 4180 1.1 christos #endif 4181 1.1 christos free (compile_stack.stack); 4182 1.1 christos 4183 1.1 christos /* We have succeeded; set the length of the buffer. */ 4184 1.1 christos #ifdef WCHAR 4185 1.1 christos bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR; 4186 1.1 christos #else 4187 1.1 christos bufp->used = b - bufp->buffer; 4188 1.1 christos #endif 4189 1.1 christos 4190 1.1 christos #ifdef DEBUG 4191 1.1 christos if (debug) 4192 1.1 christos { 4193 1.1 christos DEBUG_PRINT1 ("\nCompiled pattern: \n"); 4194 1.1 christos PREFIX(print_compiled_pattern) (bufp); 4195 1.1 christos } 4196 1.1 christos #endif /* DEBUG */ 4197 1.1 christos 4198 1.1 christos #ifndef MATCH_MAY_ALLOCATE 4199 1.1 christos /* Initialize the failure stack to the largest possible stack. This 4200 1.1 christos isn't necessary unless we're trying to avoid calling alloca in 4201 1.1 christos the search and match routines. */ 4202 1.1 christos { 4203 1.1 christos int num_regs = bufp->re_nsub + 1; 4204 1.1 christos 4205 1.1 christos /* Since DOUBLE_FAIL_STACK refuses to double only if the current size 4206 1.1 christos is strictly greater than re_max_failures, the largest possible stack 4207 1.1 christos is 2 * re_max_failures failure points. */ 4208 1.1 christos if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) 4209 1.1 christos { 4210 1.1 christos fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); 4211 1.1 christos 4212 1.1 christos # ifdef emacs 4213 1.1 christos if (! fail_stack.stack) 4214 1.1 christos fail_stack.stack 4215 1.1 christos = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size 4216 1.1 christos * sizeof (PREFIX(fail_stack_elt_t))); 4217 1.1 christos else 4218 1.1 christos fail_stack.stack 4219 1.1 christos = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack, 4220 1.1 christos (fail_stack.size 4221 1.1 christos * sizeof (PREFIX(fail_stack_elt_t)))); 4222 1.1 christos # else /* not emacs */ 4223 1.1 christos if (! fail_stack.stack) 4224 1.1 christos fail_stack.stack 4225 1.1 christos = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size 4226 1.1 christos * sizeof (PREFIX(fail_stack_elt_t))); 4227 1.1 christos else 4228 1.1 christos fail_stack.stack 4229 1.1 christos = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack, 4230 1.1 christos (fail_stack.size 4231 1.1 christos * sizeof (PREFIX(fail_stack_elt_t)))); 4232 1.1 christos # endif /* not emacs */ 4233 1.1 christos } 4234 1.1 christos 4235 1.1 christos PREFIX(regex_grow_registers) (num_regs); 4236 1.1 christos } 4237 1.1 christos #endif /* not MATCH_MAY_ALLOCATE */ 4238 1.1 christos 4239 1.1 christos return REG_NOERROR; 4240 1.1 christos } /* regex_compile */ 4241 1.1 christos 4242 1.1 christos /* Subroutines for `regex_compile'. */ 4243 1.1 christos 4244 1.1 christos /* Store OP at LOC followed by two-byte integer parameter ARG. */ 4245 1.1 christos /* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4246 1.1 christos 4247 1.1 christos static void 4248 1.1 christos PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg) 4249 1.1 christos { 4250 1.1 christos *loc = (UCHAR_T) op; 4251 1.1 christos STORE_NUMBER (loc + 1, arg); 4252 1.1 christos } 4253 1.1 christos 4254 1.1 christos 4255 1.1 christos /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ 4256 1.1 christos /* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4257 1.1 christos 4258 1.1 christos static void 4259 1.1 christos PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2) 4260 1.1 christos { 4261 1.1 christos *loc = (UCHAR_T) op; 4262 1.1 christos STORE_NUMBER (loc + 1, arg1); 4263 1.1 christos STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2); 4264 1.1 christos } 4265 1.1 christos 4266 1.1 christos 4267 1.1 christos /* Copy the bytes from LOC to END to open up three bytes of space at LOC 4268 1.1 christos for OP followed by two-byte integer parameter ARG. */ 4269 1.1 christos /* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4270 1.1 christos 4271 1.1 christos static void 4272 1.1 christos PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end) 4273 1.1 christos { 4274 1.1 christos register UCHAR_T *pfrom = end; 4275 1.1 christos register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE; 4276 1.1 christos 4277 1.1 christos while (pfrom != loc) 4278 1.1 christos *--pto = *--pfrom; 4279 1.1 christos 4280 1.1 christos PREFIX(store_op1) (op, loc, arg); 4281 1.1 christos } 4282 1.1 christos 4283 1.1 christos 4284 1.1 christos /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ 4285 1.1 christos /* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4286 1.1 christos 4287 1.1 christos static void 4288 1.1 christos PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, 4289 1.1 christos int arg2, UCHAR_T *end) 4290 1.1 christos { 4291 1.1 christos register UCHAR_T *pfrom = end; 4292 1.1 christos register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE; 4293 1.1 christos 4294 1.1 christos while (pfrom != loc) 4295 1.1 christos *--pto = *--pfrom; 4296 1.1 christos 4297 1.1 christos PREFIX(store_op2) (op, loc, arg1, arg2); 4298 1.1 christos } 4299 1.1 christos 4300 1.1 christos 4301 1.1 christos /* P points to just after a ^ in PATTERN. Return true if that ^ comes 4302 1.1 christos after an alternative or a begin-subexpression. We assume there is at 4303 1.1 christos least one character before the ^. */ 4304 1.1 christos 4305 1.1 christos static boolean 4306 1.1 christos PREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p, 4307 1.1 christos reg_syntax_t syntax) 4308 1.1 christos { 4309 1.1 christos const CHAR_T *prev = p - 2; 4310 1.1 christos boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 4311 1.1 christos 4312 1.1 christos return 4313 1.1 christos /* After a subexpression? */ 4314 1.1 christos (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 4315 1.1 christos /* After an alternative? */ 4316 1.1 christos || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); 4317 1.1 christos } 4318 1.1 christos 4319 1.1 christos 4320 1.1 christos /* The dual of at_begline_loc_p. This one is for $. We assume there is 4321 1.1 christos at least one character after the $, i.e., `P < PEND'. */ 4322 1.1 christos 4323 1.1 christos static boolean 4324 1.1 christos PREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend, 4325 1.1 christos reg_syntax_t syntax) 4326 1.1 christos { 4327 1.1 christos const CHAR_T *next = p; 4328 1.1 christos boolean next_backslash = *next == '\\'; 4329 1.1 christos const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0; 4330 1.1 christos 4331 1.1 christos return 4332 1.1 christos /* Before a subexpression? */ 4333 1.1 christos (syntax & RE_NO_BK_PARENS ? *next == ')' 4334 1.1 christos : next_backslash && next_next && *next_next == ')') 4335 1.1 christos /* Before an alternative? */ 4336 1.1 christos || (syntax & RE_NO_BK_VBAR ? *next == '|' 4337 1.1 christos : next_backslash && next_next && *next_next == '|'); 4338 1.1 christos } 4339 1.1 christos 4340 1.1 christos #else /* not INSIDE_RECURSION */ 4341 1.1 christos 4342 1.1 christos /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 4343 1.1 christos false if it's not. */ 4344 1.1 christos 4345 1.1 christos static boolean 4346 1.1 christos group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) 4347 1.1 christos { 4348 1.1 christos int this_element; 4349 1.1 christos 4350 1.1 christos for (this_element = compile_stack.avail - 1; 4351 1.1 christos this_element >= 0; 4352 1.1 christos this_element--) 4353 1.1 christos if (compile_stack.stack[this_element].regnum == regnum) 4354 1.1 christos return true; 4355 1.1 christos 4356 1.1 christos return false; 4357 1.1 christos } 4358 1.1 christos #endif /* not INSIDE_RECURSION */ 4359 1.1 christos 4360 1.1 christos #ifdef INSIDE_RECURSION 4361 1.1 christos 4362 1.1 christos #ifdef WCHAR 4363 1.1 christos /* This insert space, which size is "num", into the pattern at "loc". 4364 1.1 christos "end" must point the end of the allocated buffer. */ 4365 1.1 christos static void 4366 1.1 christos insert_space (int num, CHAR_T *loc, CHAR_T *end) 4367 1.1 christos { 4368 1.1 christos register CHAR_T *pto = end; 4369 1.1 christos register CHAR_T *pfrom = end - num; 4370 1.1 christos 4371 1.1 christos while (pfrom >= loc) 4372 1.1 christos *pto-- = *pfrom--; 4373 1.1 christos } 4374 1.1 christos #endif /* WCHAR */ 4375 1.1 christos 4376 1.1 christos #ifdef WCHAR 4377 1.1 christos static reg_errcode_t 4378 1.1 christos wcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr, 4379 1.1 christos const CHAR_T *pend, RE_TRANSLATE_TYPE translate, 4380 1.1 christos reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set) 4381 1.1 christos { 4382 1.1 christos const CHAR_T *p = *p_ptr; 4383 1.1 christos CHAR_T range_start, range_end; 4384 1.1 christos reg_errcode_t ret; 4385 1.1 christos # ifdef _LIBC 4386 1.1 christos uint32_t nrules; 4387 1.1 christos uint32_t start_val, end_val; 4388 1.1 christos # endif 4389 1.1 christos if (p == pend) 4390 1.1 christos return REG_ERANGE; 4391 1.1 christos 4392 1.1 christos # ifdef _LIBC 4393 1.1 christos nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 4394 1.1 christos if (nrules != 0) 4395 1.1 christos { 4396 1.1 christos const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE, 4397 1.1 christos _NL_COLLATE_COLLSEQWC); 4398 1.1 christos const unsigned char *extra = (const unsigned char *) 4399 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 4400 1.1 christos 4401 1.1 christos if (range_start_char < -1) 4402 1.1 christos { 4403 1.1 christos /* range_start is a collating symbol. */ 4404 1.1 christos int32_t *wextra; 4405 1.1 christos /* Retreive the index and get collation sequence value. */ 4406 1.1 christos wextra = (int32_t*)(extra + char_set[-range_start_char]); 4407 1.1 christos start_val = wextra[1 + *wextra]; 4408 1.1 christos } 4409 1.1 christos else 4410 1.1 christos start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char)); 4411 1.1 christos 4412 1.1 christos end_val = collseq_table_lookup (collseq, TRANSLATE (p[0])); 4413 1.1 christos 4414 1.1 christos /* Report an error if the range is empty and the syntax prohibits 4415 1.1 christos this. */ 4416 1.1 christos ret = ((syntax & RE_NO_EMPTY_RANGES) 4417 1.1 christos && (start_val > end_val))? REG_ERANGE : REG_NOERROR; 4418 1.1 christos 4419 1.1 christos /* Insert space to the end of the char_ranges. */ 4420 1.1 christos insert_space(2, b - char_set[5] - 2, b - 1); 4421 1.1 christos *(b - char_set[5] - 2) = (wchar_t)start_val; 4422 1.1 christos *(b - char_set[5] - 1) = (wchar_t)end_val; 4423 1.1 christos char_set[4]++; /* ranges_index */ 4424 1.1 christos } 4425 1.1 christos else 4426 1.1 christos # endif 4427 1.1 christos { 4428 1.1 christos range_start = (range_start_char >= 0)? TRANSLATE (range_start_char): 4429 1.1 christos range_start_char; 4430 1.1 christos range_end = TRANSLATE (p[0]); 4431 1.1 christos /* Report an error if the range is empty and the syntax prohibits 4432 1.1 christos this. */ 4433 1.1 christos ret = ((syntax & RE_NO_EMPTY_RANGES) 4434 1.1 christos && (range_start > range_end))? REG_ERANGE : REG_NOERROR; 4435 1.1 christos 4436 1.1 christos /* Insert space to the end of the char_ranges. */ 4437 1.1 christos insert_space(2, b - char_set[5] - 2, b - 1); 4438 1.1 christos *(b - char_set[5] - 2) = range_start; 4439 1.1 christos *(b - char_set[5] - 1) = range_end; 4440 1.1 christos char_set[4]++; /* ranges_index */ 4441 1.1 christos } 4442 1.1 christos /* Have to increment the pointer into the pattern string, so the 4443 1.1 christos caller isn't still at the ending character. */ 4444 1.1 christos (*p_ptr)++; 4445 1.1 christos 4446 1.1 christos return ret; 4447 1.1 christos } 4448 1.1 christos #else /* BYTE */ 4449 1.1 christos /* Read the ending character of a range (in a bracket expression) from the 4450 1.1 christos uncompiled pattern *P_PTR (which ends at PEND). We assume the 4451 1.1 christos starting character is in `P[-2]'. (`P[-1]' is the character `-'.) 4452 1.1 christos Then we set the translation of all bits between the starting and 4453 1.1 christos ending characters (inclusive) in the compiled pattern B. 4454 1.1 christos 4455 1.1 christos Return an error code. 4456 1.1 christos 4457 1.1 christos We use these short variable names so we can use the same macros as 4458 1.1 christos `regex_compile' itself. */ 4459 1.1 christos 4460 1.1 christos static reg_errcode_t 4461 1.1 christos byte_compile_range (unsigned int range_start_char, const char **p_ptr, 4462 1.1 christos const char *pend, RE_TRANSLATE_TYPE translate, 4463 1.1 christos reg_syntax_t syntax, unsigned char *b) 4464 1.1 christos { 4465 1.1 christos unsigned this_char; 4466 1.1 christos const char *p = *p_ptr; 4467 1.1 christos reg_errcode_t ret; 4468 1.1 christos # if _LIBC 4469 1.1 christos const unsigned char *collseq; 4470 1.1 christos unsigned int start_colseq; 4471 1.1 christos unsigned int end_colseq; 4472 1.1 christos # else 4473 1.1 christos unsigned end_char; 4474 1.1 christos # endif 4475 1.1 christos 4476 1.1 christos if (p == pend) 4477 1.1 christos return REG_ERANGE; 4478 1.1 christos 4479 1.1 christos /* Have to increment the pointer into the pattern string, so the 4480 1.1 christos caller isn't still at the ending character. */ 4481 1.1 christos (*p_ptr)++; 4482 1.1 christos 4483 1.1 christos /* Report an error if the range is empty and the syntax prohibits this. */ 4484 1.1 christos ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 4485 1.1 christos 4486 1.1 christos # if _LIBC 4487 1.1 christos collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 4488 1.1 christos _NL_COLLATE_COLLSEQMB); 4489 1.1 christos 4490 1.1 christos start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)]; 4491 1.1 christos end_colseq = collseq[(unsigned char) TRANSLATE (p[0])]; 4492 1.1 christos for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) 4493 1.1 christos { 4494 1.1 christos unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)]; 4495 1.1 christos 4496 1.1 christos if (start_colseq <= this_colseq && this_colseq <= end_colseq) 4497 1.1 christos { 4498 1.1 christos SET_LIST_BIT (TRANSLATE (this_char)); 4499 1.1 christos ret = REG_NOERROR; 4500 1.1 christos } 4501 1.1 christos } 4502 1.1 christos # else 4503 1.1 christos /* Here we see why `this_char' has to be larger than an `unsigned 4504 1.1 christos char' -- we would otherwise go into an infinite loop, since all 4505 1.1 christos characters <= 0xff. */ 4506 1.1 christos range_start_char = TRANSLATE (range_start_char); 4507 1.1 christos /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE, 4508 1.1 christos and some compilers cast it to int implicitly, so following for_loop 4509 1.1 christos may fall to (almost) infinite loop. 4510 1.1 christos e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff. 4511 1.1 christos To avoid this, we cast p[0] to unsigned int and truncate it. */ 4512 1.1 christos end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1)); 4513 1.1 christos 4514 1.1 christos for (this_char = range_start_char; this_char <= end_char; ++this_char) 4515 1.1 christos { 4516 1.1 christos SET_LIST_BIT (TRANSLATE (this_char)); 4517 1.1 christos ret = REG_NOERROR; 4518 1.1 christos } 4519 1.1 christos # endif 4520 1.1 christos 4521 1.1 christos return ret; 4522 1.1 christos } 4523 1.1 christos #endif /* WCHAR */ 4524 1.1 christos 4525 1.1 christos /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 4527 1.1 christos BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible 4528 1.1 christos characters can start a string that matches the pattern. This fastmap 4529 1.1 christos is used by re_search to skip quickly over impossible starting points. 4530 1.1 christos 4531 1.1 christos The caller must supply the address of a (1 << BYTEWIDTH)-byte data 4532 1.1 christos area as BUFP->fastmap. 4533 1.1 christos 4534 1.1 christos We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in 4535 1.1 christos the pattern buffer. 4536 1.1 christos 4537 1.1 christos Returns 0 if we succeed, -2 if an internal error. */ 4538 1.1 christos 4539 1.1 christos #ifdef WCHAR 4540 1.1 christos /* local function for re_compile_fastmap. 4541 1.1 christos truncate wchar_t character to char. */ 4542 1.1 christos static unsigned char truncate_wchar (CHAR_T c); 4543 1.1 christos 4544 1.1 christos static unsigned char 4545 1.1 christos truncate_wchar (CHAR_T c) 4546 1.1 christos { 4547 1.1 christos unsigned char buf[MB_CUR_MAX]; 4548 1.1 christos mbstate_t state; 4549 1.1 christos int retval; 4550 1.1 christos memset (&state, '\0', sizeof (state)); 4551 1.1 christos # ifdef _LIBC 4552 1.1 christos retval = __wcrtomb (buf, c, &state); 4553 1.1 christos # else 4554 1.1 christos retval = wcrtomb (buf, c, &state); 4555 1.1 christos # endif 4556 1.1 christos return retval > 0 ? buf[0] : (unsigned char) c; 4557 1.1 christos } 4558 1.1 christos #endif /* WCHAR */ 4559 1.1 christos 4560 1.1 christos static int 4561 1.1 christos PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp) 4562 1.1 christos { 4563 1.1 christos int j, k; 4564 1.1 christos #ifdef MATCH_MAY_ALLOCATE 4565 1.1 christos PREFIX(fail_stack_type) fail_stack; 4566 1.1 christos #endif 4567 1.1 christos #ifndef REGEX_MALLOC 4568 1.1 christos char *destination; 4569 1.1 christos #endif 4570 1.1 christos 4571 1.1 christos register char *fastmap = bufp->fastmap; 4572 1.1 christos 4573 1.1 christos #ifdef WCHAR 4574 1.1 christos /* We need to cast pattern to (wchar_t*), because we casted this compiled 4575 1.1 christos pattern to (char*) in regex_compile. */ 4576 1.1 christos UCHAR_T *pattern = (UCHAR_T*)bufp->buffer; 4577 1.1 christos register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used); 4578 1.1 christos #else /* BYTE */ 4579 1.1 christos UCHAR_T *pattern = bufp->buffer; 4580 1.1 christos register UCHAR_T *pend = pattern + bufp->used; 4581 1.1 christos #endif /* WCHAR */ 4582 1.1 christos UCHAR_T *p = pattern; 4583 1.1 christos 4584 1.1 christos #ifdef REL_ALLOC 4585 1.1 christos /* This holds the pointer to the failure stack, when 4586 1.1 christos it is allocated relocatably. */ 4587 1.1 christos fail_stack_elt_t *failure_stack_ptr; 4588 1.1 christos #endif 4589 1.1 christos 4590 1.1 christos /* Assume that each path through the pattern can be null until 4591 1.1 christos proven otherwise. We set this false at the bottom of switch 4592 1.1 christos statement, to which we get only if a particular path doesn't 4593 1.1 christos match the empty string. */ 4594 1.1 christos boolean path_can_be_null = true; 4595 1.1 christos 4596 1.1 christos /* We aren't doing a `succeed_n' to begin with. */ 4597 1.1 christos boolean succeed_n_p = false; 4598 1.1 christos 4599 1.1 christos assert (fastmap != NULL && p != NULL); 4600 1.1 christos 4601 1.1 christos INIT_FAIL_STACK (); 4602 1.1 christos bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 4603 1.1 christos bufp->fastmap_accurate = 1; /* It will be when we're done. */ 4604 1.1 christos bufp->can_be_null = 0; 4605 1.1 christos 4606 1.1 christos while (1) 4607 1.1 christos { 4608 1.1 christos if (p == pend || *p == (UCHAR_T) succeed) 4609 1.1 christos { 4610 1.1 christos /* We have reached the (effective) end of pattern. */ 4611 1.1 christos if (!FAIL_STACK_EMPTY ()) 4612 1.1 christos { 4613 1.1 christos bufp->can_be_null |= path_can_be_null; 4614 1.1 christos 4615 1.1 christos /* Reset for next path. */ 4616 1.1 christos path_can_be_null = true; 4617 1.1 christos 4618 1.1 christos p = fail_stack.stack[--fail_stack.avail].pointer; 4619 1.1 christos 4620 1.1 christos continue; 4621 1.1 christos } 4622 1.1 christos else 4623 1.1 christos break; 4624 1.1 christos } 4625 1.1 christos 4626 1.1 christos /* We should never be about to go beyond the end of the pattern. */ 4627 1.1 christos assert (p < pend); 4628 1.1 christos 4629 1.1 christos switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 4630 1.1 christos { 4631 1.1 christos 4632 1.1 christos /* I guess the idea here is to simply not bother with a fastmap 4633 1.1 christos if a backreference is used, since it's too hard to figure out 4634 1.1 christos the fastmap for the corresponding group. Setting 4635 1.1 christos `can_be_null' stops `re_search_2' from using the fastmap, so 4636 1.1 christos that is all we do. */ 4637 1.1 christos case duplicate: 4638 1.1 christos bufp->can_be_null = 1; 4639 1.1 christos goto done; 4640 1.1 christos 4641 1.1 christos 4642 1.1 christos /* Following are the cases which match a character. These end 4643 1.1 christos with `break'. */ 4644 1.1 christos 4645 1.1 christos #ifdef WCHAR 4646 1.1 christos case exactn: 4647 1.1 christos fastmap[truncate_wchar(p[1])] = 1; 4648 1.1 christos break; 4649 1.1 christos #else /* BYTE */ 4650 1.1 christos case exactn: 4651 1.1 christos fastmap[p[1]] = 1; 4652 1.1 christos break; 4653 1.1 christos #endif /* WCHAR */ 4654 1.1 christos #ifdef MBS_SUPPORT 4655 1.1 christos case exactn_bin: 4656 1.1 christos fastmap[p[1]] = 1; 4657 1.1 christos break; 4658 1.1 christos #endif 4659 1.1 christos 4660 1.1 christos #ifdef WCHAR 4661 1.1 christos /* It is hard to distinguish fastmap from (multi byte) characters 4662 1.1 christos which depends on current locale. */ 4663 1.1 christos case charset: 4664 1.1 christos case charset_not: 4665 1.1 christos case wordchar: 4666 1.1 christos case notwordchar: 4667 1.1 christos bufp->can_be_null = 1; 4668 1.1 christos goto done; 4669 1.1 christos #else /* BYTE */ 4670 1.1 christos case charset: 4671 1.1 christos for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 4672 1.1 christos if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 4673 1.1 christos fastmap[j] = 1; 4674 1.1 christos break; 4675 1.1 christos 4676 1.1 christos 4677 1.1 christos case charset_not: 4678 1.1 christos /* Chars beyond end of map must be allowed. */ 4679 1.1 christos for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 4680 1.1 christos fastmap[j] = 1; 4681 1.1 christos 4682 1.1 christos for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 4683 1.1 christos if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 4684 1.1 christos fastmap[j] = 1; 4685 1.1 christos break; 4686 1.1 christos 4687 1.1 christos 4688 1.1 christos case wordchar: 4689 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4690 1.1 christos if (SYNTAX (j) == Sword) 4691 1.1 christos fastmap[j] = 1; 4692 1.1 christos break; 4693 1.1 christos 4694 1.1 christos 4695 1.1 christos case notwordchar: 4696 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4697 1.1 christos if (SYNTAX (j) != Sword) 4698 1.1 christos fastmap[j] = 1; 4699 1.1 christos break; 4700 1.1 christos #endif /* WCHAR */ 4701 1.1 christos 4702 1.1 christos case anychar: 4703 1.1 christos { 4704 1.1 christos int fastmap_newline = fastmap['\n']; 4705 1.1 christos 4706 1.1 christos /* `.' matches anything ... */ 4707 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4708 1.1 christos fastmap[j] = 1; 4709 1.1 christos 4710 1.1 christos /* ... except perhaps newline. */ 4711 1.1 christos if (!(bufp->syntax & RE_DOT_NEWLINE)) 4712 1.1 christos fastmap['\n'] = fastmap_newline; 4713 1.1 christos 4714 1.1 christos /* Return if we have already set `can_be_null'; if we have, 4715 1.1 christos then the fastmap is irrelevant. Something's wrong here. */ 4716 1.1 christos else if (bufp->can_be_null) 4717 1.1 christos goto done; 4718 1.1 christos 4719 1.1 christos /* Otherwise, have to check alternative paths. */ 4720 1.1 christos break; 4721 1.1 christos } 4722 1.1 christos 4723 1.1 christos #ifdef emacs 4724 1.1 christos case syntaxspec: 4725 1.1 christos k = *p++; 4726 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4727 1.1 christos if (SYNTAX (j) == (enum syntaxcode) k) 4728 1.1 christos fastmap[j] = 1; 4729 1.1 christos break; 4730 1.1 christos 4731 1.1 christos 4732 1.1 christos case notsyntaxspec: 4733 1.1 christos k = *p++; 4734 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4735 1.1 christos if (SYNTAX (j) != (enum syntaxcode) k) 4736 1.1 christos fastmap[j] = 1; 4737 1.1 christos break; 4738 1.1 christos 4739 1.1 christos 4740 1.1 christos /* All cases after this match the empty string. These end with 4741 1.1 christos `continue'. */ 4742 1.1 christos 4743 1.1 christos 4744 1.1 christos case before_dot: 4745 1.1 christos case at_dot: 4746 1.1 christos case after_dot: 4747 1.1 christos continue; 4748 1.1 christos #endif /* emacs */ 4749 1.1 christos 4750 1.1 christos 4751 1.1 christos case no_op: 4752 1.1 christos case begline: 4753 1.1 christos case endline: 4754 1.1 christos case begbuf: 4755 1.1 christos case endbuf: 4756 1.1 christos case wordbound: 4757 1.1 christos case notwordbound: 4758 1.1 christos case wordbeg: 4759 1.1 christos case wordend: 4760 1.1 christos case push_dummy_failure: 4761 1.1 christos continue; 4762 1.1 christos 4763 1.1 christos 4764 1.1 christos case jump_n: 4765 1.1 christos case pop_failure_jump: 4766 1.1 christos case maybe_pop_jump: 4767 1.1 christos case jump: 4768 1.1 christos case jump_past_alt: 4769 1.1 christos case dummy_failure_jump: 4770 1.1 christos EXTRACT_NUMBER_AND_INCR (j, p); 4771 1.1 christos p += j; 4772 1.1 christos if (j > 0) 4773 1.1 christos continue; 4774 1.1 christos 4775 1.1 christos /* Jump backward implies we just went through the body of a 4776 1.1 christos loop and matched nothing. Opcode jumped to should be 4777 1.1 christos `on_failure_jump' or `succeed_n'. Just treat it like an 4778 1.1 christos ordinary jump. For a * loop, it has pushed its failure 4779 1.1 christos point already; if so, discard that as redundant. */ 4780 1.1 christos if ((re_opcode_t) *p != on_failure_jump 4781 1.1 christos && (re_opcode_t) *p != succeed_n) 4782 1.1 christos continue; 4783 1.1 christos 4784 1.1 christos p++; 4785 1.1 christos EXTRACT_NUMBER_AND_INCR (j, p); 4786 1.1 christos p += j; 4787 1.1 christos 4788 1.1 christos /* If what's on the stack is where we are now, pop it. */ 4789 1.1 christos if (!FAIL_STACK_EMPTY () 4790 1.1 christos && fail_stack.stack[fail_stack.avail - 1].pointer == p) 4791 1.1 christos fail_stack.avail--; 4792 1.1 christos 4793 1.1 christos continue; 4794 1.1 christos 4795 1.1 christos 4796 1.1 christos case on_failure_jump: 4797 1.1 christos case on_failure_keep_string_jump: 4798 1.1 christos handle_on_failure_jump: 4799 1.1 christos EXTRACT_NUMBER_AND_INCR (j, p); 4800 1.1 christos 4801 1.1 christos /* For some patterns, e.g., `(a?)?', `p+j' here points to the 4802 1.1 christos end of the pattern. We don't want to push such a point, 4803 1.1 christos since when we restore it above, entering the switch will 4804 1.1 christos increment `p' past the end of the pattern. We don't need 4805 1.1 christos to push such a point since we obviously won't find any more 4806 1.1 christos fastmap entries beyond `pend'. Such a pattern can match 4807 1.1 christos the null string, though. */ 4808 1.1 christos if (p + j < pend) 4809 1.1 christos { 4810 1.1 christos if (!PUSH_PATTERN_OP (p + j, fail_stack)) 4811 1.1 christos { 4812 1.1 christos RESET_FAIL_STACK (); 4813 1.1 christos return -2; 4814 1.1 christos } 4815 1.1 christos } 4816 1.1 christos else 4817 1.1 christos bufp->can_be_null = 1; 4818 1.1 christos 4819 1.1 christos if (succeed_n_p) 4820 1.1 christos { 4821 1.1 christos EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ 4822 1.1 christos succeed_n_p = false; 4823 1.1 christos } 4824 1.1 christos 4825 1.1 christos continue; 4826 1.1 christos 4827 1.1 christos 4828 1.1 christos case succeed_n: 4829 1.1 christos /* Get to the number of times to succeed. */ 4830 1.1 christos p += OFFSET_ADDRESS_SIZE; 4831 1.1 christos 4832 1.1 christos /* Increment p past the n for when k != 0. */ 4833 1.1 christos EXTRACT_NUMBER_AND_INCR (k, p); 4834 1.1 christos if (k == 0) 4835 1.1 christos { 4836 1.1 christos p -= 2 * OFFSET_ADDRESS_SIZE; 4837 1.1 christos succeed_n_p = true; /* Spaghetti code alert. */ 4838 1.1 christos goto handle_on_failure_jump; 4839 1.1 christos } 4840 1.1 christos continue; 4841 1.1 christos 4842 1.1 christos 4843 1.1 christos case set_number_at: 4844 1.1 christos p += 2 * OFFSET_ADDRESS_SIZE; 4845 1.1 christos continue; 4846 1.1 christos 4847 1.1 christos 4848 1.1 christos case start_memory: 4849 1.1 christos case stop_memory: 4850 1.1 christos p += 2; 4851 1.1 christos continue; 4852 1.1 christos 4853 1.1 christos 4854 1.1 christos default: 4855 1.1 christos abort (); /* We have listed all the cases. */ 4856 1.1 christos } /* switch *p++ */ 4857 1.1 christos 4858 1.1 christos /* Getting here means we have found the possible starting 4859 1.1 christos characters for one path of the pattern -- and that the empty 4860 1.1 christos string does not match. We need not follow this path further. 4861 1.1 christos Instead, look at the next alternative (remembered on the 4862 1.1 christos stack), or quit if no more. The test at the top of the loop 4863 1.1 christos does these things. */ 4864 1.1 christos path_can_be_null = false; 4865 1.1 christos p = pend; 4866 1.1 christos } /* while p */ 4867 1.1 christos 4868 1.1 christos /* Set `can_be_null' for the last path (also the first path, if the 4869 1.1 christos pattern is empty). */ 4870 1.1 christos bufp->can_be_null |= path_can_be_null; 4871 1.1 christos 4872 1.1 christos done: 4873 1.1 christos RESET_FAIL_STACK (); 4874 1.1 christos return 0; 4875 1.1 christos } 4876 1.1 christos 4877 1.1 christos #else /* not INSIDE_RECURSION */ 4878 1.1 christos 4879 1.1 christos int 4880 1.1 christos re_compile_fastmap (struct re_pattern_buffer *bufp) 4881 1.1 christos { 4882 1.1 christos # ifdef MBS_SUPPORT 4883 1.1 christos if (MB_CUR_MAX != 1) 4884 1.1 christos return wcs_re_compile_fastmap(bufp); 4885 1.1 christos else 4886 1.1 christos # endif 4887 1.1 christos return byte_re_compile_fastmap(bufp); 4888 1.1 christos } /* re_compile_fastmap */ 4889 1.1 christos #ifdef _LIBC 4890 1.1 christos weak_alias (__re_compile_fastmap, re_compile_fastmap) 4891 1.1 christos #endif 4892 1.1 christos 4893 1.1 christos 4895 1.1 christos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 4896 1.1 christos ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 4897 1.1 christos this memory for recording register information. STARTS and ENDS 4898 1.1 christos must be allocated using the malloc library routine, and must each 4899 1.1 christos be at least NUM_REGS * sizeof (regoff_t) bytes long. 4900 1.1 christos 4901 1.1 christos If NUM_REGS == 0, then subsequent matches should allocate their own 4902 1.1 christos register data. 4903 1.1 christos 4904 1.1 christos Unless this function is called, the first search or match using 4905 1.1 christos PATTERN_BUFFER will allocate its own register data, without 4906 1.1 christos freeing the old data. */ 4907 1.1 christos 4908 1.1 christos void 4909 1.1 christos re_set_registers (struct re_pattern_buffer *bufp, 4910 1.1 christos struct re_registers *regs, unsigned num_regs, 4911 1.1 christos regoff_t *starts, regoff_t *ends) 4912 1.1 christos { 4913 1.1 christos if (num_regs) 4914 1.1 christos { 4915 1.1 christos bufp->regs_allocated = REGS_REALLOCATE; 4916 1.1 christos regs->num_regs = num_regs; 4917 1.1 christos regs->start = starts; 4918 1.1 christos regs->end = ends; 4919 1.1 christos } 4920 1.1 christos else 4921 1.1 christos { 4922 1.1 christos bufp->regs_allocated = REGS_UNALLOCATED; 4923 1.1 christos regs->num_regs = 0; 4924 1.1 christos regs->start = regs->end = (regoff_t *) 0; 4925 1.1 christos } 4926 1.1 christos } 4927 1.1 christos #ifdef _LIBC 4928 1.1 christos weak_alias (__re_set_registers, re_set_registers) 4929 1.1 christos #endif 4930 1.1 christos 4931 1.1 christos /* Searching routines. */ 4933 1.1 christos 4934 1.1 christos /* Like re_search_2, below, but only one string is specified, and 4935 1.1 christos doesn't let you say where to stop matching. */ 4936 1.1 christos 4937 1.1 christos int 4938 1.1 christos re_search (struct re_pattern_buffer *bufp, const char *string, int size, 4939 1.1 christos int startpos, int range, struct re_registers *regs) 4940 1.1 christos { 4941 1.1 christos return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 4942 1.1 christos regs, size); 4943 1.1 christos } 4944 1.1 christos #ifdef _LIBC 4945 1.1 christos weak_alias (__re_search, re_search) 4946 1.1 christos #endif 4947 1.1 christos 4948 1.1 christos 4949 1.1 christos /* Using the compiled pattern in BUFP->buffer, first tries to match the 4950 1.1 christos virtual concatenation of STRING1 and STRING2, starting first at index 4951 1.1 christos STARTPOS, then at STARTPOS + 1, and so on. 4952 1.1 christos 4953 1.1 christos STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. 4954 1.1 christos 4955 1.1 christos RANGE is how far to scan while trying to match. RANGE = 0 means try 4956 1.1 christos only at STARTPOS; in general, the last start tried is STARTPOS + 4957 1.1 christos RANGE. 4958 1.1 christos 4959 1.1 christos In REGS, return the indices of the virtual concatenation of STRING1 4960 1.1 christos and STRING2 that matched the entire BUFP->buffer and its contained 4961 1.1 christos subexpressions. 4962 1.1 christos 4963 1.1 christos Do not consider matching one past the index STOP in the virtual 4964 1.1 christos concatenation of STRING1 and STRING2. 4965 1.1 christos 4966 1.1 christos We return either the position in the strings at which the match was 4967 1.1 christos found, -1 if no match, or -2 if error (such as failure 4968 1.1 christos stack overflow). */ 4969 1.1 christos 4970 1.1 christos int 4971 1.1 christos re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, 4972 1.1 christos const char *string2, int size2, int startpos, int range, 4973 1.1 christos struct re_registers *regs, int stop) 4974 1.1 christos { 4975 1.1 christos # ifdef MBS_SUPPORT 4976 1.1 christos if (MB_CUR_MAX != 1) 4977 1.1 christos return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos, 4978 1.1 christos range, regs, stop); 4979 1.1.1.2 christos else 4980 1.1 christos # endif 4981 1.1 christos return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos, 4982 1.1 christos range, regs, stop); 4983 1.1 christos } /* re_search_2 */ 4984 1.1 christos #ifdef _LIBC 4985 1.1 christos weak_alias (__re_search_2, re_search_2) 4986 1.1 christos #endif 4987 1.1 christos 4988 1.1 christos #endif /* not INSIDE_RECURSION */ 4989 1.1 christos 4990 1.1 christos #ifdef INSIDE_RECURSION 4991 1.1 christos 4992 1.1 christos #ifdef MATCH_MAY_ALLOCATE 4993 1.1 christos # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL 4994 1.1 christos #else 4995 1.1 christos # define FREE_VAR(var) free (var); var = NULL 4996 1.1 christos #endif 4997 1.1 christos 4998 1.1 christos #ifdef WCHAR 4999 1.1 christos # define MAX_ALLOCA_SIZE 2000 5000 1.1 christos 5001 1.1 christos # define FREE_WCS_BUFFERS() \ 5002 1.1 christos do { \ 5003 1.1 christos if (size1 > MAX_ALLOCA_SIZE) \ 5004 1.1 christos { \ 5005 1.1 christos free (wcs_string1); \ 5006 1.1 christos free (mbs_offset1); \ 5007 1.1 christos } \ 5008 1.1 christos else \ 5009 1.1 christos { \ 5010 1.1 christos FREE_VAR (wcs_string1); \ 5011 1.1 christos FREE_VAR (mbs_offset1); \ 5012 1.1 christos } \ 5013 1.1 christos if (size2 > MAX_ALLOCA_SIZE) \ 5014 1.1 christos { \ 5015 1.1 christos free (wcs_string2); \ 5016 1.1 christos free (mbs_offset2); \ 5017 1.1 christos } \ 5018 1.1 christos else \ 5019 1.1 christos { \ 5020 1.1 christos FREE_VAR (wcs_string2); \ 5021 1.1 christos FREE_VAR (mbs_offset2); \ 5022 1.1 christos } \ 5023 1.1 christos } while (0) 5024 1.1 christos 5025 1.1 christos #endif 5026 1.1 christos 5027 1.1 christos 5028 1.1 christos static int 5029 1.1 christos PREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1, 5030 1.1 christos int size1, const char *string2, int size2, 5031 1.1 christos int startpos, int range, 5032 1.1 christos struct re_registers *regs, int stop) 5033 1.1 christos { 5034 1.1 christos int val; 5035 1.1 christos register char *fastmap = bufp->fastmap; 5036 1.1 christos register RE_TRANSLATE_TYPE translate = bufp->translate; 5037 1.1 christos int total_size = size1 + size2; 5038 1.1 christos int endpos = startpos + range; 5039 1.1 christos #ifdef WCHAR 5040 1.1 christos /* We need wchar_t* buffers correspond to cstring1, cstring2. */ 5041 1.1 christos wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL; 5042 1.1 christos /* We need the size of wchar_t buffers correspond to csize1, csize2. */ 5043 1.1 christos int wcs_size1 = 0, wcs_size2 = 0; 5044 1.1 christos /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ 5045 1.1 christos int *mbs_offset1 = NULL, *mbs_offset2 = NULL; 5046 1.1 christos /* They hold whether each wchar_t is binary data or not. */ 5047 1.1 christos char *is_binary = NULL; 5048 1.1 christos #endif /* WCHAR */ 5049 1.1 christos 5050 1.1 christos /* Check for out-of-range STARTPOS. */ 5051 1.1 christos if (startpos < 0 || startpos > total_size) 5052 1.1 christos return -1; 5053 1.1 christos 5054 1.1 christos /* Fix up RANGE if it might eventually take us outside 5055 1.1 christos the virtual concatenation of STRING1 and STRING2. 5056 1.1 christos Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ 5057 1.1 christos if (endpos < 0) 5058 1.1 christos range = 0 - startpos; 5059 1.1 christos else if (endpos > total_size) 5060 1.1 christos range = total_size - startpos; 5061 1.1 christos 5062 1.1 christos /* If the search isn't to be a backwards one, don't waste time in a 5063 1.1 christos search for a pattern that must be anchored. */ 5064 1.1 christos if (bufp->used > 0 && range > 0 5065 1.1 christos && ((re_opcode_t) bufp->buffer[0] == begbuf 5066 1.1 christos /* `begline' is like `begbuf' if it cannot match at newlines. */ 5067 1.1 christos || ((re_opcode_t) bufp->buffer[0] == begline 5068 1.1 christos && !bufp->newline_anchor))) 5069 1.1 christos { 5070 1.1 christos if (startpos > 0) 5071 1.1 christos return -1; 5072 1.1 christos else 5073 1.1 christos range = 1; 5074 1.1 christos } 5075 1.1 christos 5076 1.1 christos #ifdef emacs 5077 1.1 christos /* In a forward search for something that starts with \=. 5078 1.1 christos don't keep searching past point. */ 5079 1.1 christos if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) 5080 1.1 christos { 5081 1.1 christos range = PT - startpos; 5082 1.1 christos if (range <= 0) 5083 1.1 christos return -1; 5084 1.1 christos } 5085 1.1 christos #endif /* emacs */ 5086 1.1 christos 5087 1.1 christos /* Update the fastmap now if not correct already. */ 5088 1.1 christos if (fastmap && !bufp->fastmap_accurate) 5089 1.1 christos if (re_compile_fastmap (bufp) == -2) 5090 1.1 christos return -2; 5091 1.1 christos 5092 1.1 christos #ifdef WCHAR 5093 1.1 christos /* Allocate wchar_t array for wcs_string1 and wcs_string2 and 5094 1.1 christos fill them with converted string. */ 5095 1.1 christos if (size1 != 0) 5096 1.1 christos { 5097 1.1 christos if (size1 > MAX_ALLOCA_SIZE) 5098 1.1 christos { 5099 1.1 christos wcs_string1 = TALLOC (size1 + 1, CHAR_T); 5100 1.1 christos mbs_offset1 = TALLOC (size1 + 1, int); 5101 1.1 christos is_binary = TALLOC (size1 + 1, char); 5102 1.1 christos } 5103 1.1 christos else 5104 1.1 christos { 5105 1.1 christos wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T); 5106 1.1 christos mbs_offset1 = REGEX_TALLOC (size1 + 1, int); 5107 1.1 christos is_binary = REGEX_TALLOC (size1 + 1, char); 5108 1.1 christos } 5109 1.1 christos if (!wcs_string1 || !mbs_offset1 || !is_binary) 5110 1.1 christos { 5111 1.1 christos if (size1 > MAX_ALLOCA_SIZE) 5112 1.1 christos { 5113 1.1 christos free (wcs_string1); 5114 1.1 christos free (mbs_offset1); 5115 1.1 christos free (is_binary); 5116 1.1 christos } 5117 1.1 christos else 5118 1.1 christos { 5119 1.1 christos FREE_VAR (wcs_string1); 5120 1.1 christos FREE_VAR (mbs_offset1); 5121 1.1 christos FREE_VAR (is_binary); 5122 1.1 christos } 5123 1.1 christos return -2; 5124 1.1 christos } 5125 1.1 christos wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1, 5126 1.1 christos mbs_offset1, is_binary); 5127 1.1 christos wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */ 5128 1.1 christos if (size1 > MAX_ALLOCA_SIZE) 5129 1.1 christos free (is_binary); 5130 1.1 christos else 5131 1.1 christos FREE_VAR (is_binary); 5132 1.1 christos } 5133 1.1 christos if (size2 != 0) 5134 1.1 christos { 5135 1.1 christos if (size2 > MAX_ALLOCA_SIZE) 5136 1.1 christos { 5137 1.1 christos wcs_string2 = TALLOC (size2 + 1, CHAR_T); 5138 1.1 christos mbs_offset2 = TALLOC (size2 + 1, int); 5139 1.1 christos is_binary = TALLOC (size2 + 1, char); 5140 1.1 christos } 5141 1.1 christos else 5142 1.1 christos { 5143 1.1 christos wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T); 5144 1.1 christos mbs_offset2 = REGEX_TALLOC (size2 + 1, int); 5145 1.1 christos is_binary = REGEX_TALLOC (size2 + 1, char); 5146 1.1 christos } 5147 1.1 christos if (!wcs_string2 || !mbs_offset2 || !is_binary) 5148 1.1 christos { 5149 1.1 christos FREE_WCS_BUFFERS (); 5150 1.1 christos if (size2 > MAX_ALLOCA_SIZE) 5151 1.1 christos free (is_binary); 5152 1.1 christos else 5153 1.1 christos FREE_VAR (is_binary); 5154 1.1 christos return -2; 5155 1.1 christos } 5156 1.1 christos wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2, 5157 1.1 christos mbs_offset2, is_binary); 5158 1.1 christos wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */ 5159 1.1 christos if (size2 > MAX_ALLOCA_SIZE) 5160 1.1 christos free (is_binary); 5161 1.1 christos else 5162 1.1 christos FREE_VAR (is_binary); 5163 1.1 christos } 5164 1.1 christos #endif /* WCHAR */ 5165 1.1 christos 5166 1.1 christos 5167 1.1 christos /* Loop through the string, looking for a place to start matching. */ 5168 1.1 christos for (;;) 5169 1.1 christos { 5170 1.1 christos /* If a fastmap is supplied, skip quickly over characters that 5171 1.1 christos cannot be the start of a match. If the pattern can match the 5172 1.1 christos null string, however, we don't need to skip characters; we want 5173 1.1 christos the first null string. */ 5174 1.1 christos if (fastmap && startpos < total_size && !bufp->can_be_null) 5175 1.1 christos { 5176 1.1 christos if (range > 0) /* Searching forwards. */ 5177 1.1 christos { 5178 1.1 christos register const char *d; 5179 1.1 christos register int lim = 0; 5180 1.1 christos int irange = range; 5181 1.1 christos 5182 1.1 christos if (startpos < size1 && startpos + range >= size1) 5183 1.1 christos lim = range - (size1 - startpos); 5184 1.1 christos 5185 1.1 christos d = (startpos >= size1 ? string2 - size1 : string1) + startpos; 5186 1.1 christos 5187 1.1 christos /* Written out as an if-else to avoid testing `translate' 5188 1.1 christos inside the loop. */ 5189 1.1 christos if (translate) 5190 1.1 christos while (range > lim 5191 1.1 christos && !fastmap[(unsigned char) 5192 1.1 christos translate[(unsigned char) *d++]]) 5193 1.1 christos range--; 5194 1.1 christos else 5195 1.1 christos while (range > lim && !fastmap[(unsigned char) *d++]) 5196 1.1 christos range--; 5197 1.1 christos 5198 1.1 christos startpos += irange - range; 5199 1.1 christos } 5200 1.1 christos else /* Searching backwards. */ 5201 1.1 christos { 5202 1.1 christos register CHAR_T c = (size1 == 0 || startpos >= size1 5203 1.1 christos ? string2[startpos - size1] 5204 1.1 christos : string1[startpos]); 5205 1.1 christos 5206 1.1 christos if (!fastmap[(unsigned char) TRANSLATE (c)]) 5207 1.1 christos goto advance; 5208 1.1 christos } 5209 1.1 christos } 5210 1.1 christos 5211 1.1 christos /* If can't match the null string, and that's all we have left, fail. */ 5212 1.1 christos if (range >= 0 && startpos == total_size && fastmap 5213 1.1 christos && !bufp->can_be_null) 5214 1.1 christos { 5215 1.1 christos #ifdef WCHAR 5216 1.1 christos FREE_WCS_BUFFERS (); 5217 1.1 christos #endif 5218 1.1 christos return -1; 5219 1.1 christos } 5220 1.1 christos 5221 1.1 christos #ifdef WCHAR 5222 1.1 christos val = wcs_re_match_2_internal (bufp, string1, size1, string2, 5223 1.1 christos size2, startpos, regs, stop, 5224 1.1 christos wcs_string1, wcs_size1, 5225 1.1 christos wcs_string2, wcs_size2, 5226 1.1 christos mbs_offset1, mbs_offset2); 5227 1.1 christos #else /* BYTE */ 5228 1.1 christos val = byte_re_match_2_internal (bufp, string1, size1, string2, 5229 1.1 christos size2, startpos, regs, stop); 5230 1.1 christos #endif /* BYTE */ 5231 1.1 christos 5232 1.1 christos #ifndef REGEX_MALLOC 5233 1.1 christos # ifdef C_ALLOCA 5234 1.1 christos alloca (0); 5235 1.1 christos # endif 5236 1.1 christos #endif 5237 1.1 christos 5238 1.1 christos if (val >= 0) 5239 1.1 christos { 5240 1.1 christos #ifdef WCHAR 5241 1.1 christos FREE_WCS_BUFFERS (); 5242 1.1 christos #endif 5243 1.1 christos return startpos; 5244 1.1 christos } 5245 1.1 christos 5246 1.1 christos if (val == -2) 5247 1.1 christos { 5248 1.1 christos #ifdef WCHAR 5249 1.1 christos FREE_WCS_BUFFERS (); 5250 1.1 christos #endif 5251 1.1 christos return -2; 5252 1.1 christos } 5253 1.1 christos 5254 1.1 christos advance: 5255 1.1 christos if (!range) 5256 1.1 christos break; 5257 1.1 christos else if (range > 0) 5258 1.1 christos { 5259 1.1 christos range--; 5260 1.1 christos startpos++; 5261 1.1 christos } 5262 1.1 christos else 5263 1.1 christos { 5264 1.1 christos range++; 5265 1.1 christos startpos--; 5266 1.1 christos } 5267 1.1 christos } 5268 1.1 christos #ifdef WCHAR 5269 1.1 christos FREE_WCS_BUFFERS (); 5270 1.1 christos #endif 5271 1.1 christos return -1; 5272 1.1 christos } 5273 1.1 christos 5274 1.1 christos #ifdef WCHAR 5275 1.1 christos /* This converts PTR, a pointer into one of the search wchar_t strings 5276 1.1 christos `string1' and `string2' into an multibyte string offset from the 5277 1.1 christos beginning of that string. We use mbs_offset to optimize. 5278 1.1 christos See convert_mbs_to_wcs. */ 5279 1.1 christos # define POINTER_TO_OFFSET(ptr) \ 5280 1.1 christos (FIRST_STRING_P (ptr) \ 5281 1.1 christos ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \ 5282 1.1 christos : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \ 5283 1.1 christos + csize1))) 5284 1.1 christos #else /* BYTE */ 5285 1.1 christos /* This converts PTR, a pointer into one of the search strings `string1' 5286 1.1 christos and `string2' into an offset from the beginning of that string. */ 5287 1.1 christos # define POINTER_TO_OFFSET(ptr) \ 5288 1.1 christos (FIRST_STRING_P (ptr) \ 5289 1.1 christos ? ((regoff_t) ((ptr) - string1)) \ 5290 1.1 christos : ((regoff_t) ((ptr) - string2 + size1))) 5291 1.1 christos #endif /* WCHAR */ 5292 1.1 christos 5293 1.1 christos /* Macros for dealing with the split strings in re_match_2. */ 5294 1.1 christos 5295 1.1 christos #define MATCHING_IN_FIRST_STRING (dend == end_match_1) 5296 1.1 christos 5297 1.1 christos /* Call before fetching a character with *d. This switches over to 5298 1.1 christos string2 if necessary. */ 5299 1.1 christos #define PREFETCH() \ 5300 1.1 christos while (d == dend) \ 5301 1.1 christos { \ 5302 1.1 christos /* End of string2 => fail. */ \ 5303 1.1 christos if (dend == end_match_2) \ 5304 1.1 christos goto fail; \ 5305 1.1 christos /* End of string1 => advance to string2. */ \ 5306 1.1 christos d = string2; \ 5307 1.1 christos dend = end_match_2; \ 5308 1.1 christos } 5309 1.1 christos 5310 1.1 christos /* Test if at very beginning or at very end of the virtual concatenation 5311 1.1 christos of `string1' and `string2'. If only one string, it's `string2'. */ 5312 1.1 christos #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 5313 1.1 christos #define AT_STRINGS_END(d) ((d) == end2) 5314 1.1 christos 5315 1.1 christos 5316 1.1 christos /* Test if D points to a character which is word-constituent. We have 5317 1.1 christos two special cases to check for: if past the end of string1, look at 5318 1.1 christos the first character in string2; and if before the beginning of 5319 1.1 christos string2, look at the last character in string1. */ 5320 1.1 christos #ifdef WCHAR 5321 1.1 christos /* Use internationalized API instead of SYNTAX. */ 5322 1.1 christos # define WORDCHAR_P(d) \ 5323 1.1 christos (iswalnum ((wint_t)((d) == end1 ? *string2 \ 5324 1.1 christos : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \ 5325 1.1 christos || ((d) == end1 ? *string2 \ 5326 1.1 christos : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_') 5327 1.1 christos #else /* BYTE */ 5328 1.1 christos # define WORDCHAR_P(d) \ 5329 1.1 christos (SYNTAX ((d) == end1 ? *string2 \ 5330 1.1 christos : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ 5331 1.1 christos == Sword) 5332 1.1 christos #endif /* WCHAR */ 5333 1.1 christos 5334 1.1 christos /* Disabled due to a compiler bug -- see comment at case wordbound */ 5335 1.1 christos #if 0 5336 1.1 christos /* Test if the character before D and the one at D differ with respect 5337 1.1 christos to being word-constituent. */ 5338 1.1 christos #define AT_WORD_BOUNDARY(d) \ 5339 1.1 christos (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ 5340 1.1 christos || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) 5341 1.1 christos #endif 5342 1.1 christos 5343 1.1 christos /* Free everything we malloc. */ 5344 1.1 christos #ifdef MATCH_MAY_ALLOCATE 5345 1.1 christos # ifdef WCHAR 5346 1.1 christos # define FREE_VARIABLES() \ 5347 1.1 christos do { \ 5348 1.1 christos REGEX_FREE_STACK (fail_stack.stack); \ 5349 1.1 christos FREE_VAR (regstart); \ 5350 1.1 christos FREE_VAR (regend); \ 5351 1.1 christos FREE_VAR (old_regstart); \ 5352 1.1 christos FREE_VAR (old_regend); \ 5353 1.1 christos FREE_VAR (best_regstart); \ 5354 1.1 christos FREE_VAR (best_regend); \ 5355 1.1 christos FREE_VAR (reg_info); \ 5356 1.1 christos FREE_VAR (reg_dummy); \ 5357 1.1 christos FREE_VAR (reg_info_dummy); \ 5358 1.1 christos if (!cant_free_wcs_buf) \ 5359 1.1 christos { \ 5360 1.1 christos FREE_VAR (string1); \ 5361 1.1 christos FREE_VAR (string2); \ 5362 1.1 christos FREE_VAR (mbs_offset1); \ 5363 1.1 christos FREE_VAR (mbs_offset2); \ 5364 1.1 christos } \ 5365 1.1 christos } while (0) 5366 1.1 christos # else /* BYTE */ 5367 1.1 christos # define FREE_VARIABLES() \ 5368 1.1 christos do { \ 5369 1.1 christos REGEX_FREE_STACK (fail_stack.stack); \ 5370 1.1 christos FREE_VAR (regstart); \ 5371 1.1 christos FREE_VAR (regend); \ 5372 1.1 christos FREE_VAR (old_regstart); \ 5373 1.1 christos FREE_VAR (old_regend); \ 5374 1.1 christos FREE_VAR (best_regstart); \ 5375 1.1 christos FREE_VAR (best_regend); \ 5376 1.1 christos FREE_VAR (reg_info); \ 5377 1.1 christos FREE_VAR (reg_dummy); \ 5378 1.1 christos FREE_VAR (reg_info_dummy); \ 5379 1.1 christos } while (0) 5380 1.1 christos # endif /* WCHAR */ 5381 1.1 christos #else 5382 1.1 christos # ifdef WCHAR 5383 1.1 christos # define FREE_VARIABLES() \ 5384 1.1 christos do { \ 5385 1.1 christos if (!cant_free_wcs_buf) \ 5386 1.1 christos { \ 5387 1.1 christos FREE_VAR (string1); \ 5388 1.1 christos FREE_VAR (string2); \ 5389 1.1 christos FREE_VAR (mbs_offset1); \ 5390 1.1 christos FREE_VAR (mbs_offset2); \ 5391 1.1 christos } \ 5392 1.1 christos } while (0) 5393 1.1 christos # else /* BYTE */ 5394 1.1 christos # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 5395 1.1 christos # endif /* WCHAR */ 5396 1.1 christos #endif /* not MATCH_MAY_ALLOCATE */ 5397 1.1 christos 5398 1.1 christos /* These values must meet several constraints. They must not be valid 5399 1.1 christos register values; since we have a limit of 255 registers (because 5400 1.1 christos we use only one byte in the pattern for the register number), we can 5401 1.1 christos use numbers larger than 255. They must differ by 1, because of 5402 1.1 christos NUM_FAILURE_ITEMS above. And the value for the lowest register must 5403 1.1 christos be larger than the value for the highest register, so we do not try 5404 1.1 christos to actually save any registers when none are active. */ 5405 1.1 christos #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) 5406 1.1 christos #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) 5407 1.1 christos 5408 1.1 christos #else /* not INSIDE_RECURSION */ 5410 1.1 christos /* Matching routines. */ 5411 1.1 christos 5412 1.1 christos #ifndef emacs /* Emacs never uses this. */ 5413 1.1 christos /* re_match is like re_match_2 except it takes only a single string. */ 5414 1.1 christos 5415 1.1 christos int 5416 1.1 christos re_match (struct re_pattern_buffer *bufp, const char *string, 5417 1.1 christos int size, int pos, struct re_registers *regs) 5418 1.1 christos { 5419 1.1 christos int result; 5420 1.1 christos # ifdef MBS_SUPPORT 5421 1.1 christos if (MB_CUR_MAX != 1) 5422 1.1 christos result = wcs_re_match_2_internal (bufp, NULL, 0, string, size, 5423 1.1 christos pos, regs, size, 5424 1.1 christos NULL, 0, NULL, 0, NULL, NULL); 5425 1.1 christos else 5426 1.1 christos # endif 5427 1.1 christos result = byte_re_match_2_internal (bufp, NULL, 0, string, size, 5428 1.1 christos pos, regs, size); 5429 1.1 christos # ifndef REGEX_MALLOC 5430 1.1 christos # ifdef C_ALLOCA 5431 1.1 christos alloca (0); 5432 1.1 christos # endif 5433 1.1 christos # endif 5434 1.1 christos return result; 5435 1.1 christos } 5436 1.1 christos # ifdef _LIBC 5437 1.1 christos weak_alias (__re_match, re_match) 5438 1.1 christos # endif 5439 1.1 christos #endif /* not emacs */ 5440 1.1 christos 5441 1.1 christos #endif /* not INSIDE_RECURSION */ 5442 1.1 christos 5443 1.1 christos #ifdef INSIDE_RECURSION 5444 1.1 christos static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p, 5445 1.1 christos UCHAR_T *end, 5446 1.1 christos PREFIX(register_info_type) *reg_info); 5447 1.1 christos static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p, 5448 1.1 christos UCHAR_T *end, 5449 1.1 christos PREFIX(register_info_type) *reg_info); 5450 1.1 christos static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p, 5451 1.1 christos UCHAR_T *end, 5452 1.1 christos PREFIX(register_info_type) *reg_info); 5453 1.1 christos static int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, 5454 1.1 christos int len, char *translate); 5455 1.1 christos #else /* not INSIDE_RECURSION */ 5456 1.1 christos 5457 1.1 christos /* re_match_2 matches the compiled pattern in BUFP against the 5458 1.1 christos the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 5459 1.1 christos and SIZE2, respectively). We start matching at POS, and stop 5460 1.1 christos matching at STOP. 5461 1.1 christos 5462 1.1 christos If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 5463 1.1 christos store offsets for the substring each group matched in REGS. See the 5464 1.1 christos documentation for exactly how many groups we fill. 5465 1.1 christos 5466 1.1 christos We return -1 if no match, -2 if an internal error (such as the 5467 1.1 christos failure stack overflowing). Otherwise, we return the length of the 5468 1.1 christos matched substring. */ 5469 1.1 christos 5470 1.1 christos int 5471 1.1 christos re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, 5472 1.1 christos const char *string2, int size2, int pos, 5473 1.1 christos struct re_registers *regs, int stop) 5474 1.1 christos { 5475 1.1 christos int result; 5476 1.1 christos # ifdef MBS_SUPPORT 5477 1.1 christos if (MB_CUR_MAX != 1) 5478 1.1 christos result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2, 5479 1.1 christos pos, regs, stop, 5480 1.1 christos NULL, 0, NULL, 0, NULL, NULL); 5481 1.1 christos else 5482 1.1 christos # endif 5483 1.1 christos result = byte_re_match_2_internal (bufp, string1, size1, string2, size2, 5484 1.1 christos pos, regs, stop); 5485 1.1 christos 5486 1.1 christos #ifndef REGEX_MALLOC 5487 1.1 christos # ifdef C_ALLOCA 5488 1.1 christos alloca (0); 5489 1.1 christos # endif 5490 1.1 christos #endif 5491 1.1 christos return result; 5492 1.1 christos } 5493 1.1 christos #ifdef _LIBC 5494 1.1 christos weak_alias (__re_match_2, re_match_2) 5495 1.1 christos #endif 5496 1.1 christos 5497 1.1 christos #endif /* not INSIDE_RECURSION */ 5498 1.1 christos 5499 1.1 christos #ifdef INSIDE_RECURSION 5500 1.1 christos 5501 1.1 christos #ifdef WCHAR 5502 1.1 christos static int count_mbs_length (int *, int); 5503 1.1 christos 5504 1.1 christos /* This check the substring (from 0, to length) of the multibyte string, 5505 1.1 christos to which offset_buffer correspond. And count how many wchar_t_characters 5506 1.1 christos the substring occupy. We use offset_buffer to optimization. 5507 1.1 christos See convert_mbs_to_wcs. */ 5508 1.1 christos 5509 1.1 christos static int 5510 1.1 christos count_mbs_length(int *offset_buffer, int length) 5511 1.1 christos { 5512 1.1 christos int upper, lower; 5513 1.1 christos 5514 1.1 christos /* Check whether the size is valid. */ 5515 1.1 christos if (length < 0) 5516 1.1 christos return -1; 5517 1.1 christos 5518 1.1 christos if (offset_buffer == NULL) 5519 1.1 christos return 0; 5520 1.1 christos 5521 1.1 christos /* If there are no multibyte character, offset_buffer[i] == i. 5522 1.1 christos Optmize for this case. */ 5523 1.1 christos if (offset_buffer[length] == length) 5524 1.1 christos return length; 5525 1.1 christos 5526 1.1 christos /* Set up upper with length. (because for all i, offset_buffer[i] >= i) */ 5527 1.1 christos upper = length; 5528 1.1 christos lower = 0; 5529 1.1 christos 5530 1.1 christos while (true) 5531 1.1 christos { 5532 1.1 christos int middle = (lower + upper) / 2; 5533 1.1 christos if (middle == lower || middle == upper) 5534 1.1 christos break; 5535 1.1 christos if (offset_buffer[middle] > length) 5536 1.1 christos upper = middle; 5537 1.1 christos else if (offset_buffer[middle] < length) 5538 1.1 christos lower = middle; 5539 1.1 christos else 5540 1.1 christos return middle; 5541 1.1 christos } 5542 1.1 christos 5543 1.1 christos return -1; 5544 1.1 christos } 5545 1.1 christos #endif /* WCHAR */ 5546 1.1 christos 5547 1.1 christos /* This is a separate function so that we can force an alloca cleanup 5548 1.1 christos afterwards. */ 5549 1.1 christos #ifdef WCHAR 5550 1.1 christos static int 5551 1.1 christos wcs_re_match_2_internal (struct re_pattern_buffer *bufp, 5552 1.1 christos const char *cstring1, int csize1, 5553 1.1 christos const char *cstring2, int csize2, 5554 1.1 christos int pos, 5555 1.1 christos struct re_registers *regs, 5556 1.1 christos int stop, 5557 1.1 christos /* string1 == string2 == NULL means string1/2, size1/2 and 5558 1.1 christos mbs_offset1/2 need seting up in this function. */ 5559 1.1 christos /* We need wchar_t* buffers correspond to cstring1, cstring2. */ 5560 1.1 christos wchar_t *string1, int size1, 5561 1.1 christos wchar_t *string2, int size2, 5562 1.1 christos /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ 5563 1.1 christos int *mbs_offset1, int *mbs_offset2) 5564 1.1 christos #else /* BYTE */ 5565 1.1 christos static int 5566 1.1 christos byte_re_match_2_internal (struct re_pattern_buffer *bufp, 5567 1.1 christos const char *string1, int size1, 5568 1.1 christos const char *string2, int size2, 5569 1.1 christos int pos, 5570 1.1 christos struct re_registers *regs, int stop) 5571 1.1 christos #endif /* BYTE */ 5572 1.1 christos { 5573 1.1 christos /* General temporaries. */ 5574 1.1 christos int mcnt; 5575 1.1 christos UCHAR_T *p1; 5576 1.1 christos #ifdef WCHAR 5577 1.1 christos /* They hold whether each wchar_t is binary data or not. */ 5578 1.1 christos char *is_binary = NULL; 5579 1.1 christos /* If true, we can't free string1/2, mbs_offset1/2. */ 5580 1.1 christos int cant_free_wcs_buf = 1; 5581 1.1 christos #endif /* WCHAR */ 5582 1.1 christos 5583 1.1 christos /* Just past the end of the corresponding string. */ 5584 1.1 christos const CHAR_T *end1, *end2; 5585 1.1 christos 5586 1.1 christos /* Pointers into string1 and string2, just past the last characters in 5587 1.1 christos each to consider matching. */ 5588 1.1 christos const CHAR_T *end_match_1, *end_match_2; 5589 1.1 christos 5590 1.1 christos /* Where we are in the data, and the end of the current string. */ 5591 1.1 christos const CHAR_T *d, *dend; 5592 1.1 christos 5593 1.1 christos /* Where we are in the pattern, and the end of the pattern. */ 5594 1.1 christos #ifdef WCHAR 5595 1.1 christos UCHAR_T *pattern, *p; 5596 1.1 christos register UCHAR_T *pend; 5597 1.1 christos #else /* BYTE */ 5598 1.1 christos UCHAR_T *p = bufp->buffer; 5599 1.1 christos register UCHAR_T *pend = p + bufp->used; 5600 1.1.1.11 christos #endif /* WCHAR */ 5601 1.1 christos 5602 1.1 christos /* Mark the opcode just after a start_memory, so we can test for an 5603 1.1 christos empty subpattern when we get to the stop_memory. */ 5604 1.1 christos UCHAR_T *just_past_start_mem = 0; 5605 1.1 christos 5606 1.1 christos /* We use this to map every character in the string. */ 5607 1.1 christos RE_TRANSLATE_TYPE translate = bufp->translate; 5608 1.1 christos 5609 1.1 christos /* Failure point stack. Each place that can handle a failure further 5610 1.1 christos down the line pushes a failure point on this stack. It consists of 5611 1.1 christos restart, regend, and reg_info for all registers corresponding to 5612 1.1 christos the subexpressions we're currently inside, plus the number of such 5613 1.1 christos registers, and, finally, two char *'s. The first char * is where 5614 1.1 christos to resume scanning the pattern; the second one is where to resume 5615 1.1 christos scanning the strings. If the latter is zero, the failure point is 5616 1.1 christos a ``dummy''; if a failure happens and the failure point is a dummy, 5617 1.1 christos it gets discarded and the next one is tried. */ 5618 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 5619 1.1 christos PREFIX(fail_stack_type) fail_stack; 5620 1.1 christos #endif 5621 1.1 christos #ifdef DEBUG 5622 1.1 christos static unsigned failure_id; 5623 1.1 christos unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 5624 1.1 christos #endif 5625 1.1 christos 5626 1.1 christos #ifdef REL_ALLOC 5627 1.1 christos /* This holds the pointer to the failure stack, when 5628 1.1 christos it is allocated relocatably. */ 5629 1.1 christos fail_stack_elt_t *failure_stack_ptr; 5630 1.1 christos #endif 5631 1.1 christos 5632 1.1 christos /* We fill all the registers internally, independent of what we 5633 1.1 christos return, for use in backreferences. The number here includes 5634 1.1 christos an element for register zero. */ 5635 1.1 christos size_t num_regs = bufp->re_nsub + 1; 5636 1.1 christos 5637 1.1 christos /* The currently active registers. */ 5638 1.1 christos active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; 5639 1.1 christos active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; 5640 1.1 christos 5641 1.1 christos /* Information on the contents of registers. These are pointers into 5642 1.1 christos the input strings; they record just what was matched (on this 5643 1.1 christos attempt) by a subexpression part of the pattern, that is, the 5644 1.1 christos regnum-th regstart pointer points to where in the pattern we began 5645 1.1 christos matching and the regnum-th regend points to right after where we 5646 1.1 christos stopped matching the regnum-th subexpression. (The zeroth register 5647 1.1 christos keeps track of what the whole pattern matches.) */ 5648 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5649 1.1 christos const CHAR_T **regstart, **regend; 5650 1.1 christos #endif 5651 1.1 christos 5652 1.1 christos /* If a group that's operated upon by a repetition operator fails to 5653 1.1 christos match anything, then the register for its start will need to be 5654 1.1 christos restored because it will have been set to wherever in the string we 5655 1.1 christos are when we last see its open-group operator. Similarly for a 5656 1.1 christos register's end. */ 5657 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5658 1.1 christos const CHAR_T **old_regstart, **old_regend; 5659 1.1 christos #endif 5660 1.1 christos 5661 1.1 christos /* The is_active field of reg_info helps us keep track of which (possibly 5662 1.1 christos nested) subexpressions we are currently in. The matched_something 5663 1.1 christos field of reg_info[reg_num] helps us tell whether or not we have 5664 1.1 christos matched any of the pattern so far this time through the reg_num-th 5665 1.1 christos subexpression. These two fields get reset each time through any 5666 1.1 christos loop their register is in. */ 5667 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 5668 1.1 christos PREFIX(register_info_type) *reg_info; 5669 1.1 christos #endif 5670 1.1 christos 5671 1.1 christos /* The following record the register info as found in the above 5672 1.1 christos variables when we find a match better than any we've seen before. 5673 1.1 christos This happens as we backtrack through the failure points, which in 5674 1.1 christos turn happens only if we have not yet matched the entire string. */ 5675 1.1 christos unsigned best_regs_set = false; 5676 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5677 1.1 christos const CHAR_T **best_regstart, **best_regend; 5678 1.1 christos #endif 5679 1.1 christos 5680 1.1 christos /* Logically, this is `best_regend[0]'. But we don't want to have to 5681 1.1 christos allocate space for that if we're not allocating space for anything 5682 1.1 christos else (see below). Also, we never need info about register 0 for 5683 1.1 christos any of the other register vectors, and it seems rather a kludge to 5684 1.1 christos treat `best_regend' differently than the rest. So we keep track of 5685 1.1 christos the end of the best match so far in a separate variable. We 5686 1.1 christos initialize this to NULL so that when we backtrack the first time 5687 1.1 christos and need to test it, it's not garbage. */ 5688 1.1 christos const CHAR_T *match_end = NULL; 5689 1.1 christos 5690 1.1 christos /* This helps SET_REGS_MATCHED avoid doing redundant work. */ 5691 1.1 christos int set_regs_matched_done = 0; 5692 1.1 christos 5693 1.1 christos /* Used when we pop values we don't care about. */ 5694 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5695 1.1 christos const CHAR_T **reg_dummy; 5696 1.1 christos PREFIX(register_info_type) *reg_info_dummy; 5697 1.1 christos #endif 5698 1.1 christos 5699 1.1 christos #ifdef DEBUG 5700 1.1 christos /* Counts the total number of registers pushed. */ 5701 1.1 christos unsigned num_regs_pushed = 0; 5702 1.1 christos #endif 5703 1.1 christos 5704 1.1 christos DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); 5705 1.1 christos 5706 1.1 christos INIT_FAIL_STACK (); 5707 1.1 christos 5708 1.1 christos #ifdef MATCH_MAY_ALLOCATE 5709 1.1 christos /* Do not bother to initialize all the register variables if there are 5710 1.1 christos no groups in the pattern, as it takes a fair amount of time. If 5711 1.1 christos there are groups, we include space for register 0 (the whole 5712 1.1 christos pattern), even though we never use it, since it simplifies the 5713 1.1 christos array indexing. We should fix this. */ 5714 1.1 christos if (bufp->re_nsub) 5715 1.1 christos { 5716 1.1 christos regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 5717 1.1 christos regend = REGEX_TALLOC (num_regs, const CHAR_T *); 5718 1.1 christos old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 5719 1.1 christos old_regend = REGEX_TALLOC (num_regs, const CHAR_T *); 5720 1.1 christos best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 5721 1.1 christos best_regend = REGEX_TALLOC (num_regs, const CHAR_T *); 5722 1.1 christos reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type)); 5723 1.1 christos reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *); 5724 1.1 christos reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type)); 5725 1.1 christos 5726 1.1 christos if (!(regstart && regend && old_regstart && old_regend && reg_info 5727 1.1 christos && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 5728 1.1 christos { 5729 1.1 christos FREE_VARIABLES (); 5730 1.1 christos return -2; 5731 1.1 christos } 5732 1.1 christos } 5733 1.1 christos else 5734 1.1 christos { 5735 1.1 christos /* We must initialize all our variables to NULL, so that 5736 1.1 christos `FREE_VARIABLES' doesn't try to free them. */ 5737 1.1 christos regstart = regend = old_regstart = old_regend = best_regstart 5738 1.1 christos = best_regend = reg_dummy = NULL; 5739 1.1 christos reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL; 5740 1.1 christos } 5741 1.1 christos #endif /* MATCH_MAY_ALLOCATE */ 5742 1.1 christos 5743 1.1 christos /* The starting position is bogus. */ 5744 1.1 christos #ifdef WCHAR 5745 1.1 christos if (pos < 0 || pos > csize1 + csize2) 5746 1.1 christos #else /* BYTE */ 5747 1.1 christos if (pos < 0 || pos > size1 + size2) 5748 1.1 christos #endif 5749 1.1 christos { 5750 1.1 christos FREE_VARIABLES (); 5751 1.1 christos return -1; 5752 1.1 christos } 5753 1.1 christos 5754 1.1 christos #ifdef WCHAR 5755 1.1 christos /* Allocate wchar_t array for string1 and string2 and 5756 1.1 christos fill them with converted string. */ 5757 1.1 christos if (string1 == NULL && string2 == NULL) 5758 1.1 christos { 5759 1.1 christos /* We need seting up buffers here. */ 5760 1.1 christos 5761 1.1 christos /* We must free wcs buffers in this function. */ 5762 1.1 christos cant_free_wcs_buf = 0; 5763 1.1 christos 5764 1.1 christos if (csize1 != 0) 5765 1.1 christos { 5766 1.1 christos string1 = REGEX_TALLOC (csize1 + 1, CHAR_T); 5767 1.1 christos mbs_offset1 = REGEX_TALLOC (csize1 + 1, int); 5768 1.1 christos is_binary = REGEX_TALLOC (csize1 + 1, char); 5769 1.1 christos if (!string1 || !mbs_offset1 || !is_binary) 5770 1.1 christos { 5771 1.1 christos FREE_VAR (string1); 5772 1.1 christos FREE_VAR (mbs_offset1); 5773 1.1 christos FREE_VAR (is_binary); 5774 1.1 christos return -2; 5775 1.1 christos } 5776 1.1 christos } 5777 1.1 christos if (csize2 != 0) 5778 1.1 christos { 5779 1.1 christos string2 = REGEX_TALLOC (csize2 + 1, CHAR_T); 5780 1.1 christos mbs_offset2 = REGEX_TALLOC (csize2 + 1, int); 5781 1.1 christos is_binary = REGEX_TALLOC (csize2 + 1, char); 5782 1.1 christos if (!string2 || !mbs_offset2 || !is_binary) 5783 1.1 christos { 5784 1.1 christos FREE_VAR (string1); 5785 1.1 christos FREE_VAR (mbs_offset1); 5786 1.1 christos FREE_VAR (string2); 5787 1.1 christos FREE_VAR (mbs_offset2); 5788 1.1 christos FREE_VAR (is_binary); 5789 1.1 christos return -2; 5790 1.1 christos } 5791 1.1 christos size2 = convert_mbs_to_wcs(string2, cstring2, csize2, 5792 1.1 christos mbs_offset2, is_binary); 5793 1.1 christos string2[size2] = L'\0'; /* for a sentinel */ 5794 1.1 christos FREE_VAR (is_binary); 5795 1.1 christos } 5796 1.1 christos } 5797 1.1 christos 5798 1.1 christos /* We need to cast pattern to (wchar_t*), because we casted this compiled 5799 1.1 christos pattern to (char*) in regex_compile. */ 5800 1.1 christos p = pattern = (CHAR_T*)bufp->buffer; 5801 1.1 christos pend = (CHAR_T*)(bufp->buffer + bufp->used); 5802 1.1 christos 5803 1.1 christos #endif /* WCHAR */ 5804 1.1 christos 5805 1.1 christos /* Initialize subexpression text positions to -1 to mark ones that no 5806 1.1 christos start_memory/stop_memory has been seen for. Also initialize the 5807 1.1 christos register information struct. */ 5808 1.1 christos for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 5809 1.1 christos { 5810 1.1 christos regstart[mcnt] = regend[mcnt] 5811 1.1 christos = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 5812 1.1 christos 5813 1.1 christos REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 5814 1.1 christos IS_ACTIVE (reg_info[mcnt]) = 0; 5815 1.1 christos MATCHED_SOMETHING (reg_info[mcnt]) = 0; 5816 1.1 christos EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 5817 1.1 christos } 5818 1.1 christos 5819 1.1 christos /* We move `string1' into `string2' if the latter's empty -- but not if 5820 1.1 christos `string1' is null. */ 5821 1.1 christos if (size2 == 0 && string1 != NULL) 5822 1.1 christos { 5823 1.1 christos string2 = string1; 5824 1.1 christos size2 = size1; 5825 1.1 christos string1 = 0; 5826 1.1 christos size1 = 0; 5827 1.1 christos #ifdef WCHAR 5828 1.1 christos mbs_offset2 = mbs_offset1; 5829 1.1 christos csize2 = csize1; 5830 1.1 christos mbs_offset1 = NULL; 5831 1.1 christos csize1 = 0; 5832 1.1 christos #endif 5833 1.1 christos } 5834 1.1 christos end1 = string1 + size1; 5835 1.1 christos end2 = string2 + size2; 5836 1.1 christos 5837 1.1 christos /* Compute where to stop matching, within the two strings. */ 5838 1.1 christos #ifdef WCHAR 5839 1.1 christos if (stop <= csize1) 5840 1.1 christos { 5841 1.1 christos mcnt = count_mbs_length(mbs_offset1, stop); 5842 1.1 christos end_match_1 = string1 + mcnt; 5843 1.1 christos end_match_2 = string2; 5844 1.1 christos } 5845 1.1 christos else 5846 1.1 christos { 5847 1.1 christos if (stop > csize1 + csize2) 5848 1.1 christos stop = csize1 + csize2; 5849 1.1 christos end_match_1 = end1; 5850 1.1 christos mcnt = count_mbs_length(mbs_offset2, stop-csize1); 5851 1.1 christos end_match_2 = string2 + mcnt; 5852 1.1 christos } 5853 1.1 christos if (mcnt < 0) 5854 1.1 christos { /* count_mbs_length return error. */ 5855 1.1 christos FREE_VARIABLES (); 5856 1.1 christos return -1; 5857 1.1 christos } 5858 1.1 christos #else 5859 1.1 christos if (stop <= size1) 5860 1.1 christos { 5861 1.1 christos end_match_1 = string1 + stop; 5862 1.1 christos end_match_2 = string2; 5863 1.1 christos } 5864 1.1 christos else 5865 1.1 christos { 5866 1.1 christos end_match_1 = end1; 5867 1.1 christos end_match_2 = string2 + stop - size1; 5868 1.1 christos } 5869 1.1 christos #endif /* WCHAR */ 5870 1.1 christos 5871 1.1 christos /* `p' scans through the pattern as `d' scans through the data. 5872 1.1 christos `dend' is the end of the input string that `d' points within. `d' 5873 1.1 christos is advanced into the following input string whenever necessary, but 5874 1.1 christos this happens before fetching; therefore, at the beginning of the 5875 1.1 christos loop, `d' can be pointing at the end of a string, but it cannot 5876 1.1 christos equal `string2'. */ 5877 1.1 christos #ifdef WCHAR 5878 1.1 christos if (size1 > 0 && pos <= csize1) 5879 1.1 christos { 5880 1.1 christos mcnt = count_mbs_length(mbs_offset1, pos); 5881 1.1 christos d = string1 + mcnt; 5882 1.1 christos dend = end_match_1; 5883 1.1 christos } 5884 1.1 christos else 5885 1.1 christos { 5886 1.1 christos mcnt = count_mbs_length(mbs_offset2, pos-csize1); 5887 1.1 christos d = string2 + mcnt; 5888 1.1 christos dend = end_match_2; 5889 1.1 christos } 5890 1.1 christos 5891 1.1 christos if (mcnt < 0) 5892 1.1 christos { /* count_mbs_length return error. */ 5893 1.1 christos FREE_VARIABLES (); 5894 1.1 christos return -1; 5895 1.1 christos } 5896 1.1 christos #else 5897 1.1 christos if (size1 > 0 && pos <= size1) 5898 1.1 christos { 5899 1.1 christos d = string1 + pos; 5900 1.1 christos dend = end_match_1; 5901 1.1 christos } 5902 1.1 christos else 5903 1.1 christos { 5904 1.1 christos d = string2 + pos - size1; 5905 1.1 christos dend = end_match_2; 5906 1.1 christos } 5907 1.1 christos #endif /* WCHAR */ 5908 1.1 christos 5909 1.1 christos DEBUG_PRINT1 ("The compiled pattern is:\n"); 5910 1.1 christos DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 5911 1.1 christos DEBUG_PRINT1 ("The string to match is: `"); 5912 1.1 christos DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 5913 1.1 christos DEBUG_PRINT1 ("'\n"); 5914 1.1 christos 5915 1.1 christos /* This loops over pattern commands. It exits by returning from the 5916 1.1 christos function if the match is complete, or it drops through if the match 5917 1.1 christos fails at this starting point in the input data. */ 5918 1.1 christos for (;;) 5919 1.1 christos { 5920 1.1 christos #ifdef _LIBC 5921 1.1 christos DEBUG_PRINT2 ("\n%p: ", p); 5922 1.1 christos #else 5923 1.1 christos DEBUG_PRINT2 ("\n0x%x: ", p); 5924 1.1 christos #endif 5925 1.1 christos 5926 1.1 christos if (p == pend) 5927 1.1 christos { /* End of pattern means we might have succeeded. */ 5928 1.1 christos DEBUG_PRINT1 ("end of pattern ... "); 5929 1.1 christos 5930 1.1 christos /* If we haven't matched the entire string, and we want the 5931 1.1 christos longest match, try backtracking. */ 5932 1.1 christos if (d != end_match_2) 5933 1.1 christos { 5934 1.1 christos /* 1 if this match ends in the same string (string1 or string2) 5935 1.1 christos as the best previous match. */ 5936 1.1 christos boolean same_str_p; 5937 1.1 christos 5938 1.1 christos /* 1 if this match is the best seen so far. */ 5939 1.1 christos boolean best_match_p; 5940 1.1 christos 5941 1.1 christos same_str_p = (FIRST_STRING_P (match_end) 5942 1.1 christos == MATCHING_IN_FIRST_STRING); 5943 1.1 christos 5944 1.1 christos /* AIX compiler got confused when this was combined 5945 1.1 christos with the previous declaration. */ 5946 1.1 christos if (same_str_p) 5947 1.1 christos best_match_p = d > match_end; 5948 1.1 christos else 5949 1.1 christos best_match_p = !MATCHING_IN_FIRST_STRING; 5950 1.1 christos 5951 1.1 christos DEBUG_PRINT1 ("backtracking.\n"); 5952 1.1 christos 5953 1.1 christos if (!FAIL_STACK_EMPTY ()) 5954 1.1 christos { /* More failure points to try. */ 5955 1.1 christos 5956 1.1 christos /* If exceeds best match so far, save it. */ 5957 1.1 christos if (!best_regs_set || best_match_p) 5958 1.1 christos { 5959 1.1 christos best_regs_set = true; 5960 1.1 christos match_end = d; 5961 1.1 christos 5962 1.1 christos DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 5963 1.1 christos 5964 1.1 christos for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 5965 1.1 christos { 5966 1.1 christos best_regstart[mcnt] = regstart[mcnt]; 5967 1.1 christos best_regend[mcnt] = regend[mcnt]; 5968 1.1 christos } 5969 1.1 christos } 5970 1.1 christos goto fail; 5971 1.1 christos } 5972 1.1 christos 5973 1.1 christos /* If no failure points, don't restore garbage. And if 5974 1.1 christos last match is real best match, don't restore second 5975 1.1 christos best one. */ 5976 1.1 christos else if (best_regs_set && !best_match_p) 5977 1.1 christos { 5978 1.1 christos restore_best_regs: 5979 1.1 christos /* Restore best match. It may happen that `dend == 5980 1.1 christos end_match_1' while the restored d is in string2. 5981 1.1 christos For example, the pattern `x.*y.*z' against the 5982 1.1 christos strings `x-' and `y-z-', if the two strings are 5983 1.1 christos not consecutive in memory. */ 5984 1.1 christos DEBUG_PRINT1 ("Restoring best registers.\n"); 5985 1.1 christos 5986 1.1 christos d = match_end; 5987 1.1 christos dend = ((d >= string1 && d <= end1) 5988 1.1 christos ? end_match_1 : end_match_2); 5989 1.1 christos 5990 1.1 christos for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 5991 1.1 christos { 5992 1.1 christos regstart[mcnt] = best_regstart[mcnt]; 5993 1.1 christos regend[mcnt] = best_regend[mcnt]; 5994 1.1 christos } 5995 1.1 christos } 5996 1.1 christos } /* d != end_match_2 */ 5997 1.1 christos 5998 1.1 christos succeed_label: 5999 1.1 christos DEBUG_PRINT1 ("Accepting match.\n"); 6000 1.1 christos /* If caller wants register contents data back, do it. */ 6001 1.1 christos if (regs && !bufp->no_sub) 6002 1.1 christos { 6003 1.1 christos /* Have the register data arrays been allocated? */ 6004 1.1 christos if (bufp->regs_allocated == REGS_UNALLOCATED) 6005 1.1 christos { /* No. So allocate them with malloc. We need one 6006 1.1 christos extra element beyond `num_regs' for the `-1' marker 6007 1.1 christos GNU code uses. */ 6008 1.1 christos regs->num_regs = MAX (RE_NREGS, num_regs + 1); 6009 1.1 christos regs->start = TALLOC (regs->num_regs, regoff_t); 6010 1.1 christos regs->end = TALLOC (regs->num_regs, regoff_t); 6011 1.1 christos if (regs->start == NULL || regs->end == NULL) 6012 1.1 christos { 6013 1.1 christos FREE_VARIABLES (); 6014 1.1 christos return -2; 6015 1.1 christos } 6016 1.1 christos bufp->regs_allocated = REGS_REALLOCATE; 6017 1.1 christos } 6018 1.1 christos else if (bufp->regs_allocated == REGS_REALLOCATE) 6019 1.1 christos { /* Yes. If we need more elements than were already 6020 1.1 christos allocated, reallocate them. If we need fewer, just 6021 1.1 christos leave it alone. */ 6022 1.1 christos if (regs->num_regs < num_regs + 1) 6023 1.1 christos { 6024 1.1 christos regs->num_regs = num_regs + 1; 6025 1.1 christos RETALLOC (regs->start, regs->num_regs, regoff_t); 6026 1.1 christos RETALLOC (regs->end, regs->num_regs, regoff_t); 6027 1.1 christos if (regs->start == NULL || regs->end == NULL) 6028 1.1 christos { 6029 1.1 christos FREE_VARIABLES (); 6030 1.1 christos return -2; 6031 1.1 christos } 6032 1.1 christos } 6033 1.1 christos } 6034 1.1 christos else 6035 1.1 christos { 6036 1.1 christos /* These braces fend off a "empty body in an else-statement" 6037 1.1 christos warning under GCC when assert expands to nothing. */ 6038 1.1 christos assert (bufp->regs_allocated == REGS_FIXED); 6039 1.1 christos } 6040 1.1 christos 6041 1.1 christos /* Convert the pointer data in `regstart' and `regend' to 6042 1.1 christos indices. Register zero has to be set differently, 6043 1.1 christos since we haven't kept track of any info for it. */ 6044 1.1 christos if (regs->num_regs > 0) 6045 1.1 christos { 6046 1.1 christos regs->start[0] = pos; 6047 1.1 christos #ifdef WCHAR 6048 1.1 christos if (MATCHING_IN_FIRST_STRING) 6049 1.1 christos regs->end[0] = mbs_offset1 != NULL ? 6050 1.1 christos mbs_offset1[d-string1] : 0; 6051 1.1 christos else 6052 1.1 christos regs->end[0] = csize1 + (mbs_offset2 != NULL ? 6053 1.1 christos mbs_offset2[d-string2] : 0); 6054 1.1 christos #else 6055 1.1 christos regs->end[0] = (MATCHING_IN_FIRST_STRING 6056 1.1 christos ? ((regoff_t) (d - string1)) 6057 1.1 christos : ((regoff_t) (d - string2 + size1))); 6058 1.1 christos #endif /* WCHAR */ 6059 1.1 christos } 6060 1.1 christos 6061 1.1 christos /* Go through the first `min (num_regs, regs->num_regs)' 6062 1.1 christos registers, since that is all we initialized. */ 6063 1.1 christos for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); 6064 1.1 christos mcnt++) 6065 1.1 christos { 6066 1.1 christos if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 6067 1.1 christos regs->start[mcnt] = regs->end[mcnt] = -1; 6068 1.1 christos else 6069 1.1 christos { 6070 1.1 christos regs->start[mcnt] 6071 1.1 christos = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 6072 1.1 christos regs->end[mcnt] 6073 1.1 christos = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 6074 1.1 christos } 6075 1.1 christos } 6076 1.1 christos 6077 1.1 christos /* If the regs structure we return has more elements than 6078 1.1 christos were in the pattern, set the extra elements to -1. If 6079 1.1 christos we (re)allocated the registers, this is the case, 6080 1.1 christos because we always allocate enough to have at least one 6081 1.1 christos -1 at the end. */ 6082 1.1 christos for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) 6083 1.1 christos regs->start[mcnt] = regs->end[mcnt] = -1; 6084 1.1 christos } /* regs && !bufp->no_sub */ 6085 1.1 christos 6086 1.1 christos DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 6087 1.1 christos nfailure_points_pushed, nfailure_points_popped, 6088 1.1 christos nfailure_points_pushed - nfailure_points_popped); 6089 1.1 christos DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 6090 1.1 christos 6091 1.1 christos #ifdef WCHAR 6092 1.1 christos if (MATCHING_IN_FIRST_STRING) 6093 1.1 christos mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0; 6094 1.1 christos else 6095 1.1 christos mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) + 6096 1.1 christos csize1; 6097 1.1 christos mcnt -= pos; 6098 1.1 christos #else 6099 1.1 christos mcnt = d - pos - (MATCHING_IN_FIRST_STRING 6100 1.1 christos ? string1 6101 1.1 christos : string2 - size1); 6102 1.1 christos #endif /* WCHAR */ 6103 1.1 christos 6104 1.1 christos DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 6105 1.1 christos 6106 1.1 christos FREE_VARIABLES (); 6107 1.1 christos return mcnt; 6108 1.1 christos } 6109 1.1 christos 6110 1.1 christos /* Otherwise match next pattern command. */ 6111 1.1 christos switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 6112 1.1 christos { 6113 1.1 christos /* Ignore these. Used to ignore the n of succeed_n's which 6114 1.1 christos currently have n == 0. */ 6115 1.1 christos case no_op: 6116 1.1 christos DEBUG_PRINT1 ("EXECUTING no_op.\n"); 6117 1.1 christos break; 6118 1.1 christos 6119 1.1 christos case succeed: 6120 1.1 christos DEBUG_PRINT1 ("EXECUTING succeed.\n"); 6121 1.1 christos goto succeed_label; 6122 1.1 christos 6123 1.1 christos /* Match the next n pattern characters exactly. The following 6124 1.1 christos byte in the pattern defines n, and the n bytes after that 6125 1.1 christos are the characters to match. */ 6126 1.1 christos case exactn: 6127 1.1 christos #ifdef MBS_SUPPORT 6128 1.1 christos case exactn_bin: 6129 1.1 christos #endif 6130 1.1 christos mcnt = *p++; 6131 1.1 christos DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 6132 1.1 christos 6133 1.1 christos /* This is written out as an if-else so we don't waste time 6134 1.1 christos testing `translate' inside the loop. */ 6135 1.1 christos if (translate) 6136 1.1 christos { 6137 1.1 christos do 6138 1.1 christos { 6139 1.1 christos PREFETCH (); 6140 1.1 christos #ifdef WCHAR 6141 1.1 christos if (*d <= 0xff) 6142 1.1 christos { 6143 1.1 christos if ((UCHAR_T) translate[(unsigned char) *d++] 6144 1.1 christos != (UCHAR_T) *p++) 6145 1.1 christos goto fail; 6146 1.1 christos } 6147 1.1 christos else 6148 1.1 christos { 6149 1.1 christos if (*d++ != (CHAR_T) *p++) 6150 1.1 christos goto fail; 6151 1.1 christos } 6152 1.1 christos #else 6153 1.1 christos if ((UCHAR_T) translate[(unsigned char) *d++] 6154 1.1 christos != (UCHAR_T) *p++) 6155 1.1 christos goto fail; 6156 1.1 christos #endif /* WCHAR */ 6157 1.1 christos } 6158 1.1 christos while (--mcnt); 6159 1.1 christos } 6160 1.1 christos else 6161 1.1 christos { 6162 1.1 christos do 6163 1.1 christos { 6164 1.1 christos PREFETCH (); 6165 1.1 christos if (*d++ != (CHAR_T) *p++) goto fail; 6166 1.1 christos } 6167 1.1 christos while (--mcnt); 6168 1.1 christos } 6169 1.1 christos SET_REGS_MATCHED (); 6170 1.1 christos break; 6171 1.1 christos 6172 1.1 christos 6173 1.1 christos /* Match any character except possibly a newline or a null. */ 6174 1.1 christos case anychar: 6175 1.1 christos DEBUG_PRINT1 ("EXECUTING anychar.\n"); 6176 1.1 christos 6177 1.1 christos PREFETCH (); 6178 1.1 christos 6179 1.1 christos if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') 6180 1.1 christos || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) 6181 1.1 christos goto fail; 6182 1.1 christos 6183 1.1 christos SET_REGS_MATCHED (); 6184 1.1 christos DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d); 6185 1.1 christos d++; 6186 1.1 christos break; 6187 1.1 christos 6188 1.1 christos 6189 1.1 christos case charset: 6190 1.1 christos case charset_not: 6191 1.1 christos { 6192 1.1 christos register UCHAR_T c; 6193 1.1 christos #ifdef WCHAR 6194 1.1 christos unsigned int i, char_class_length, coll_symbol_length, 6195 1.1 christos equiv_class_length, ranges_length, chars_length, length; 6196 1.1 christos CHAR_T *workp, *workp2, *charset_top; 6197 1.1 christos #define WORK_BUFFER_SIZE 128 6198 1.1 christos CHAR_T str_buf[WORK_BUFFER_SIZE]; 6199 1.1 christos # ifdef _LIBC 6200 1.1 christos uint32_t nrules; 6201 1.1 christos # endif /* _LIBC */ 6202 1.1 christos #endif /* WCHAR */ 6203 1.1 christos boolean negate = (re_opcode_t) *(p - 1) == charset_not; 6204 1.1 christos 6205 1.1 christos DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : ""); 6206 1.1 christos PREFETCH (); 6207 1.1 christos c = TRANSLATE (*d); /* The character to match. */ 6208 1.1 christos #ifdef WCHAR 6209 1.1 christos # ifdef _LIBC 6210 1.1 christos nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 6211 1.1 christos # endif /* _LIBC */ 6212 1.1 christos charset_top = p - 1; 6213 1.1 christos char_class_length = *p++; 6214 1.1 christos coll_symbol_length = *p++; 6215 1.1 christos equiv_class_length = *p++; 6216 1.1 christos ranges_length = *p++; 6217 1.1 christos chars_length = *p++; 6218 1.1 christos /* p points charset[6], so the address of the next instruction 6219 1.1 christos (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'], 6220 1.1 christos where l=length of char_classes, m=length of collating_symbol, 6221 1.1 christos n=equivalence_class, o=length of char_range, 6222 1.1 christos p'=length of character. */ 6223 1.1 christos workp = p; 6224 1.1 christos /* Update p to indicate the next instruction. */ 6225 1.1 christos p += char_class_length + coll_symbol_length+ equiv_class_length + 6226 1.1 christos 2*ranges_length + chars_length; 6227 1.1 christos 6228 1.1 christos /* match with char_class? */ 6229 1.1 christos for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE) 6230 1.1 christos { 6231 1.1 christos wctype_t wctype; 6232 1.1 christos uintptr_t alignedp = ((uintptr_t)workp 6233 1.1 christos + __alignof__(wctype_t) - 1) 6234 1.1 christos & ~(uintptr_t)(__alignof__(wctype_t) - 1); 6235 1.1 christos wctype = *((wctype_t*)alignedp); 6236 1.1 christos workp += CHAR_CLASS_SIZE; 6237 1.1 christos # ifdef _LIBC 6238 1.1 christos if (__iswctype((wint_t)c, wctype)) 6239 1.1 christos goto char_set_matched; 6240 1.1 christos # else 6241 1.1 christos if (iswctype((wint_t)c, wctype)) 6242 1.1 christos goto char_set_matched; 6243 1.1 christos # endif 6244 1.1 christos } 6245 1.1 christos 6246 1.1 christos /* match with collating_symbol? */ 6247 1.1 christos # ifdef _LIBC 6248 1.1 christos if (nrules != 0) 6249 1.1 christos { 6250 1.1 christos const unsigned char *extra = (const unsigned char *) 6251 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 6252 1.1 christos 6253 1.1 christos for (workp2 = workp + coll_symbol_length ; workp < workp2 ; 6254 1.1 christos workp++) 6255 1.1 christos { 6256 1.1 christos int32_t *wextra; 6257 1.1 christos wextra = (int32_t*)(extra + *workp++); 6258 1.1 christos for (i = 0; i < *wextra; ++i) 6259 1.1 christos if (TRANSLATE(d[i]) != wextra[1 + i]) 6260 1.1 christos break; 6261 1.1 christos 6262 1.1 christos if (i == *wextra) 6263 1.1 christos { 6264 1.1 christos /* Update d, however d will be incremented at 6265 1.1 christos char_set_matched:, we decrement d here. */ 6266 1.1 christos d += i - 1; 6267 1.1 christos goto char_set_matched; 6268 1.1 christos } 6269 1.1 christos } 6270 1.1 christos } 6271 1.1 christos else /* (nrules == 0) */ 6272 1.1 christos # endif 6273 1.1 christos /* If we can't look up collation data, we use wcscoll 6274 1.1 christos instead. */ 6275 1.1 christos { 6276 1.1 christos for (workp2 = workp + coll_symbol_length ; workp < workp2 ;) 6277 1.1 christos { 6278 1.1 christos const CHAR_T *backup_d = d, *backup_dend = dend; 6279 1.1 christos # ifdef _LIBC 6280 1.1 christos length = __wcslen (workp); 6281 1.1 christos # else 6282 1.1 christos length = wcslen (workp); 6283 1.1 christos # endif 6284 1.1 christos 6285 1.1 christos /* If wcscoll(the collating symbol, whole string) > 0, 6286 1.1 christos any substring of the string never match with the 6287 1.1 christos collating symbol. */ 6288 1.1 christos # ifdef _LIBC 6289 1.1 christos if (__wcscoll (workp, d) > 0) 6290 1.1 christos # else 6291 1.1 christos if (wcscoll (workp, d) > 0) 6292 1.1 christos # endif 6293 1.1 christos { 6294 1.1 christos workp += length + 1; 6295 1.1 christos continue; 6296 1.1 christos } 6297 1.1 christos 6298 1.1 christos /* First, we compare the collating symbol with 6299 1.1 christos the first character of the string. 6300 1.1 christos If it don't match, we add the next character to 6301 1.1 christos the compare buffer in turn. */ 6302 1.1 christos for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++) 6303 1.1 christos { 6304 1.1 christos int match; 6305 1.1 christos if (d == dend) 6306 1.1 christos { 6307 1.1 christos if (dend == end_match_2) 6308 1.1 christos break; 6309 1.1 christos d = string2; 6310 1.1 christos dend = end_match_2; 6311 1.1 christos } 6312 1.1 christos 6313 1.1 christos /* add next character to the compare buffer. */ 6314 1.1 christos str_buf[i] = TRANSLATE(*d); 6315 1.1 christos str_buf[i+1] = '\0'; 6316 1.1 christos 6317 1.1 christos # ifdef _LIBC 6318 1.1 christos match = __wcscoll (workp, str_buf); 6319 1.1 christos # else 6320 1.1 christos match = wcscoll (workp, str_buf); 6321 1.1 christos # endif 6322 1.1 christos if (match == 0) 6323 1.1 christos goto char_set_matched; 6324 1.1 christos 6325 1.1 christos if (match < 0) 6326 1.1 christos /* (str_buf > workp) indicate (str_buf + X > workp), 6327 1.1 christos because for all X (str_buf + X > str_buf). 6328 1.1 christos So we don't need continue this loop. */ 6329 1.1 christos break; 6330 1.1 christos 6331 1.1 christos /* Otherwise(str_buf < workp), 6332 1.1 christos (str_buf+next_character) may equals (workp). 6333 1.1 christos So we continue this loop. */ 6334 1.1 christos } 6335 1.1 christos /* not matched */ 6336 1.1 christos d = backup_d; 6337 1.1 christos dend = backup_dend; 6338 1.1 christos workp += length + 1; 6339 1.1 christos } 6340 1.1 christos } 6341 1.1 christos /* match with equivalence_class? */ 6342 1.1 christos # ifdef _LIBC 6343 1.1 christos if (nrules != 0) 6344 1.1 christos { 6345 1.1 christos const CHAR_T *backup_d = d, *backup_dend = dend; 6346 1.1 christos /* Try to match the equivalence class against 6347 1.1 christos those known to the collate implementation. */ 6348 1.1 christos const int32_t *table; 6349 1.1 christos const int32_t *weights; 6350 1.1 christos const int32_t *extra; 6351 1.1 christos const int32_t *indirect; 6352 1.1 christos int32_t idx, idx2; 6353 1.1 christos wint_t *cp; 6354 1.1 christos size_t len; 6355 1.1 christos 6356 1.1 christos /* This #include defines a local function! */ 6357 1.1 christos # include <locale/weightwc.h> 6358 1.1 christos 6359 1.1 christos table = (const int32_t *) 6360 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); 6361 1.1 christos weights = (const wint_t *) 6362 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); 6363 1.1 christos extra = (const wint_t *) 6364 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); 6365 1.1 christos indirect = (const int32_t *) 6366 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); 6367 1.1 christos 6368 1.1 christos /* Write 1 collating element to str_buf, and 6369 1.1 christos get its index. */ 6370 1.1 christos idx2 = 0; 6371 1.1 christos 6372 1.1 christos for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++) 6373 1.1 christos { 6374 1.1 christos cp = (wint_t*)str_buf; 6375 1.1 christos if (d == dend) 6376 1.1 christos { 6377 1.1 christos if (dend == end_match_2) 6378 1.1 christos break; 6379 1.1 christos d = string2; 6380 1.1 christos dend = end_match_2; 6381 1.1 christos } 6382 1.1 christos str_buf[i] = TRANSLATE(*(d+i)); 6383 1.1 christos str_buf[i+1] = '\0'; /* sentinel */ 6384 1.1 christos idx2 = findidx ((const wint_t**)&cp); 6385 1.1 christos } 6386 1.1 christos 6387 1.1 christos /* Update d, however d will be incremented at 6388 1.1 christos char_set_matched:, we decrement d here. */ 6389 1.1 christos d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1); 6390 1.1 christos if (d >= dend) 6391 1.1 christos { 6392 1.1 christos if (dend == end_match_2) 6393 1.1 christos d = dend; 6394 1.1 christos else 6395 1.1 christos { 6396 1.1 christos d = string2; 6397 1.1 christos dend = end_match_2; 6398 1.1 christos } 6399 1.1 christos } 6400 1.1 christos 6401 1.1 christos len = weights[idx2]; 6402 1.1 christos 6403 1.1 christos for (workp2 = workp + equiv_class_length ; workp < workp2 ; 6404 1.1 christos workp++) 6405 1.1 christos { 6406 1.1 christos idx = (int32_t)*workp; 6407 1.1 christos /* We already checked idx != 0 in regex_compile. */ 6408 1.1 christos 6409 1.1 christos if (idx2 != 0 && len == weights[idx]) 6410 1.1 christos { 6411 1.1 christos int cnt = 0; 6412 1.1 christos while (cnt < len && (weights[idx + 1 + cnt] 6413 1.1 christos == weights[idx2 + 1 + cnt])) 6414 1.1 christos ++cnt; 6415 1.1 christos 6416 1.1 christos if (cnt == len) 6417 1.1 christos goto char_set_matched; 6418 1.1 christos } 6419 1.1 christos } 6420 1.1 christos /* not matched */ 6421 1.1 christos d = backup_d; 6422 1.1 christos dend = backup_dend; 6423 1.1 christos } 6424 1.1 christos else /* (nrules == 0) */ 6425 1.1 christos # endif 6426 1.1 christos /* If we can't look up collation data, we use wcscoll 6427 1.1 christos instead. */ 6428 1.1 christos { 6429 1.1 christos for (workp2 = workp + equiv_class_length ; workp < workp2 ;) 6430 1.1 christos { 6431 1.1 christos const CHAR_T *backup_d = d, *backup_dend = dend; 6432 1.1 christos # ifdef _LIBC 6433 1.1 christos length = __wcslen (workp); 6434 1.1 christos # else 6435 1.1 christos length = wcslen (workp); 6436 1.1 christos # endif 6437 1.1 christos 6438 1.1 christos /* If wcscoll(the collating symbol, whole string) > 0, 6439 1.1 christos any substring of the string never match with the 6440 1.1 christos collating symbol. */ 6441 1.1 christos # ifdef _LIBC 6442 1.1 christos if (__wcscoll (workp, d) > 0) 6443 1.1 christos # else 6444 1.1 christos if (wcscoll (workp, d) > 0) 6445 1.1 christos # endif 6446 1.1 christos { 6447 1.1 christos workp += length + 1; 6448 1.1 christos break; 6449 1.1 christos } 6450 1.1 christos 6451 1.1 christos /* First, we compare the equivalence class with 6452 1.1 christos the first character of the string. 6453 1.1 christos If it don't match, we add the next character to 6454 1.1 christos the compare buffer in turn. */ 6455 1.1 christos for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++) 6456 1.1 christos { 6457 1.1 christos int match; 6458 1.1 christos if (d == dend) 6459 1.1 christos { 6460 1.1 christos if (dend == end_match_2) 6461 1.1 christos break; 6462 1.1 christos d = string2; 6463 1.1 christos dend = end_match_2; 6464 1.1 christos } 6465 1.1 christos 6466 1.1 christos /* add next character to the compare buffer. */ 6467 1.1 christos str_buf[i] = TRANSLATE(*d); 6468 1.1 christos str_buf[i+1] = '\0'; 6469 1.1 christos 6470 1.1 christos # ifdef _LIBC 6471 1.1 christos match = __wcscoll (workp, str_buf); 6472 1.1 christos # else 6473 1.1 christos match = wcscoll (workp, str_buf); 6474 1.1 christos # endif 6475 1.1 christos 6476 1.1 christos if (match == 0) 6477 1.1 christos goto char_set_matched; 6478 1.1 christos 6479 1.1 christos if (match < 0) 6480 1.1 christos /* (str_buf > workp) indicate (str_buf + X > workp), 6481 1.1 christos because for all X (str_buf + X > str_buf). 6482 1.1 christos So we don't need continue this loop. */ 6483 1.1 christos break; 6484 1.1 christos 6485 1.1 christos /* Otherwise(str_buf < workp), 6486 1.1 christos (str_buf+next_character) may equals (workp). 6487 1.1 christos So we continue this loop. */ 6488 1.1 christos } 6489 1.1 christos /* not matched */ 6490 1.1 christos d = backup_d; 6491 1.1 christos dend = backup_dend; 6492 1.1 christos workp += length + 1; 6493 1.1 christos } 6494 1.1 christos } 6495 1.1 christos 6496 1.1 christos /* match with char_range? */ 6497 1.1 christos # ifdef _LIBC 6498 1.1 christos if (nrules != 0) 6499 1.1 christos { 6500 1.1 christos uint32_t collseqval; 6501 1.1 christos const char *collseq = (const char *) 6502 1.1 christos _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); 6503 1.1 christos 6504 1.1 christos collseqval = collseq_table_lookup (collseq, c); 6505 1.1 christos 6506 1.1 christos for (; workp < p - chars_length ;) 6507 1.1 christos { 6508 1.1 christos uint32_t start_val, end_val; 6509 1.1 christos 6510 1.1 christos /* We already compute the collation sequence value 6511 1.1 christos of the characters (or collating symbols). */ 6512 1.1 christos start_val = (uint32_t) *workp++; /* range_start */ 6513 1.1 christos end_val = (uint32_t) *workp++; /* range_end */ 6514 1.1 christos 6515 1.1 christos if (start_val <= collseqval && collseqval <= end_val) 6516 1.1 christos goto char_set_matched; 6517 1.1 christos } 6518 1.1 christos } 6519 1.1 christos else 6520 1.1 christos # endif 6521 1.1 christos { 6522 1.1 christos /* We set range_start_char at str_buf[0], range_end_char 6523 1.1 christos at str_buf[4], and compared char at str_buf[2]. */ 6524 1.1 christos str_buf[1] = 0; 6525 1.1 christos str_buf[2] = c; 6526 1.1 christos str_buf[3] = 0; 6527 1.1 christos str_buf[5] = 0; 6528 1.1 christos for (; workp < p - chars_length ;) 6529 1.1 christos { 6530 1.1 christos wchar_t *range_start_char, *range_end_char; 6531 1.1 christos 6532 1.1 christos /* match if (range_start_char <= c <= range_end_char). */ 6533 1.1 christos 6534 1.1 christos /* If range_start(or end) < 0, we assume -range_start(end) 6535 1.1 christos is the offset of the collating symbol which is specified 6536 1.1 christos as the character of the range start(end). */ 6537 1.1 christos 6538 1.1 christos /* range_start */ 6539 1.1 christos if (*workp < 0) 6540 1.1 christos range_start_char = charset_top - (*workp++); 6541 1.1 christos else 6542 1.1 christos { 6543 1.1 christos str_buf[0] = *workp++; 6544 1.1 christos range_start_char = str_buf; 6545 1.1 christos } 6546 1.1 christos 6547 1.1 christos /* range_end */ 6548 1.1 christos if (*workp < 0) 6549 1.1 christos range_end_char = charset_top - (*workp++); 6550 1.1 christos else 6551 1.1 christos { 6552 1.1 christos str_buf[4] = *workp++; 6553 1.1 christos range_end_char = str_buf + 4; 6554 1.1 christos } 6555 1.1 christos 6556 1.1 christos # ifdef _LIBC 6557 1.1 christos if (__wcscoll (range_start_char, str_buf+2) <= 0 6558 1.1 christos && __wcscoll (str_buf+2, range_end_char) <= 0) 6559 1.1 christos # else 6560 1.1 christos if (wcscoll (range_start_char, str_buf+2) <= 0 6561 1.1 christos && wcscoll (str_buf+2, range_end_char) <= 0) 6562 1.1 christos # endif 6563 1.1 christos goto char_set_matched; 6564 1.1 christos } 6565 1.1 christos } 6566 1.1 christos 6567 1.1 christos /* match with char? */ 6568 1.1 christos for (; workp < p ; workp++) 6569 1.1 christos if (c == *workp) 6570 1.1 christos goto char_set_matched; 6571 1.1 christos 6572 1.1 christos negate = !negate; 6573 1.1 christos 6574 1.1 christos char_set_matched: 6575 1.1 christos if (negate) goto fail; 6576 1.1 christos #else 6577 1.1 christos /* Cast to `unsigned' instead of `unsigned char' in case the 6578 1.1 christos bit list is a full 32 bytes long. */ 6579 1.1 christos if (c < (unsigned) (*p * BYTEWIDTH) 6580 1.1 christos && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 6581 1.1 christos negate = !negate; 6582 1.1 christos 6583 1.1 christos p += 1 + *p; 6584 1.1 christos 6585 1.1 christos if (!negate) goto fail; 6586 1.1 christos #undef WORK_BUFFER_SIZE 6587 1.1 christos #endif /* WCHAR */ 6588 1.1 christos SET_REGS_MATCHED (); 6589 1.1 christos d++; 6590 1.1 christos break; 6591 1.1 christos } 6592 1.1 christos 6593 1.1 christos 6594 1.1 christos /* The beginning of a group is represented by start_memory. 6595 1.1 christos The arguments are the register number in the next byte, and the 6596 1.1 christos number of groups inner to this one in the next. The text 6597 1.1 christos matched within the group is recorded (in the internal 6598 1.1 christos registers data structure) under the register number. */ 6599 1.1 christos case start_memory: 6600 1.1 christos DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n", 6601 1.1 christos (long int) *p, (long int) p[1]); 6602 1.1 christos 6603 1.1 christos /* Find out if this group can match the empty string. */ 6604 1.1 christos p1 = p; /* To send to group_match_null_string_p. */ 6605 1.1 christos 6606 1.1 christos if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 6607 1.1 christos REG_MATCH_NULL_STRING_P (reg_info[*p]) 6608 1.1 christos = PREFIX(group_match_null_string_p) (&p1, pend, reg_info); 6609 1.1 christos 6610 1.1 christos /* Save the position in the string where we were the last time 6611 1.1 christos we were at this open-group operator in case the group is 6612 1.1 christos operated upon by a repetition operator, e.g., with `(a*)*b' 6613 1.1 christos against `ab'; then we want to ignore where we are now in 6614 1.1 christos the string in case this attempt to match fails. */ 6615 1.1 christos old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 6616 1.1 christos ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 6617 1.1 christos : regstart[*p]; 6618 1.1 christos DEBUG_PRINT2 (" old_regstart: %d\n", 6619 1.1 christos POINTER_TO_OFFSET (old_regstart[*p])); 6620 1.1 christos 6621 1.1 christos regstart[*p] = d; 6622 1.1 christos DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 6623 1.1 christos 6624 1.1 christos IS_ACTIVE (reg_info[*p]) = 1; 6625 1.1 christos MATCHED_SOMETHING (reg_info[*p]) = 0; 6626 1.1 christos 6627 1.1 christos /* Clear this whenever we change the register activity status. */ 6628 1.1 christos set_regs_matched_done = 0; 6629 1.1 christos 6630 1.1 christos /* This is the new highest active register. */ 6631 1.1 christos highest_active_reg = *p; 6632 1.1 christos 6633 1.1 christos /* If nothing was active before, this is the new lowest active 6634 1.1 christos register. */ 6635 1.1 christos if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 6636 1.1 christos lowest_active_reg = *p; 6637 1.1 christos 6638 1.1 christos /* Move past the register number and inner group count. */ 6639 1.1 christos p += 2; 6640 1.1 christos just_past_start_mem = p; 6641 1.1 christos 6642 1.1 christos break; 6643 1.1 christos 6644 1.1 christos 6645 1.1 christos /* The stop_memory opcode represents the end of a group. Its 6646 1.1 christos arguments are the same as start_memory's: the register 6647 1.1 christos number, and the number of inner groups. */ 6648 1.1 christos case stop_memory: 6649 1.1 christos DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n", 6650 1.1 christos (long int) *p, (long int) p[1]); 6651 1.1 christos 6652 1.1 christos /* We need to save the string position the last time we were at 6653 1.1 christos this close-group operator in case the group is operated 6654 1.1 christos upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 6655 1.1 christos against `aba'; then we want to ignore where we are now in 6656 1.1 christos the string in case this attempt to match fails. */ 6657 1.1 christos old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 6658 1.1 christos ? REG_UNSET (regend[*p]) ? d : regend[*p] 6659 1.1 christos : regend[*p]; 6660 1.1 christos DEBUG_PRINT2 (" old_regend: %d\n", 6661 1.1 christos POINTER_TO_OFFSET (old_regend[*p])); 6662 1.1 christos 6663 1.1 christos regend[*p] = d; 6664 1.1 christos DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 6665 1.1 christos 6666 1.1 christos /* This register isn't active anymore. */ 6667 1.1 christos IS_ACTIVE (reg_info[*p]) = 0; 6668 1.1 christos 6669 1.1 christos /* Clear this whenever we change the register activity status. */ 6670 1.1 christos set_regs_matched_done = 0; 6671 1.1 christos 6672 1.1 christos /* If this was the only register active, nothing is active 6673 1.1 christos anymore. */ 6674 1.1 christos if (lowest_active_reg == highest_active_reg) 6675 1.1 christos { 6676 1.1 christos lowest_active_reg = NO_LOWEST_ACTIVE_REG; 6677 1.1 christos highest_active_reg = NO_HIGHEST_ACTIVE_REG; 6678 1.1 christos } 6679 1.1 christos else 6680 1.1 christos { /* We must scan for the new highest active register, since 6681 1.1 christos it isn't necessarily one less than now: consider 6682 1.1 christos (a(b)c(d(e)f)g). When group 3 ends, after the f), the 6683 1.1 christos new highest active register is 1. */ 6684 1.1 christos UCHAR_T r = *p - 1; 6685 1.1 christos while (r > 0 && !IS_ACTIVE (reg_info[r])) 6686 1.1 christos r--; 6687 1.1 christos 6688 1.1 christos /* If we end up at register zero, that means that we saved 6689 1.1 christos the registers as the result of an `on_failure_jump', not 6690 1.1 christos a `start_memory', and we jumped to past the innermost 6691 1.1 christos `stop_memory'. For example, in ((.)*) we save 6692 1.1 christos registers 1 and 2 as a result of the *, but when we pop 6693 1.1 christos back to the second ), we are at the stop_memory 1. 6694 1.1 christos Thus, nothing is active. */ 6695 1.1 christos if (r == 0) 6696 1.1 christos { 6697 1.1 christos lowest_active_reg = NO_LOWEST_ACTIVE_REG; 6698 1.1 christos highest_active_reg = NO_HIGHEST_ACTIVE_REG; 6699 1.1 christos } 6700 1.1 christos else 6701 1.1 christos highest_active_reg = r; 6702 1.1 christos } 6703 1.1 christos 6704 1.1.1.6 christos /* If just failed to match something this time around with a 6705 1.1 christos group that's operated on by a repetition operator, try to 6706 1.1 christos force exit from the ``loop'', and restore the register 6707 1.1 christos information for this group that we had before trying this 6708 1.1 christos last match. */ 6709 1.1 christos if ((!MATCHED_SOMETHING (reg_info[*p]) 6710 1.1 christos || just_past_start_mem == p - 1) 6711 1.1 christos && (p + 2) < pend) 6712 1.1 christos { 6713 1.1 christos boolean is_a_jump_n = false; 6714 1.1 christos 6715 1.1 christos p1 = p + 2; 6716 1.1 christos mcnt = 0; 6717 1.1 christos switch ((re_opcode_t) *p1++) 6718 1.1 christos { 6719 1.1 christos case jump_n: 6720 1.1 christos is_a_jump_n = true; 6721 1.1 christos /* Fall through. */ 6722 1.1 christos case pop_failure_jump: 6723 1.1 christos case maybe_pop_jump: 6724 1.1 christos case jump: 6725 1.1 christos case dummy_failure_jump: 6726 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 6727 1.1 christos if (is_a_jump_n) 6728 1.1 christos p1 += OFFSET_ADDRESS_SIZE; 6729 1.1 christos break; 6730 1.1 christos 6731 1.1 christos default: 6732 1.1 christos /* do nothing */ ; 6733 1.1 christos } 6734 1.1 christos p1 += mcnt; 6735 1.1 christos 6736 1.1 christos /* If the next operation is a jump backwards in the pattern 6737 1.1 christos to an on_failure_jump right before the start_memory 6738 1.1 christos corresponding to this stop_memory, exit from the loop 6739 1.1 christos by forcing a failure after pushing on the stack the 6740 1.1 christos on_failure_jump's jump in the pattern, and d. */ 6741 1.1 christos if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump 6742 1.1 christos && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory 6743 1.1 christos && p1[2+OFFSET_ADDRESS_SIZE] == *p) 6744 1.1 christos { 6745 1.1 christos /* If this group ever matched anything, then restore 6746 1.1 christos what its registers were before trying this last 6747 1.1 christos failed match, e.g., with `(a*)*b' against `ab' for 6748 1.1 christos regstart[1], and, e.g., with `((a*)*(b*)*)*' 6749 1.1 christos against `aba' for regend[3]. 6750 1.1 christos 6751 1.1 christos Also restore the registers for inner groups for, 6752 1.1 christos e.g., `((a*)(b*))*' against `aba' (register 3 would 6753 1.1 christos otherwise get trashed). */ 6754 1.1 christos 6755 1.1 christos if (EVER_MATCHED_SOMETHING (reg_info[*p])) 6756 1.1 christos { 6757 1.1 christos unsigned r; 6758 1.1 christos 6759 1.1 christos EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 6760 1.1 christos 6761 1.1 christos /* Restore this and inner groups' (if any) registers. */ 6762 1.1 christos for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); 6763 1.1 christos r++) 6764 1.1 christos { 6765 1.1 christos regstart[r] = old_regstart[r]; 6766 1.1 christos 6767 1.1 christos /* xx why this test? */ 6768 1.1 christos if (old_regend[r] >= regstart[r]) 6769 1.1 christos regend[r] = old_regend[r]; 6770 1.1 christos } 6771 1.1 christos } 6772 1.1 christos p1++; 6773 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 6774 1.1 christos PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 6775 1.1 christos 6776 1.1 christos goto fail; 6777 1.1 christos } 6778 1.1 christos } 6779 1.1 christos 6780 1.1 christos /* Move past the register number and the inner group count. */ 6781 1.1 christos p += 2; 6782 1.1 christos break; 6783 1.1 christos 6784 1.1 christos 6785 1.1 christos /* \<digit> has been turned into a `duplicate' command which is 6786 1.1 christos followed by the numeric value of <digit> as the register number. */ 6787 1.1 christos case duplicate: 6788 1.1 christos { 6789 1.1 christos register const CHAR_T *d2, *dend2; 6790 1.1 christos int regno = *p++; /* Get which register to match against. */ 6791 1.1 christos DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 6792 1.1 christos 6793 1.1 christos /* Can't back reference a group which we've never matched. */ 6794 1.1 christos if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 6795 1.1 christos goto fail; 6796 1.1 christos 6797 1.1 christos /* Where in input to try to start matching. */ 6798 1.1 christos d2 = regstart[regno]; 6799 1.1 christos 6800 1.1 christos /* Where to stop matching; if both the place to start and 6801 1.1 christos the place to stop matching are in the same string, then 6802 1.1 christos set to the place to stop, otherwise, for now have to use 6803 1.1 christos the end of the first string. */ 6804 1.1 christos 6805 1.1 christos dend2 = ((FIRST_STRING_P (regstart[regno]) 6806 1.1 christos == FIRST_STRING_P (regend[regno])) 6807 1.1 christos ? regend[regno] : end_match_1); 6808 1.1 christos for (;;) 6809 1.1 christos { 6810 1.1 christos /* If necessary, advance to next segment in register 6811 1.1 christos contents. */ 6812 1.1 christos while (d2 == dend2) 6813 1.1 christos { 6814 1.1 christos if (dend2 == end_match_2) break; 6815 1.1 christos if (dend2 == regend[regno]) break; 6816 1.1 christos 6817 1.1 christos /* End of string1 => advance to string2. */ 6818 1.1 christos d2 = string2; 6819 1.1 christos dend2 = regend[regno]; 6820 1.1 christos } 6821 1.1 christos /* At end of register contents => success */ 6822 1.1 christos if (d2 == dend2) break; 6823 1.1 christos 6824 1.1 christos /* If necessary, advance to next segment in data. */ 6825 1.1 christos PREFETCH (); 6826 1.1 christos 6827 1.1 christos /* How many characters left in this segment to match. */ 6828 1.1 christos mcnt = dend - d; 6829 1.1 christos 6830 1.1 christos /* Want how many consecutive characters we can match in 6831 1.1 christos one shot, so, if necessary, adjust the count. */ 6832 1.1 christos if (mcnt > dend2 - d2) 6833 1.1 christos mcnt = dend2 - d2; 6834 1.1 christos 6835 1.1 christos /* Compare that many; failure if mismatch, else move 6836 1.1 christos past them. */ 6837 1.1 christos if (translate 6838 1.1 christos ? PREFIX(bcmp_translate) (d, d2, mcnt, translate) 6839 1.1 christos : memcmp (d, d2, mcnt*sizeof(UCHAR_T))) 6840 1.1 christos goto fail; 6841 1.1 christos d += mcnt, d2 += mcnt; 6842 1.1 christos 6843 1.1 christos /* Do this because we've match some characters. */ 6844 1.1 christos SET_REGS_MATCHED (); 6845 1.1 christos } 6846 1.1 christos } 6847 1.1 christos break; 6848 1.1 christos 6849 1.1 christos 6850 1.1 christos /* begline matches the empty string at the beginning of the string 6851 1.1 christos (unless `not_bol' is set in `bufp'), and, if 6852 1.1 christos `newline_anchor' is set, after newlines. */ 6853 1.1 christos case begline: 6854 1.1 christos DEBUG_PRINT1 ("EXECUTING begline.\n"); 6855 1.1 christos 6856 1.1 christos if (AT_STRINGS_BEG (d)) 6857 1.1 christos { 6858 1.1 christos if (!bufp->not_bol) break; 6859 1.1 christos } 6860 1.1 christos else if (d[-1] == '\n' && bufp->newline_anchor) 6861 1.1 christos { 6862 1.1 christos break; 6863 1.1 christos } 6864 1.1 christos /* In all other cases, we fail. */ 6865 1.1 christos goto fail; 6866 1.1 christos 6867 1.1 christos 6868 1.1 christos /* endline is the dual of begline. */ 6869 1.1 christos case endline: 6870 1.1 christos DEBUG_PRINT1 ("EXECUTING endline.\n"); 6871 1.1 christos 6872 1.1 christos if (AT_STRINGS_END (d)) 6873 1.1 christos { 6874 1.1 christos if (!bufp->not_eol) break; 6875 1.1 christos } 6876 1.1 christos 6877 1.1 christos /* We have to ``prefetch'' the next character. */ 6878 1.1 christos else if ((d == end1 ? *string2 : *d) == '\n' 6879 1.1 christos && bufp->newline_anchor) 6880 1.1 christos { 6881 1.1 christos break; 6882 1.1 christos } 6883 1.1 christos goto fail; 6884 1.1 christos 6885 1.1 christos 6886 1.1 christos /* Match at the very beginning of the data. */ 6887 1.1 christos case begbuf: 6888 1.1 christos DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 6889 1.1 christos if (AT_STRINGS_BEG (d)) 6890 1.1 christos break; 6891 1.1 christos goto fail; 6892 1.1 christos 6893 1.1 christos 6894 1.1 christos /* Match at the very end of the data. */ 6895 1.1 christos case endbuf: 6896 1.1 christos DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 6897 1.1 christos if (AT_STRINGS_END (d)) 6898 1.1 christos break; 6899 1.1 christos goto fail; 6900 1.1 christos 6901 1.1 christos 6902 1.1 christos /* on_failure_keep_string_jump is used to optimize `.*\n'. It 6903 1.1 christos pushes NULL as the value for the string on the stack. Then 6904 1.1 christos `pop_failure_point' will keep the current value for the 6905 1.1 christos string, instead of restoring it. To see why, consider 6906 1.1 christos matching `foo\nbar' against `.*\n'. The .* matches the foo; 6907 1.1 christos then the . fails against the \n. But the next thing we want 6908 1.1 christos to do is match the \n against the \n; if we restored the 6909 1.1 christos string value, we would be back at the foo. 6910 1.1 christos 6911 1.1 christos Because this is used only in specific cases, we don't need to 6912 1.1 christos check all the things that `on_failure_jump' does, to make 6913 1.1 christos sure the right things get saved on the stack. Hence we don't 6914 1.1 christos share its code. The only reason to push anything on the 6915 1.1 christos stack at all is that otherwise we would have to change 6916 1.1 christos `anychar's code to do something besides goto fail in this 6917 1.1 christos case; that seems worse than this. */ 6918 1.1 christos case on_failure_keep_string_jump: 6919 1.1 christos DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 6920 1.1 christos 6921 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 6922 1.1 christos #ifdef _LIBC 6923 1.1 christos DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); 6924 1.1 christos #else 6925 1.1 christos DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 6926 1.1 christos #endif 6927 1.1 christos 6928 1.1 christos PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 6929 1.1 christos break; 6930 1.1 christos 6931 1.1 christos 6932 1.1 christos /* Uses of on_failure_jump: 6933 1.1 christos 6934 1.1 christos Each alternative starts with an on_failure_jump that points 6935 1.1 christos to the beginning of the next alternative. Each alternative 6936 1.1 christos except the last ends with a jump that in effect jumps past 6937 1.1 christos the rest of the alternatives. (They really jump to the 6938 1.1 christos ending jump of the following alternative, because tensioning 6939 1.1 christos these jumps is a hassle.) 6940 1.1 christos 6941 1.1 christos Repeats start with an on_failure_jump that points past both 6942 1.1 christos the repetition text and either the following jump or 6943 1.1 christos pop_failure_jump back to this on_failure_jump. */ 6944 1.1 christos case on_failure_jump: 6945 1.1 christos on_failure: 6946 1.1 christos DEBUG_PRINT1 ("EXECUTING on_failure_jump"); 6947 1.1 christos 6948 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 6949 1.1 christos #ifdef _LIBC 6950 1.1 christos DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); 6951 1.1 christos #else 6952 1.1 christos DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 6953 1.1 christos #endif 6954 1.1 christos 6955 1.1 christos /* If this on_failure_jump comes right before a group (i.e., 6956 1.1 christos the original * applied to a group), save the information 6957 1.1 christos for that group and all inner ones, so that if we fail back 6958 1.1 christos to this point, the group's information will be correct. 6959 1.1 christos For example, in \(a*\)*\1, we need the preceding group, 6960 1.1 christos and in \(zz\(a*\)b*\)\2, we need the inner group. */ 6961 1.1 christos 6962 1.1 christos /* We can't use `p' to check ahead because we push 6963 1.1 christos a failure point to `p + mcnt' after we do this. */ 6964 1.1 christos p1 = p; 6965 1.1 christos 6966 1.1 christos /* We need to skip no_op's before we look for the 6967 1.1 christos start_memory in case this on_failure_jump is happening as 6968 1.1 christos the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 6969 1.1 christos against aba. */ 6970 1.1 christos while (p1 < pend && (re_opcode_t) *p1 == no_op) 6971 1.1 christos p1++; 6972 1.1 christos 6973 1.1 christos if (p1 < pend && (re_opcode_t) *p1 == start_memory) 6974 1.1 christos { 6975 1.1 christos /* We have a new highest active register now. This will 6976 1.1 christos get reset at the start_memory we are about to get to, 6977 1.1 christos but we will have saved all the registers relevant to 6978 1.1 christos this repetition op, as described above. */ 6979 1.1 christos highest_active_reg = *(p1 + 1) + *(p1 + 2); 6980 1.1 christos if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 6981 1.1 christos lowest_active_reg = *(p1 + 1); 6982 1.1 christos } 6983 1.1 christos 6984 1.1 christos DEBUG_PRINT1 (":\n"); 6985 1.1 christos PUSH_FAILURE_POINT (p + mcnt, d, -2); 6986 1.1 christos break; 6987 1.1 christos 6988 1.1 christos 6989 1.1 christos /* A smart repeat ends with `maybe_pop_jump'. 6990 1.1 christos We change it to either `pop_failure_jump' or `jump'. */ 6991 1.1 christos case maybe_pop_jump: 6992 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 6993 1.1 christos DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 6994 1.1 christos { 6995 1.1 christos register UCHAR_T *p2 = p; 6996 1.1 christos 6997 1.1 christos /* Compare the beginning of the repeat with what in the 6998 1.1 christos pattern follows its end. If we can establish that there 6999 1.1 christos is nothing that they would both match, i.e., that we 7000 1.1 christos would have to backtrack because of (as in, e.g., `a*a') 7001 1.1 christos then we can change to pop_failure_jump, because we'll 7002 1.1 christos never have to backtrack. 7003 1.1 christos 7004 1.1 christos This is not true in the case of alternatives: in 7005 1.1 christos `(a|ab)*' we do need to backtrack to the `ab' alternative 7006 1.1 christos (e.g., if the string was `ab'). But instead of trying to 7007 1.1 christos detect that here, the alternative has put on a dummy 7008 1.1 christos failure point which is what we will end up popping. */ 7009 1.1 christos 7010 1.1 christos /* Skip over open/close-group commands. 7011 1.1 christos If what follows this loop is a ...+ construct, 7012 1.1 christos look at what begins its body, since we will have to 7013 1.1 christos match at least one of that. */ 7014 1.1 christos while (1) 7015 1.1 christos { 7016 1.1 christos if (p2 + 2 < pend 7017 1.1 christos && ((re_opcode_t) *p2 == stop_memory 7018 1.1 christos || (re_opcode_t) *p2 == start_memory)) 7019 1.1 christos p2 += 3; 7020 1.1 christos else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend 7021 1.1 christos && (re_opcode_t) *p2 == dummy_failure_jump) 7022 1.1 christos p2 += 2 + 2 * OFFSET_ADDRESS_SIZE; 7023 1.1 christos else 7024 1.1 christos break; 7025 1.1 christos } 7026 1.1 christos 7027 1.1 christos p1 = p + mcnt; 7028 1.1 christos /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 7029 1.1 christos to the `maybe_finalize_jump' of this case. Examine what 7030 1.1 christos follows. */ 7031 1.1 christos 7032 1.1 christos /* If we're at the end of the pattern, we can change. */ 7033 1.1 christos if (p2 == pend) 7034 1.1 christos { 7035 1.1 christos /* Consider what happens when matching ":\(.*\)" 7036 1.1 christos against ":/". I don't really understand this code 7037 1.1 christos yet. */ 7038 1.1 christos p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T) 7039 1.1 christos pop_failure_jump; 7040 1.1 christos DEBUG_PRINT1 7041 1.1 christos (" End of pattern: change to `pop_failure_jump'.\n"); 7042 1.1 christos } 7043 1.1 christos 7044 1.1 christos else if ((re_opcode_t) *p2 == exactn 7045 1.1 christos #ifdef MBS_SUPPORT 7046 1.1 christos || (re_opcode_t) *p2 == exactn_bin 7047 1.1 christos #endif 7048 1.1 christos || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 7049 1.1 christos { 7050 1.1 christos register UCHAR_T c 7051 1.1 christos = *p2 == (UCHAR_T) endline ? '\n' : p2[2]; 7052 1.1 christos 7053 1.1 christos if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn 7054 1.1 christos #ifdef MBS_SUPPORT 7055 1.1 christos || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin 7056 1.1 christos #endif 7057 1.1 christos ) && p1[3+OFFSET_ADDRESS_SIZE] != c) 7058 1.1 christos { 7059 1.1 christos p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T) 7060 1.1 christos pop_failure_jump; 7061 1.1 christos #ifdef WCHAR 7062 1.1 christos DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n", 7063 1.1 christos (wint_t) c, 7064 1.1 christos (wint_t) p1[3+OFFSET_ADDRESS_SIZE]); 7065 1.1 christos #else 7066 1.1 christos DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 7067 1.1 christos (char) c, 7068 1.1 christos (char) p1[3+OFFSET_ADDRESS_SIZE]); 7069 1.1 christos #endif 7070 1.1 christos } 7071 1.1 christos 7072 1.1 christos #ifndef WCHAR 7073 1.1 christos else if ((re_opcode_t) p1[3] == charset 7074 1.1 christos || (re_opcode_t) p1[3] == charset_not) 7075 1.1 christos { 7076 1.1 christos int negate = (re_opcode_t) p1[3] == charset_not; 7077 1.1 christos 7078 1.1 christos if (c < (unsigned) (p1[4] * BYTEWIDTH) 7079 1.1 christos && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 7080 1.1 christos negate = !negate; 7081 1.1 christos 7082 1.1 christos /* `negate' is equal to 1 if c would match, which means 7083 1.1 christos that we can't change to pop_failure_jump. */ 7084 1.1 christos if (!negate) 7085 1.1 christos { 7086 1.1 christos p[-3] = (unsigned char) pop_failure_jump; 7087 1.1 christos DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7088 1.1 christos } 7089 1.1 christos } 7090 1.1 christos #endif /* not WCHAR */ 7091 1.1 christos } 7092 1.1 christos #ifndef WCHAR 7093 1.1 christos else if ((re_opcode_t) *p2 == charset) 7094 1.1 christos { 7095 1.1 christos /* We win if the first character of the loop is not part 7096 1.1 christos of the charset. */ 7097 1.1 christos if ((re_opcode_t) p1[3] == exactn 7098 1.1 christos && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] 7099 1.1 christos && (p2[2 + p1[5] / BYTEWIDTH] 7100 1.1 christos & (1 << (p1[5] % BYTEWIDTH))))) 7101 1.1 christos { 7102 1.1 christos p[-3] = (unsigned char) pop_failure_jump; 7103 1.1 christos DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7104 1.1 christos } 7105 1.1 christos 7106 1.1 christos else if ((re_opcode_t) p1[3] == charset_not) 7107 1.1 christos { 7108 1.1 christos int idx; 7109 1.1 christos /* We win if the charset_not inside the loop 7110 1.1 christos lists every character listed in the charset after. */ 7111 1.1 christos for (idx = 0; idx < (int) p2[1]; idx++) 7112 1.1 christos if (! (p2[2 + idx] == 0 7113 1.1 christos || (idx < (int) p1[4] 7114 1.1 christos && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) 7115 1.1 christos break; 7116 1.1 christos 7117 1.1 christos if (idx == p2[1]) 7118 1.1 christos { 7119 1.1 christos p[-3] = (unsigned char) pop_failure_jump; 7120 1.1 christos DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7121 1.1 christos } 7122 1.1 christos } 7123 1.1 christos else if ((re_opcode_t) p1[3] == charset) 7124 1.1 christos { 7125 1.1 christos int idx; 7126 1.1 christos /* We win if the charset inside the loop 7127 1.1 christos has no overlap with the one after the loop. */ 7128 1.1 christos for (idx = 0; 7129 1.1 christos idx < (int) p2[1] && idx < (int) p1[4]; 7130 1.1 christos idx++) 7131 1.1 christos if ((p2[2 + idx] & p1[5 + idx]) != 0) 7132 1.1 christos break; 7133 1.1.1.6 christos 7134 1.1 christos if (idx == p2[1] || idx == p1[4]) 7135 1.1 christos { 7136 1.1 christos p[-3] = (unsigned char) pop_failure_jump; 7137 1.1 christos DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7138 1.1 christos } 7139 1.1 christos } 7140 1.1 christos } 7141 1.1 christos #endif /* not WCHAR */ 7142 1.1 christos } 7143 1.1 christos p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again. */ 7144 1.1 christos if ((re_opcode_t) p[-1] != pop_failure_jump) 7145 1.1 christos { 7146 1.1 christos p[-1] = (UCHAR_T) jump; 7147 1.1 christos DEBUG_PRINT1 (" Match => jump.\n"); 7148 1.1 christos goto unconditional_jump; 7149 1.1 christos } 7150 1.1 christos /* Fall through. */ 7151 1.1 christos 7152 1.1 christos 7153 1.1 christos /* The end of a simple repeat has a pop_failure_jump back to 7154 1.1 christos its matching on_failure_jump, where the latter will push a 7155 1.1 christos failure point. The pop_failure_jump takes off failure 7156 1.1 christos points put on by this pop_failure_jump's matching 7157 1.1 christos on_failure_jump; we got through the pattern to here from the 7158 1.1.1.6 christos matching on_failure_jump, so didn't fail. */ 7159 1.1 christos case pop_failure_jump: 7160 1.1 christos { 7161 1.1 christos /* We need to pass separate storage for the lowest and 7162 1.1 christos highest registers, even though we don't care about the 7163 1.1 christos actual values. Otherwise, we will restore only one 7164 1.1 christos register from the stack, since lowest will == highest in 7165 1.1 christos `pop_failure_point'. */ 7166 1.1 christos active_reg_t dummy_low_reg, dummy_high_reg; 7167 1.1 christos UCHAR_T *pdummy ATTRIBUTE_UNUSED = NULL; 7168 1.1 christos const CHAR_T *sdummy ATTRIBUTE_UNUSED = NULL; 7169 1.1 christos 7170 1.1 christos DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 7171 1.1 christos POP_FAILURE_POINT (sdummy, pdummy, 7172 1.1 christos dummy_low_reg, dummy_high_reg, 7173 1.1 christos reg_dummy, reg_dummy, reg_info_dummy); 7174 1.1 christos } 7175 1.1 christos /* Fall through. */ 7176 1.1 christos 7177 1.1 christos unconditional_jump: 7178 1.1 christos #ifdef _LIBC 7179 1.1 christos DEBUG_PRINT2 ("\n%p: ", p); 7180 1.1 christos #else 7181 1.1 christos DEBUG_PRINT2 ("\n0x%x: ", p); 7182 1.1 christos #endif 7183 1.1 christos /* Note fall through. */ 7184 1.1 christos 7185 1.1 christos /* Unconditionally jump (without popping any failure points). */ 7186 1.1 christos case jump: 7187 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 7188 1.1 christos DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 7189 1.1 christos p += mcnt; /* Do the jump. */ 7190 1.1 christos #ifdef _LIBC 7191 1.1 christos DEBUG_PRINT2 ("(to %p).\n", p); 7192 1.1 christos #else 7193 1.1 christos DEBUG_PRINT2 ("(to 0x%x).\n", p); 7194 1.1 christos #endif 7195 1.1 christos break; 7196 1.1 christos 7197 1.1 christos 7198 1.1 christos /* We need this opcode so we can detect where alternatives end 7199 1.1 christos in `group_match_null_string_p' et al. */ 7200 1.1 christos case jump_past_alt: 7201 1.1 christos DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 7202 1.1 christos goto unconditional_jump; 7203 1.1 christos 7204 1.1 christos 7205 1.1 christos /* Normally, the on_failure_jump pushes a failure point, which 7206 1.1 christos then gets popped at pop_failure_jump. We will end up at 7207 1.1 christos pop_failure_jump, also, and with a pattern of, say, `a+', we 7208 1.1 christos are skipping over the on_failure_jump, so we have to push 7209 1.1 christos something meaningless for pop_failure_jump to pop. */ 7210 1.1 christos case dummy_failure_jump: 7211 1.1 christos DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 7212 1.1 christos /* It doesn't matter what we push for the string here. What 7213 1.1 christos the code at `fail' tests is the value for the pattern. */ 7214 1.1 christos PUSH_FAILURE_POINT (NULL, NULL, -2); 7215 1.1 christos goto unconditional_jump; 7216 1.1 christos 7217 1.1 christos 7218 1.1 christos /* At the end of an alternative, we need to push a dummy failure 7219 1.1 christos point in case we are followed by a `pop_failure_jump', because 7220 1.1 christos we don't want the failure point for the alternative to be 7221 1.1 christos popped. For example, matching `(a|ab)*' against `aab' 7222 1.1 christos requires that we match the `ab' alternative. */ 7223 1.1 christos case push_dummy_failure: 7224 1.1 christos DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 7225 1.1 christos /* See comments just above at `dummy_failure_jump' about the 7226 1.1 christos two zeroes. */ 7227 1.1 christos PUSH_FAILURE_POINT (NULL, NULL, -2); 7228 1.1 christos break; 7229 1.1 christos 7230 1.1 christos /* Have to succeed matching what follows at least n times. 7231 1.1 christos After that, handle like `on_failure_jump'. */ 7232 1.1 christos case succeed_n: 7233 1.1 christos EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE); 7234 1.1 christos DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 7235 1.1 christos 7236 1.1 christos assert (mcnt >= 0); 7237 1.1 christos /* Originally, this is how many times we HAVE to succeed. */ 7238 1.1 christos if (mcnt > 0) 7239 1.1 christos { 7240 1.1 christos mcnt--; 7241 1.1 christos p += OFFSET_ADDRESS_SIZE; 7242 1.1 christos STORE_NUMBER_AND_INCR (p, mcnt); 7243 1.1 christos #ifdef _LIBC 7244 1.1 christos DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE 7245 1.1 christos , mcnt); 7246 1.1 christos #else 7247 1.1 christos DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE 7248 1.1 christos , mcnt); 7249 1.1 christos #endif 7250 1.1 christos } 7251 1.1 christos else if (mcnt == 0) 7252 1.1 christos { 7253 1.1 christos #ifdef _LIBC 7254 1.1 christos DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", 7255 1.1 christos p + OFFSET_ADDRESS_SIZE); 7256 1.1 christos #else 7257 1.1 christos DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", 7258 1.1 christos p + OFFSET_ADDRESS_SIZE); 7259 1.1 christos #endif /* _LIBC */ 7260 1.1 christos 7261 1.1 christos #ifdef WCHAR 7262 1.1 christos p[1] = (UCHAR_T) no_op; 7263 1.1 christos #else 7264 1.1 christos p[2] = (UCHAR_T) no_op; 7265 1.1 christos p[3] = (UCHAR_T) no_op; 7266 1.1 christos #endif /* WCHAR */ 7267 1.1 christos goto on_failure; 7268 1.1 christos } 7269 1.1 christos break; 7270 1.1 christos 7271 1.1 christos case jump_n: 7272 1.1 christos EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE); 7273 1.1 christos DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 7274 1.1 christos 7275 1.1 christos /* Originally, this is how many times we CAN jump. */ 7276 1.1 christos if (mcnt) 7277 1.1 christos { 7278 1.1 christos mcnt--; 7279 1.1 christos STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt); 7280 1.1 christos 7281 1.1 christos #ifdef _LIBC 7282 1.1 christos DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE, 7283 1.1 christos mcnt); 7284 1.1 christos #else 7285 1.1 christos DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE, 7286 1.1 christos mcnt); 7287 1.1 christos #endif /* _LIBC */ 7288 1.1 christos goto unconditional_jump; 7289 1.1 christos } 7290 1.1 christos /* If don't have to jump any more, skip over the rest of command. */ 7291 1.1 christos else 7292 1.1 christos p += 2 * OFFSET_ADDRESS_SIZE; 7293 1.1 christos break; 7294 1.1 christos 7295 1.1 christos case set_number_at: 7296 1.1 christos { 7297 1.1 christos DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 7298 1.1 christos 7299 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 7300 1.1 christos p1 = p + mcnt; 7301 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 7302 1.1 christos #ifdef _LIBC 7303 1.1 christos DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); 7304 1.1 christos #else 7305 1.1 christos DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 7306 1.1 christos #endif 7307 1.1 christos STORE_NUMBER (p1, mcnt); 7308 1.1 christos break; 7309 1.1 christos } 7310 1.1 christos 7311 1.1 christos #if 0 7312 1.1 christos /* The DEC Alpha C compiler 3.x generates incorrect code for the 7313 1.1 christos test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 7314 1.1 christos AT_WORD_BOUNDARY, so this code is disabled. Expanding the 7315 1.1 christos macro and introducing temporary variables works around the bug. */ 7316 1.1 christos 7317 1.1 christos case wordbound: 7318 1.1 christos DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 7319 1.1 christos if (AT_WORD_BOUNDARY (d)) 7320 1.1 christos break; 7321 1.1 christos goto fail; 7322 1.1 christos 7323 1.1 christos case notwordbound: 7324 1.1 christos DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 7325 1.1 christos if (AT_WORD_BOUNDARY (d)) 7326 1.1 christos goto fail; 7327 1.1 christos break; 7328 1.1 christos #else 7329 1.1 christos case wordbound: 7330 1.1 christos { 7331 1.1 christos boolean prevchar, thischar; 7332 1.1 christos 7333 1.1 christos DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 7334 1.1 christos if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 7335 1.1 christos break; 7336 1.1 christos 7337 1.1 christos prevchar = WORDCHAR_P (d - 1); 7338 1.1 christos thischar = WORDCHAR_P (d); 7339 1.1 christos if (prevchar != thischar) 7340 1.1 christos break; 7341 1.1 christos goto fail; 7342 1.1 christos } 7343 1.1 christos 7344 1.1 christos case notwordbound: 7345 1.1 christos { 7346 1.1 christos boolean prevchar, thischar; 7347 1.1 christos 7348 1.1 christos DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 7349 1.1 christos if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 7350 1.1 christos goto fail; 7351 1.1 christos 7352 1.1 christos prevchar = WORDCHAR_P (d - 1); 7353 1.1 christos thischar = WORDCHAR_P (d); 7354 1.1 christos if (prevchar != thischar) 7355 1.1 christos goto fail; 7356 1.1 christos break; 7357 1.1 christos } 7358 1.1 christos #endif 7359 1.1 christos 7360 1.1 christos case wordbeg: 7361 1.1 christos DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 7362 1.1 christos if (!AT_STRINGS_END (d) && WORDCHAR_P (d) 7363 1.1 christos && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) 7364 1.1 christos break; 7365 1.1 christos goto fail; 7366 1.1 christos 7367 1.1 christos case wordend: 7368 1.1 christos DEBUG_PRINT1 ("EXECUTING wordend.\n"); 7369 1.1 christos if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) 7370 1.1 christos && (AT_STRINGS_END (d) || !WORDCHAR_P (d))) 7371 1.1 christos break; 7372 1.1 christos goto fail; 7373 1.1 christos 7374 1.1 christos #ifdef emacs 7375 1.1 christos case before_dot: 7376 1.1 christos DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 7377 1.1 christos if (PTR_CHAR_POS ((unsigned char *) d) >= point) 7378 1.1 christos goto fail; 7379 1.1 christos break; 7380 1.1 christos 7381 1.1 christos case at_dot: 7382 1.1 christos DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 7383 1.1 christos if (PTR_CHAR_POS ((unsigned char *) d) != point) 7384 1.1 christos goto fail; 7385 1.1 christos break; 7386 1.1 christos 7387 1.1 christos case after_dot: 7388 1.1 christos DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 7389 1.1 christos if (PTR_CHAR_POS ((unsigned char *) d) <= point) 7390 1.1 christos goto fail; 7391 1.1 christos break; 7392 1.1 christos 7393 1.1 christos case syntaxspec: 7394 1.1 christos DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); 7395 1.1 christos mcnt = *p++; 7396 1.1 christos goto matchsyntax; 7397 1.1 christos 7398 1.1 christos case wordchar: 7399 1.1 christos DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); 7400 1.1 christos mcnt = (int) Sword; 7401 1.1 christos matchsyntax: 7402 1.1 christos PREFETCH (); 7403 1.1 christos /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 7404 1.1 christos d++; 7405 1.1 christos if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) 7406 1.1 christos goto fail; 7407 1.1 christos SET_REGS_MATCHED (); 7408 1.1 christos break; 7409 1.1 christos 7410 1.1 christos case notsyntaxspec: 7411 1.1 christos DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); 7412 1.1 christos mcnt = *p++; 7413 1.1 christos goto matchnotsyntax; 7414 1.1 christos 7415 1.1 christos case notwordchar: 7416 1.1 christos DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); 7417 1.1 christos mcnt = (int) Sword; 7418 1.1 christos matchnotsyntax: 7419 1.1 christos PREFETCH (); 7420 1.1 christos /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 7421 1.1 christos d++; 7422 1.1 christos if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) 7423 1.1 christos goto fail; 7424 1.1 christos SET_REGS_MATCHED (); 7425 1.1 christos break; 7426 1.1 christos 7427 1.1 christos #else /* not emacs */ 7428 1.1 christos case wordchar: 7429 1.1 christos DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 7430 1.1 christos PREFETCH (); 7431 1.1 christos if (!WORDCHAR_P (d)) 7432 1.1 christos goto fail; 7433 1.1 christos SET_REGS_MATCHED (); 7434 1.1 christos d++; 7435 1.1 christos break; 7436 1.1 christos 7437 1.1 christos case notwordchar: 7438 1.1 christos DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 7439 1.1 christos PREFETCH (); 7440 1.1 christos if (WORDCHAR_P (d)) 7441 1.1 christos goto fail; 7442 1.1 christos SET_REGS_MATCHED (); 7443 1.1 christos d++; 7444 1.1 christos break; 7445 1.1 christos #endif /* not emacs */ 7446 1.1 christos 7447 1.1 christos default: 7448 1.1 christos abort (); 7449 1.1 christos } 7450 1.1 christos continue; /* Successfully executed one pattern command; keep going. */ 7451 1.1 christos 7452 1.1 christos 7453 1.1 christos /* We goto here if a matching operation fails. */ 7454 1.1 christos fail: 7455 1.1 christos if (!FAIL_STACK_EMPTY ()) 7456 1.1 christos { /* A restart point is known. Restore to that state. */ 7457 1.1 christos DEBUG_PRINT1 ("\nFAIL:\n"); 7458 1.1 christos POP_FAILURE_POINT (d, p, 7459 1.1 christos lowest_active_reg, highest_active_reg, 7460 1.1 christos regstart, regend, reg_info); 7461 1.1.1.6 christos 7462 1.1 christos /* If this failure point is a dummy, try the next one. */ 7463 1.1 christos if (!p) 7464 1.1 christos goto fail; 7465 1.1 christos 7466 1.1 christos /* If we failed to the end of the pattern, don't examine *p. */ 7467 1.1 christos assert (p <= pend); 7468 1.1 christos if (p < pend) 7469 1.1 christos { 7470 1.1 christos boolean is_a_jump_n = false; 7471 1.1 christos 7472 1.1 christos /* If failed to a backwards jump that's part of a repetition 7473 1.1 christos loop, need to pop this failure point and use the next one. */ 7474 1.1 christos switch ((re_opcode_t) *p) 7475 1.1 christos { 7476 1.1 christos case jump_n: 7477 1.1 christos is_a_jump_n = true; 7478 1.1 christos /* Fall through. */ 7479 1.1 christos case maybe_pop_jump: 7480 1.1 christos case pop_failure_jump: 7481 1.1 christos case jump: 7482 1.1 christos p1 = p + 1; 7483 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7484 1.1 christos p1 += mcnt; 7485 1.1 christos 7486 1.1 christos if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) 7487 1.1 christos || (!is_a_jump_n 7488 1.1 christos && (re_opcode_t) *p1 == on_failure_jump)) 7489 1.1 christos goto fail; 7490 1.1 christos break; 7491 1.1 christos default: 7492 1.1 christos /* do nothing */ ; 7493 1.1 christos } 7494 1.1 christos } 7495 1.1 christos 7496 1.1 christos if (d >= string1 && d <= end1) 7497 1.1 christos dend = end_match_1; 7498 1.1 christos } 7499 1.1 christos else 7500 1.1 christos break; /* Matching at this starting point really fails. */ 7501 1.1 christos } /* for (;;) */ 7502 1.1 christos 7503 1.1 christos if (best_regs_set) 7504 1.1 christos goto restore_best_regs; 7505 1.1 christos 7506 1.1 christos FREE_VARIABLES (); 7507 1.1 christos 7508 1.1 christos return -1; /* Failure to match. */ 7509 1.1 christos } /* re_match_2 */ 7510 1.1 christos 7511 1.1 christos /* Subroutine definitions for re_match_2. */ 7513 1.1 christos 7514 1.1 christos 7515 1.1 christos /* We are passed P pointing to a register number after a start_memory. 7516 1.1 christos 7517 1.1 christos Return true if the pattern up to the corresponding stop_memory can 7518 1.1 christos match the empty string, and false otherwise. 7519 1.1 christos 7520 1.1 christos If we find the matching stop_memory, sets P to point to one past its number. 7521 1.1 christos Otherwise, sets P to an undefined byte less than or equal to END. 7522 1.1 christos 7523 1.1 christos We don't handle duplicates properly (yet). */ 7524 1.1 christos 7525 1.1 christos static boolean 7526 1.1 christos PREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end, 7527 1.1 christos PREFIX(register_info_type) *reg_info) 7528 1.1 christos { 7529 1.1 christos int mcnt; 7530 1.1 christos /* Point to after the args to the start_memory. */ 7531 1.1 christos UCHAR_T *p1 = *p + 2; 7532 1.1 christos 7533 1.1 christos while (p1 < end) 7534 1.1 christos { 7535 1.1 christos /* Skip over opcodes that can match nothing, and return true or 7536 1.1 christos false, as appropriate, when we get to one that can't, or to the 7537 1.1 christos matching stop_memory. */ 7538 1.1 christos 7539 1.1 christos switch ((re_opcode_t) *p1) 7540 1.1 christos { 7541 1.1 christos /* Could be either a loop or a series of alternatives. */ 7542 1.1 christos case on_failure_jump: 7543 1.1 christos p1++; 7544 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7545 1.1 christos 7546 1.1 christos /* If the next operation is not a jump backwards in the 7547 1.1 christos pattern. */ 7548 1.1 christos 7549 1.1 christos if (mcnt >= 0) 7550 1.1 christos { 7551 1.1 christos /* Go through the on_failure_jumps of the alternatives, 7552 1.1 christos seeing if any of the alternatives cannot match nothing. 7553 1.1 christos The last alternative starts with only a jump, 7554 1.1 christos whereas the rest start with on_failure_jump and end 7555 1.1 christos with a jump, e.g., here is the pattern for `a|b|c': 7556 1.1 christos 7557 1.1 christos /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 7558 1.1 christos /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 7559 1.1 christos /exactn/1/c 7560 1.1 christos 7561 1.1 christos So, we have to first go through the first (n-1) 7562 1.1 christos alternatives and then deal with the last one separately. */ 7563 1.1 christos 7564 1.1 christos 7565 1.1 christos /* Deal with the first (n-1) alternatives, which start 7566 1.1 christos with an on_failure_jump (see above) that jumps to right 7567 1.1 christos past a jump_past_alt. */ 7568 1.1 christos 7569 1.1 christos while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] == 7570 1.1 christos jump_past_alt) 7571 1.1 christos { 7572 1.1 christos /* `mcnt' holds how many bytes long the alternative 7573 1.1 christos is, including the ending `jump_past_alt' and 7574 1.1 christos its number. */ 7575 1.1 christos 7576 1.1 christos if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt - 7577 1.1 christos (1 + OFFSET_ADDRESS_SIZE), 7578 1.1 christos reg_info)) 7579 1.1 christos return false; 7580 1.1 christos 7581 1.1 christos /* Move to right after this alternative, including the 7582 1.1 christos jump_past_alt. */ 7583 1.1 christos p1 += mcnt; 7584 1.1 christos 7585 1.1 christos /* Break if it's the beginning of an n-th alternative 7586 1.1 christos that doesn't begin with an on_failure_jump. */ 7587 1.1 christos if ((re_opcode_t) *p1 != on_failure_jump) 7588 1.1 christos break; 7589 1.1 christos 7590 1.1 christos /* Still have to check that it's not an n-th 7591 1.1 christos alternative that starts with an on_failure_jump. */ 7592 1.1 christos p1++; 7593 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7594 1.1 christos if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] != 7595 1.1 christos jump_past_alt) 7596 1.1 christos { 7597 1.1 christos /* Get to the beginning of the n-th alternative. */ 7598 1.1 christos p1 -= 1 + OFFSET_ADDRESS_SIZE; 7599 1.1 christos break; 7600 1.1 christos } 7601 1.1 christos } 7602 1.1 christos 7603 1.1 christos /* Deal with the last alternative: go back and get number 7604 1.1 christos of the `jump_past_alt' just before it. `mcnt' contains 7605 1.1 christos the length of the alternative. */ 7606 1.1 christos EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE); 7607 1.1 christos 7608 1.1 christos if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info)) 7609 1.1 christos return false; 7610 1.1 christos 7611 1.1 christos p1 += mcnt; /* Get past the n-th alternative. */ 7612 1.1 christos } /* if mcnt > 0 */ 7613 1.1 christos break; 7614 1.1 christos 7615 1.1 christos 7616 1.1 christos case stop_memory: 7617 1.1 christos assert (p1[1] == **p); 7618 1.1 christos *p = p1 + 2; 7619 1.1 christos return true; 7620 1.1 christos 7621 1.1 christos 7622 1.1 christos default: 7623 1.1 christos if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info)) 7624 1.1 christos return false; 7625 1.1 christos } 7626 1.1 christos } /* while p1 < end */ 7627 1.1 christos 7628 1.1 christos return false; 7629 1.1 christos } /* group_match_null_string_p */ 7630 1.1 christos 7631 1.1 christos 7632 1.1 christos /* Similar to group_match_null_string_p, but doesn't deal with alternatives: 7633 1.1 christos It expects P to be the first byte of a single alternative and END one 7634 1.1 christos byte past the last. The alternative can contain groups. */ 7635 1.1 christos 7636 1.1 christos static boolean 7637 1.1 christos PREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end, 7638 1.1 christos PREFIX(register_info_type) *reg_info) 7639 1.1 christos { 7640 1.1 christos int mcnt; 7641 1.1 christos UCHAR_T *p1 = p; 7642 1.1 christos 7643 1.1 christos while (p1 < end) 7644 1.1 christos { 7645 1.1 christos /* Skip over opcodes that can match nothing, and break when we get 7646 1.1 christos to one that can't. */ 7647 1.1 christos 7648 1.1 christos switch ((re_opcode_t) *p1) 7649 1.1 christos { 7650 1.1 christos /* It's a loop. */ 7651 1.1 christos case on_failure_jump: 7652 1.1 christos p1++; 7653 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7654 1.1 christos p1 += mcnt; 7655 1.1 christos break; 7656 1.1 christos 7657 1.1 christos default: 7658 1.1 christos if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info)) 7659 1.1 christos return false; 7660 1.1 christos } 7661 1.1 christos } /* while p1 < end */ 7662 1.1 christos 7663 1.1 christos return true; 7664 1.1 christos } /* alt_match_null_string_p */ 7665 1.1 christos 7666 1.1 christos 7667 1.1 christos /* Deals with the ops common to group_match_null_string_p and 7668 1.1 christos alt_match_null_string_p. 7669 1.1 christos 7670 1.1 christos Sets P to one after the op and its arguments, if any. */ 7671 1.1 christos 7672 1.1 christos static boolean 7673 1.1 christos PREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end, 7674 1.1 christos PREFIX(register_info_type) *reg_info) 7675 1.1 christos { 7676 1.1 christos int mcnt; 7677 1.1 christos boolean ret; 7678 1.1 christos int reg_no; 7679 1.1 christos UCHAR_T *p1 = *p; 7680 1.1 christos 7681 1.1 christos switch ((re_opcode_t) *p1++) 7682 1.1 christos { 7683 1.1 christos case no_op: 7684 1.1 christos case begline: 7685 1.1 christos case endline: 7686 1.1 christos case begbuf: 7687 1.1 christos case endbuf: 7688 1.1 christos case wordbeg: 7689 1.1 christos case wordend: 7690 1.1 christos case wordbound: 7691 1.1 christos case notwordbound: 7692 1.1 christos #ifdef emacs 7693 1.1 christos case before_dot: 7694 1.1 christos case at_dot: 7695 1.1 christos case after_dot: 7696 1.1 christos #endif 7697 1.1 christos break; 7698 1.1 christos 7699 1.1 christos case start_memory: 7700 1.1 christos reg_no = *p1; 7701 1.1 christos assert (reg_no > 0 && reg_no <= MAX_REGNUM); 7702 1.1 christos ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info); 7703 1.1 christos 7704 1.1 christos /* Have to set this here in case we're checking a group which 7705 1.1 christos contains a group and a back reference to it. */ 7706 1.1 christos 7707 1.1 christos if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) 7708 1.1 christos REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; 7709 1.1 christos 7710 1.1 christos if (!ret) 7711 1.1 christos return false; 7712 1.1 christos break; 7713 1.1 christos 7714 1.1 christos /* If this is an optimized succeed_n for zero times, make the jump. */ 7715 1.1 christos case jump: 7716 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7717 1.1 christos if (mcnt >= 0) 7718 1.1 christos p1 += mcnt; 7719 1.1 christos else 7720 1.1 christos return false; 7721 1.1 christos break; 7722 1.1 christos 7723 1.1 christos case succeed_n: 7724 1.1 christos /* Get to the number of times to succeed. */ 7725 1.1 christos p1 += OFFSET_ADDRESS_SIZE; 7726 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7727 1.1.1.6 christos 7728 1.1 christos if (mcnt == 0) 7729 1.1 christos { 7730 1.1 christos p1 -= 2 * OFFSET_ADDRESS_SIZE; 7731 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7732 1.1 christos p1 += mcnt; 7733 1.1 christos } 7734 1.1 christos else 7735 1.1 christos return false; 7736 1.1 christos break; 7737 1.1 christos 7738 1.1 christos case duplicate: 7739 1.1 christos if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) 7740 1.1 christos return false; 7741 1.1 christos break; 7742 1.1 christos 7743 1.1 christos case set_number_at: 7744 1.1 christos p1 += 2 * OFFSET_ADDRESS_SIZE; 7745 1.1 christos return false; 7746 1.1 christos 7747 1.1 christos default: 7748 1.1 christos /* All other opcodes mean we cannot match the empty string. */ 7749 1.1 christos return false; 7750 1.1 christos } 7751 1.1 christos 7752 1.1 christos *p = p1; 7753 1.1 christos return true; 7754 1.1 christos } /* common_op_match_null_string_p */ 7755 1.1 christos 7756 1.1 christos 7757 1.1 christos /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 7758 1.1 christos bytes; nonzero otherwise. */ 7759 1.1 christos 7760 1.1 christos static int 7761 1.1 christos PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len, 7762 1.1 christos RE_TRANSLATE_TYPE translate) 7763 1.1 christos { 7764 1.1 christos register const UCHAR_T *p1 = (const UCHAR_T *) s1; 7765 1.1 christos register const UCHAR_T *p2 = (const UCHAR_T *) s2; 7766 1.1 christos while (len) 7767 1.1 christos { 7768 1.1 christos #ifdef WCHAR 7769 1.1 christos if (((*p1<=0xff)?translate[*p1++]:*p1++) 7770 1.1 christos != ((*p2<=0xff)?translate[*p2++]:*p2++)) 7771 1.1 christos return 1; 7772 1.1 christos #else /* BYTE */ 7773 1.1 christos if (translate[*p1++] != translate[*p2++]) return 1; 7774 1.1 christos #endif /* WCHAR */ 7775 1.1 christos len--; 7776 1.1 christos } 7777 1.1 christos return 0; 7778 1.1 christos } 7779 1.1 christos 7780 1.1 christos 7782 1.1 christos #else /* not INSIDE_RECURSION */ 7783 1.1 christos 7784 1.1 christos /* Entry points for GNU code. */ 7785 1.1 christos 7786 1.1 christos /* re_compile_pattern is the GNU regular expression compiler: it 7787 1.1 christos compiles PATTERN (of length SIZE) and puts the result in BUFP. 7788 1.1 christos Returns 0 if the pattern was valid, otherwise an error string. 7789 1.1 christos 7790 1.1 christos Assumes the `allocated' (and perhaps `buffer') and `translate' fields 7791 1.1 christos are set in BUFP on entry. 7792 1.1 christos 7793 1.1 christos We call regex_compile to do the actual compilation. */ 7794 1.1 christos 7795 1.1 christos const char * 7796 1.1 christos re_compile_pattern (const char *pattern, size_t length, 7797 1.1 christos struct re_pattern_buffer *bufp) 7798 1.1 christos { 7799 1.1 christos reg_errcode_t ret; 7800 1.1 christos 7801 1.1 christos /* GNU code is written to assume at least RE_NREGS registers will be set 7802 1.1 christos (and at least one extra will be -1). */ 7803 1.1 christos bufp->regs_allocated = REGS_UNALLOCATED; 7804 1.1 christos 7805 1.1 christos /* And GNU code determines whether or not to get register information 7806 1.1 christos by passing null for the REGS argument to re_match, etc., not by 7807 1.1 christos setting no_sub. */ 7808 1.1 christos bufp->no_sub = 0; 7809 1.1 christos 7810 1.1 christos /* Match anchors at newline. */ 7811 1.1 christos bufp->newline_anchor = 1; 7812 1.1 christos 7813 1.1 christos # ifdef MBS_SUPPORT 7814 1.1 christos if (MB_CUR_MAX != 1) 7815 1.1 christos ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp); 7816 1.1 christos else 7817 1.1 christos # endif 7818 1.1 christos ret = byte_regex_compile (pattern, length, re_syntax_options, bufp); 7819 1.1 christos 7820 1.1 christos if (!ret) 7821 1.1 christos return NULL; 7822 1.1 christos return gettext (re_error_msgid[(int) ret]); 7823 1.1 christos } 7824 1.1 christos #ifdef _LIBC 7825 1.1 christos weak_alias (__re_compile_pattern, re_compile_pattern) 7826 1.1 christos #endif 7827 1.1 christos 7828 1.1 christos /* Entry points compatible with 4.2 BSD regex library. We don't define 7830 1.1 christos them unless specifically requested. */ 7831 1.1 christos 7832 1.1 christos #if defined _REGEX_RE_COMP || defined _LIBC 7833 1.1 christos 7834 1.1 christos /* BSD has one and only one pattern buffer. */ 7835 1.1 christos static struct re_pattern_buffer re_comp_buf; 7836 1.1 christos 7837 1.1 christos char * 7838 1.1 christos #ifdef _LIBC 7839 1.1 christos /* Make these definitions weak in libc, so POSIX programs can redefine 7840 1.1 christos these names if they don't use our functions, and still use 7841 1.1 christos regcomp/regexec below without link errors. */ 7842 1.1 christos weak_function 7843 1.1 christos #endif 7844 1.1 christos re_comp (const char *s) 7845 1.1 christos { 7846 1.1 christos reg_errcode_t ret; 7847 1.1 christos 7848 1.1 christos if (!s) 7849 1.1 christos { 7850 1.1 christos if (!re_comp_buf.buffer) 7851 1.1 christos return (char *) gettext ("No previous regular expression"); 7852 1.1 christos return 0; 7853 1.1 christos } 7854 1.1 christos 7855 1.1 christos if (!re_comp_buf.buffer) 7856 1.1 christos { 7857 1.1 christos re_comp_buf.buffer = (unsigned char *) malloc (200); 7858 1.1 christos if (re_comp_buf.buffer == NULL) 7859 1.1 christos return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 7860 1.1 christos re_comp_buf.allocated = 200; 7861 1.1 christos 7862 1.1 christos re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); 7863 1.1 christos if (re_comp_buf.fastmap == NULL) 7864 1.1 christos return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 7865 1.1 christos } 7866 1.1 christos 7867 1.1 christos /* Since `re_exec' always passes NULL for the `regs' argument, we 7868 1.1 christos don't need to initialize the pattern buffer fields which affect it. */ 7869 1.1 christos 7870 1.1 christos /* Match anchors at newlines. */ 7871 1.1 christos re_comp_buf.newline_anchor = 1; 7872 1.1 christos 7873 1.1 christos # ifdef MBS_SUPPORT 7874 1.1 christos if (MB_CUR_MAX != 1) 7875 1.1 christos ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 7876 1.1 christos else 7877 1.1 christos # endif 7878 1.1 christos ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 7879 1.1 christos 7880 1.1 christos if (!ret) 7881 1.1 christos return NULL; 7882 1.1 christos 7883 1.1 christos /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 7884 1.1 christos return (char *) gettext (re_error_msgid[(int) ret]); 7885 1.1 christos } 7886 1.1 christos 7887 1.1 christos 7888 1.1 christos int 7889 1.1 christos #ifdef _LIBC 7890 1.1 christos weak_function 7891 1.1 christos #endif 7892 1.1 christos re_exec (const char *s) 7893 1.1 christos { 7894 1.1 christos const int len = strlen (s); 7895 1.1 christos return 7896 1.1 christos 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); 7897 1.1 christos } 7898 1.1 christos 7899 1.1 christos #endif /* _REGEX_RE_COMP */ 7900 1.1 christos 7901 1.1 christos /* POSIX.2 functions. Don't define these for Emacs. */ 7903 1.1 christos 7904 1.1 christos #ifndef emacs 7905 1.1 christos 7906 1.1 christos /* regcomp takes a regular expression as a string and compiles it. 7907 1.1 christos 7908 1.1 christos PREG is a regex_t *. We do not expect any fields to be initialized, 7909 1.1 christos since POSIX says we shouldn't. Thus, we set 7910 1.1 christos 7911 1.1 christos `buffer' to the compiled pattern; 7912 1.1 christos `used' to the length of the compiled pattern; 7913 1.1 christos `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 7914 1.1 christos REG_EXTENDED bit in CFLAGS is set; otherwise, to 7915 1.1 christos RE_SYNTAX_POSIX_BASIC; 7916 1.1 christos `newline_anchor' to REG_NEWLINE being set in CFLAGS; 7917 1.1 christos `fastmap' to an allocated space for the fastmap; 7918 1.1 christos `fastmap_accurate' to zero; 7919 1.1 christos `re_nsub' to the number of subexpressions in PATTERN. 7920 1.1 christos 7921 1.1 christos PATTERN is the address of the pattern string. 7922 1.1 christos 7923 1.1 christos CFLAGS is a series of bits which affect compilation. 7924 1.1 christos 7925 1.1 christos If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we 7926 1.1 christos use POSIX basic syntax. 7927 1.1 christos 7928 1.1 christos If REG_NEWLINE is set, then . and [^...] don't match newline. 7929 1.1 christos Also, regexec will try a match beginning after every newline. 7930 1.1 christos 7931 1.1 christos If REG_ICASE is set, then we considers upper- and lowercase 7932 1.1 christos versions of letters to be equivalent when matching. 7933 1.1 christos 7934 1.1 christos If REG_NOSUB is set, then when PREG is passed to regexec, that 7935 1.1 christos routine will report only success or failure, and nothing about the 7936 1.1 christos registers. 7937 1.1 christos 7938 1.1 christos It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for 7939 1.1 christos the return codes and their meanings.) */ 7940 1.1 christos 7941 1.1 christos int 7942 1.1 christos regcomp (regex_t *preg, const char *pattern, int cflags) 7943 1.1 christos { 7944 1.1 christos reg_errcode_t ret; 7945 1.1 christos reg_syntax_t syntax 7946 1.1 christos = (cflags & REG_EXTENDED) ? 7947 1.1 christos RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; 7948 1.1 christos 7949 1.1 christos /* regex_compile will allocate the space for the compiled pattern. */ 7950 1.1 christos preg->buffer = 0; 7951 1.1 christos preg->allocated = 0; 7952 1.1 christos preg->used = 0; 7953 1.1 christos 7954 1.1 christos /* Try to allocate space for the fastmap. */ 7955 1.1 christos preg->fastmap = (char *) malloc (1 << BYTEWIDTH); 7956 1.1 christos 7957 1.1 christos if (cflags & REG_ICASE) 7958 1.1 christos { 7959 1.1 christos int i; 7960 1.1 christos 7961 1.1 christos preg->translate 7962 1.1 christos = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 7963 1.1 christos * sizeof (*(RE_TRANSLATE_TYPE)0)); 7964 1.1 christos if (preg->translate == NULL) 7965 1.1 christos return (int) REG_ESPACE; 7966 1.1 christos 7967 1.1 christos /* Map uppercase characters to corresponding lowercase ones. */ 7968 1.1 christos for (i = 0; i < CHAR_SET_SIZE; i++) 7969 1.1 christos preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; 7970 1.1 christos } 7971 1.1 christos else 7972 1.1 christos preg->translate = NULL; 7973 1.1 christos 7974 1.1 christos /* If REG_NEWLINE is set, newlines are treated differently. */ 7975 1.1 christos if (cflags & REG_NEWLINE) 7976 1.1 christos { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 7977 1.1 christos syntax &= ~RE_DOT_NEWLINE; 7978 1.1 christos syntax |= RE_HAT_LISTS_NOT_NEWLINE; 7979 1.1 christos /* It also changes the matching behavior. */ 7980 1.1 christos preg->newline_anchor = 1; 7981 1.1 christos } 7982 1.1 christos else 7983 1.1 christos preg->newline_anchor = 0; 7984 1.1 christos 7985 1.1 christos preg->no_sub = !!(cflags & REG_NOSUB); 7986 1.1 christos 7987 1.1 christos /* POSIX says a null character in the pattern terminates it, so we 7988 1.1 christos can use strlen here in compiling the pattern. */ 7989 1.1 christos # ifdef MBS_SUPPORT 7990 1.1 christos if (MB_CUR_MAX != 1) 7991 1.1 christos ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg); 7992 1.1 christos else 7993 1.1 christos # endif 7994 1.1 christos ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg); 7995 1.1 christos 7996 1.1 christos /* POSIX doesn't distinguish between an unmatched open-group and an 7997 1.1 christos unmatched close-group: both are REG_EPAREN. */ 7998 1.1 christos if (ret == REG_ERPAREN) ret = REG_EPAREN; 7999 1.1 christos 8000 1.1 christos if (ret == REG_NOERROR && preg->fastmap) 8001 1.1 christos { 8002 1.1 christos /* Compute the fastmap now, since regexec cannot modify the pattern 8003 1.1 christos buffer. */ 8004 1.1 christos if (re_compile_fastmap (preg) == -2) 8005 1.1 christos { 8006 1.1 christos /* Some error occurred while computing the fastmap, just forget 8007 1.1 christos about it. */ 8008 1.1 christos free (preg->fastmap); 8009 1.1 christos preg->fastmap = NULL; 8010 1.1 christos } 8011 1.1 christos } 8012 1.1 christos 8013 1.1 christos return (int) ret; 8014 1.1 christos } 8015 1.1 christos #ifdef _LIBC 8016 1.1 christos weak_alias (__regcomp, regcomp) 8017 1.1 christos #endif 8018 1.1 christos 8019 1.1 christos 8020 1.1 christos /* regexec searches for a given pattern, specified by PREG, in the 8021 1.1 christos string STRING. 8022 1.1 christos 8023 1.1 christos If NMATCH is zero or REG_NOSUB was set in the cflags argument to 8024 1.1 christos `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 8025 1.1 christos least NMATCH elements, and we set them to the offsets of the 8026 1.1 christos corresponding matched substrings. 8027 1.1 christos 8028 1.1 christos EFLAGS specifies `execution flags' which affect matching: if 8029 1.1 christos REG_NOTBOL is set, then ^ does not match at the beginning of the 8030 1.1 christos string; if REG_NOTEOL is set, then $ does not match at the end. 8031 1.1 christos 8032 1.1 christos We return 0 if we find a match and REG_NOMATCH if not. */ 8033 1.1 christos 8034 1.1 christos int 8035 1.1 christos regexec (const regex_t *preg, const char *string, size_t nmatch, 8036 1.1 christos regmatch_t pmatch[], int eflags) 8037 1.1 christos { 8038 1.1 christos int ret; 8039 1.1 christos struct re_registers regs; 8040 1.1 christos regex_t private_preg; 8041 1.1 christos int len = strlen (string); 8042 1.1 christos boolean want_reg_info = !preg->no_sub && nmatch > 0; 8043 1.1 christos 8044 1.1 christos private_preg = *preg; 8045 1.1 christos 8046 1.1 christos private_preg.not_bol = !!(eflags & REG_NOTBOL); 8047 1.1 christos private_preg.not_eol = !!(eflags & REG_NOTEOL); 8048 1.1 christos 8049 1.1 christos /* The user has told us exactly how many registers to return 8050 1.1 christos information about, via `nmatch'. We have to pass that on to the 8051 1.1 christos matching routines. */ 8052 1.1 christos private_preg.regs_allocated = REGS_FIXED; 8053 1.1 christos 8054 1.1 christos if (want_reg_info) 8055 1.1 christos { 8056 1.1 christos regs.num_regs = nmatch; 8057 1.1 christos regs.start = TALLOC (nmatch * 2, regoff_t); 8058 1.1 christos if (regs.start == NULL) 8059 1.1 christos return (int) REG_NOMATCH; 8060 1.1 christos regs.end = regs.start + nmatch; 8061 1.1 christos } 8062 1.1 christos 8063 1.1 christos /* Perform the searching operation. */ 8064 1.1 christos ret = re_search (&private_preg, string, len, 8065 1.1 christos /* start: */ 0, /* range: */ len, 8066 1.1 christos want_reg_info ? ®s : (struct re_registers *) 0); 8067 1.1 christos 8068 1.1 christos /* Copy the register information to the POSIX structure. */ 8069 1.1 christos if (want_reg_info) 8070 1.1 christos { 8071 1.1 christos if (ret >= 0) 8072 1.1 christos { 8073 1.1 christos unsigned r; 8074 1.1 christos 8075 1.1 christos for (r = 0; r < nmatch; r++) 8076 1.1 christos { 8077 1.1 christos pmatch[r].rm_so = regs.start[r]; 8078 1.1 christos pmatch[r].rm_eo = regs.end[r]; 8079 1.1 christos } 8080 1.1 christos } 8081 1.1 christos 8082 1.1 christos /* If we needed the temporary register info, free the space now. */ 8083 1.1 christos free (regs.start); 8084 1.1 christos } 8085 1.1 christos 8086 1.1 christos /* We want zero return to mean success, unlike `re_search'. */ 8087 1.1 christos return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; 8088 1.1 christos } 8089 1.1 christos #ifdef _LIBC 8090 1.1 christos weak_alias (__regexec, regexec) 8091 1.1 christos #endif 8092 1.1 christos 8093 1.1 christos 8094 1.1 christos /* Returns a message corresponding to an error code, ERRCODE, returned 8095 1.1 christos from either regcomp or regexec. We don't use PREG here. */ 8096 1.1 christos 8097 1.1 christos size_t 8098 1.1 christos regerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED, 8099 1.1 christos char *errbuf, size_t errbuf_size) 8100 1.1 christos { 8101 1.1 christos const char *msg; 8102 1.1 christos size_t msg_size; 8103 1.1.1.4 christos 8104 1.1 christos if (errcode < 0 8105 1.1 christos || errcode >= (int) (sizeof (re_error_msgid) 8106 1.1 christos / sizeof (re_error_msgid[0]))) 8107 1.1 christos /* Only error codes returned by the rest of the code should be passed 8108 1.1.1.4 christos to this routine. If we are given anything else, or if other regex 8109 1.1 christos code generates an invalid error code, then the program has a bug. 8110 1.1 christos Dump core so we can fix it. */ 8111 1.1 christos abort (); 8112 1.1 christos 8113 1.1 christos msg = gettext (re_error_msgid[errcode]); 8114 1.1 christos 8115 1.1 christos msg_size = strlen (msg) + 1; /* Includes the null. */ 8116 1.1 christos 8117 1.1 christos if (errbuf_size != 0) 8118 1.1 christos { 8119 1.1 christos if (msg_size > errbuf_size) 8120 1.1 christos { 8121 1.1 christos #if defined HAVE_MEMPCPY || defined _LIBC 8122 1.1 christos *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; 8123 1.1.1.2 christos #else 8124 1.1 christos (void) memcpy (errbuf, msg, errbuf_size - 1); 8125 1.1 christos errbuf[errbuf_size - 1] = 0; 8126 1.1 christos #endif 8127 1.1 christos } 8128 1.1 christos else 8129 1.1.1.2 christos (void) memcpy (errbuf, msg, msg_size); 8130 1.1 christos } 8131 1.1 christos 8132 1.1 christos return msg_size; 8133 1.1.1.2 christos } 8134 1.1 christos #ifdef _LIBC 8135 1.1 christos weak_alias (__regerror, regerror) 8136 1.1 christos #endif 8137 1.1 christos 8138 1.1 christos 8139 1.1 christos /* Free dynamically allocated space used by PREG. */ 8140 1.1 christos 8141 1.1 christos void 8142 1.1 christos regfree (regex_t *preg) 8143 1.1 christos { 8144 1.1 christos free (preg->buffer); 8145 1.1 christos preg->buffer = NULL; 8146 1.1 christos 8147 1.1 christos preg->allocated = 0; 8148 1.1 christos preg->used = 0; 8149 1.1 christos 8150 1.1 christos free (preg->fastmap); 8151 1.1 christos preg->fastmap = NULL; 8152 1.1 christos preg->fastmap_accurate = 0; 8153 1.1 christos 8154 1.1 christos free (preg->translate); 8155 1.1 christos preg->translate = NULL; 8156 1.1 christos } 8157 1.1 christos #ifdef _LIBC 8158 1.1 christos weak_alias (__regfree, regfree) 8159 1.1 christos #endif 8160 1.1 christos 8161 1.1 christos #endif /* not emacs */ 8162 1.1 christos 8163 1.1 christos #endif /* not INSIDE_RECURSION */ 8164 1.1 christos 8165 1.1 christos 8166 1.1 christos #undef STORE_NUMBER 8168 1.1 christos #undef STORE_NUMBER_AND_INCR 8169 1.1 christos #undef EXTRACT_NUMBER 8170 1.1 christos #undef EXTRACT_NUMBER_AND_INCR 8171 1.1 christos 8172 1.1 christos #undef DEBUG_PRINT_COMPILED_PATTERN 8173 1.1 christos #undef DEBUG_PRINT_DOUBLE_STRING 8174 1.1 christos 8175 1.1 christos #undef INIT_FAIL_STACK 8176 1.1 christos #undef RESET_FAIL_STACK 8177 1.1 christos #undef DOUBLE_FAIL_STACK 8178 1.1 christos #undef PUSH_PATTERN_OP 8179 1.1 christos #undef PUSH_FAILURE_POINTER 8180 1.1 christos #undef PUSH_FAILURE_INT 8181 1.1 christos #undef PUSH_FAILURE_ELT 8182 1.1 christos #undef POP_FAILURE_POINTER 8183 1.1 christos #undef POP_FAILURE_INT 8184 1.1 christos #undef POP_FAILURE_ELT 8185 1.1 christos #undef DEBUG_PUSH 8186 1.1 christos #undef DEBUG_POP 8187 1.1 christos #undef PUSH_FAILURE_POINT 8188 1.1 christos #undef POP_FAILURE_POINT 8189 1.1 christos 8190 1.1 christos #undef REG_UNSET_VALUE 8191 1.1 christos #undef REG_UNSET 8192 1.1 christos 8193 1.1 christos #undef PATFETCH 8194 1.1 christos #undef PATFETCH_RAW 8195 1.1 christos #undef PATUNFETCH 8196 1.1 christos #undef TRANSLATE 8197 1.1 christos 8198 1.1 christos #undef INIT_BUF_SIZE 8199 1.1 christos #undef GET_BUFFER_SPACE 8200 1.1 christos #undef BUF_PUSH 8201 1.1 christos #undef BUF_PUSH_2 8202 1.1 christos #undef BUF_PUSH_3 8203 1.1 christos #undef STORE_JUMP 8204 1.1 christos #undef STORE_JUMP2 8205 1.1 christos #undef INSERT_JUMP 8206 1.1 christos #undef INSERT_JUMP2 8207 1.1 christos #undef EXTEND_BUFFER 8208 1.1 christos #undef GET_UNSIGNED_NUMBER 8209 1.1 christos #undef FREE_STACK_RETURN 8210 1.1 christos 8211 1.1 christos # undef POINTER_TO_OFFSET 8212 # undef MATCHING_IN_FRST_STRING 8213 # undef PREFETCH 8214 # undef AT_STRINGS_BEG 8215 # undef AT_STRINGS_END 8216 # undef WORDCHAR_P 8217 # undef FREE_VAR 8218 # undef FREE_VARIABLES 8219 # undef NO_HIGHEST_ACTIVE_REG 8220 # undef NO_LOWEST_ACTIVE_REG 8221 8222 # undef CHAR_T 8223 # undef UCHAR_T 8224 # undef COMPILED_BUFFER_VAR 8225 # undef OFFSET_ADDRESS_SIZE 8226 # undef CHAR_CLASS_SIZE 8227 # undef PREFIX 8228 # undef ARG_PREFIX 8229 # undef PUT_CHAR 8230 # undef BYTE 8231 # undef WCHAR 8232 8233 # define DEFINED_ONCE 8234