1 1.1 christos /* $NetBSD: regex.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $ */ 2 1.1 christos 3 1.1 christos /* Extended regular expression matching and search library, 4 1.1 christos version 0.12. 5 1.1 christos (Implements POSIX draft P1003.2/D11.2, except for some of the 6 1.1 christos internationalization features.) 7 1.1 christos Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc. 8 1.1 christos 9 1.1 christos This program is free software; you can redistribute it and/or modify 10 1.1 christos it under the terms of the GNU General Public License as published by 11 1.1 christos the Free Software Foundation; either version 2, or (at your option) 12 1.1 christos any later version. 13 1.1 christos 14 1.1 christos This program is distributed in the hope that it will be useful, 15 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of 16 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 1.1 christos GNU General Public License for more details. 18 1.1 christos 19 1.1 christos You should have received a copy of the GNU General Public License 20 1.1 christos along with this program; if not, write to the Free Software Foundation, 21 1.1 christos Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 1.1 christos 23 1.1 christos /* AIX requires this to be the first thing in the file. */ 24 1.1 christos #if defined _AIX && !defined REGEX_MALLOC 25 1.1 christos #pragma alloca 26 1.1 christos #endif 27 1.1 christos 28 1.1 christos #undef _GNU_SOURCE 29 1.1 christos #define _GNU_SOURCE 30 1.1 christos 31 1.1 christos #ifdef HAVE_CONFIG_H 32 1.1 christos # include <config.h> 33 1.1 christos #endif 34 1.1 christos 35 1.1 christos #ifndef PARAMS 36 1.1 christos # if defined __GNUC__ || (defined __STDC__ && __STDC__) 37 1.1 christos # define PARAMS(args) args 38 1.1 christos # else 39 1.1 christos # define PARAMS(args) () 40 1.1 christos # endif /* GCC. */ 41 1.1 christos #endif /* Not PARAMS. */ 42 1.1 christos 43 1.1 christos #ifndef INSIDE_RECURSION 44 1.1 christos 45 1.1 christos # if defined STDC_HEADERS && !defined emacs 46 1.1 christos # include <stddef.h> 47 1.1 christos # else 48 1.1 christos /* We need this for `regex.h', and perhaps for the Emacs include files. */ 49 1.1 christos # include <sys/types.h> 50 1.1 christos # endif 51 1.1 christos 52 1.1 christos # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) 53 1.1 christos 54 1.1 christos /* For platform which support the ISO C amendement 1 functionality we 55 1.1 christos support user defined character classes. */ 56 1.1 christos # if defined _LIBC || WIDE_CHAR_SUPPORT 57 1.1 christos /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 58 1.1 christos # include <wchar.h> 59 1.1 christos # include <wctype.h> 60 1.1 christos # endif 61 1.1 christos 62 1.1 christos # ifdef _LIBC 63 1.1 christos /* We have to keep the namespace clean. */ 64 1.1 christos # define regfree(preg) __regfree (preg) 65 1.1 christos # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) 66 1.1 christos # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) 67 1.1 christos # define regerror(errcode, preg, errbuf, errbuf_size) \ 68 1.1 christos __regerror(errcode, preg, errbuf, errbuf_size) 69 1.1 christos # define re_set_registers(bu, re, nu, st, en) \ 70 1.1 christos __re_set_registers (bu, re, nu, st, en) 71 1.1 christos # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ 72 1.1 christos __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 73 1.1 christos # define re_match(bufp, string, size, pos, regs) \ 74 1.1 christos __re_match (bufp, string, size, pos, regs) 75 1.1 christos # define re_search(bufp, string, size, startpos, range, regs) \ 76 1.1 christos __re_search (bufp, string, size, startpos, range, regs) 77 1.1 christos # define re_compile_pattern(pattern, length, bufp) \ 78 1.1 christos __re_compile_pattern (pattern, length, bufp) 79 1.1 christos # define re_set_syntax(syntax) __re_set_syntax (syntax) 80 1.1 christos # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ 81 1.1 christos __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) 82 1.1 christos # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) 83 1.1 christos 84 1.1 christos # define btowc __btowc 85 1.1 christos # define iswctype __iswctype 86 1.1 christos # define mbrtowc __mbrtowc 87 1.1 christos # define wcslen __wcslen 88 1.1 christos # define wcscoll __wcscoll 89 1.1 christos # define wcrtomb __wcrtomb 90 1.1 christos 91 1.1 christos /* We are also using some library internals. */ 92 1.1 christos # include <locale/localeinfo.h> 93 1.1 christos # include <locale/elem-hash.h> 94 1.1 christos # include <langinfo.h> 95 1.1 christos # include <locale/coll-lookup.h> 96 1.1 christos # endif 97 1.1 christos 98 1.1 christos /* This is for other GNU distributions with internationalized messages. */ 99 1.1 christos # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC 100 1.1 christos # include <libintl.h> 101 1.1 christos # ifdef _LIBC 102 1.1 christos # undef gettext 103 1.1 christos # define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) 104 1.1 christos # endif 105 1.1 christos # else 106 1.1 christos # define gettext(msgid) (msgid) 107 1.1 christos # endif 108 1.1 christos 109 1.1 christos # ifndef gettext_noop 110 1.1 christos /* This define is so xgettext can find the internationalizable 111 1.1 christos strings. */ 112 1.1 christos # define gettext_noop(String) String 113 1.1 christos # endif 114 1.1 christos 115 1.1 christos /* Support for bounded pointers. */ 116 1.1 christos # if !defined _LIBC && !defined __BOUNDED_POINTERS__ 117 1.1 christos # define __bounded /* nothing */ 118 1.1 christos # define __unbounded /* nothing */ 119 1.1 christos # define __ptrvalue /* nothing */ 120 1.1 christos # endif 121 1.1 christos 122 1.1 christos /* The `emacs' switch turns on certain matching commands 123 1.1 christos that make sense only in Emacs. */ 124 1.1 christos # ifdef emacs 125 1.1 christos 126 1.1 christos # include "lisp.h" 127 1.1 christos # include "buffer.h" 128 1.1 christos # include "syntax.h" 129 1.1 christos 130 1.1 christos # else /* not emacs */ 131 1.1 christos 132 1.1 christos /* If we are not linking with Emacs proper, 133 1.1 christos we can't use the relocating allocator 134 1.1 christos even if config.h says that we can. */ 135 1.1 christos # undef REL_ALLOC 136 1.1 christos 137 1.1 christos # if defined STDC_HEADERS || defined _LIBC 138 1.1 christos # include <stdlib.h> 139 1.1 christos # else 140 1.1 christos char *malloc (); 141 1.1 christos char *realloc (); 142 1.1 christos # endif 143 1.1 christos 144 1.1 christos /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 145 1.1 christos If nothing else has been done, use the method below. */ 146 1.1 christos # ifdef INHIBIT_STRING_HEADER 147 1.1 christos # if !(defined HAVE_BZERO && defined HAVE_BCOPY) 148 1.1 christos # if !defined bzero && !defined bcopy 149 1.1 christos # undef INHIBIT_STRING_HEADER 150 1.1 christos # endif 151 1.1 christos # endif 152 1.1 christos # endif 153 1.1 christos 154 1.1 christos /* This is the normal way of making sure we have a bcopy and a bzero. 155 1.1 christos This is used in most programs--a few other programs avoid this 156 1.1 christos by defining INHIBIT_STRING_HEADER. */ 157 1.1 christos # ifndef INHIBIT_STRING_HEADER 158 1.1 christos # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC 159 1.1 christos # include <string.h> 160 1.1 christos # ifndef bzero 161 1.1 christos # ifndef _LIBC 162 1.1 christos # define bzero(s, n) (memset (s, '\0', n), (s)) 163 1.1 christos # else 164 1.1 christos # define bzero(s, n) __bzero (s, n) 165 1.1 christos # endif 166 1.1 christos # endif 167 1.1 christos # else 168 1.1 christos # include <strings.h> 169 1.1 christos # ifndef memcmp 170 1.1 christos # define memcmp(s1, s2, n) bcmp (s1, s2, n) 171 1.1 christos # endif 172 1.1 christos # ifndef memcpy 173 1.1 christos # define memcpy(d, s, n) (bcopy (s, d, n), (d)) 174 1.1 christos # endif 175 1.1 christos # endif 176 1.1 christos # endif 177 1.1 christos 178 1.1 christos /* Define the syntax stuff for \<, \>, etc. */ 179 1.1 christos 180 1.1 christos /* This must be nonzero for the wordchar and notwordchar pattern 181 1.1 christos commands in re_match_2. */ 182 1.1 christos # ifndef Sword 183 1.1 christos # define Sword 1 184 1.1 christos # endif 185 1.1 christos 186 1.1 christos # ifdef SWITCH_ENUM_BUG 187 1.1 christos # define SWITCH_ENUM_CAST(x) ((int)(x)) 188 1.1 christos # else 189 1.1 christos # define SWITCH_ENUM_CAST(x) (x) 190 1.1 christos # endif 191 1.1 christos 192 1.1 christos # endif /* not emacs */ 193 1.1 christos 194 1.1 christos # if defined _LIBC || HAVE_LIMITS_H 195 1.1 christos # include <limits.h> 196 1.1 christos # endif 197 1.1 christos 198 1.1 christos # ifndef MB_LEN_MAX 199 1.1 christos # define MB_LEN_MAX 1 200 1.1 christos # endif 201 1.1 christos 202 1.1 christos /* Get the interface, including the syntax bits. */ 204 1.1 christos # include <regex.h> 205 1.1 christos 206 1.1 christos /* isalpha etc. are used for the character classes. */ 207 1.1 christos # include <ctype.h> 208 1.1 christos 209 1.1 christos /* Jim Meyering writes: 210 1.1 christos 211 1.1 christos "... Some ctype macros are valid only for character codes that 212 1.1 christos isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 213 1.1 christos using /bin/cc or gcc but without giving an ansi option). So, all 214 1.1 christos ctype uses should be through macros like ISPRINT... If 215 1.1 christos STDC_HEADERS is defined, then autoconf has verified that the ctype 216 1.1 christos macros don't need to be guarded with references to isascii. ... 217 1.1 christos Defining isascii to 1 should let any compiler worth its salt 218 1.1 christos eliminate the && through constant folding." 219 1.1 christos Solaris defines some of these symbols so we must undefine them first. */ 220 1.1 christos 221 1.1 christos # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) 222 1.1 christos # define IN_CTYPE_DOMAIN(c) 1 223 1.1 christos # else 224 1.1 christos # define IN_CTYPE_DOMAIN(c) isascii(c) 225 1.1 christos # endif 226 1.1 christos 227 1.1 christos # ifdef isblank 228 1.1 christos # define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c)) 229 1.1 christos # else 230 1.1 christos # define ISBLANK(c) ((c) == ' ' || (c) == '\t') 231 1.1 christos # endif 232 1.1 christos # ifdef isgraph 233 1.1 christos # define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c)) 234 1.1 christos # else 235 1.1 christos # define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c)) 236 1.1 christos # endif 237 1.1 christos 238 1.1 christos # undef ISPRINT 239 1.1 christos # define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) 240 1.1 christos # define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c)) 241 1.1 christos # define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c)) 242 1.1 christos # define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c)) 243 1.1 christos # define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c)) 244 1.1 christos # define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c)) 245 1.1 christos # define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c)) 246 1.1 christos # define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c)) 247 1.1 christos # define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c)) 248 1.1 christos # define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c)) 249 1.1 christos 250 1.1 christos # ifdef _tolower 251 1.1 christos # define TOLOWER(c) _tolower(c) 252 1.1 christos # else 253 1.1 christos # define TOLOWER(c) tolower(c) 254 1.1 christos # endif 255 1.1 christos 256 1.1 christos # ifndef NULL 257 1.1 christos # define NULL (void *)0 258 1.1 christos # endif 259 1.1 christos 260 1.1 christos /* We remove any previous definition of `SIGN_EXTEND_CHAR', 261 1.1 christos since ours (we hope) works properly with all combinations of 262 1.1 christos machines, compilers, `char' and `unsigned char' argument types. 263 1.1 christos (Per Bothner suggested the basic approach.) */ 264 1.1 christos # undef SIGN_EXTEND_CHAR 265 1.1 christos # if __STDC__ 266 1.1 christos # define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 267 1.1 christos # else /* not __STDC__ */ 268 1.1 christos /* As in Harbison and Steele. */ 269 1.1 christos # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) 270 1.1 christos # endif 271 1.1 christos 272 1.1 christos # ifndef emacs 274 1.1 christos /* How many characters in the character set. */ 275 1.1 christos # define CHAR_SET_SIZE 256 276 1.1 christos 277 1.1 christos # ifdef SYNTAX_TABLE 278 1.1 christos 279 1.1 christos extern char *re_syntax_table; 280 1.1 christos 281 1.1 christos # else /* not SYNTAX_TABLE */ 282 1.1 christos 283 1.1 christos static char re_syntax_table[CHAR_SET_SIZE]; 284 1.1 christos 285 1.1 christos static void init_syntax_once PARAMS ((void)); 286 1.1 christos 287 1.1 christos static void 288 1.1 christos init_syntax_once () 289 1.1 christos { 290 1.1 christos register int c; 291 1.1 christos static int done = 0; 292 1.1 christos 293 1.1 christos if (done) 294 1.1 christos return; 295 1.1 christos bzero (re_syntax_table, sizeof re_syntax_table); 296 1.1 christos 297 1.1 christos for (c = 0; c < CHAR_SET_SIZE; ++c) 298 1.1 christos if (ISALNUM (c)) 299 1.1 christos re_syntax_table[c] = Sword; 300 1.1 christos 301 1.1 christos re_syntax_table['_'] = Sword; 302 1.1 christos 303 1.1 christos done = 1; 304 1.1 christos } 305 1.1 christos 306 1.1 christos # endif /* not SYNTAX_TABLE */ 307 1.1 christos 308 1.1 christos # define SYNTAX(c) re_syntax_table[(unsigned char) (c)] 309 1.1 christos 310 1.1 christos # endif /* emacs */ 311 1.1 christos 312 1.1 christos /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 314 1.1 christos use `alloca' instead of `malloc'. This is because using malloc in 315 1.1 christos re_search* or re_match* could cause memory leaks when C-g is used in 316 1.1 christos Emacs; also, malloc is slower and causes storage fragmentation. On 317 1.1 christos the other hand, malloc is more portable, and easier to debug. 318 1.1 christos 319 1.1 christos Because we sometimes use alloca, some routines have to be macros, 320 1.1 christos not functions -- `alloca'-allocated space disappears at the end of the 321 1.1 christos function it is called in. */ 322 1.1 christos 323 1.1 christos # ifdef REGEX_MALLOC 324 1.1 christos 325 1.1 christos # define REGEX_ALLOCATE malloc 326 1.1 christos # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) 327 1.1 christos # define REGEX_FREE free 328 1.1 christos 329 1.1 christos # else /* not REGEX_MALLOC */ 330 1.1 christos 331 1.1 christos /* Emacs already defines alloca, sometimes. */ 332 1.1 christos # ifndef alloca 333 1.1 christos 334 1.1 christos /* Make alloca work the best possible way. */ 335 1.1 christos # ifdef __GNUC__ 336 1.1 christos # define alloca __builtin_alloca 337 1.1 christos # else /* not __GNUC__ */ 338 1.1 christos # if HAVE_ALLOCA_H 339 1.1 christos # include <alloca.h> 340 1.1 christos # endif /* HAVE_ALLOCA_H */ 341 1.1 christos # endif /* not __GNUC__ */ 342 1.1 christos 343 1.1 christos # endif /* not alloca */ 344 1.1 christos 345 1.1 christos # define REGEX_ALLOCATE alloca 346 1.1 christos 347 1.1 christos /* Assumes a `char *destination' variable. */ 348 1.1 christos # define REGEX_REALLOCATE(source, osize, nsize) \ 349 1.1 christos (destination = (char *) alloca (nsize), \ 350 1.1 christos memcpy (destination, source, osize)) 351 1.1 christos 352 1.1 christos /* No need to do anything to free, after alloca. */ 353 1.1 christos # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 354 1.1 christos 355 1.1 christos # endif /* not REGEX_MALLOC */ 356 1.1 christos 357 1.1 christos /* Define how to allocate the failure stack. */ 358 1.1 christos 359 1.1 christos # if defined REL_ALLOC && defined REGEX_MALLOC 360 1.1 christos 361 1.1 christos # define REGEX_ALLOCATE_STACK(size) \ 362 1.1 christos r_alloc (&failure_stack_ptr, (size)) 363 1.1 christos # define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 364 1.1 christos r_re_alloc (&failure_stack_ptr, (nsize)) 365 1.1 christos # define REGEX_FREE_STACK(ptr) \ 366 1.1 christos r_alloc_free (&failure_stack_ptr) 367 1.1 christos 368 1.1 christos # else /* not using relocating allocator */ 369 1.1 christos 370 1.1 christos # ifdef REGEX_MALLOC 371 1.1 christos 372 1.1 christos # define REGEX_ALLOCATE_STACK malloc 373 1.1 christos # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) 374 1.1 christos # define REGEX_FREE_STACK free 375 1.1 christos 376 1.1 christos # else /* not REGEX_MALLOC */ 377 1.1 christos 378 1.1 christos # define REGEX_ALLOCATE_STACK alloca 379 1.1 christos 380 1.1 christos # define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 381 1.1 christos REGEX_REALLOCATE (source, osize, nsize) 382 1.1 christos /* No need to explicitly free anything. */ 383 1.1 christos # define REGEX_FREE_STACK(arg) 384 1.1 christos 385 1.1 christos # endif /* not REGEX_MALLOC */ 386 1.1 christos # endif /* not using relocating allocator */ 387 1.1 christos 388 1.1 christos 389 1.1 christos /* True if `size1' is non-NULL and PTR is pointing anywhere inside 390 1.1 christos `string1' or just past its end. This works if PTR is NULL, which is 391 1.1 christos a good thing. */ 392 1.1 christos # define FIRST_STRING_P(ptr) \ 393 1.1 christos (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) 394 1.1 christos 395 1.1 christos /* (Re)Allocate N items of type T using malloc, or fail. */ 396 1.1 christos # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 397 1.1 christos # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 398 1.1 christos # define RETALLOC_IF(addr, n, t) \ 399 1.1 christos if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 400 1.1 christos # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 401 1.1 christos 402 1.1 christos # define BYTEWIDTH 8 /* In bits. */ 403 1.1 christos 404 1.1 christos # define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 405 1.1 christos 406 1.1 christos # undef MAX 407 1.1 christos # undef MIN 408 1.1 christos # define MAX(a, b) ((a) > (b) ? (a) : (b)) 409 1.1 christos # define MIN(a, b) ((a) < (b) ? (a) : (b)) 410 1.1 christos 411 1.1 christos typedef char boolean; 412 1.1 christos # define false 0 413 1.1 christos # define true 1 414 1.1 christos 415 1.1 christos static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size, 416 1.1 christos reg_syntax_t syntax, 417 1.1 christos struct re_pattern_buffer *bufp)); 418 1.1 christos 419 1.1 christos static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, 420 1.1 christos const char *string1, int size1, 421 1.1 christos const char *string2, int size2, 422 1.1 christos int pos, 423 1.1 christos struct re_registers *regs, 424 1.1 christos int stop)); 425 1.1 christos static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp, 426 1.1 christos const char *string1, int size1, 427 1.1 christos const char *string2, int size2, 428 1.1 christos int startpos, int range, 429 1.1 christos struct re_registers *regs, int stop)); 430 1.1 christos static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp)); 431 1.1 christos 432 1.1 christos #ifdef MBS_SUPPORT 433 1.1 christos static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size, 434 1.1 christos reg_syntax_t syntax, 435 1.1 christos struct re_pattern_buffer *bufp)); 436 1.1 christos 437 1.1 christos 438 1.1 christos static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, 439 1.1 christos const char *cstring1, int csize1, 440 1.1 christos const char *cstring2, int csize2, 441 1.1 christos int pos, 442 1.1 christos struct re_registers *regs, 443 1.1 christos int stop, 444 1.1 christos wchar_t *string1, int size1, 445 1.1 christos wchar_t *string2, int size2, 446 1.1 christos int *mbs_offset1, int *mbs_offset2)); 447 1.1 christos static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp, 448 1.1 christos const char *string1, int size1, 449 1.1 christos const char *string2, int size2, 450 1.1 christos int startpos, int range, 451 1.1 christos struct re_registers *regs, int stop)); 452 1.1 christos static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp)); 453 1.1 christos #endif 454 1.1 christos 455 1.1 christos /* These are the command codes that appear in compiled regular 457 1.1 christos expressions. Some opcodes are followed by argument bytes. A 458 1.1 christos command code can specify any interpretation whatsoever for its 459 1.1 christos arguments. Zero bytes may appear in the compiled regular expression. */ 460 1.1 christos 461 1.1 christos typedef enum 462 1.1 christos { 463 1.1 christos no_op = 0, 464 1.1 christos 465 1.1 christos /* Succeed right away--no more backtracking. */ 466 1.1 christos succeed, 467 1.1 christos 468 1.1 christos /* Followed by one byte giving n, then by n literal bytes. */ 469 1.1 christos exactn, 470 1.1 christos 471 1.1 christos # ifdef MBS_SUPPORT 472 1.1 christos /* Same as exactn, but contains binary data. */ 473 1.1 christos exactn_bin, 474 1.1 christos # endif 475 1.1 christos 476 1.1 christos /* Matches any (more or less) character. */ 477 1.1 christos anychar, 478 1.1 christos 479 1.1 christos /* Matches any one char belonging to specified set. First 480 1.1 christos following byte is number of bitmap bytes. Then come bytes 481 1.1 christos for a bitmap saying which chars are in. Bits in each byte 482 1.1 christos are ordered low-bit-first. A character is in the set if its 483 1.1 christos bit is 1. A character too large to have a bit in the map is 484 1.1 christos automatically not in the set. */ 485 1.1 christos /* ifdef MBS_SUPPORT, following element is length of character 486 1.1 christos classes, length of collating symbols, length of equivalence 487 1.1 christos classes, length of character ranges, and length of characters. 488 1.1 christos Next, character class element, collating symbols elements, 489 1.1 christos equivalence class elements, range elements, and character 490 1.1 christos elements follow. 491 1.1 christos See regex_compile function. */ 492 1.1 christos charset, 493 1.1 christos 494 1.1 christos /* Same parameters as charset, but match any character that is 495 1.1 christos not one of those specified. */ 496 1.1 christos charset_not, 497 1.1 christos 498 1.1 christos /* Start remembering the text that is matched, for storing in a 499 1.1 christos register. Followed by one byte with the register number, in 500 1.1 christos the range 0 to one less than the pattern buffer's re_nsub 501 1.1 christos field. Then followed by one byte with the number of groups 502 1.1 christos inner to this one. (This last has to be part of the 503 1.1 christos start_memory only because we need it in the on_failure_jump 504 1.1 christos of re_match_2.) */ 505 1.1 christos start_memory, 506 1.1 christos 507 1.1 christos /* Stop remembering the text that is matched and store it in a 508 1.1 christos memory register. Followed by one byte with the register 509 1.1 christos number, in the range 0 to one less than `re_nsub' in the 510 1.1 christos pattern buffer, and one byte with the number of inner groups, 511 1.1 christos just like `start_memory'. (We need the number of inner 512 1.1 christos groups here because we don't have any easy way of finding the 513 1.1 christos corresponding start_memory when we're at a stop_memory.) */ 514 1.1 christos stop_memory, 515 1.1 christos 516 1.1 christos /* Match a duplicate of something remembered. Followed by one 517 1.1 christos byte containing the register number. */ 518 1.1 christos duplicate, 519 1.1 christos 520 1.1 christos /* Fail unless at beginning of line. */ 521 1.1 christos begline, 522 1.1 christos 523 1.1 christos /* Fail unless at end of line. */ 524 1.1 christos endline, 525 1.1 christos 526 1.1 christos /* Succeeds if at beginning of buffer (if emacs) or at beginning 527 1.1 christos of string to be matched (if not). */ 528 1.1 christos begbuf, 529 1.1 christos 530 1.1 christos /* Analogously, for end of buffer/string. */ 531 1.1 christos endbuf, 532 1.1 christos 533 1.1 christos /* Followed by two byte relative address to which to jump. */ 534 1.1 christos jump, 535 1.1 christos 536 1.1 christos /* Same as jump, but marks the end of an alternative. */ 537 1.1 christos jump_past_alt, 538 1.1 christos 539 1.1 christos /* Followed by two-byte relative address of place to resume at 540 1.1 christos in case of failure. */ 541 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 542 1.1 christos on_failure_jump, 543 1.1 christos 544 1.1 christos /* Like on_failure_jump, but pushes a placeholder instead of the 545 1.1 christos current string position when executed. */ 546 1.1 christos on_failure_keep_string_jump, 547 1.1 christos 548 1.1 christos /* Throw away latest failure point and then jump to following 549 1.1 christos two-byte relative address. */ 550 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 551 1.1 christos pop_failure_jump, 552 1.1 christos 553 1.1 christos /* Change to pop_failure_jump if know won't have to backtrack to 554 1.1 christos match; otherwise change to jump. This is used to jump 555 1.1 christos back to the beginning of a repeat. If what follows this jump 556 1.1 christos clearly won't match what the repeat does, such that we can be 557 1.1 christos sure that there is no use backtracking out of repetitions 558 1.1 christos already matched, then we change it to a pop_failure_jump. 559 1.1 christos Followed by two-byte address. */ 560 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 561 1.1 christos maybe_pop_jump, 562 1.1 christos 563 1.1 christos /* Jump to following two-byte address, and push a dummy failure 564 1.1 christos point. This failure point will be thrown away if an attempt 565 1.1 christos is made to use it for a failure. A `+' construct makes this 566 1.1 christos before the first repeat. Also used as an intermediary kind 567 1.1 christos of jump when compiling an alternative. */ 568 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 569 1.1 christos dummy_failure_jump, 570 1.1 christos 571 1.1 christos /* Push a dummy failure point and continue. Used at the end of 572 1.1 christos alternatives. */ 573 1.1 christos push_dummy_failure, 574 1.1 christos 575 1.1 christos /* Followed by two-byte relative address and two-byte number n. 576 1.1 christos After matching N times, jump to the address upon failure. */ 577 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 578 1.1 christos succeed_n, 579 1.1 christos 580 1.1 christos /* Followed by two-byte relative address, and two-byte number n. 581 1.1 christos Jump to the address N times, then fail. */ 582 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 583 1.1 christos jump_n, 584 1.1 christos 585 1.1 christos /* Set the following two-byte relative address to the 586 1.1 christos subsequent two-byte number. The address *includes* the two 587 1.1 christos bytes of number. */ 588 1.1 christos /* ifdef MBS_SUPPORT, the size of address is 1. */ 589 1.1 christos set_number_at, 590 1.1 christos 591 1.1 christos wordchar, /* Matches any word-constituent character. */ 592 1.1 christos notwordchar, /* Matches any char that is not a word-constituent. */ 593 1.1 christos 594 1.1 christos wordbeg, /* Succeeds if at word beginning. */ 595 1.1 christos wordend, /* Succeeds if at word end. */ 596 1.1 christos 597 1.1 christos wordbound, /* Succeeds if at a word boundary. */ 598 1.1 christos notwordbound /* Succeeds if not at a word boundary. */ 599 1.1 christos 600 1.1 christos # ifdef emacs 601 1.1 christos ,before_dot, /* Succeeds if before point. */ 602 1.1 christos at_dot, /* Succeeds if at point. */ 603 1.1 christos after_dot, /* Succeeds if after point. */ 604 1.1 christos 605 1.1 christos /* Matches any character whose syntax is specified. Followed by 606 1.1 christos a byte which contains a syntax code, e.g., Sword. */ 607 1.1 christos syntaxspec, 608 1.1 christos 609 1.1 christos /* Matches any character whose syntax is not that specified. */ 610 1.1 christos notsyntaxspec 611 1.1 christos # endif /* emacs */ 612 1.1 christos } re_opcode_t; 613 1.1 christos #endif /* not INSIDE_RECURSION */ 614 1.1 christos 615 1.1 christos 617 1.1 christos #ifdef BYTE 618 1.1 christos # define CHAR_T char 619 1.1 christos # define UCHAR_T unsigned char 620 1.1 christos # define COMPILED_BUFFER_VAR bufp->buffer 621 1.1 christos # define OFFSET_ADDRESS_SIZE 2 622 1.1 christos # define PREFIX(name) byte_##name 623 1.1 christos # define ARG_PREFIX(name) name 624 1.1 christos # define PUT_CHAR(c) putchar (c) 625 1.1 christos #else 626 1.1 christos # ifdef WCHAR 627 1.1 christos # define CHAR_T wchar_t 628 1.1 christos # define UCHAR_T wchar_t 629 1.1 christos # define COMPILED_BUFFER_VAR wc_buffer 630 1.1 christos # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ 631 1.1 christos # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1) 632 1.1 christos # define PREFIX(name) wcs_##name 633 1.1 christos # define ARG_PREFIX(name) c##name 634 1.1 christos /* Should we use wide stream?? */ 635 1.1 christos # define PUT_CHAR(c) printf ("%C", c); 636 1.1 christos # define TRUE 1 637 1.1 christos # define FALSE 0 638 1.1 christos # else 639 1.1 christos # ifdef MBS_SUPPORT 640 1.1 christos # define WCHAR 641 1.1 christos # define INSIDE_RECURSION 642 1.1 christos # include "regex.c" 643 1.1 christos # undef INSIDE_RECURSION 644 1.1 christos # endif 645 1.1 christos # define BYTE 646 1.1 christos # define INSIDE_RECURSION 647 1.1 christos # include "regex.c" 648 1.1 christos # undef INSIDE_RECURSION 649 1.1 christos # endif 650 1.1 christos #endif 651 1.1 christos #include "unlocked-io.h" 652 1.1 christos 653 1.1 christos #ifdef INSIDE_RECURSION 654 1.1 christos /* Common operations on the compiled pattern. */ 655 1.1 christos 656 1.1 christos /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ 657 1.1 christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 658 1.1 christos 659 1.1 christos # ifdef WCHAR 660 1.1 christos # define STORE_NUMBER(destination, number) \ 661 1.1 christos do { \ 662 1.1 christos *(destination) = (UCHAR_T)(number); \ 663 1.1 christos } while (0) 664 1.1 christos # else /* BYTE */ 665 1.1 christos # define STORE_NUMBER(destination, number) \ 666 1.1 christos do { \ 667 1.1 christos (destination)[0] = (number) & 0377; \ 668 1.1 christos (destination)[1] = (number) >> 8; \ 669 1.1 christos } while (0) 670 1.1 christos # endif /* WCHAR */ 671 1.1 christos 672 1.1 christos /* Same as STORE_NUMBER, except increment DESTINATION to 673 1.1 christos the byte after where the number is stored. Therefore, DESTINATION 674 1.1 christos must be an lvalue. */ 675 1.1 christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 676 1.1 christos 677 1.1 christos # define STORE_NUMBER_AND_INCR(destination, number) \ 678 1.1 christos do { \ 679 1.1 christos STORE_NUMBER (destination, number); \ 680 1.1 christos (destination) += OFFSET_ADDRESS_SIZE; \ 681 1.1 christos } while (0) 682 1.1 christos 683 1.1 christos /* Put into DESTINATION a number stored in two contiguous bytes starting 684 1.1 christos at SOURCE. */ 685 1.1 christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 686 1.1 christos 687 1.1 christos # ifdef WCHAR 688 1.1 christos # define EXTRACT_NUMBER(destination, source) \ 689 1.1 christos do { \ 690 1.1 christos (destination) = *(source); \ 691 1.1 christos } while (0) 692 1.1 christos # else /* BYTE */ 693 1.1 christos # define EXTRACT_NUMBER(destination, source) \ 694 1.1 christos do { \ 695 1.1 christos (destination) = *(source) & 0377; \ 696 1.1 christos (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ 697 1.1 christos } while (0) 698 1.1 christos # endif 699 1.1 christos 700 1.1 christos # ifdef DEBUG 701 1.1 christos static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source)); 702 1.1 christos static void 703 1.1 christos PREFIX(extract_number) (dest, source) 704 1.1 christos int *dest; 705 1.1 christos UCHAR_T *source; 706 1.1 christos { 707 1.1 christos # ifdef WCHAR 708 1.1 christos *dest = *source; 709 1.1 christos # else /* BYTE */ 710 1.1 christos int temp = SIGN_EXTEND_CHAR (*(source + 1)); 711 1.1 christos *dest = *source & 0377; 712 1.1 christos *dest += temp << 8; 713 1.1 christos # endif 714 1.1 christos } 715 1.1 christos 716 1.1 christos # ifndef EXTRACT_MACROS /* To debug the macros. */ 717 1.1 christos # undef EXTRACT_NUMBER 718 1.1 christos # define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src) 719 1.1 christos # endif /* not EXTRACT_MACROS */ 720 1.1 christos 721 1.1 christos # endif /* DEBUG */ 722 1.1 christos 723 1.1 christos /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. 724 1.1 christos SOURCE must be an lvalue. */ 725 1.1 christos 726 1.1 christos # define EXTRACT_NUMBER_AND_INCR(destination, source) \ 727 1.1 christos do { \ 728 1.1 christos EXTRACT_NUMBER (destination, source); \ 729 1.1 christos (source) += OFFSET_ADDRESS_SIZE; \ 730 1.1 christos } while (0) 731 1.1 christos 732 1.1 christos # ifdef DEBUG 733 1.1 christos static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination, 734 1.1 christos UCHAR_T **source)); 735 1.1 christos static void 736 1.1 christos PREFIX(extract_number_and_incr) (destination, source) 737 1.1 christos int *destination; 738 1.1 christos UCHAR_T **source; 739 1.1 christos { 740 1.1 christos PREFIX(extract_number) (destination, *source); 741 1.1 christos *source += OFFSET_ADDRESS_SIZE; 742 1.1 christos } 743 1.1 christos 744 1.1 christos # ifndef EXTRACT_MACROS 745 1.1 christos # undef EXTRACT_NUMBER_AND_INCR 746 1.1 christos # define EXTRACT_NUMBER_AND_INCR(dest, src) \ 747 1.1 christos PREFIX(extract_number_and_incr) (&dest, &src) 748 1.1 christos # endif /* not EXTRACT_MACROS */ 749 1.1 christos 750 1.1 christos # endif /* DEBUG */ 751 1.1 christos 752 1.1 christos 753 1.1 christos 755 1.1 christos /* If DEBUG is defined, Regex prints many voluminous messages about what 756 1.1 christos it is doing (if the variable `debug' is nonzero). If linked with the 757 1.1 christos main program in `iregex.c', you can enter patterns and strings 758 1.1 christos interactively. And if linked with the main program in `main.c' and 759 1.1 christos the other test files, you can run the already-written tests. */ 760 1.1 christos 761 1.1 christos # ifdef DEBUG 762 1.1 christos 763 1.1 christos # ifndef DEFINED_ONCE 764 1.1 christos 765 1.1 christos /* We use standard I/O for debugging. */ 766 1.1 christos # include <stdio.h> 767 1.1 christos 768 1.1 christos /* It is useful to test things that ``must'' be true when debugging. */ 769 1.1 christos # include <assert.h> 770 1.1 christos 771 1.1 christos static int debug; 772 1.1 christos 773 1.1 christos # define DEBUG_STATEMENT(e) e 774 1.1 christos # define DEBUG_PRINT1(x) if (debug) printf (x) 775 1.1 christos # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) 776 1.1 christos # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) 777 1.1 christos # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) 778 1.1 christos # endif /* not DEFINED_ONCE */ 779 1.1 christos 780 1.1 christos # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 781 1.1 christos if (debug) PREFIX(print_partial_compiled_pattern) (s, e) 782 1.1 christos # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 783 1.1 christos if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2) 784 1.1 christos 785 1.1 christos 786 1.1 christos /* Print the fastmap in human-readable form. */ 787 1.1 christos 788 1.1 christos # ifndef DEFINED_ONCE 789 1.1 christos void 790 1.1 christos print_fastmap (fastmap) 791 1.1 christos char *fastmap; 792 1.1 christos { 793 1.1 christos unsigned was_a_range = 0; 794 1.1 christos unsigned i = 0; 795 1.1 christos 796 1.1 christos while (i < (1 << BYTEWIDTH)) 797 1.1 christos { 798 1.1 christos if (fastmap[i++]) 799 1.1 christos { 800 1.1 christos was_a_range = 0; 801 1.1 christos putchar (i - 1); 802 1.1 christos while (i < (1 << BYTEWIDTH) && fastmap[i]) 803 1.1 christos { 804 1.1 christos was_a_range = 1; 805 1.1 christos i++; 806 1.1 christos } 807 1.1 christos if (was_a_range) 808 1.1 christos { 809 1.1 christos printf ("-"); 810 1.1 christos putchar (i - 1); 811 1.1 christos } 812 1.1 christos } 813 1.1 christos } 814 1.1 christos putchar ('\n'); 815 1.1 christos } 816 1.1 christos # endif /* not DEFINED_ONCE */ 817 1.1 christos 818 1.1 christos 819 1.1 christos /* Print a compiled pattern string in human-readable form, starting at 820 1.1 christos the START pointer into it and ending just before the pointer END. */ 821 1.1 christos 822 1.1 christos void 823 1.1 christos PREFIX(print_partial_compiled_pattern) (start, end) 824 1.1 christos UCHAR_T *start; 825 1.1 christos UCHAR_T *end; 826 1.1 christos { 827 1.1 christos int mcnt, mcnt2; 828 1.1 christos UCHAR_T *p1; 829 1.1 christos UCHAR_T *p = start; 830 1.1 christos UCHAR_T *pend = end; 831 1.1 christos 832 1.1 christos if (start == NULL) 833 1.1 christos { 834 1.1 christos printf ("(null)\n"); 835 1.1 christos return; 836 1.1 christos } 837 1.1 christos 838 1.1 christos /* Loop over pattern commands. */ 839 1.1 christos while (p < pend) 840 1.1 christos { 841 1.1 christos # ifdef _LIBC 842 1.1 christos printf ("%td:\t", p - start); 843 1.1 christos # else 844 1.1 christos printf ("%ld:\t", (long int) (p - start)); 845 1.1 christos # endif 846 1.1 christos 847 1.1 christos switch ((re_opcode_t) *p++) 848 1.1 christos { 849 1.1 christos case no_op: 850 1.1 christos printf ("/no_op"); 851 1.1 christos break; 852 1.1 christos 853 1.1 christos case exactn: 854 1.1 christos mcnt = *p++; 855 1.1 christos printf ("/exactn/%d", mcnt); 856 1.1 christos do 857 1.1 christos { 858 1.1 christos putchar ('/'); 859 1.1 christos PUT_CHAR (*p++); 860 1.1 christos } 861 1.1 christos while (--mcnt); 862 1.1 christos break; 863 1.1 christos 864 1.1 christos # ifdef MBS_SUPPORT 865 1.1 christos case exactn_bin: 866 1.1 christos mcnt = *p++; 867 1.1 christos printf ("/exactn_bin/%d", mcnt); 868 1.1 christos do 869 1.1 christos { 870 1.1 christos printf("/%lx", (long int) *p++); 871 1.1 christos } 872 1.1 christos while (--mcnt); 873 1.1 christos break; 874 1.1 christos # endif /* MBS_SUPPORT */ 875 1.1 christos 876 1.1 christos case start_memory: 877 1.1 christos mcnt = *p++; 878 1.1 christos printf ("/start_memory/%d/%ld", mcnt, (long int) *p++); 879 1.1 christos break; 880 1.1 christos 881 1.1 christos case stop_memory: 882 1.1 christos mcnt = *p++; 883 1.1 christos printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++); 884 1.1 christos break; 885 1.1 christos 886 1.1 christos case duplicate: 887 1.1 christos printf ("/duplicate/%ld", (long int) *p++); 888 1.1 christos break; 889 1.1 christos 890 1.1 christos case anychar: 891 1.1 christos printf ("/anychar"); 892 1.1 christos break; 893 1.1 christos 894 1.1 christos case charset: 895 1.1 christos case charset_not: 896 1.1 christos { 897 1.1 christos # ifdef WCHAR 898 1.1 christos int i, length; 899 1.1 christos wchar_t *workp = p; 900 1.1 christos printf ("/charset [%s", 901 1.1 christos (re_opcode_t) *(workp - 1) == charset_not ? "^" : ""); 902 1.1 christos p += 5; 903 1.1 christos length = *workp++; /* the length of char_classes */ 904 1.1 christos for (i=0 ; i<length ; i++) 905 1.1 christos printf("[:%lx:]", (long int) *p++); 906 1.1 christos length = *workp++; /* the length of collating_symbol */ 907 1.1 christos for (i=0 ; i<length ;) 908 1.1 christos { 909 1.1 christos printf("[."); 910 1.1 christos while(*p != 0) 911 1.1 christos PUT_CHAR((i++,*p++)); 912 1.1 christos i++,p++; 913 1.1 christos printf(".]"); 914 1.1 christos } 915 1.1 christos length = *workp++; /* the length of equivalence_class */ 916 1.1 christos for (i=0 ; i<length ;) 917 1.1 christos { 918 1.1 christos printf("[="); 919 1.1 christos while(*p != 0) 920 1.1 christos PUT_CHAR((i++,*p++)); 921 1.1 christos i++,p++; 922 1.1 christos printf("=]"); 923 1.1 christos } 924 1.1 christos length = *workp++; /* the length of char_range */ 925 1.1 christos for (i=0 ; i<length ; i++) 926 1.1 christos { 927 1.1 christos wchar_t range_start = *p++; 928 1.1 christos wchar_t range_end = *p++; 929 1.1 christos printf("%C-%C", range_start, range_end); 930 1.1 christos } 931 1.1 christos length = *workp++; /* the length of char */ 932 1.1 christos for (i=0 ; i<length ; i++) 933 1.1 christos printf("%C", *p++); 934 1.1 christos putchar (']'); 935 1.1 christos # else 936 1.1 christos register int c, last = -100; 937 1.1 christos register int in_range = 0; 938 1.1 christos 939 1.1 christos printf ("/charset [%s", 940 1.1 christos (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 941 1.1 christos 942 1.1 christos assert (p + *p < pend); 943 1.1 christos 944 1.1 christos for (c = 0; c < 256; c++) 945 1.1 christos if (c / 8 < *p 946 1.1 christos && (p[1 + (c/8)] & (1 << (c % 8)))) 947 1.1 christos { 948 1.1 christos /* Are we starting a range? */ 949 1.1 christos if (last + 1 == c && ! in_range) 950 1.1 christos { 951 1.1 christos putchar ('-'); 952 1.1 christos in_range = 1; 953 1.1 christos } 954 1.1 christos /* Have we broken a range? */ 955 1.1 christos else if (last + 1 != c && in_range) 956 1.1 christos { 957 1.1 christos putchar (last); 958 1.1 christos in_range = 0; 959 1.1 christos } 960 1.1 christos 961 1.1 christos if (! in_range) 962 1.1 christos putchar (c); 963 1.1 christos 964 1.1 christos last = c; 965 1.1 christos } 966 1.1 christos 967 1.1 christos if (in_range) 968 1.1 christos putchar (last); 969 1.1 christos 970 1.1 christos putchar (']'); 971 1.1 christos 972 1.1 christos p += 1 + *p; 973 1.1 christos # endif /* WCHAR */ 974 1.1 christos } 975 1.1 christos break; 976 1.1 christos 977 1.1 christos case begline: 978 1.1 christos printf ("/begline"); 979 1.1 christos break; 980 1.1 christos 981 1.1 christos case endline: 982 1.1 christos printf ("/endline"); 983 1.1 christos break; 984 1.1 christos 985 1.1 christos case on_failure_jump: 986 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 987 1.1 christos # ifdef _LIBC 988 1.1 christos printf ("/on_failure_jump to %td", p + mcnt - start); 989 1.1 christos # else 990 1.1 christos printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start)); 991 1.1 christos # endif 992 1.1 christos break; 993 1.1 christos 994 1.1 christos case on_failure_keep_string_jump: 995 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 996 1.1 christos # ifdef _LIBC 997 1.1 christos printf ("/on_failure_keep_string_jump to %td", p + mcnt - start); 998 1.1 christos # else 999 1.1 christos printf ("/on_failure_keep_string_jump to %ld", 1000 1.1 christos (long int) (p + mcnt - start)); 1001 1.1 christos # endif 1002 1.1 christos break; 1003 1.1 christos 1004 1.1 christos case dummy_failure_jump: 1005 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1006 1.1 christos # ifdef _LIBC 1007 1.1 christos printf ("/dummy_failure_jump to %td", p + mcnt - start); 1008 1.1 christos # else 1009 1.1 christos printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start)); 1010 1.1 christos # endif 1011 1.1 christos break; 1012 1.1 christos 1013 1.1 christos case push_dummy_failure: 1014 1.1 christos printf ("/push_dummy_failure"); 1015 1.1 christos break; 1016 1.1 christos 1017 1.1 christos case maybe_pop_jump: 1018 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1019 1.1 christos # ifdef _LIBC 1020 1.1 christos printf ("/maybe_pop_jump to %td", p + mcnt - start); 1021 1.1 christos # else 1022 1.1 christos printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start)); 1023 1.1 christos # endif 1024 1.1 christos break; 1025 1.1 christos 1026 1.1 christos case pop_failure_jump: 1027 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1028 1.1 christos # ifdef _LIBC 1029 1.1 christos printf ("/pop_failure_jump to %td", p + mcnt - start); 1030 1.1 christos # else 1031 1.1 christos printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start)); 1032 1.1 christos # endif 1033 1.1 christos break; 1034 1.1 christos 1035 1.1 christos case jump_past_alt: 1036 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1037 1.1 christos # ifdef _LIBC 1038 1.1 christos printf ("/jump_past_alt to %td", p + mcnt - start); 1039 1.1 christos # else 1040 1.1 christos printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start)); 1041 1.1 christos # endif 1042 1.1 christos break; 1043 1.1 christos 1044 1.1 christos case jump: 1045 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1046 1.1 christos # ifdef _LIBC 1047 1.1 christos printf ("/jump to %td", p + mcnt - start); 1048 1.1 christos # else 1049 1.1 christos printf ("/jump to %ld", (long int) (p + mcnt - start)); 1050 1.1 christos # endif 1051 1.1 christos break; 1052 1.1 christos 1053 1.1 christos case succeed_n: 1054 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1055 1.1 christos p1 = p + mcnt; 1056 1.1 christos PREFIX(extract_number_and_incr) (&mcnt2, &p); 1057 1.1 christos # ifdef _LIBC 1058 1.1 christos printf ("/succeed_n to %td, %d times", p1 - start, mcnt2); 1059 1.1 christos # else 1060 1.1 christos printf ("/succeed_n to %ld, %d times", 1061 1.1 christos (long int) (p1 - start), mcnt2); 1062 1.1 christos # endif 1063 1.1 christos break; 1064 1.1 christos 1065 1.1 christos case jump_n: 1066 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1067 1.1 christos p1 = p + mcnt; 1068 1.1 christos PREFIX(extract_number_and_incr) (&mcnt2, &p); 1069 1.1 christos printf ("/jump_n to %d, %d times", p1 - start, mcnt2); 1070 1.1 christos break; 1071 1.1 christos 1072 1.1 christos case set_number_at: 1073 1.1 christos PREFIX(extract_number_and_incr) (&mcnt, &p); 1074 1.1 christos p1 = p + mcnt; 1075 1.1 christos PREFIX(extract_number_and_incr) (&mcnt2, &p); 1076 1.1 christos # ifdef _LIBC 1077 1.1 christos printf ("/set_number_at location %td to %d", p1 - start, mcnt2); 1078 1.1 christos # else 1079 1.1 christos printf ("/set_number_at location %ld to %d", 1080 1.1 christos (long int) (p1 - start), mcnt2); 1081 1.1 christos # endif 1082 1.1 christos break; 1083 1.1 christos 1084 1.1 christos case wordbound: 1085 1.1 christos printf ("/wordbound"); 1086 1.1 christos break; 1087 1.1 christos 1088 1.1 christos case notwordbound: 1089 1.1 christos printf ("/notwordbound"); 1090 1.1 christos break; 1091 1.1 christos 1092 1.1 christos case wordbeg: 1093 1.1 christos printf ("/wordbeg"); 1094 1.1 christos break; 1095 1.1 christos 1096 1.1 christos case wordend: 1097 1.1 christos printf ("/wordend"); 1098 1.1 christos break; 1099 1.1 christos 1100 1.1 christos # ifdef emacs 1101 1.1 christos case before_dot: 1102 1.1 christos printf ("/before_dot"); 1103 1.1 christos break; 1104 1.1 christos 1105 1.1 christos case at_dot: 1106 1.1 christos printf ("/at_dot"); 1107 1.1 christos break; 1108 1.1 christos 1109 1.1 christos case after_dot: 1110 1.1 christos printf ("/after_dot"); 1111 1.1 christos break; 1112 1.1 christos 1113 1.1 christos case syntaxspec: 1114 1.1 christos printf ("/syntaxspec"); 1115 1.1 christos mcnt = *p++; 1116 1.1 christos printf ("/%d", mcnt); 1117 1.1 christos break; 1118 1.1 christos 1119 1.1 christos case notsyntaxspec: 1120 1.1 christos printf ("/notsyntaxspec"); 1121 1.1 christos mcnt = *p++; 1122 1.1 christos printf ("/%d", mcnt); 1123 1.1 christos break; 1124 1.1 christos # endif /* emacs */ 1125 1.1 christos 1126 1.1 christos case wordchar: 1127 1.1 christos printf ("/wordchar"); 1128 1.1 christos break; 1129 1.1 christos 1130 1.1 christos case notwordchar: 1131 1.1 christos printf ("/notwordchar"); 1132 1.1 christos break; 1133 1.1 christos 1134 1.1 christos case begbuf: 1135 1.1 christos printf ("/begbuf"); 1136 1.1 christos break; 1137 1.1 christos 1138 1.1 christos case endbuf: 1139 1.1 christos printf ("/endbuf"); 1140 1.1 christos break; 1141 1.1 christos 1142 1.1 christos default: 1143 1.1 christos printf ("?%ld", (long int) *(p-1)); 1144 1.1 christos } 1145 1.1 christos 1146 1.1 christos putchar ('\n'); 1147 1.1 christos } 1148 1.1 christos 1149 1.1 christos # ifdef _LIBC 1150 1.1 christos printf ("%td:\tend of pattern.\n", p - start); 1151 1.1 christos # else 1152 1.1 christos printf ("%ld:\tend of pattern.\n", (long int) (p - start)); 1153 1.1 christos # endif 1154 1.1 christos } 1155 1.1 christos 1156 1.1 christos 1157 1.1 christos void 1158 1.1 christos PREFIX(print_compiled_pattern) (bufp) 1159 1.1 christos struct re_pattern_buffer *bufp; 1160 1.1 christos { 1161 1.1 christos UCHAR_T *buffer = (UCHAR_T*) bufp->buffer; 1162 1.1 christos 1163 1.1 christos PREFIX(print_partial_compiled_pattern) (buffer, buffer 1164 1.1 christos + bufp->used / sizeof(UCHAR_T)); 1165 1.1 christos printf ("%ld bytes used/%ld bytes allocated.\n", 1166 1.1 christos bufp->used, bufp->allocated); 1167 1.1 christos 1168 1.1 christos if (bufp->fastmap_accurate && bufp->fastmap) 1169 1.1 christos { 1170 1.1 christos printf ("fastmap: "); 1171 1.1 christos print_fastmap (bufp->fastmap); 1172 1.1 christos } 1173 1.1 christos 1174 1.1 christos # ifdef _LIBC 1175 1.1 christos printf ("re_nsub: %Zd\t", bufp->re_nsub); 1176 1.1 christos # else 1177 1.1 christos printf ("re_nsub: %ld\t", (long int) bufp->re_nsub); 1178 1.1 christos # endif 1179 1.1 christos printf ("regs_alloc: %d\t", bufp->regs_allocated); 1180 1.1 christos printf ("can_be_null: %d\t", bufp->can_be_null); 1181 1.1 christos printf ("newline_anchor: %d\n", bufp->newline_anchor); 1182 1.1 christos printf ("no_sub: %d\t", bufp->no_sub); 1183 1.1 christos printf ("not_bol: %d\t", bufp->not_bol); 1184 1.1 christos printf ("not_eol: %d\t", bufp->not_eol); 1185 1.1 christos printf ("syntax: %lx\n", bufp->syntax); 1186 1.1 christos /* Perhaps we should print the translate table? */ 1187 1.1 christos } 1188 1.1 christos 1189 1.1 christos 1190 1.1 christos void 1191 1.1 christos PREFIX(print_double_string) (where, string1, size1, string2, size2) 1192 1.1 christos const CHAR_T *where; 1193 1.1 christos const CHAR_T *string1; 1194 1.1 christos const CHAR_T *string2; 1195 1.1 christos int size1; 1196 1.1 christos int size2; 1197 1.1 christos { 1198 1.1 christos int this_char; 1199 1.1 christos 1200 1.1 christos if (where == NULL) 1201 1.1 christos printf ("(null)"); 1202 1.1 christos else 1203 1.1 christos { 1204 1.1 christos int cnt; 1205 1.1 christos 1206 1.1 christos if (FIRST_STRING_P (where)) 1207 1.1 christos { 1208 1.1 christos for (this_char = where - string1; this_char < size1; this_char++) 1209 1.1 christos PUT_CHAR (string1[this_char]); 1210 1.1 christos 1211 1.1 christos where = string2; 1212 1.1 christos } 1213 1.1 christos 1214 1.1 christos cnt = 0; 1215 1.1 christos for (this_char = where - string2; this_char < size2; this_char++) 1216 1.1 christos { 1217 1.1 christos PUT_CHAR (string2[this_char]); 1218 1.1 christos if (++cnt > 100) 1219 1.1 christos { 1220 1.1 christos fputs ("...", stdout); 1221 1.1 christos break; 1222 1.1 christos } 1223 1.1 christos } 1224 1.1 christos } 1225 1.1 christos } 1226 1.1 christos 1227 1.1 christos # ifndef DEFINED_ONCE 1228 1.1 christos void 1229 1.1 christos printchar (c) 1230 1.1 christos int c; 1231 1.1 christos { 1232 1.1 christos putc (c, stderr); 1233 1.1 christos } 1234 1.1 christos # endif 1235 1.1 christos 1236 1.1 christos # else /* not DEBUG */ 1237 1.1 christos 1238 1.1 christos # ifndef DEFINED_ONCE 1239 1.1 christos # undef assert 1240 1.1 christos # define assert(e) 1241 1.1 christos 1242 1.1 christos # define DEBUG_STATEMENT(e) 1243 1.1 christos # define DEBUG_PRINT1(x) 1244 1.1 christos # define DEBUG_PRINT2(x1, x2) 1245 1.1 christos # define DEBUG_PRINT3(x1, x2, x3) 1246 1.1 christos # define DEBUG_PRINT4(x1, x2, x3, x4) 1247 1.1 christos # endif /* not DEFINED_ONCE */ 1248 1.1 christos # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 1249 1.1 christos # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 1250 1.1 christos 1251 1.1 christos # endif /* not DEBUG */ 1252 1.1 christos 1253 1.1 christos 1254 1.1 christos 1256 1.1 christos # ifdef WCHAR 1257 1.1 christos /* This convert a multibyte string to a wide character string. 1258 1.1 christos And write their correspondances to offset_buffer(see below) 1259 1.1 christos and write whether each wchar_t is binary data to is_binary. 1260 1.1 christos This assume invalid multibyte sequences as binary data. 1261 1.1 christos We assume offset_buffer and is_binary is already allocated 1262 1.1 christos enough space. */ 1263 1.1 christos 1264 1.1 christos static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src, 1265 1.1 christos size_t len, int *offset_buffer, 1266 1.1 christos char *is_binary); 1267 1.1 christos static size_t 1268 1.1 christos convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary) 1269 1.1 christos CHAR_T *dest; 1270 1.1 christos const unsigned char* src; 1271 1.1 christos size_t len; /* the length of multibyte string. */ 1272 1.1 christos 1273 1.1 christos /* It hold correspondances between src(char string) and 1274 1.1 christos dest(wchar_t string) for optimization. 1275 1.1 christos e.g. src = "xxxyzz" 1276 1.1 christos dest = {'X', 'Y', 'Z'} 1277 1.1 christos (each "xxx", "y" and "zz" represent one multibyte character 1278 1.1 christos corresponding to 'X', 'Y' and 'Z'.) 1279 1.1 christos offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")} 1280 1.1 christos = {0, 3, 4, 6} 1281 1.1 christos */ 1282 1.1 christos int *offset_buffer; 1283 1.1 christos char *is_binary; 1284 1.1 christos { 1285 1.1 christos wchar_t *pdest = dest; 1286 1.1 christos const unsigned char *psrc = src; 1287 1.1 christos size_t wc_count = 0; 1288 1.1 christos 1289 1.1 christos mbstate_t mbs; 1290 1.1 christos int i, consumed; 1291 1.1 christos size_t mb_remain = len; 1292 1.1 christos size_t mb_count = 0; 1293 1.1 christos 1294 1.1 christos /* Initialize the conversion state. */ 1295 1.1 christos memset (&mbs, 0, sizeof (mbstate_t)); 1296 1.1 christos 1297 1.1 christos offset_buffer[0] = 0; 1298 1.1 christos for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed, 1299 1.1 christos psrc += consumed) 1300 1.1 christos { 1301 1.1 christos consumed = mbrtowc (pdest, psrc, mb_remain, &mbs); 1302 1.1 christos 1303 1.1 christos if (consumed <= 0) 1304 1.1 christos /* failed to convert. maybe src contains binary data. 1305 1.1 christos So we consume 1 byte manualy. */ 1306 1.1 christos { 1307 1.1 christos *pdest = *psrc; 1308 1.1 christos consumed = 1; 1309 1.1 christos is_binary[wc_count] = TRUE; 1310 1.1 christos } 1311 1.1 christos else 1312 1.1 christos is_binary[wc_count] = FALSE; 1313 1.1 christos /* In sjis encoding, we use yen sign as escape character in 1314 1.1 christos place of reverse solidus. So we convert 0x5c(yen sign in 1315 1.1 christos sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse 1316 1.1 christos solidus in UCS2). */ 1317 1.1 christos if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5) 1318 1.1 christos *pdest = (wchar_t) *psrc; 1319 1.1 christos 1320 1.1 christos offset_buffer[wc_count + 1] = mb_count += consumed; 1321 1.1 christos } 1322 1.1 christos 1323 1.1 christos /* Fill remain of the buffer with sentinel. */ 1324 1.1 christos for (i = wc_count + 1 ; i <= len ; i++) 1325 1.1 christos offset_buffer[i] = mb_count + 1; 1326 1.1 christos 1327 1.1 christos return wc_count; 1328 1.1 christos } 1329 1.1 christos 1330 1.1 christos # endif /* WCHAR */ 1331 1.1 christos 1332 1.1 christos #else /* not INSIDE_RECURSION */ 1333 1.1 christos 1334 1.1 christos /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 1335 1.1 christos also be assigned to arbitrarily: each pattern buffer stores its own 1336 1.1 christos syntax, so it can be changed between regex compilations. */ 1337 1.1 christos /* This has no initializer because initialized variables in Emacs 1338 1.1 christos become read-only after dumping. */ 1339 1.1 christos reg_syntax_t re_syntax_options; 1340 1.1 christos 1341 1.1 christos 1342 1.1 christos /* Specify the precise syntax of regexps for compilation. This provides 1343 1.1 christos for compatibility for various utilities which historically have 1344 1.1 christos different, incompatible syntaxes. 1345 1.1 christos 1346 1.1 christos The argument SYNTAX is a bit mask comprised of the various bits 1347 1.1 christos defined in regex.h. We return the old syntax. */ 1348 1.1 christos 1349 1.1 christos reg_syntax_t 1350 1.1 christos re_set_syntax (syntax) 1351 1.1 christos reg_syntax_t syntax; 1352 1.1 christos { 1353 1.1 christos reg_syntax_t ret = re_syntax_options; 1354 1.1 christos 1355 1.1 christos re_syntax_options = syntax; 1356 1.1 christos # ifdef DEBUG 1357 1.1 christos if (syntax & RE_DEBUG) 1358 1.1 christos debug = 1; 1359 1.1 christos else if (debug) /* was on but now is not */ 1360 1.1 christos debug = 0; 1361 1.1 christos # endif /* DEBUG */ 1362 1.1 christos return ret; 1363 1.1 christos } 1364 1.1 christos # ifdef _LIBC 1365 1.1 christos weak_alias (__re_set_syntax, re_set_syntax) 1366 1.1 christos # endif 1367 1.1 christos 1368 1.1 christos /* This table gives an error message for each of the error codes listed 1370 1.1 christos in regex.h. Obviously the order here has to be same as there. 1371 1.1 christos POSIX doesn't require that we do anything for REG_NOERROR, 1372 1.1 christos but why not be nice? */ 1373 1.1 christos 1374 1.1 christos static const char re_error_msgid[] = 1375 1.1 christos { 1376 1.1 christos # define REG_NOERROR_IDX 0 1377 1.1 christos gettext_noop ("Success") /* REG_NOERROR */ 1378 1.1 christos "\0" 1379 1.1 christos # define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") 1380 1.1 christos gettext_noop ("No match") /* REG_NOMATCH */ 1381 1.1 christos "\0" 1382 1.1 christos # define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") 1383 1.1 christos gettext_noop ("Invalid regular expression") /* REG_BADPAT */ 1384 1.1 christos "\0" 1385 1.1 christos # define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") 1386 1.1 christos gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ 1387 1.1 christos "\0" 1388 1.1 christos # define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") 1389 1.1 christos gettext_noop ("Invalid character class name") /* REG_ECTYPE */ 1390 1.1 christos "\0" 1391 1.1 christos # define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") 1392 1.1 christos gettext_noop ("Trailing backslash") /* REG_EESCAPE */ 1393 1.1 christos "\0" 1394 1.1 christos # define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") 1395 1.1 christos gettext_noop ("Invalid back reference") /* REG_ESUBREG */ 1396 1.1 christos "\0" 1397 1.1 christos # define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") 1398 1.1 christos gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ 1399 1.1 christos "\0" 1400 1.1 christos # define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") 1401 1.1 christos gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ 1402 1.1 christos "\0" 1403 1.1 christos # define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") 1404 1.1 christos gettext_noop ("Unmatched \\{") /* REG_EBRACE */ 1405 1.1 christos "\0" 1406 1.1 christos # define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") 1407 1.1 christos gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ 1408 1.1 christos "\0" 1409 1.1 christos # define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") 1410 1.1 christos gettext_noop ("Invalid range end") /* REG_ERANGE */ 1411 1.1 christos "\0" 1412 1.1 christos # define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") 1413 1.1 christos gettext_noop ("Memory exhausted") /* REG_ESPACE */ 1414 1.1 christos "\0" 1415 1.1 christos # define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") 1416 1.1 christos gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ 1417 1.1 christos "\0" 1418 1.1 christos # define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") 1419 1.1 christos gettext_noop ("Premature end of regular expression") /* REG_EEND */ 1420 1.1 christos "\0" 1421 1.1 christos # define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") 1422 1.1 christos gettext_noop ("Regular expression too big") /* REG_ESIZE */ 1423 1.1 christos "\0" 1424 1.1 christos # define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") 1425 1.1 christos gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ 1426 1.1 christos }; 1427 1.1 christos 1428 1.1 christos static const size_t re_error_msgid_idx[] = 1429 1.1 christos { 1430 1.1 christos REG_NOERROR_IDX, 1431 1.1 christos REG_NOMATCH_IDX, 1432 1.1 christos REG_BADPAT_IDX, 1433 1.1 christos REG_ECOLLATE_IDX, 1434 1.1 christos REG_ECTYPE_IDX, 1435 1.1 christos REG_EESCAPE_IDX, 1436 1.1 christos REG_ESUBREG_IDX, 1437 1.1 christos REG_EBRACK_IDX, 1438 1.1 christos REG_EPAREN_IDX, 1439 1.1 christos REG_EBRACE_IDX, 1440 1.1 christos REG_BADBR_IDX, 1441 1.1 christos REG_ERANGE_IDX, 1442 1.1 christos REG_ESPACE_IDX, 1443 1.1 christos REG_BADRPT_IDX, 1444 1.1 christos REG_EEND_IDX, 1445 1.1 christos REG_ESIZE_IDX, 1446 1.1 christos REG_ERPAREN_IDX 1447 1.1 christos }; 1448 1.1 christos 1449 1.1 christos #endif /* INSIDE_RECURSION */ 1451 1.1 christos 1452 1.1 christos #ifndef DEFINED_ONCE 1453 1.1 christos /* Avoiding alloca during matching, to placate r_alloc. */ 1454 1.1 christos 1455 1.1 christos /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 1456 1.1 christos searching and matching functions should not call alloca. On some 1457 1.1 christos systems, alloca is implemented in terms of malloc, and if we're 1458 1.1 christos using the relocating allocator routines, then malloc could cause a 1459 1.1 christos relocation, which might (if the strings being searched are in the 1460 1.1 christos ralloc heap) shift the data out from underneath the regexp 1461 1.1 christos routines. 1462 1.1 christos 1463 1.1 christos Here's another reason to avoid allocation: Emacs 1464 1.1 christos processes input from X in a signal handler; processing X input may 1465 1.1 christos call malloc; if input arrives while a matching routine is calling 1466 1.1 christos malloc, then we're scrod. But Emacs can't just block input while 1467 1.1 christos calling matching routines; then we don't notice interrupts when 1468 1.1 christos they come in. So, Emacs blocks input around all regexp calls 1469 1.1 christos except the matching calls, which it leaves unprotected, in the 1470 1.1 christos faith that they will not malloc. */ 1471 1.1 christos 1472 1.1 christos /* Normally, this is fine. */ 1473 1.1 christos # define MATCH_MAY_ALLOCATE 1474 1.1 christos 1475 1.1 christos /* When using GNU C, we are not REALLY using the C alloca, no matter 1476 1.1 christos what config.h may say. So don't take precautions for it. */ 1477 1.1 christos # ifdef __GNUC__ 1478 1.1 christos # undef C_ALLOCA 1479 1.1 christos # endif 1480 1.1 christos 1481 1.1 christos /* The match routines may not allocate if (1) they would do it with malloc 1482 1.1 christos and (2) it's not safe for them to use malloc. 1483 1.1 christos Note that if REL_ALLOC is defined, matching would not use malloc for the 1484 1.1 christos failure stack, but we would still use it for the register vectors; 1485 1.1 christos so REL_ALLOC should not affect this. */ 1486 1.1 christos # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs 1487 1.1 christos # undef MATCH_MAY_ALLOCATE 1488 1.1 christos # endif 1489 1.1 christos #endif /* not DEFINED_ONCE */ 1490 1.1 christos 1491 1.1 christos #ifdef INSIDE_RECURSION 1493 1.1 christos /* Failure stack declarations and macros; both re_compile_fastmap and 1494 1.1 christos re_match_2 use a failure stack. These have to be macros because of 1495 1.1 christos REGEX_ALLOCATE_STACK. */ 1496 1.1 christos 1497 1.1 christos 1498 1.1 christos /* Number of failure points for which to initially allocate space 1499 1.1 christos when matching. If this number is exceeded, we allocate more 1500 1.1 christos space, so it is not a hard limit. */ 1501 1.1 christos # ifndef INIT_FAILURE_ALLOC 1502 1.1 christos # define INIT_FAILURE_ALLOC 5 1503 1.1 christos # endif 1504 1.1 christos 1505 1.1 christos /* Roughly the maximum number of failure points on the stack. Would be 1506 1.1 christos exactly that if always used MAX_FAILURE_ITEMS items each time we failed. 1507 1.1 christos This is a variable only so users of regex can assign to it; we never 1508 1.1 christos change it ourselves. */ 1509 1.1 christos 1510 1.1 christos # ifdef INT_IS_16BIT 1511 1.1 christos 1512 1.1 christos # ifndef DEFINED_ONCE 1513 1.1 christos # if defined MATCH_MAY_ALLOCATE 1514 1.1 christos /* 4400 was enough to cause a crash on Alpha OSF/1, 1515 1.1 christos whose default stack limit is 2mb. */ 1516 1.1 christos long int re_max_failures = 4000; 1517 1.1 christos # else 1518 1.1 christos long int re_max_failures = 2000; 1519 1.1 christos # endif 1520 1.1 christos # endif 1521 1.1 christos 1522 1.1 christos union PREFIX(fail_stack_elt) 1523 1.1 christos { 1524 1.1 christos UCHAR_T *pointer; 1525 1.1 christos long int integer; 1526 1.1 christos }; 1527 1.1 christos 1528 1.1 christos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); 1529 1.1 christos 1530 1.1 christos typedef struct 1531 1.1 christos { 1532 1.1 christos PREFIX(fail_stack_elt_t) *stack; 1533 1.1 christos unsigned long int size; 1534 1.1 christos unsigned long int avail; /* Offset of next open position. */ 1535 1.1 christos } PREFIX(fail_stack_type); 1536 1.1 christos 1537 1.1 christos # else /* not INT_IS_16BIT */ 1538 1.1 christos 1539 1.1 christos # ifndef DEFINED_ONCE 1540 1.1 christos # if defined MATCH_MAY_ALLOCATE 1541 1.1 christos /* 4400 was enough to cause a crash on Alpha OSF/1, 1542 1.1 christos whose default stack limit is 2mb. */ 1543 1.1 christos int re_max_failures = 4000; 1544 1.1 christos # else 1545 1.1 christos int re_max_failures = 2000; 1546 1.1 christos # endif 1547 1.1 christos # endif 1548 1.1 christos 1549 1.1 christos union PREFIX(fail_stack_elt) 1550 1.1 christos { 1551 1.1 christos UCHAR_T *pointer; 1552 1.1 christos int integer; 1553 1.1 christos }; 1554 1.1 christos 1555 1.1 christos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); 1556 1.1 christos 1557 1.1 christos typedef struct 1558 1.1 christos { 1559 1.1 christos PREFIX(fail_stack_elt_t) *stack; 1560 1.1 christos unsigned size; 1561 1.1 christos unsigned avail; /* Offset of next open position. */ 1562 1.1 christos } PREFIX(fail_stack_type); 1563 1.1 christos 1564 1.1 christos # endif /* INT_IS_16BIT */ 1565 1.1 christos 1566 1.1 christos # ifndef DEFINED_ONCE 1567 1.1 christos # define FAIL_STACK_EMPTY() (fail_stack.avail == 0) 1568 1.1 christos # define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) 1569 1.1 christos # define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 1570 1.1 christos # endif 1571 1.1 christos 1572 1.1 christos 1573 1.1 christos /* Define macros to initialize and free the failure stack. 1574 1.1 christos Do `return -2' if the alloc fails. */ 1575 1.1 christos 1576 1.1 christos # ifdef MATCH_MAY_ALLOCATE 1577 1.1 christos # define INIT_FAIL_STACK() \ 1578 1.1 christos do { \ 1579 1.1 christos fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \ 1580 1.1 christos REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \ 1581 1.1 christos \ 1582 1.1 christos if (fail_stack.stack == NULL) \ 1583 1.1 christos return -2; \ 1584 1.1 christos \ 1585 1.1 christos fail_stack.size = INIT_FAILURE_ALLOC; \ 1586 1.1 christos fail_stack.avail = 0; \ 1587 1.1 christos } while (0) 1588 1.1 christos 1589 1.1 christos # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) 1590 1.1 christos # else 1591 1.1 christos # define INIT_FAIL_STACK() \ 1592 1.1 christos do { \ 1593 1.1 christos fail_stack.avail = 0; \ 1594 1.1 christos } while (0) 1595 1.1 christos 1596 1.1 christos # define RESET_FAIL_STACK() 1597 1.1 christos # endif 1598 1.1 christos 1599 1.1 christos 1600 1.1 christos /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. 1601 1.1 christos 1602 1.1 christos Return 1 if succeeds, and 0 if either ran out of memory 1603 1.1 christos allocating space for it or it was already too large. 1604 1.1 christos 1605 1.1 christos REGEX_REALLOCATE_STACK requires `destination' be declared. */ 1606 1.1 christos 1607 1.1 christos # define DOUBLE_FAIL_STACK(fail_stack) \ 1608 1.1 christos ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ 1609 1.1 christos ? 0 \ 1610 1.1 christos : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \ 1611 1.1 christos REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 1612 1.1 christos (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \ 1613 1.1 christos ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\ 1614 1.1 christos \ 1615 1.1 christos (fail_stack).stack == NULL \ 1616 1.1 christos ? 0 \ 1617 1.1 christos : ((fail_stack).size <<= 1, \ 1618 1.1 christos 1))) 1619 1.1 christos 1620 1.1 christos 1621 1.1 christos /* Push pointer POINTER on FAIL_STACK. 1622 1.1 christos Return 1 if was able to do so and 0 if ran out of memory allocating 1623 1.1 christos space to do so. */ 1624 1.1 christos # define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ 1625 1.1 christos ((FAIL_STACK_FULL () \ 1626 1.1 christos && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ 1627 1.1 christos ? 0 \ 1628 1.1 christos : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1629 1.1 christos 1)) 1630 1.1 christos 1631 1.1 christos /* Push a pointer value onto the failure stack. 1632 1.1 christos Assumes the variable `fail_stack'. Probably should only 1633 1.1 christos be called from within `PUSH_FAILURE_POINT'. */ 1634 1.1 christos # define PUSH_FAILURE_POINTER(item) \ 1635 1.1 christos fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item) 1636 1.1 christos 1637 1.1 christos /* This pushes an integer-valued item onto the failure stack. 1638 1.1 christos Assumes the variable `fail_stack'. Probably should only 1639 1.1 christos be called from within `PUSH_FAILURE_POINT'. */ 1640 1.1 christos # define PUSH_FAILURE_INT(item) \ 1641 1.1 christos fail_stack.stack[fail_stack.avail++].integer = (item) 1642 1.1 christos 1643 1.1 christos /* Push a fail_stack_elt_t value onto the failure stack. 1644 1.1 christos Assumes the variable `fail_stack'. Probably should only 1645 1.1 christos be called from within `PUSH_FAILURE_POINT'. */ 1646 1.1 christos # define PUSH_FAILURE_ELT(item) \ 1647 1.1 christos fail_stack.stack[fail_stack.avail++] = (item) 1648 1.1 christos 1649 1.1 christos /* These three POP... operations complement the three PUSH... operations. 1650 1.1 christos All assume that `fail_stack' is nonempty. */ 1651 1.1 christos # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer 1652 1.1 christos # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer 1653 1.1 christos # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] 1654 1.1 christos 1655 1.1 christos /* Used to omit pushing failure point id's when we're not debugging. */ 1656 1.1 christos # ifdef DEBUG 1657 1.1 christos # define DEBUG_PUSH PUSH_FAILURE_INT 1658 1.1 christos # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () 1659 1.1 christos # else 1660 1.1 christos # define DEBUG_PUSH(item) 1661 1.1 christos # define DEBUG_POP(item_addr) 1662 1.1 christos # endif 1663 1.1 christos 1664 1.1 christos 1665 1.1 christos /* Push the information about the state we will need 1666 1.1 christos if we ever fail back to it. 1667 1.1 christos 1668 1.1 christos Requires variables fail_stack, regstart, regend, reg_info, and 1669 1.1 christos num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' 1670 1.1 christos be declared. 1671 1.1 christos 1672 1.1 christos Does `return FAILURE_CODE' if runs out of memory. */ 1673 1.1 christos 1674 1.1 christos # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ 1675 1.1 christos do { \ 1676 1.1 christos char *destination; \ 1677 1.1 christos /* Must be int, so when we don't save any registers, the arithmetic \ 1678 1.1 christos of 0 + -1 isn't done as unsigned. */ \ 1679 1.1 christos /* Can't be int, since there is not a shred of a guarantee that int \ 1680 1.1 christos is wide enough to hold a value of something to which pointer can \ 1681 1.1 christos be assigned */ \ 1682 1.1 christos active_reg_t this_reg; \ 1683 1.1 christos \ 1684 1.1 christos DEBUG_STATEMENT (failure_id++); \ 1685 1.1 christos DEBUG_STATEMENT (nfailure_points_pushed++); \ 1686 1.1 christos DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 1687 1.1 christos DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 1688 1.1 christos DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 1689 1.1 christos \ 1690 1.1 christos DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ 1691 1.1 christos DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ 1692 1.1 christos \ 1693 1.1 christos /* Ensure we have enough space allocated for what we will push. */ \ 1694 1.1 christos while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ 1695 1.1 christos { \ 1696 1.1 christos if (!DOUBLE_FAIL_STACK (fail_stack)) \ 1697 1.1 christos return failure_code; \ 1698 1.1 christos \ 1699 1.1 christos DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ 1700 1.1 christos (fail_stack).size); \ 1701 1.1 christos DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ 1702 1.1 christos } \ 1703 1.1 christos \ 1704 1.1 christos /* Push the info, starting with the registers. */ \ 1705 1.1 christos DEBUG_PRINT1 ("\n"); \ 1706 1.1 christos \ 1707 1.1 christos if (1) \ 1708 1.1 christos for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ 1709 1.1 christos this_reg++) \ 1710 1.1 christos { \ 1711 1.1 christos DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ 1712 1.1 christos DEBUG_STATEMENT (num_regs_pushed++); \ 1713 1.1 christos \ 1714 1.1 christos DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 1715 1.1 christos PUSH_FAILURE_POINTER (regstart[this_reg]); \ 1716 1.1 christos \ 1717 1.1 christos DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 1718 1.1 christos PUSH_FAILURE_POINTER (regend[this_reg]); \ 1719 1.1 christos \ 1720 1.1 christos DEBUG_PRINT2 (" info: %p\n ", \ 1721 1.1 christos reg_info[this_reg].word.pointer); \ 1722 1.1 christos DEBUG_PRINT2 (" match_null=%d", \ 1723 1.1 christos REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ 1724 1.1 christos DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ 1725 1.1 christos DEBUG_PRINT2 (" matched_something=%d", \ 1726 1.1 christos MATCHED_SOMETHING (reg_info[this_reg])); \ 1727 1.1 christos DEBUG_PRINT2 (" ever_matched=%d", \ 1728 1.1 christos EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ 1729 1.1 christos DEBUG_PRINT1 ("\n"); \ 1730 1.1 christos PUSH_FAILURE_ELT (reg_info[this_reg].word); \ 1731 1.1 christos } \ 1732 1.1 christos \ 1733 1.1 christos DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ 1734 1.1 christos PUSH_FAILURE_INT (lowest_active_reg); \ 1735 1.1 christos \ 1736 1.1 christos DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ 1737 1.1 christos PUSH_FAILURE_INT (highest_active_reg); \ 1738 1.1 christos \ 1739 1.1 christos DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ 1740 1.1 christos DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ 1741 1.1 christos PUSH_FAILURE_POINTER (pattern_place); \ 1742 1.1 christos \ 1743 1.1 christos DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ 1744 1.1 christos DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ 1745 1.1 christos size2); \ 1746 1.1 christos DEBUG_PRINT1 ("'\n"); \ 1747 1.1 christos PUSH_FAILURE_POINTER (string_place); \ 1748 1.1 christos \ 1749 1.1 christos DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ 1750 1.1 christos DEBUG_PUSH (failure_id); \ 1751 1.1 christos } while (0) 1752 1.1 christos 1753 1.1 christos # ifndef DEFINED_ONCE 1754 1.1 christos /* This is the number of items that are pushed and popped on the stack 1755 1.1 christos for each register. */ 1756 1.1 christos # define NUM_REG_ITEMS 3 1757 1.1 christos 1758 1.1 christos /* Individual items aside from the registers. */ 1759 1.1 christos # ifdef DEBUG 1760 1.1 christos # define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ 1761 1.1 christos # else 1762 1.1 christos # define NUM_NONREG_ITEMS 4 1763 1.1 christos # endif 1764 1.1 christos 1765 1.1 christos /* We push at most this many items on the stack. */ 1766 1.1 christos /* We used to use (num_regs - 1), which is the number of registers 1767 1.1 christos this regexp will save; but that was changed to 5 1768 1.1 christos to avoid stack overflow for a regexp with lots of parens. */ 1769 1.1 christos # define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) 1770 1.1 christos 1771 1.1 christos /* We actually push this many items. */ 1772 1.1 christos # define NUM_FAILURE_ITEMS \ 1773 1.1 christos (((0 \ 1774 1.1 christos ? 0 : highest_active_reg - lowest_active_reg + 1) \ 1775 1.1 christos * NUM_REG_ITEMS) \ 1776 1.1 christos + NUM_NONREG_ITEMS) 1777 1.1 christos 1778 1.1 christos /* How many items can still be added to the stack without overflowing it. */ 1779 1.1 christos # define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) 1780 1.1 christos # endif /* not DEFINED_ONCE */ 1781 1.1 christos 1782 1.1 christos 1783 1.1 christos /* Pops what PUSH_FAIL_STACK pushes. 1784 1.1 christos 1785 1.1 christos We restore into the parameters, all of which should be lvalues: 1786 1.1 christos STR -- the saved data position. 1787 1.1 christos PAT -- the saved pattern position. 1788 1.1 christos LOW_REG, HIGH_REG -- the highest and lowest active registers. 1789 1.1 christos REGSTART, REGEND -- arrays of string positions. 1790 1.1 christos REG_INFO -- array of information about each subexpression. 1791 1.1 christos 1792 1.1 christos Also assumes the variables `fail_stack' and (if debugging), `bufp', 1793 1.1 christos `pend', `string1', `size1', `string2', and `size2'. */ 1794 1.1 christos # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 1795 1.1 christos { \ 1796 1.1 christos DEBUG_STATEMENT (unsigned failure_id;) \ 1797 1.1 christos active_reg_t this_reg; \ 1798 1.1 christos const UCHAR_T *string_temp; \ 1799 1.1 christos \ 1800 1.1 christos assert (!FAIL_STACK_EMPTY ()); \ 1801 1.1 christos \ 1802 1.1 christos /* Remove failure points and point to how many regs pushed. */ \ 1803 1.1 christos DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ 1804 1.1 christos DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ 1805 1.1 christos DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ 1806 1.1 christos \ 1807 1.1 christos assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ 1808 1.1 christos \ 1809 1.1 christos DEBUG_POP (&failure_id); \ 1810 1.1 christos DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ 1811 1.1 christos \ 1812 1.1 christos /* If the saved string location is NULL, it came from an \ 1813 1.1 christos on_failure_keep_string_jump opcode, and we want to throw away the \ 1814 1.1 christos saved NULL, thus retaining our current position in the string. */ \ 1815 1.1 christos string_temp = POP_FAILURE_POINTER (); \ 1816 1.1 christos if (string_temp != NULL) \ 1817 1.1 christos str = (const CHAR_T *) string_temp; \ 1818 1.1 christos \ 1819 1.1 christos DEBUG_PRINT2 (" Popping string %p: `", str); \ 1820 1.1 christos DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 1821 1.1 christos DEBUG_PRINT1 ("'\n"); \ 1822 1.1 christos \ 1823 1.1 christos pat = (UCHAR_T *) POP_FAILURE_POINTER (); \ 1824 1.1 christos DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ 1825 1.1 christos DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ 1826 1.1 christos \ 1827 1.1 christos /* Restore register info. */ \ 1828 1.1 christos high_reg = (active_reg_t) POP_FAILURE_INT (); \ 1829 1.1 christos DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \ 1830 1.1 christos \ 1831 1.1 christos low_reg = (active_reg_t) POP_FAILURE_INT (); \ 1832 1.1 christos DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \ 1833 1.1 christos \ 1834 1.1 christos if (1) \ 1835 1.1 christos for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ 1836 1.1 christos { \ 1837 1.1 christos DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \ 1838 1.1 christos \ 1839 1.1 christos reg_info[this_reg].word = POP_FAILURE_ELT (); \ 1840 1.1 christos DEBUG_PRINT2 (" info: %p\n", \ 1841 1.1 christos reg_info[this_reg].word.pointer); \ 1842 1.1 christos \ 1843 1.1 christos regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ 1844 1.1 christos DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 1845 1.1 christos \ 1846 1.1 christos regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ 1847 1.1 christos DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 1848 1.1 christos } \ 1849 1.1 christos else \ 1850 1.1 christos { \ 1851 1.1 christos for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ 1852 1.1 christos { \ 1853 1.1 christos reg_info[this_reg].word.integer = 0; \ 1854 1.1 christos regend[this_reg] = 0; \ 1855 1.1 christos regstart[this_reg] = 0; \ 1856 1.1 christos } \ 1857 1.1 christos highest_active_reg = high_reg; \ 1858 1.1 christos } \ 1859 1.1 christos \ 1860 1.1 christos set_regs_matched_done = 0; \ 1861 1.1 christos DEBUG_STATEMENT (nfailure_points_popped++); \ 1862 1.1 christos } /* POP_FAILURE_POINT */ 1863 1.1 christos 1864 1.1 christos /* Structure for per-register (a.k.a. per-group) information. 1866 1.1 christos Other register information, such as the 1867 1.1 christos starting and ending positions (which are addresses), and the list of 1868 1.1 christos inner groups (which is a bits list) are maintained in separate 1869 1.1 christos variables. 1870 1.1 christos 1871 1.1 christos We are making a (strictly speaking) nonportable assumption here: that 1872 1.1 christos the compiler will pack our bit fields into something that fits into 1873 1.1 christos the type of `word', i.e., is something that fits into one item on the 1874 1.1 christos failure stack. */ 1875 1.1 christos 1876 1.1 christos 1877 1.1 christos /* Declarations and macros for re_match_2. */ 1878 1.1 christos 1879 1.1 christos typedef union 1880 1.1 christos { 1881 1.1 christos PREFIX(fail_stack_elt_t) word; 1882 1.1 christos struct 1883 1.1 christos { 1884 1.1 christos /* This field is one if this group can match the empty string, 1885 1.1 christos zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ 1886 1.1 christos # define MATCH_NULL_UNSET_VALUE 3 1887 1.1 christos unsigned match_null_string_p : 2; 1888 1.1 christos unsigned is_active : 1; 1889 1.1 christos unsigned matched_something : 1; 1890 1.1 christos unsigned ever_matched_something : 1; 1891 1.1 christos } bits; 1892 1.1 christos } PREFIX(register_info_type); 1893 1.1 christos 1894 1.1 christos # ifndef DEFINED_ONCE 1895 1.1 christos # define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) 1896 1.1 christos # define IS_ACTIVE(R) ((R).bits.is_active) 1897 1.1 christos # define MATCHED_SOMETHING(R) ((R).bits.matched_something) 1898 1.1 christos # define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) 1899 1.1 christos 1900 1.1 christos 1901 1.1 christos /* Call this when have matched a real character; it sets `matched' flags 1902 1.1 christos for the subexpressions which we are currently inside. Also records 1903 1.1 christos that those subexprs have matched. */ 1904 1.1 christos # define SET_REGS_MATCHED() \ 1905 1.1 christos do \ 1906 1.1 christos { \ 1907 1.1 christos if (!set_regs_matched_done) \ 1908 1.1 christos { \ 1909 1.1 christos active_reg_t r; \ 1910 1.1 christos set_regs_matched_done = 1; \ 1911 1.1 christos for (r = lowest_active_reg; r <= highest_active_reg; r++) \ 1912 1.1 christos { \ 1913 1.1 christos MATCHED_SOMETHING (reg_info[r]) \ 1914 1.1 christos = EVER_MATCHED_SOMETHING (reg_info[r]) \ 1915 1.1 christos = 1; \ 1916 1.1 christos } \ 1917 1.1 christos } \ 1918 1.1 christos } \ 1919 1.1 christos while (0) 1920 1.1 christos # endif /* not DEFINED_ONCE */ 1921 1.1 christos 1922 1.1 christos /* Registers are set to a sentinel when they haven't yet matched. */ 1923 1.1 christos static CHAR_T PREFIX(reg_unset_dummy); 1924 1.1 christos # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy)) 1925 1.1 christos # define REG_UNSET(e) ((e) == REG_UNSET_VALUE) 1926 1.1 christos 1927 1.1 christos /* Subroutine declarations and macros for regex_compile. */ 1928 1.1 christos static void PREFIX(store_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg)); 1929 1.1 christos static void PREFIX(store_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, 1930 1.1 christos int arg1, int arg2)); 1931 1.1 christos static void PREFIX(insert_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, 1932 1.1 christos int arg, UCHAR_T *end)); 1933 1.1 christos static void PREFIX(insert_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, 1934 1.1 christos int arg1, int arg2, UCHAR_T *end)); 1935 1.1 christos static boolean PREFIX(at_begline_loc_p) _RE_ARGS ((const CHAR_T *pattern, 1936 1.1 christos const CHAR_T *p, 1937 1.1 christos reg_syntax_t syntax)); 1938 1.1 christos static boolean PREFIX(at_endline_loc_p) _RE_ARGS ((const CHAR_T *p, 1939 1.1 christos const CHAR_T *pend, 1940 1.1 christos reg_syntax_t syntax)); 1941 1.1 christos # ifdef WCHAR 1942 1.1 christos static reg_errcode_t wcs_compile_range _RE_ARGS ((CHAR_T range_start, 1943 1.1 christos const CHAR_T **p_ptr, 1944 1.1 christos const CHAR_T *pend, 1945 1.1 christos char *translate, 1946 1.1 christos reg_syntax_t syntax, 1947 1.1 christos UCHAR_T *b, 1948 1.1 christos CHAR_T *char_set)); 1949 1.1 christos static void insert_space _RE_ARGS ((int num, CHAR_T *loc, CHAR_T *end)); 1950 1.1 christos # else /* BYTE */ 1951 1.1 christos static reg_errcode_t byte_compile_range _RE_ARGS ((unsigned int range_start, 1952 1.1 christos const char **p_ptr, 1953 1.1 christos const char *pend, 1954 1.1 christos char *translate, 1955 1.1 christos reg_syntax_t syntax, 1956 1.1 christos unsigned char *b)); 1957 1.1 christos # endif /* WCHAR */ 1958 1.1 christos 1959 1.1 christos /* Fetch the next character in the uncompiled pattern---translating it 1960 1.1 christos if necessary. Also cast from a signed character in the constant 1961 1.1 christos string passed to us by the user to an unsigned char that we can use 1962 1.1 christos as an array index (in, e.g., `translate'). */ 1963 1.1 christos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff, 1964 1.1 christos because it is impossible to allocate 4GB array for some encodings 1965 1.1 christos which have 4 byte character_set like UCS4. */ 1966 1.1 christos # ifndef PATFETCH 1967 1.1 christos # ifdef WCHAR 1968 1.1 christos # define PATFETCH(c) \ 1969 1.1 christos do {if (p == pend) return REG_EEND; \ 1970 1.1 christos c = (UCHAR_T) *p++; \ 1971 1.1 christos if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \ 1972 1.1 christos } while (0) 1973 1.1 christos # else /* BYTE */ 1974 1.1 christos # define PATFETCH(c) \ 1975 1.1 christos do {if (p == pend) return REG_EEND; \ 1976 1.1 christos c = (unsigned char) *p++; \ 1977 1.1 christos if (translate) c = (unsigned char) translate[c]; \ 1978 1.1 christos } while (0) 1979 1.1 christos # endif /* WCHAR */ 1980 1.1 christos # endif 1981 1.1 christos 1982 1.1 christos /* Fetch the next character in the uncompiled pattern, with no 1983 1.1 christos translation. */ 1984 1.1 christos # define PATFETCH_RAW(c) \ 1985 1.1 christos do {if (p == pend) return REG_EEND; \ 1986 1.1 christos c = (UCHAR_T) *p++; \ 1987 1.1 christos } while (0) 1988 1.1 christos 1989 1.1 christos /* Go backwards one character in the pattern. */ 1990 1.1 christos # define PATUNFETCH p-- 1991 1.1 christos 1992 1.1 christos 1993 1.1 christos /* If `translate' is non-null, return translate[D], else just D. We 1994 1.1 christos cast the subscript to translate because some data is declared as 1995 1.1 christos `char *', to avoid warnings when a string constant is passed. But 1996 1.1 christos when we use a character as a subscript we must make it unsigned. */ 1997 1.1 christos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff, 1998 1.1 christos because it is impossible to allocate 4GB array for some encodings 1999 1.1 christos which have 4 byte character_set like UCS4. */ 2000 1.1 christos 2001 1.1 christos # ifndef TRANSLATE 2002 1.1 christos # ifdef WCHAR 2003 1.1 christos # define TRANSLATE(d) \ 2004 1.1 christos ((translate && ((UCHAR_T) (d)) <= 0xff) \ 2005 1.1 christos ? (char) translate[(unsigned char) (d)] : (d)) 2006 1.1 christos # else /* BYTE */ 2007 1.1 christos # define TRANSLATE(d) \ 2008 1.1 christos (translate ? (char) translate[(unsigned char) (d)] : (d)) 2009 1.1 christos # endif /* WCHAR */ 2010 1.1 christos # endif 2011 1.1 christos 2012 1.1 christos 2013 1.1 christos /* Macros for outputting the compiled pattern into `buffer'. */ 2014 1.1 christos 2015 1.1 christos /* If the buffer isn't allocated when it comes in, use this. */ 2016 1.1 christos # define INIT_BUF_SIZE (32 * sizeof(UCHAR_T)) 2017 1.1 christos 2018 1.1 christos /* Make sure we have at least N more bytes of space in buffer. */ 2019 1.1 christos # ifdef WCHAR 2020 1.1 christos # define GET_BUFFER_SPACE(n) \ 2021 1.1 christos while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \ 2022 1.1 christos + (n)*sizeof(CHAR_T)) > bufp->allocated) \ 2023 1.1 christos EXTEND_BUFFER () 2024 1.1 christos # else /* BYTE */ 2025 1.1 christos # define GET_BUFFER_SPACE(n) \ 2026 1.1 christos while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ 2027 1.1 christos EXTEND_BUFFER () 2028 1.1 christos # endif /* WCHAR */ 2029 1.1 christos 2030 1.1 christos /* Make sure we have one more byte of buffer space and then add C to it. */ 2031 1.1 christos # define BUF_PUSH(c) \ 2032 1.1 christos do { \ 2033 1.1 christos GET_BUFFER_SPACE (1); \ 2034 1.1 christos *b++ = (UCHAR_T) (c); \ 2035 1.1 christos } while (0) 2036 1.1 christos 2037 1.1 christos 2038 1.1 christos /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ 2039 1.1 christos # define BUF_PUSH_2(c1, c2) \ 2040 1.1 christos do { \ 2041 1.1 christos GET_BUFFER_SPACE (2); \ 2042 1.1 christos *b++ = (UCHAR_T) (c1); \ 2043 1.1 christos *b++ = (UCHAR_T) (c2); \ 2044 1.1 christos } while (0) 2045 1.1 christos 2046 1.1 christos 2047 1.1 christos /* As with BUF_PUSH_2, except for three bytes. */ 2048 1.1 christos # define BUF_PUSH_3(c1, c2, c3) \ 2049 1.1 christos do { \ 2050 1.1 christos GET_BUFFER_SPACE (3); \ 2051 1.1 christos *b++ = (UCHAR_T) (c1); \ 2052 1.1 christos *b++ = (UCHAR_T) (c2); \ 2053 1.1 christos *b++ = (UCHAR_T) (c3); \ 2054 1.1 christos } while (0) 2055 1.1 christos 2056 1.1 christos /* Store a jump with opcode OP at LOC to location TO. We store a 2057 1.1 christos relative address offset by the three bytes the jump itself occupies. */ 2058 1.1 christos # define STORE_JUMP(op, loc, to) \ 2059 1.1 christos PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE))) 2060 1.1 christos 2061 1.1 christos /* Likewise, for a two-argument jump. */ 2062 1.1 christos # define STORE_JUMP2(op, loc, to, arg) \ 2063 1.1 christos PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg) 2064 1.1 christos 2065 1.1 christos /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 2066 1.1 christos # define INSERT_JUMP(op, loc, to) \ 2067 1.1 christos PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b) 2068 1.1 christos 2069 1.1 christos /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 2070 1.1 christos # define INSERT_JUMP2(op, loc, to, arg) \ 2071 1.1 christos PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\ 2072 1.1 christos arg, b) 2073 1.1 christos 2074 1.1 christos /* This is not an arbitrary limit: the arguments which represent offsets 2075 1.1 christos into the pattern are two bytes long. So if 2^16 bytes turns out to 2076 1.1 christos be too small, many things would have to change. */ 2077 1.1 christos /* Any other compiler which, like MSC, has allocation limit below 2^16 2078 1.1 christos bytes will have to use approach similar to what was done below for 2079 1.1 christos MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up 2080 1.1 christos reallocating to 0 bytes. Such thing is not going to work too well. 2081 1.1 christos You have been warned!! */ 2082 1.1 christos # ifndef DEFINED_ONCE 2083 1.1 christos # if defined _MSC_VER && !defined WIN32 2084 1.1 christos /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. 2085 1.1 christos The REALLOC define eliminates a flurry of conversion warnings, 2086 1.1 christos but is not required. */ 2087 1.1 christos # define MAX_BUF_SIZE 65500L 2088 1.1 christos # define REALLOC(p,s) realloc ((p), (size_t) (s)) 2089 1.1 christos # else 2090 1.1 christos # define MAX_BUF_SIZE (1L << 16) 2091 1.1 christos # define REALLOC(p,s) realloc ((p), (s)) 2092 1.1 christos # endif 2093 1.1 christos 2094 1.1 christos /* Extend the buffer by twice its current size via realloc and 2095 1.1 christos reset the pointers that pointed into the old block to point to the 2096 1.1 christos correct places in the new one. If extending the buffer results in it 2097 1.1 christos being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 2098 1.1 christos # if __BOUNDED_POINTERS__ 2099 1.1 christos # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated) 2100 1.1 christos # define MOVE_BUFFER_POINTER(P) \ 2101 1.1 christos (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr) 2102 1.1 christos # define ELSE_EXTEND_BUFFER_HIGH_BOUND \ 2103 1.1 christos else \ 2104 1.1 christos { \ 2105 1.1 christos SET_HIGH_BOUND (b); \ 2106 1.1 christos SET_HIGH_BOUND (begalt); \ 2107 1.1 christos if (fixup_alt_jump) \ 2108 1.1 christos SET_HIGH_BOUND (fixup_alt_jump); \ 2109 1.1 christos if (laststart) \ 2110 1.1 christos SET_HIGH_BOUND (laststart); \ 2111 1.1 christos if (pending_exact) \ 2112 1.1 christos SET_HIGH_BOUND (pending_exact); \ 2113 1.1 christos } 2114 1.1 christos # else 2115 1.1 christos # define MOVE_BUFFER_POINTER(P) (P) += incr 2116 1.1 christos # define ELSE_EXTEND_BUFFER_HIGH_BOUND 2117 1.1 christos # endif 2118 1.1 christos # endif /* not DEFINED_ONCE */ 2119 1.1 christos 2120 1.1 christos # ifdef WCHAR 2121 1.1 christos # define EXTEND_BUFFER() \ 2122 1.1 christos do { \ 2123 1.1 christos UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \ 2124 1.1 christos int wchar_count; \ 2125 1.1 christos if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \ 2126 1.1 christos return REG_ESIZE; \ 2127 1.1 christos bufp->allocated <<= 1; \ 2128 1.1 christos if (bufp->allocated > MAX_BUF_SIZE) \ 2129 1.1 christos bufp->allocated = MAX_BUF_SIZE; \ 2130 1.1 christos /* How many characters the new buffer can have? */ \ 2131 1.1 christos wchar_count = bufp->allocated / sizeof(UCHAR_T); \ 2132 1.1 christos if (wchar_count == 0) wchar_count = 1; \ 2133 1.1 christos /* Truncate the buffer to CHAR_T align. */ \ 2134 1.1 christos bufp->allocated = wchar_count * sizeof(UCHAR_T); \ 2135 1.1 christos RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \ 2136 1.1 christos bufp->buffer = (char*)COMPILED_BUFFER_VAR; \ 2137 1.1 christos if (COMPILED_BUFFER_VAR == NULL) \ 2138 1.1 christos return REG_ESPACE; \ 2139 1.1 christos /* If the buffer moved, move all the pointers into it. */ \ 2140 1.1 christos if (old_buffer != COMPILED_BUFFER_VAR) \ 2141 1.1 christos { \ 2142 1.1 christos int incr = COMPILED_BUFFER_VAR - old_buffer; \ 2143 1.1 christos MOVE_BUFFER_POINTER (b); \ 2144 1.1 christos MOVE_BUFFER_POINTER (begalt); \ 2145 1.1 christos if (fixup_alt_jump) \ 2146 1.1 christos MOVE_BUFFER_POINTER (fixup_alt_jump); \ 2147 1.1 christos if (laststart) \ 2148 1.1 christos MOVE_BUFFER_POINTER (laststart); \ 2149 1.1 christos if (pending_exact) \ 2150 1.1 christos MOVE_BUFFER_POINTER (pending_exact); \ 2151 1.1 christos } \ 2152 1.1 christos ELSE_EXTEND_BUFFER_HIGH_BOUND \ 2153 1.1 christos } while (0) 2154 1.1 christos # else /* BYTE */ 2155 1.1 christos # define EXTEND_BUFFER() \ 2156 1.1 christos do { \ 2157 1.1 christos UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \ 2158 1.1 christos if (bufp->allocated == MAX_BUF_SIZE) \ 2159 1.1 christos return REG_ESIZE; \ 2160 1.1 christos bufp->allocated <<= 1; \ 2161 1.1 christos if (bufp->allocated > MAX_BUF_SIZE) \ 2162 1.1 christos bufp->allocated = MAX_BUF_SIZE; \ 2163 1.1 christos bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \ 2164 1.1 christos bufp->allocated); \ 2165 1.1 christos if (COMPILED_BUFFER_VAR == NULL) \ 2166 1.1 christos return REG_ESPACE; \ 2167 1.1 christos /* If the buffer moved, move all the pointers into it. */ \ 2168 1.1 christos if (old_buffer != COMPILED_BUFFER_VAR) \ 2169 1.1 christos { \ 2170 1.1 christos int incr = COMPILED_BUFFER_VAR - old_buffer; \ 2171 1.1 christos MOVE_BUFFER_POINTER (b); \ 2172 1.1 christos MOVE_BUFFER_POINTER (begalt); \ 2173 1.1 christos if (fixup_alt_jump) \ 2174 1.1 christos MOVE_BUFFER_POINTER (fixup_alt_jump); \ 2175 1.1 christos if (laststart) \ 2176 1.1 christos MOVE_BUFFER_POINTER (laststart); \ 2177 1.1 christos if (pending_exact) \ 2178 1.1 christos MOVE_BUFFER_POINTER (pending_exact); \ 2179 1.1 christos } \ 2180 1.1 christos ELSE_EXTEND_BUFFER_HIGH_BOUND \ 2181 1.1 christos } while (0) 2182 1.1 christos # endif /* WCHAR */ 2183 1.1 christos 2184 1.1 christos # ifndef DEFINED_ONCE 2185 1.1 christos /* Since we have one byte reserved for the register number argument to 2186 1.1 christos {start,stop}_memory, the maximum number of groups we can report 2187 1.1 christos things about is what fits in that byte. */ 2188 1.1 christos # define MAX_REGNUM 255 2189 1.1 christos 2190 1.1 christos /* But patterns can have more than `MAX_REGNUM' registers. We just 2191 1.1 christos ignore the excess. */ 2192 1.1 christos typedef unsigned regnum_t; 2193 1.1 christos 2194 1.1 christos 2195 1.1 christos /* Macros for the compile stack. */ 2196 1.1 christos 2197 1.1 christos /* Since offsets can go either forwards or backwards, this type needs to 2198 1.1 christos be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 2199 1.1 christos /* int may be not enough when sizeof(int) == 2. */ 2200 1.1 christos typedef long pattern_offset_t; 2201 1.1 christos 2202 1.1 christos typedef struct 2203 1.1 christos { 2204 1.1 christos pattern_offset_t begalt_offset; 2205 1.1 christos pattern_offset_t fixup_alt_jump; 2206 1.1 christos pattern_offset_t inner_group_offset; 2207 1.1 christos pattern_offset_t laststart_offset; 2208 1.1 christos regnum_t regnum; 2209 1.1 christos } compile_stack_elt_t; 2210 1.1 christos 2211 1.1 christos 2212 1.1 christos typedef struct 2213 1.1 christos { 2214 1.1 christos compile_stack_elt_t *stack; 2215 1.1 christos unsigned size; 2216 1.1 christos unsigned avail; /* Offset of next open position. */ 2217 1.1 christos } compile_stack_type; 2218 1.1 christos 2219 1.1 christos 2220 1.1 christos # define INIT_COMPILE_STACK_SIZE 32 2221 1.1 christos 2222 1.1 christos # define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 2223 1.1 christos # define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 2224 1.1 christos 2225 1.1 christos /* The next available element. */ 2226 1.1 christos # define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 2227 1.1 christos 2228 1.1 christos # endif /* not DEFINED_ONCE */ 2229 1.1 christos 2230 1.1 christos /* Set the bit for character C in a list. */ 2231 1.1 christos # ifndef DEFINED_ONCE 2232 1.1 christos # define SET_LIST_BIT(c) \ 2233 1.1 christos (b[((unsigned char) (c)) / BYTEWIDTH] \ 2234 1.1 christos |= 1 << (((unsigned char) c) % BYTEWIDTH)) 2235 1.1 christos # endif /* DEFINED_ONCE */ 2236 1.1 christos 2237 1.1 christos /* Get the next unsigned number in the uncompiled pattern. */ 2238 1.1 christos # define GET_UNSIGNED_NUMBER(num) \ 2239 1.1 christos { \ 2240 1.1 christos while (p != pend) \ 2241 1.1 christos { \ 2242 1.1 christos PATFETCH (c); \ 2243 1.1 christos if (c < '0' || c > '9') \ 2244 1.1 christos break; \ 2245 1.1 christos if (num <= RE_DUP_MAX) \ 2246 1.1 christos { \ 2247 1.1 christos if (num < 0) \ 2248 1.1 christos num = 0; \ 2249 1.1 christos num = num * 10 + c - '0'; \ 2250 1.1 christos } \ 2251 1.1 christos } \ 2252 1.1 christos } 2253 1.1 christos 2254 1.1 christos # ifndef DEFINED_ONCE 2255 1.1 christos # if defined _LIBC || WIDE_CHAR_SUPPORT 2256 1.1 christos /* The GNU C library provides support for user-defined character classes 2257 1.1 christos and the functions from ISO C amendement 1. */ 2258 1.1 christos # ifdef CHARCLASS_NAME_MAX 2259 1.1 christos # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX 2260 1.1 christos # else 2261 1.1 christos /* This shouldn't happen but some implementation might still have this 2262 1.1 christos problem. Use a reasonable default value. */ 2263 1.1 christos # define CHAR_CLASS_MAX_LENGTH 256 2264 1.1 christos # endif 2265 1.1 christos 2266 1.1 christos # ifdef _LIBC 2267 1.1 christos # define IS_CHAR_CLASS(string) __wctype (string) 2268 1.1 christos # else 2269 1.1 christos # define IS_CHAR_CLASS(string) wctype (string) 2270 1.1 christos # endif 2271 1.1 christos # else 2272 1.1 christos # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ 2273 1.1 christos 2274 1.1 christos # define IS_CHAR_CLASS(string) \ 2275 1.1 christos (STREQ (string, "alpha") || STREQ (string, "upper") \ 2276 1.1 christos || STREQ (string, "lower") || STREQ (string, "digit") \ 2277 1.1 christos || STREQ (string, "alnum") || STREQ (string, "xdigit") \ 2278 1.1 christos || STREQ (string, "space") || STREQ (string, "print") \ 2279 1.1 christos || STREQ (string, "punct") || STREQ (string, "graph") \ 2280 1.1 christos || STREQ (string, "cntrl") || STREQ (string, "blank")) 2281 1.1 christos # endif 2282 1.1 christos # endif /* DEFINED_ONCE */ 2283 1.1 christos 2284 1.1 christos # ifndef MATCH_MAY_ALLOCATE 2286 1.1 christos 2287 1.1 christos /* If we cannot allocate large objects within re_match_2_internal, 2288 1.1 christos we make the fail stack and register vectors global. 2289 1.1 christos The fail stack, we grow to the maximum size when a regexp 2290 1.1 christos is compiled. 2291 1.1 christos The register vectors, we adjust in size each time we 2292 1.1 christos compile a regexp, according to the number of registers it needs. */ 2293 1.1 christos 2294 1.1 christos static PREFIX(fail_stack_type) fail_stack; 2295 1.1 christos 2296 1.1 christos /* Size with which the following vectors are currently allocated. 2297 1.1 christos That is so we can make them bigger as needed, 2298 1.1 christos but never make them smaller. */ 2299 1.1 christos # ifdef DEFINED_ONCE 2300 1.1 christos static int regs_allocated_size; 2301 1.1 christos 2302 1.1 christos static const char ** regstart, ** regend; 2303 1.1 christos static const char ** old_regstart, ** old_regend; 2304 1.1 christos static const char **best_regstart, **best_regend; 2305 1.1 christos static const char **reg_dummy; 2306 1.1 christos # endif /* DEFINED_ONCE */ 2307 1.1 christos 2308 1.1 christos static PREFIX(register_info_type) *PREFIX(reg_info); 2309 1.1 christos static PREFIX(register_info_type) *PREFIX(reg_info_dummy); 2310 1.1 christos 2311 1.1 christos /* Make the register vectors big enough for NUM_REGS registers, 2312 1.1 christos but don't make them smaller. */ 2313 1.1 christos 2314 1.1 christos static void 2315 1.1 christos PREFIX(regex_grow_registers) (num_regs) 2316 1.1 christos int num_regs; 2317 1.1 christos { 2318 1.1 christos if (num_regs > regs_allocated_size) 2319 1.1 christos { 2320 1.1 christos RETALLOC_IF (regstart, num_regs, const char *); 2321 1.1 christos RETALLOC_IF (regend, num_regs, const char *); 2322 1.1 christos RETALLOC_IF (old_regstart, num_regs, const char *); 2323 1.1 christos RETALLOC_IF (old_regend, num_regs, const char *); 2324 1.1 christos RETALLOC_IF (best_regstart, num_regs, const char *); 2325 1.1 christos RETALLOC_IF (best_regend, num_regs, const char *); 2326 1.1 christos RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type)); 2327 1.1 christos RETALLOC_IF (reg_dummy, num_regs, const char *); 2328 1.1 christos RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type)); 2329 1.1 christos 2330 1.1 christos regs_allocated_size = num_regs; 2331 1.1 christos } 2332 1.1 christos } 2333 1.1 christos 2334 1.1 christos # endif /* not MATCH_MAY_ALLOCATE */ 2335 1.1 christos 2336 1.1 christos # ifndef DEFINED_ONCE 2338 1.1 christos static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type 2339 1.1 christos compile_stack, 2340 1.1 christos regnum_t regnum)); 2341 1.1 christos # endif /* not DEFINED_ONCE */ 2342 1.1 christos 2343 1.1 christos /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. 2344 1.1 christos Returns one of error codes defined in `regex.h', or zero for success. 2345 1.1 christos 2346 1.1 christos Assumes the `allocated' (and perhaps `buffer') and `translate' 2347 1.1 christos fields are set in BUFP on entry. 2348 1.1 christos 2349 1.1 christos If it succeeds, results are put in BUFP (if it returns an error, the 2350 1.1 christos contents of BUFP are undefined): 2351 1.1 christos `buffer' is the compiled pattern; 2352 1.1 christos `syntax' is set to SYNTAX; 2353 1.1 christos `used' is set to the length of the compiled pattern; 2354 1.1 christos `fastmap_accurate' is zero; 2355 1.1 christos `re_nsub' is the number of subexpressions in PATTERN; 2356 1.1 christos `not_bol' and `not_eol' are zero; 2357 1.1 christos 2358 1.1 christos The `fastmap' and `newline_anchor' fields are neither 2359 1.1 christos examined nor set. */ 2360 1.1 christos 2361 1.1 christos /* Return, freeing storage we allocated. */ 2362 1.1 christos # ifdef WCHAR 2363 1.1 christos # define FREE_STACK_RETURN(value) \ 2364 1.1 christos return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value) 2365 1.1 christos # else 2366 1.1 christos # define FREE_STACK_RETURN(value) \ 2367 1.1 christos return (free (compile_stack.stack), value) 2368 1.1 christos # endif /* WCHAR */ 2369 1.1 christos 2370 1.1 christos static reg_errcode_t 2371 1.1 christos PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp) 2372 1.1 christos const char *ARG_PREFIX(pattern); 2373 1.1 christos size_t ARG_PREFIX(size); 2374 1.1 christos reg_syntax_t syntax; 2375 1.1 christos struct re_pattern_buffer *bufp; 2376 1.1 christos { 2377 1.1 christos /* We fetch characters from PATTERN here. Even though PATTERN is 2378 1.1 christos `char *' (i.e., signed), we declare these variables as unsigned, so 2379 1.1 christos they can be reliably used as array indices. */ 2380 1.1 christos register UCHAR_T c, c1; 2381 1.1 christos 2382 1.1 christos #ifdef WCHAR 2383 1.1 christos /* A temporary space to keep wchar_t pattern and compiled pattern. */ 2384 1.1 christos CHAR_T *pattern, *COMPILED_BUFFER_VAR; 2385 1.1 christos size_t size; 2386 1.1 christos /* offset buffer for optimization. See convert_mbs_to_wc. */ 2387 1.1 christos int *mbs_offset = NULL; 2388 1.1 christos /* It hold whether each wchar_t is binary data or not. */ 2389 1.1 christos char *is_binary = NULL; 2390 1.1 christos /* A flag whether exactn is handling binary data or not. */ 2391 1.1 christos char is_exactn_bin = FALSE; 2392 1.1 christos #endif /* WCHAR */ 2393 1.1 christos 2394 1.1 christos /* A random temporary spot in PATTERN. */ 2395 1.1 christos const CHAR_T *p1; 2396 1.1 christos 2397 1.1 christos /* Points to the end of the buffer, where we should append. */ 2398 1.1 christos register UCHAR_T *b; 2399 1.1 christos 2400 1.1 christos /* Keeps track of unclosed groups. */ 2401 1.1 christos compile_stack_type compile_stack; 2402 1.1 christos 2403 1.1 christos /* Points to the current (ending) position in the pattern. */ 2404 1.1 christos #ifdef WCHAR 2405 1.1 christos const CHAR_T *p; 2406 1.1 christos const CHAR_T *pend; 2407 1.1 christos #else /* BYTE */ 2408 1.1 christos const CHAR_T *p = pattern; 2409 1.1 christos const CHAR_T *pend = pattern + size; 2410 1.1 christos #endif /* WCHAR */ 2411 1.1 christos 2412 1.1 christos /* How to translate the characters in the pattern. */ 2413 1.1 christos RE_TRANSLATE_TYPE translate = bufp->translate; 2414 1.1 christos 2415 1.1 christos /* Address of the count-byte of the most recently inserted `exactn' 2416 1.1 christos command. This makes it possible to tell if a new exact-match 2417 1.1 christos character can be added to that command or if the character requires 2418 1.1 christos a new `exactn' command. */ 2419 1.1 christos UCHAR_T *pending_exact = 0; 2420 1.1 christos 2421 1.1 christos /* Address of start of the most recently finished expression. 2422 1.1 christos This tells, e.g., postfix * where to find the start of its 2423 1.1 christos operand. Reset at the beginning of groups and alternatives. */ 2424 1.1 christos UCHAR_T *laststart = 0; 2425 1.1 christos 2426 1.1 christos /* Address of beginning of regexp, or inside of last group. */ 2427 1.1 christos UCHAR_T *begalt; 2428 1.1 christos 2429 1.1 christos /* Address of the place where a forward jump should go to the end of 2430 1.1 christos the containing expression. Each alternative of an `or' -- except the 2431 1.1 christos last -- ends with a forward jump of this sort. */ 2432 1.1 christos UCHAR_T *fixup_alt_jump = 0; 2433 1.1 christos 2434 1.1 christos /* Counts open-groups as they are encountered. Remembered for the 2435 1.1 christos matching close-group on the compile stack, so the same register 2436 1.1 christos number is put in the stop_memory as the start_memory. */ 2437 1.1 christos regnum_t regnum = 0; 2438 1.1 christos 2439 1.1 christos #ifdef WCHAR 2440 1.1 christos /* Initialize the wchar_t PATTERN and offset_buffer. */ 2441 1.1 christos p = pend = pattern = TALLOC(csize + 1, CHAR_T); 2442 1.1 christos mbs_offset = TALLOC(csize + 1, int); 2443 1.1 christos is_binary = TALLOC(csize + 1, char); 2444 1.1 christos if (pattern == NULL || mbs_offset == NULL || is_binary == NULL) 2445 1.1 christos { 2446 1.1 christos free(pattern); 2447 1.1 christos free(mbs_offset); 2448 1.1 christos free(is_binary); 2449 1.1 christos return REG_ESPACE; 2450 1.1 christos } 2451 1.1 christos pattern[csize] = L'\0'; /* sentinel */ 2452 1.1 christos size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary); 2453 1.1 christos pend = p + size; 2454 1.1 christos if (size < 0) 2455 1.1 christos { 2456 1.1 christos free(pattern); 2457 1.1 christos free(mbs_offset); 2458 1.1 christos free(is_binary); 2459 1.1 christos return REG_BADPAT; 2460 1.1 christos } 2461 1.1 christos #endif 2462 1.1 christos 2463 1.1 christos #ifdef DEBUG 2464 1.1 christos DEBUG_PRINT1 ("\nCompiling pattern: "); 2465 1.1 christos if (debug) 2466 1.1 christos { 2467 1.1 christos unsigned debug_count; 2468 1.1 christos 2469 1.1 christos for (debug_count = 0; debug_count < size; debug_count++) 2470 1.1 christos PUT_CHAR (pattern[debug_count]); 2471 1.1 christos putchar ('\n'); 2472 1.1 christos } 2473 1.1 christos #endif /* DEBUG */ 2474 1.1 christos 2475 1.1 christos /* Initialize the compile stack. */ 2476 1.1 christos compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); 2477 1.1 christos if (compile_stack.stack == NULL) 2478 1.1 christos { 2479 1.1 christos #ifdef WCHAR 2480 1.1 christos free(pattern); 2481 1.1 christos free(mbs_offset); 2482 1.1 christos free(is_binary); 2483 1.1 christos #endif 2484 1.1 christos return REG_ESPACE; 2485 1.1 christos } 2486 1.1 christos 2487 1.1 christos compile_stack.size = INIT_COMPILE_STACK_SIZE; 2488 1.1 christos compile_stack.avail = 0; 2489 1.1 christos 2490 1.1 christos /* Initialize the pattern buffer. */ 2491 1.1 christos bufp->syntax = syntax; 2492 1.1 christos bufp->fastmap_accurate = 0; 2493 1.1 christos bufp->not_bol = bufp->not_eol = 0; 2494 1.1 christos 2495 1.1 christos /* Set `used' to zero, so that if we return an error, the pattern 2496 1.1 christos printer (for debugging) will think there's no pattern. We reset it 2497 1.1 christos at the end. */ 2498 1.1 christos bufp->used = 0; 2499 1.1 christos 2500 1.1 christos /* Always count groups, whether or not bufp->no_sub is set. */ 2501 1.1 christos bufp->re_nsub = 0; 2502 1.1 christos 2503 1.1 christos #if !defined emacs && !defined SYNTAX_TABLE 2504 1.1 christos /* Initialize the syntax table. */ 2505 1.1 christos init_syntax_once (); 2506 1.1 christos #endif 2507 1.1 christos 2508 1.1 christos if (bufp->allocated == 0) 2509 1.1 christos { 2510 1.1 christos if (bufp->buffer) 2511 1.1 christos { /* If zero allocated, but buffer is non-null, try to realloc 2512 1.1 christos enough space. This loses if buffer's address is bogus, but 2513 1.1 christos that is the user's responsibility. */ 2514 1.1 christos #ifdef WCHAR 2515 1.1 christos /* Free bufp->buffer and allocate an array for wchar_t pattern 2516 1.1 christos buffer. */ 2517 1.1 christos free(bufp->buffer); 2518 1.1 christos COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T), 2519 1.1 christos UCHAR_T); 2520 1.1 christos #else 2521 1.1 christos RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T); 2522 1.1 christos #endif /* WCHAR */ 2523 1.1 christos } 2524 1.1 christos else 2525 1.1 christos { /* Caller did not allocate a buffer. Do it for them. */ 2526 1.1 christos COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T), 2527 1.1 christos UCHAR_T); 2528 1.1 christos } 2529 1.1 christos 2530 1.1 christos if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE); 2531 1.1 christos #ifdef WCHAR 2532 1.1 christos bufp->buffer = (char*)COMPILED_BUFFER_VAR; 2533 1.1 christos #endif /* WCHAR */ 2534 1.1 christos bufp->allocated = INIT_BUF_SIZE; 2535 1.1 christos } 2536 1.1 christos #ifdef WCHAR 2537 1.1 christos else 2538 1.1 christos COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer; 2539 1.1 christos #endif 2540 1.1 christos 2541 1.1 christos begalt = b = COMPILED_BUFFER_VAR; 2542 1.1 christos 2543 1.1 christos /* Loop through the uncompiled pattern until we're at the end. */ 2544 1.1 christos while (p != pend) 2545 1.1 christos { 2546 1.1 christos PATFETCH (c); 2547 1.1 christos 2548 1.1 christos switch (c) 2549 1.1 christos { 2550 1.1 christos case '^': 2551 1.1 christos { 2552 1.1 christos if ( /* If at start of pattern, it's an operator. */ 2553 1.1 christos p == pattern + 1 2554 1.1 christos /* If context independent, it's an operator. */ 2555 1.1 christos || syntax & RE_CONTEXT_INDEP_ANCHORS 2556 1.1 christos /* Otherwise, depends on what's come before. */ 2557 1.1 christos || PREFIX(at_begline_loc_p) (pattern, p, syntax)) 2558 1.1 christos BUF_PUSH (begline); 2559 1.1 christos else 2560 1.1 christos goto normal_char; 2561 1.1 christos } 2562 1.1 christos break; 2563 1.1 christos 2564 1.1 christos 2565 1.1 christos case '$': 2566 1.1 christos { 2567 1.1 christos if ( /* If at end of pattern, it's an operator. */ 2568 1.1 christos p == pend 2569 1.1 christos /* If context independent, it's an operator. */ 2570 1.1 christos || syntax & RE_CONTEXT_INDEP_ANCHORS 2571 1.1 christos /* Otherwise, depends on what's next. */ 2572 1.1 christos || PREFIX(at_endline_loc_p) (p, pend, syntax)) 2573 1.1 christos BUF_PUSH (endline); 2574 1.1 christos else 2575 1.1 christos goto normal_char; 2576 1.1 christos } 2577 1.1 christos break; 2578 1.1 christos 2579 1.1 christos 2580 1.1 christos case '+': 2581 1.1 christos case '?': 2582 1.1 christos if ((syntax & RE_BK_PLUS_QM) 2583 1.1 christos || (syntax & RE_LIMITED_OPS)) 2584 1.1 christos goto normal_char; 2585 1.1 christos handle_plus: 2586 1.1 christos case '*': 2587 1.1 christos /* If there is no previous pattern... */ 2588 1.1 christos if (!laststart) 2589 1.1 christos { 2590 1.1 christos if (syntax & RE_CONTEXT_INVALID_OPS) 2591 1.1 christos FREE_STACK_RETURN (REG_BADRPT); 2592 1.1 christos else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 2593 1.1 christos goto normal_char; 2594 1.1 christos } 2595 1.1 christos 2596 1.1 christos { 2597 1.1 christos /* Are we optimizing this jump? */ 2598 1.1 christos boolean keep_string_p = false; 2599 1.1 christos 2600 1.1 christos /* 1 means zero (many) matches is allowed. */ 2601 1.1 christos char zero_times_ok = 0, many_times_ok = 0; 2602 1.1 christos 2603 1.1 christos /* If there is a sequence of repetition chars, collapse it 2604 1.1 christos down to just one (the right one). We can't combine 2605 1.1 christos interval operators with these because of, e.g., `a{2}*', 2606 1.1 christos which should only match an even number of `a's. */ 2607 1.1 christos 2608 1.1 christos for (;;) 2609 1.1 christos { 2610 1.1 christos zero_times_ok |= c != '+'; 2611 1.1 christos many_times_ok |= c != '?'; 2612 1.1 christos 2613 1.1 christos if (p == pend) 2614 1.1 christos break; 2615 1.1 christos 2616 1.1 christos PATFETCH (c); 2617 1.1 christos 2618 1.1 christos if (c == '*' 2619 1.1 christos || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) 2620 1.1 christos ; 2621 1.1 christos 2622 1.1 christos else if (syntax & RE_BK_PLUS_QM && c == '\\') 2623 1.1 christos { 2624 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2625 1.1 christos 2626 1.1 christos PATFETCH (c1); 2627 1.1 christos if (!(c1 == '+' || c1 == '?')) 2628 1.1 christos { 2629 1.1 christos PATUNFETCH; 2630 1.1 christos PATUNFETCH; 2631 1.1 christos break; 2632 1.1 christos } 2633 1.1 christos 2634 1.1 christos c = c1; 2635 1.1 christos } 2636 1.1 christos else 2637 1.1 christos { 2638 1.1 christos PATUNFETCH; 2639 1.1 christos break; 2640 1.1 christos } 2641 1.1 christos 2642 1.1 christos /* If we get here, we found another repeat character. */ 2643 1.1 christos } 2644 1.1 christos 2645 1.1 christos /* Star, etc. applied to an empty pattern is equivalent 2646 1.1 christos to an empty pattern. */ 2647 1.1 christos if (!laststart) 2648 1.1 christos break; 2649 1.1 christos 2650 1.1 christos /* Now we know whether or not zero matches is allowed 2651 1.1 christos and also whether or not two or more matches is allowed. */ 2652 1.1 christos if (many_times_ok) 2653 1.1 christos { /* More than one repetition is allowed, so put in at the 2654 1.1 christos end a backward relative jump from `b' to before the next 2655 1.1 christos jump we're going to put in below (which jumps from 2656 1.1 christos laststart to after this jump). 2657 1.1 christos 2658 1.1 christos But if we are at the `*' in the exact sequence `.*\n', 2659 1.1 christos insert an unconditional jump backwards to the ., 2660 1.1 christos instead of the beginning of the loop. This way we only 2661 1.1 christos push a failure point once, instead of every time 2662 1.1 christos through the loop. */ 2663 1.1 christos assert (p - 1 > pattern); 2664 1.1 christos 2665 1.1 christos /* Allocate the space for the jump. */ 2666 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 2667 1.1 christos 2668 1.1 christos /* We know we are not at the first character of the pattern, 2669 1.1 christos because laststart was nonzero. And we've already 2670 1.1 christos incremented `p', by the way, to be the character after 2671 1.1 christos the `*'. Do we have to do something analogous here 2672 1.1 christos for null bytes, because of RE_DOT_NOT_NULL? */ 2673 1.1 christos if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 2674 1.1 christos && zero_times_ok 2675 1.1 christos && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 2676 1.1 christos && !(syntax & RE_DOT_NEWLINE)) 2677 1.1 christos { /* We have .*\n. */ 2678 1.1 christos STORE_JUMP (jump, b, laststart); 2679 1.1 christos keep_string_p = true; 2680 1.1 christos } 2681 1.1 christos else 2682 1.1 christos /* Anything else. */ 2683 1.1 christos STORE_JUMP (maybe_pop_jump, b, laststart - 2684 1.1 christos (1 + OFFSET_ADDRESS_SIZE)); 2685 1.1 christos 2686 1.1 christos /* We've added more stuff to the buffer. */ 2687 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 2688 1.1 christos } 2689 1.1 christos 2690 1.1 christos /* On failure, jump from laststart to b + 3, which will be the 2691 1.1 christos end of the buffer after this jump is inserted. */ 2692 1.1 christos /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of 2693 1.1 christos 'b + 3'. */ 2694 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 2695 1.1 christos INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 2696 1.1 christos : on_failure_jump, 2697 1.1 christos laststart, b + 1 + OFFSET_ADDRESS_SIZE); 2698 1.1 christos pending_exact = 0; 2699 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 2700 1.1 christos 2701 1.1 christos if (!zero_times_ok) 2702 1.1 christos { 2703 1.1 christos /* At least one repetition is required, so insert a 2704 1.1 christos `dummy_failure_jump' before the initial 2705 1.1 christos `on_failure_jump' instruction of the loop. This 2706 1.1 christos effects a skip over that instruction the first time 2707 1.1 christos we hit that loop. */ 2708 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 2709 1.1 christos INSERT_JUMP (dummy_failure_jump, laststart, laststart + 2710 1.1 christos 2 + 2 * OFFSET_ADDRESS_SIZE); 2711 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 2712 1.1 christos } 2713 1.1 christos } 2714 1.1 christos break; 2715 1.1 christos 2716 1.1 christos 2717 1.1 christos case '.': 2718 1.1 christos laststart = b; 2719 1.1 christos BUF_PUSH (anychar); 2720 1.1 christos break; 2721 1.1 christos 2722 1.1 christos 2723 1.1 christos case '[': 2724 1.1 christos { 2725 1.1 christos boolean had_char_class = false; 2726 1.1 christos #ifdef WCHAR 2727 1.1 christos CHAR_T range_start = 0xffffffff; 2728 1.1 christos #else 2729 1.1 christos unsigned int range_start = 0xffffffff; 2730 1.1 christos #endif 2731 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2732 1.1 christos 2733 1.1 christos #ifdef WCHAR 2734 1.1 christos /* We assume a charset(_not) structure as a wchar_t array. 2735 1.1 christos charset[0] = (re_opcode_t) charset(_not) 2736 1.1 christos charset[1] = l (= length of char_classes) 2737 1.1 christos charset[2] = m (= length of collating_symbols) 2738 1.1 christos charset[3] = n (= length of equivalence_classes) 2739 1.1 christos charset[4] = o (= length of char_ranges) 2740 1.1 christos charset[5] = p (= length of chars) 2741 1.1 christos 2742 1.1 christos charset[6] = char_class (wctype_t) 2743 1.1 christos charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t) 2744 1.1 christos ... 2745 1.1 christos charset[l+5] = char_class (wctype_t) 2746 1.1 christos 2747 1.1 christos charset[l+6] = collating_symbol (wchar_t) 2748 1.1 christos ... 2749 1.1 christos charset[l+m+5] = collating_symbol (wchar_t) 2750 1.1 christos ifdef _LIBC we use the index if 2751 1.1 christos _NL_COLLATE_SYMB_EXTRAMB instead of 2752 1.1 christos wchar_t string. 2753 1.1 christos 2754 1.1 christos charset[l+m+6] = equivalence_classes (wchar_t) 2755 1.1 christos ... 2756 1.1 christos charset[l+m+n+5] = equivalence_classes (wchar_t) 2757 1.1 christos ifdef _LIBC we use the index in 2758 1.1 christos _NL_COLLATE_WEIGHT instead of 2759 1.1 christos wchar_t string. 2760 1.1 christos 2761 1.1 christos charset[l+m+n+6] = range_start 2762 1.1 christos charset[l+m+n+7] = range_end 2763 1.1 christos ... 2764 1.1 christos charset[l+m+n+2o+4] = range_start 2765 1.1 christos charset[l+m+n+2o+5] = range_end 2766 1.1 christos ifdef _LIBC we use the value looked up 2767 1.1 christos in _NL_COLLATE_COLLSEQ instead of 2768 1.1 christos wchar_t character. 2769 1.1 christos 2770 1.1 christos charset[l+m+n+2o+6] = char 2771 1.1 christos ... 2772 1.1 christos charset[l+m+n+2o+p+5] = char 2773 1.1 christos 2774 1.1 christos */ 2775 1.1 christos 2776 1.1 christos /* We need at least 6 spaces: the opcode, the length of 2777 1.1 christos char_classes, the length of collating_symbols, the length of 2778 1.1 christos equivalence_classes, the length of char_ranges, the length of 2779 1.1 christos chars. */ 2780 1.1 christos GET_BUFFER_SPACE (6); 2781 1.1 christos 2782 1.1 christos /* Save b as laststart. And We use laststart as the pointer 2783 1.1 christos to the first element of the charset here. 2784 1.1 christos In other words, laststart[i] indicates charset[i]. */ 2785 1.1 christos laststart = b; 2786 1.1 christos 2787 1.1 christos /* We test `*p == '^' twice, instead of using an if 2788 1.1 christos statement, so we only need one BUF_PUSH. */ 2789 1.1 christos BUF_PUSH (*p == '^' ? charset_not : charset); 2790 1.1 christos if (*p == '^') 2791 1.1 christos p++; 2792 1.1 christos 2793 1.1 christos /* Push the length of char_classes, the length of 2794 1.1 christos collating_symbols, the length of equivalence_classes, the 2795 1.1 christos length of char_ranges and the length of chars. */ 2796 1.1 christos BUF_PUSH_3 (0, 0, 0); 2797 1.1 christos BUF_PUSH_2 (0, 0); 2798 1.1 christos 2799 1.1 christos /* Remember the first position in the bracket expression. */ 2800 1.1 christos p1 = p; 2801 1.1 christos 2802 1.1 christos /* charset_not matches newline according to a syntax bit. */ 2803 1.1 christos if ((re_opcode_t) b[-6] == charset_not 2804 1.1 christos && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 2805 1.1 christos { 2806 1.1 christos BUF_PUSH('\n'); 2807 1.1 christos laststart[5]++; /* Update the length of characters */ 2808 1.1 christos } 2809 1.1 christos 2810 1.1 christos /* Read in characters and ranges, setting map bits. */ 2811 1.1 christos for (;;) 2812 1.1 christos { 2813 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2814 1.1 christos 2815 1.1 christos PATFETCH (c); 2816 1.1 christos 2817 1.1 christos /* \ might escape characters inside [...] and [^...]. */ 2818 1.1 christos if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 2819 1.1 christos { 2820 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2821 1.1 christos 2822 1.1 christos PATFETCH (c1); 2823 1.1 christos BUF_PUSH(c1); 2824 1.1 christos laststart[5]++; /* Update the length of chars */ 2825 1.1 christos range_start = c1; 2826 1.1 christos continue; 2827 1.1 christos } 2828 1.1 christos 2829 1.1 christos /* Could be the end of the bracket expression. If it's 2830 1.1 christos not (i.e., when the bracket expression is `[]' so 2831 1.1 christos far), the ']' character bit gets set way below. */ 2832 1.1 christos if (c == ']' && p != p1 + 1) 2833 1.1 christos break; 2834 1.1 christos 2835 1.1 christos /* Look ahead to see if it's a range when the last thing 2836 1.1 christos was a character class. */ 2837 1.1 christos if (had_char_class && c == '-' && *p != ']') 2838 1.1 christos FREE_STACK_RETURN (REG_ERANGE); 2839 1.1 christos 2840 1.1 christos /* Look ahead to see if it's a range when the last thing 2841 1.1 christos was a character: if this is a hyphen not at the 2842 1.1 christos beginning or the end of a list, then it's the range 2843 1.1 christos operator. */ 2844 1.1 christos if (c == '-' 2845 1.1 christos && !(p - 2 >= pattern && p[-2] == '[') 2846 1.1 christos && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 2847 1.1 christos && *p != ']') 2848 1.1 christos { 2849 1.1 christos reg_errcode_t ret; 2850 1.1 christos /* Allocate the space for range_start and range_end. */ 2851 1.1 christos GET_BUFFER_SPACE (2); 2852 1.1 christos /* Update the pointer to indicate end of buffer. */ 2853 1.1 christos b += 2; 2854 1.1 christos ret = wcs_compile_range (range_start, &p, pend, translate, 2855 1.1 christos syntax, b, laststart); 2856 1.1 christos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2857 1.1 christos range_start = 0xffffffff; 2858 1.1 christos } 2859 1.1 christos else if (p[0] == '-' && p[1] != ']') 2860 1.1 christos { /* This handles ranges made up of characters only. */ 2861 1.1 christos reg_errcode_t ret; 2862 1.1 christos 2863 1.1 christos /* Move past the `-'. */ 2864 1.1 christos PATFETCH (c1); 2865 1.1 christos /* Allocate the space for range_start and range_end. */ 2866 1.1 christos GET_BUFFER_SPACE (2); 2867 1.1 christos /* Update the pointer to indicate end of buffer. */ 2868 1.1 christos b += 2; 2869 1.1 christos ret = wcs_compile_range (c, &p, pend, translate, syntax, b, 2870 1.1 christos laststart); 2871 1.1 christos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2872 1.1 christos range_start = 0xffffffff; 2873 1.1 christos } 2874 1.1 christos 2875 1.1 christos /* See if we're at the beginning of a possible character 2876 1.1 christos class. */ 2877 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 2878 1.1 christos { /* Leave room for the null. */ 2879 1.1 christos char str[CHAR_CLASS_MAX_LENGTH + 1]; 2880 1.1 christos 2881 1.1 christos PATFETCH (c); 2882 1.1 christos c1 = 0; 2883 1.1 christos 2884 1.1 christos /* If pattern is `[[:'. */ 2885 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2886 1.1 christos 2887 1.1 christos for (;;) 2888 1.1 christos { 2889 1.1 christos PATFETCH (c); 2890 1.1 christos if ((c == ':' && *p == ']') || p == pend) 2891 1.1 christos break; 2892 1.1 christos if (c1 < CHAR_CLASS_MAX_LENGTH) 2893 1.1 christos str[c1++] = c; 2894 1.1 christos else 2895 1.1 christos /* This is in any case an invalid class name. */ 2896 1.1 christos str[0] = '\0'; 2897 1.1 christos } 2898 1.1 christos str[c1] = '\0'; 2899 1.1 christos 2900 1.1 christos /* If isn't a word bracketed by `[:' and `:]': 2901 1.1 christos undo the ending character, the letters, and leave 2902 1.1 christos the leading `:' and `[' (but store them as character). */ 2903 1.1 christos if (c == ':' && *p == ']') 2904 1.1 christos { 2905 1.1 christos wctype_t wt; 2906 1.1 christos uintptr_t alignedp; 2907 1.1 christos 2908 1.1 christos /* Query the character class as wctype_t. */ 2909 1.1 christos wt = IS_CHAR_CLASS (str); 2910 1.1 christos if (wt == 0) 2911 1.1 christos FREE_STACK_RETURN (REG_ECTYPE); 2912 1.1 christos 2913 1.1 christos /* Throw away the ] at the end of the character 2914 1.1 christos class. */ 2915 1.1 christos PATFETCH (c); 2916 1.1 christos 2917 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2918 1.1 christos 2919 1.1 christos /* Allocate the space for character class. */ 2920 1.1 christos GET_BUFFER_SPACE(CHAR_CLASS_SIZE); 2921 1.1 christos /* Update the pointer to indicate end of buffer. */ 2922 1.1 christos b += CHAR_CLASS_SIZE; 2923 1.1 christos /* Move data which follow character classes 2924 1.1 christos not to violate the data. */ 2925 1.1 christos insert_space(CHAR_CLASS_SIZE, 2926 1.1 christos laststart + 6 + laststart[1], 2927 1.1 christos b - 1); 2928 1.1 christos alignedp = ((uintptr_t)(laststart + 6 + laststart[1]) 2929 1.1 christos + __alignof__(wctype_t) - 1) 2930 1.1 christos & ~(uintptr_t)(__alignof__(wctype_t) - 1); 2931 1.1 christos /* Store the character class. */ 2932 1.1 christos *((wctype_t*)alignedp) = wt; 2933 1.1 christos /* Update length of char_classes */ 2934 1.1 christos laststart[1] += CHAR_CLASS_SIZE; 2935 1.1 christos 2936 1.1 christos had_char_class = true; 2937 1.1 christos } 2938 1.1 christos else 2939 1.1 christos { 2940 1.1 christos c1++; 2941 1.1 christos while (c1--) 2942 1.1 christos PATUNFETCH; 2943 1.1 christos BUF_PUSH ('['); 2944 1.1 christos BUF_PUSH (':'); 2945 1.1 christos laststart[5] += 2; /* Update the length of characters */ 2946 1.1 christos range_start = ':'; 2947 1.1 christos had_char_class = false; 2948 1.1 christos } 2949 1.1 christos } 2950 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '=' 2951 1.1 christos || *p == '.')) 2952 1.1 christos { 2953 1.1 christos CHAR_T str[128]; /* Should be large enough. */ 2954 1.1 christos CHAR_T delim = *p; /* '=' or '.' */ 2955 1.1 christos # ifdef _LIBC 2956 1.1 christos uint32_t nrules = 2957 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 2958 1.1 christos # endif 2959 1.1 christos PATFETCH (c); 2960 1.1 christos c1 = 0; 2961 1.1 christos 2962 1.1 christos /* If pattern is `[[=' or '[[.'. */ 2963 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2964 1.1 christos 2965 1.1 christos for (;;) 2966 1.1 christos { 2967 1.1 christos PATFETCH (c); 2968 1.1 christos if ((c == delim && *p == ']') || p == pend) 2969 1.1 christos break; 2970 1.1 christos if (c1 < sizeof (str) - 1) 2971 1.1 christos str[c1++] = c; 2972 1.1 christos else 2973 1.1 christos /* This is in any case an invalid class name. */ 2974 1.1 christos str[0] = '\0'; 2975 1.1 christos } 2976 1.1 christos str[c1] = '\0'; 2977 1.1 christos 2978 1.1 christos if (c == delim && *p == ']' && str[0] != '\0') 2979 1.1 christos { 2980 1.1 christos unsigned int i, offset; 2981 1.1 christos /* If we have no collation data we use the default 2982 1.1 christos collation in which each character is in a class 2983 1.1 christos by itself. It also means that ASCII is the 2984 1.1 christos character set and therefore we cannot have character 2985 1.1 christos with more than one byte in the multibyte 2986 1.1 christos representation. */ 2987 1.1 christos 2988 1.1 christos /* If not defined _LIBC, we push the name and 2989 1.1 christos `\0' for the sake of matching performance. */ 2990 1.1 christos int datasize = c1 + 1; 2991 1.1 christos 2992 1.1 christos # ifdef _LIBC 2993 1.1 christos int32_t idx = 0; 2994 1.1 christos if (nrules == 0) 2995 1.1 christos # endif 2996 1.1 christos { 2997 1.1 christos if (c1 != 1) 2998 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 2999 1.1 christos } 3000 1.1 christos # ifdef _LIBC 3001 1.1 christos else 3002 1.1 christos { 3003 1.1 christos const int32_t *table; 3004 1.1 christos const int32_t *weights; 3005 1.1 christos const int32_t *extra; 3006 1.1 christos const int32_t *indirect; 3007 1.1 christos wint_t *cp; 3008 1.1 christos 3009 1.1 christos /* This #include defines a local function! */ 3010 1.1 christos # include <locale/weightwc.h> 3011 1.1 christos 3012 1.1 christos if(delim == '=') 3013 1.1 christos { 3014 1.1 christos /* We push the index for equivalence class. */ 3015 1.1 christos cp = (wint_t*)str; 3016 1.1 christos 3017 1.1 christos table = (const int32_t *) 3018 1.1 christos _NL_CURRENT (LC_COLLATE, 3019 1.1 christos _NL_COLLATE_TABLEWC); 3020 1.1 christos weights = (const int32_t *) 3021 1.1 christos _NL_CURRENT (LC_COLLATE, 3022 1.1 christos _NL_COLLATE_WEIGHTWC); 3023 1.1 christos extra = (const int32_t *) 3024 1.1 christos _NL_CURRENT (LC_COLLATE, 3025 1.1 christos _NL_COLLATE_EXTRAWC); 3026 1.1 christos indirect = (const int32_t *) 3027 1.1 christos _NL_CURRENT (LC_COLLATE, 3028 1.1 christos _NL_COLLATE_INDIRECTWC); 3029 1.1 christos 3030 1.1 christos idx = findidx ((const wint_t**)&cp); 3031 1.1 christos if (idx == 0 || cp < (wint_t*) str + c1) 3032 1.1 christos /* This is no valid character. */ 3033 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3034 1.1 christos 3035 1.1 christos str[0] = (wchar_t)idx; 3036 1.1 christos } 3037 1.1 christos else /* delim == '.' */ 3038 1.1 christos { 3039 1.1 christos /* We push collation sequence value 3040 1.1 christos for collating symbol. */ 3041 1.1 christos int32_t table_size; 3042 1.1 christos const int32_t *symb_table; 3043 1.1 christos const unsigned char *extra; 3044 1.1 christos int32_t idx; 3045 1.1 christos int32_t elem; 3046 1.1 christos int32_t second; 3047 1.1 christos int32_t hash; 3048 1.1 christos char char_str[c1]; 3049 1.1 christos 3050 1.1 christos /* We have to convert the name to a single-byte 3051 1.1 christos string. This is possible since the names 3052 1.1 christos consist of ASCII characters and the internal 3053 1.1 christos representation is UCS4. */ 3054 1.1 christos for (i = 0; i < c1; ++i) 3055 1.1 christos char_str[i] = str[i]; 3056 1.1 christos 3057 1.1 christos table_size = 3058 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, 3059 1.1 christos _NL_COLLATE_SYMB_HASH_SIZEMB); 3060 1.1 christos symb_table = (const int32_t *) 3061 1.1 christos _NL_CURRENT (LC_COLLATE, 3062 1.1 christos _NL_COLLATE_SYMB_TABLEMB); 3063 1.1 christos extra = (const unsigned char *) 3064 1.1 christos _NL_CURRENT (LC_COLLATE, 3065 1.1 christos _NL_COLLATE_SYMB_EXTRAMB); 3066 1.1 christos 3067 1.1 christos /* Locate the character in the hashing table. */ 3068 1.1 christos hash = elem_hash (char_str, c1); 3069 1.1 christos 3070 1.1 christos idx = 0; 3071 1.1 christos elem = hash % table_size; 3072 1.1 christos second = hash % (table_size - 2); 3073 1.1 christos while (symb_table[2 * elem] != 0) 3074 1.1 christos { 3075 1.1 christos /* First compare the hashing value. */ 3076 1.1 christos if (symb_table[2 * elem] == hash 3077 1.1 christos && c1 == extra[symb_table[2 * elem + 1]] 3078 1.1 christos && memcmp (char_str, 3079 1.1 christos &extra[symb_table[2 * elem + 1] 3080 1.1 christos + 1], c1) == 0) 3081 1.1 christos { 3082 1.1 christos /* Yep, this is the entry. */ 3083 1.1 christos idx = symb_table[2 * elem + 1]; 3084 1.1 christos idx += 1 + extra[idx]; 3085 1.1 christos break; 3086 1.1 christos } 3087 1.1 christos 3088 1.1 christos /* Next entry. */ 3089 1.1 christos elem += second; 3090 1.1 christos } 3091 1.1 christos 3092 1.1 christos if (symb_table[2 * elem] != 0) 3093 1.1 christos { 3094 1.1 christos /* Compute the index of the byte sequence 3095 1.1 christos in the table. */ 3096 1.1 christos idx += 1 + extra[idx]; 3097 1.1 christos /* Adjust for the alignment. */ 3098 1.1 christos idx = (idx + 3) & ~3; 3099 1.1 christos 3100 1.1 christos str[0] = (wchar_t) idx + 4; 3101 1.1 christos } 3102 1.1 christos else if (symb_table[2 * elem] == 0 && c1 == 1) 3103 1.1 christos { 3104 1.1 christos /* No valid character. Match it as a 3105 1.1 christos single byte character. */ 3106 1.1 christos had_char_class = false; 3107 1.1 christos BUF_PUSH(str[0]); 3108 1.1 christos /* Update the length of characters */ 3109 1.1 christos laststart[5]++; 3110 1.1 christos range_start = str[0]; 3111 1.1 christos 3112 1.1 christos /* Throw away the ] at the end of the 3113 1.1 christos collating symbol. */ 3114 1.1 christos PATFETCH (c); 3115 1.1 christos /* exit from the switch block. */ 3116 1.1 christos continue; 3117 1.1 christos } 3118 1.1 christos else 3119 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3120 1.1 christos } 3121 1.1 christos datasize = 1; 3122 1.1 christos } 3123 1.1 christos # endif 3124 1.1 christos /* Throw away the ] at the end of the equivalence 3125 1.1 christos class (or collating symbol). */ 3126 1.1 christos PATFETCH (c); 3127 1.1 christos 3128 1.1 christos /* Allocate the space for the equivalence class 3129 1.1 christos (or collating symbol) (and '\0' if needed). */ 3130 1.1 christos GET_BUFFER_SPACE(datasize); 3131 1.1 christos /* Update the pointer to indicate end of buffer. */ 3132 1.1 christos b += datasize; 3133 1.1 christos 3134 1.1 christos if (delim == '=') 3135 1.1 christos { /* equivalence class */ 3136 1.1 christos /* Calculate the offset of char_ranges, 3137 1.1 christos which is next to equivalence_classes. */ 3138 1.1 christos offset = laststart[1] + laststart[2] 3139 1.1 christos + laststart[3] +6; 3140 1.1 christos /* Insert space. */ 3141 1.1 christos insert_space(datasize, laststart + offset, b - 1); 3142 1.1 christos 3143 1.1 christos /* Write the equivalence_class and \0. */ 3144 1.1 christos for (i = 0 ; i < datasize ; i++) 3145 1.1 christos laststart[offset + i] = str[i]; 3146 1.1 christos 3147 1.1 christos /* Update the length of equivalence_classes. */ 3148 1.1 christos laststart[3] += datasize; 3149 1.1 christos had_char_class = true; 3150 1.1 christos } 3151 1.1 christos else /* delim == '.' */ 3152 1.1 christos { /* collating symbol */ 3153 1.1 christos /* Calculate the offset of the equivalence_classes, 3154 1.1 christos which is next to collating_symbols. */ 3155 1.1 christos offset = laststart[1] + laststart[2] + 6; 3156 1.1 christos /* Insert space and write the collationg_symbol 3157 1.1 christos and \0. */ 3158 1.1 christos insert_space(datasize, laststart + offset, b-1); 3159 1.1 christos for (i = 0 ; i < datasize ; i++) 3160 1.1 christos laststart[offset + i] = str[i]; 3161 1.1 christos 3162 1.1 christos /* In re_match_2_internal if range_start < -1, we 3163 1.1 christos assume -range_start is the offset of the 3164 1.1 christos collating symbol which is specified as 3165 1.1 christos the character of the range start. So we assign 3166 1.1 christos -(laststart[1] + laststart[2] + 6) to 3167 1.1 christos range_start. */ 3168 1.1 christos range_start = -(laststart[1] + laststart[2] + 6); 3169 1.1 christos /* Update the length of collating_symbol. */ 3170 1.1 christos laststart[2] += datasize; 3171 1.1 christos had_char_class = false; 3172 1.1 christos } 3173 1.1 christos } 3174 1.1 christos else 3175 1.1 christos { 3176 1.1 christos c1++; 3177 1.1 christos while (c1--) 3178 1.1 christos PATUNFETCH; 3179 1.1 christos BUF_PUSH ('['); 3180 1.1 christos BUF_PUSH (delim); 3181 1.1 christos laststart[5] += 2; /* Update the length of characters */ 3182 1.1 christos range_start = delim; 3183 1.1 christos had_char_class = false; 3184 1.1 christos } 3185 1.1 christos } 3186 1.1 christos else 3187 1.1 christos { 3188 1.1 christos had_char_class = false; 3189 1.1 christos BUF_PUSH(c); 3190 1.1 christos laststart[5]++; /* Update the length of characters */ 3191 1.1 christos range_start = c; 3192 1.1 christos } 3193 1.1 christos } 3194 1.1 christos 3195 1.1 christos #else /* BYTE */ 3196 1.1 christos /* Ensure that we have enough space to push a charset: the 3197 1.1 christos opcode, the length count, and the bitset; 34 bytes in all. */ 3198 1.1 christos GET_BUFFER_SPACE (34); 3199 1.1 christos 3200 1.1 christos laststart = b; 3201 1.1 christos 3202 1.1 christos /* We test `*p == '^' twice, instead of using an if 3203 1.1 christos statement, so we only need one BUF_PUSH. */ 3204 1.1 christos BUF_PUSH (*p == '^' ? charset_not : charset); 3205 1.1 christos if (*p == '^') 3206 1.1 christos p++; 3207 1.1 christos 3208 1.1 christos /* Remember the first position in the bracket expression. */ 3209 1.1 christos p1 = p; 3210 1.1 christos 3211 1.1 christos /* Push the number of bytes in the bitmap. */ 3212 1.1 christos BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 3213 1.1 christos 3214 1.1 christos /* Clear the whole map. */ 3215 1.1 christos bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 3216 1.1 christos 3217 1.1 christos /* charset_not matches newline according to a syntax bit. */ 3218 1.1 christos if ((re_opcode_t) b[-2] == charset_not 3219 1.1 christos && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 3220 1.1 christos SET_LIST_BIT ('\n'); 3221 1.1 christos 3222 1.1 christos /* Read in characters and ranges, setting map bits. */ 3223 1.1 christos for (;;) 3224 1.1 christos { 3225 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3226 1.1 christos 3227 1.1 christos PATFETCH (c); 3228 1.1 christos 3229 1.1 christos /* \ might escape characters inside [...] and [^...]. */ 3230 1.1 christos if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 3231 1.1 christos { 3232 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 3233 1.1 christos 3234 1.1 christos PATFETCH (c1); 3235 1.1 christos SET_LIST_BIT (c1); 3236 1.1 christos range_start = c1; 3237 1.1 christos continue; 3238 1.1 christos } 3239 1.1 christos 3240 1.1 christos /* Could be the end of the bracket expression. If it's 3241 1.1 christos not (i.e., when the bracket expression is `[]' so 3242 1.1 christos far), the ']' character bit gets set way below. */ 3243 1.1 christos if (c == ']' && p != p1 + 1) 3244 1.1 christos break; 3245 1.1 christos 3246 1.1 christos /* Look ahead to see if it's a range when the last thing 3247 1.1 christos was a character class. */ 3248 1.1 christos if (had_char_class && c == '-' && *p != ']') 3249 1.1 christos FREE_STACK_RETURN (REG_ERANGE); 3250 1.1 christos 3251 1.1 christos /* Look ahead to see if it's a range when the last thing 3252 1.1 christos was a character: if this is a hyphen not at the 3253 1.1 christos beginning or the end of a list, then it's the range 3254 1.1 christos operator. */ 3255 1.1 christos if (c == '-' 3256 1.1 christos && !(p - 2 >= pattern && p[-2] == '[') 3257 1.1 christos && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 3258 1.1 christos && *p != ']') 3259 1.1 christos { 3260 1.1 christos reg_errcode_t ret 3261 1.1 christos = byte_compile_range (range_start, &p, pend, translate, 3262 1.1 christos syntax, b); 3263 1.1 christos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 3264 1.1 christos range_start = 0xffffffff; 3265 1.1 christos } 3266 1.1 christos 3267 1.1 christos else if (p[0] == '-' && p[1] != ']') 3268 1.1 christos { /* This handles ranges made up of characters only. */ 3269 1.1 christos reg_errcode_t ret; 3270 1.1 christos 3271 1.1 christos /* Move past the `-'. */ 3272 1.1 christos PATFETCH (c1); 3273 1.1 christos 3274 1.1 christos ret = byte_compile_range (c, &p, pend, translate, syntax, b); 3275 1.1 christos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 3276 1.1 christos range_start = 0xffffffff; 3277 1.1 christos } 3278 1.1 christos 3279 1.1 christos /* See if we're at the beginning of a possible character 3280 1.1 christos class. */ 3281 1.1 christos 3282 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 3283 1.1 christos { /* Leave room for the null. */ 3284 1.1 christos char str[CHAR_CLASS_MAX_LENGTH + 1]; 3285 1.1 christos 3286 1.1 christos PATFETCH (c); 3287 1.1 christos c1 = 0; 3288 1.1 christos 3289 1.1 christos /* If pattern is `[[:'. */ 3290 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3291 1.1 christos 3292 1.1 christos for (;;) 3293 1.1 christos { 3294 1.1 christos PATFETCH (c); 3295 1.1 christos if ((c == ':' && *p == ']') || p == pend) 3296 1.1 christos break; 3297 1.1 christos if (c1 < CHAR_CLASS_MAX_LENGTH) 3298 1.1 christos str[c1++] = c; 3299 1.1 christos else 3300 1.1 christos /* This is in any case an invalid class name. */ 3301 1.1 christos str[0] = '\0'; 3302 1.1 christos } 3303 1.1 christos str[c1] = '\0'; 3304 1.1 christos 3305 1.1 christos /* If isn't a word bracketed by `[:' and `:]': 3306 1.1 christos undo the ending character, the letters, and leave 3307 1.1 christos the leading `:' and `[' (but set bits for them). */ 3308 1.1 christos if (c == ':' && *p == ']') 3309 1.1 christos { 3310 1.1 christos # if defined _LIBC || WIDE_CHAR_SUPPORT 3311 1.1 christos boolean is_lower = STREQ (str, "lower"); 3312 1.1 christos boolean is_upper = STREQ (str, "upper"); 3313 1.1 christos wctype_t wt; 3314 1.1 christos int ch; 3315 1.1 christos 3316 1.1 christos wt = IS_CHAR_CLASS (str); 3317 1.1 christos if (wt == 0) 3318 1.1 christos FREE_STACK_RETURN (REG_ECTYPE); 3319 1.1 christos 3320 1.1 christos /* Throw away the ] at the end of the character 3321 1.1 christos class. */ 3322 1.1 christos PATFETCH (c); 3323 1.1 christos 3324 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3325 1.1 christos 3326 1.1 christos for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) 3327 1.1 christos { 3328 1.1 christos if (iswctype (btowc (ch), wt)) 3329 1.1 christos SET_LIST_BIT (ch); 3330 1.1 christos 3331 1.1 christos if (translate && (is_upper || is_lower) 3332 1.1 christos && (ISUPPER (ch) || ISLOWER (ch))) 3333 1.1 christos SET_LIST_BIT (ch); 3334 1.1 christos } 3335 1.1 christos 3336 1.1 christos had_char_class = true; 3337 1.1 christos # else 3338 1.1 christos int ch; 3339 1.1 christos boolean is_alnum = STREQ (str, "alnum"); 3340 1.1 christos boolean is_alpha = STREQ (str, "alpha"); 3341 1.1 christos boolean is_blank = STREQ (str, "blank"); 3342 1.1 christos boolean is_cntrl = STREQ (str, "cntrl"); 3343 1.1 christos boolean is_digit = STREQ (str, "digit"); 3344 1.1 christos boolean is_graph = STREQ (str, "graph"); 3345 1.1 christos boolean is_lower = STREQ (str, "lower"); 3346 1.1 christos boolean is_print = STREQ (str, "print"); 3347 1.1 christos boolean is_punct = STREQ (str, "punct"); 3348 1.1 christos boolean is_space = STREQ (str, "space"); 3349 1.1 christos boolean is_upper = STREQ (str, "upper"); 3350 1.1 christos boolean is_xdigit = STREQ (str, "xdigit"); 3351 1.1 christos 3352 1.1 christos if (!IS_CHAR_CLASS (str)) 3353 1.1 christos FREE_STACK_RETURN (REG_ECTYPE); 3354 1.1 christos 3355 1.1 christos /* Throw away the ] at the end of the character 3356 1.1 christos class. */ 3357 1.1 christos PATFETCH (c); 3358 1.1 christos 3359 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3360 1.1 christos 3361 1.1 christos for (ch = 0; ch < 1 << BYTEWIDTH; ch++) 3362 1.1 christos { 3363 1.1 christos /* This was split into 3 if's to 3364 1.1 christos avoid an arbitrary limit in some compiler. */ 3365 1.1 christos if ( (is_alnum && ISALNUM (ch)) 3366 1.1 christos || (is_alpha && ISALPHA (ch)) 3367 1.1 christos || (is_blank && ISBLANK (ch)) 3368 1.1 christos || (is_cntrl && ISCNTRL (ch))) 3369 1.1 christos SET_LIST_BIT (ch); 3370 1.1 christos if ( (is_digit && ISDIGIT (ch)) 3371 1.1 christos || (is_graph && ISGRAPH (ch)) 3372 1.1 christos || (is_lower && ISLOWER (ch)) 3373 1.1 christos || (is_print && ISPRINT (ch))) 3374 1.1 christos SET_LIST_BIT (ch); 3375 1.1 christos if ( (is_punct && ISPUNCT (ch)) 3376 1.1 christos || (is_space && ISSPACE (ch)) 3377 1.1 christos || (is_upper && ISUPPER (ch)) 3378 1.1 christos || (is_xdigit && ISXDIGIT (ch))) 3379 1.1 christos SET_LIST_BIT (ch); 3380 1.1 christos if ( translate && (is_upper || is_lower) 3381 1.1 christos && (ISUPPER (ch) || ISLOWER (ch))) 3382 1.1 christos SET_LIST_BIT (ch); 3383 1.1 christos } 3384 1.1 christos had_char_class = true; 3385 1.1 christos # endif /* libc || wctype.h */ 3386 1.1 christos } 3387 1.1 christos else 3388 1.1 christos { 3389 1.1 christos c1++; 3390 1.1 christos while (c1--) 3391 1.1 christos PATUNFETCH; 3392 1.1 christos SET_LIST_BIT ('['); 3393 1.1 christos SET_LIST_BIT (':'); 3394 1.1 christos range_start = ':'; 3395 1.1 christos had_char_class = false; 3396 1.1 christos } 3397 1.1 christos } 3398 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=') 3399 1.1 christos { 3400 1.1 christos unsigned char str[MB_LEN_MAX + 1]; 3401 1.1 christos # ifdef _LIBC 3402 1.1 christos uint32_t nrules = 3403 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3404 1.1 christos # endif 3405 1.1 christos 3406 1.1 christos PATFETCH (c); 3407 1.1 christos c1 = 0; 3408 1.1 christos 3409 1.1 christos /* If pattern is `[[='. */ 3410 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3411 1.1 christos 3412 1.1 christos for (;;) 3413 1.1 christos { 3414 1.1 christos PATFETCH (c); 3415 1.1 christos if ((c == '=' && *p == ']') || p == pend) 3416 1.1 christos break; 3417 1.1 christos if (c1 < MB_LEN_MAX) 3418 1.1 christos str[c1++] = c; 3419 1.1 christos else 3420 1.1 christos /* This is in any case an invalid class name. */ 3421 1.1 christos str[0] = '\0'; 3422 1.1 christos } 3423 1.1 christos str[c1] = '\0'; 3424 1.1 christos 3425 1.1 christos if (c == '=' && *p == ']' && str[0] != '\0') 3426 1.1 christos { 3427 1.1 christos /* If we have no collation data we use the default 3428 1.1 christos collation in which each character is in a class 3429 1.1 christos by itself. It also means that ASCII is the 3430 1.1 christos character set and therefore we cannot have character 3431 1.1 christos with more than one byte in the multibyte 3432 1.1 christos representation. */ 3433 1.1 christos # ifdef _LIBC 3434 1.1 christos if (nrules == 0) 3435 1.1 christos # endif 3436 1.1 christos { 3437 1.1 christos if (c1 != 1) 3438 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3439 1.1 christos 3440 1.1 christos /* Throw away the ] at the end of the equivalence 3441 1.1 christos class. */ 3442 1.1 christos PATFETCH (c); 3443 1.1 christos 3444 1.1 christos /* Set the bit for the character. */ 3445 1.1 christos SET_LIST_BIT (str[0]); 3446 1.1 christos } 3447 1.1 christos # ifdef _LIBC 3448 1.1 christos else 3449 1.1 christos { 3450 1.1 christos /* Try to match the byte sequence in `str' against 3451 1.1 christos those known to the collate implementation. 3452 1.1 christos First find out whether the bytes in `str' are 3453 1.1 christos actually from exactly one character. */ 3454 1.1 christos const int32_t *table; 3455 1.1 christos const unsigned char *weights; 3456 1.1 christos const unsigned char *extra; 3457 1.1 christos const int32_t *indirect; 3458 1.1 christos int32_t idx; 3459 1.1 christos const unsigned char *cp = str; 3460 1.1 christos int ch; 3461 1.1 christos 3462 1.1 christos /* This #include defines a local function! */ 3463 1.1 christos # include <locale/weight.h> 3464 1.1 christos 3465 1.1 christos table = (const int32_t *) 3466 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 3467 1.1 christos weights = (const unsigned char *) 3468 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); 3469 1.1 christos extra = (const unsigned char *) 3470 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 3471 1.1 christos indirect = (const int32_t *) 3472 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 3473 1.1 christos 3474 1.1 christos idx = findidx (&cp); 3475 1.1 christos if (idx == 0 || cp < str + c1) 3476 1.1 christos /* This is no valid character. */ 3477 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3478 1.1 christos 3479 1.1 christos /* Throw away the ] at the end of the equivalence 3480 1.1 christos class. */ 3481 1.1 christos PATFETCH (c); 3482 1.1 christos 3483 1.1 christos /* Now we have to go throught the whole table 3484 1.1 christos and find all characters which have the same 3485 1.1 christos first level weight. 3486 1.1 christos 3487 1.1 christos XXX Note that this is not entirely correct. 3488 1.1 christos we would have to match multibyte sequences 3489 1.1 christos but this is not possible with the current 3490 1.1 christos implementation. */ 3491 1.1 christos for (ch = 1; ch < 256; ++ch) 3492 1.1 christos /* XXX This test would have to be changed if we 3493 1.1 christos would allow matching multibyte sequences. */ 3494 1.1 christos if (table[ch] > 0) 3495 1.1 christos { 3496 1.1 christos int32_t idx2 = table[ch]; 3497 1.1 christos size_t len = weights[idx2]; 3498 1.1 christos 3499 1.1 christos /* Test whether the lenghts match. */ 3500 1.1 christos if (weights[idx] == len) 3501 1.1 christos { 3502 1.1 christos /* They do. New compare the bytes of 3503 1.1 christos the weight. */ 3504 1.1 christos size_t cnt = 0; 3505 1.1 christos 3506 1.1 christos while (cnt < len 3507 1.1 christos && (weights[idx + 1 + cnt] 3508 1.1 christos == weights[idx2 + 1 + cnt])) 3509 1.1 christos ++cnt; 3510 1.1 christos 3511 1.1 christos if (cnt == len) 3512 1.1 christos /* They match. Mark the character as 3513 1.1 christos acceptable. */ 3514 1.1 christos SET_LIST_BIT (ch); 3515 1.1 christos } 3516 1.1 christos } 3517 1.1 christos } 3518 1.1 christos # endif 3519 1.1 christos had_char_class = true; 3520 1.1 christos } 3521 1.1 christos else 3522 1.1 christos { 3523 1.1 christos c1++; 3524 1.1 christos while (c1--) 3525 1.1 christos PATUNFETCH; 3526 1.1 christos SET_LIST_BIT ('['); 3527 1.1 christos SET_LIST_BIT ('='); 3528 1.1 christos range_start = '='; 3529 1.1 christos had_char_class = false; 3530 1.1 christos } 3531 1.1 christos } 3532 1.1 christos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.') 3533 1.1 christos { 3534 1.1 christos unsigned char str[128]; /* Should be large enough. */ 3535 1.1 christos # ifdef _LIBC 3536 1.1 christos uint32_t nrules = 3537 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3538 1.1 christos # endif 3539 1.1 christos 3540 1.1 christos PATFETCH (c); 3541 1.1 christos c1 = 0; 3542 1.1 christos 3543 1.1 christos /* If pattern is `[[.'. */ 3544 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3545 1.1 christos 3546 1.1 christos for (;;) 3547 1.1 christos { 3548 1.1 christos PATFETCH (c); 3549 1.1 christos if ((c == '.' && *p == ']') || p == pend) 3550 1.1 christos break; 3551 1.1 christos if (c1 < sizeof (str)) 3552 1.1 christos str[c1++] = c; 3553 1.1 christos else 3554 1.1 christos /* This is in any case an invalid class name. */ 3555 1.1 christos str[0] = '\0'; 3556 1.1 christos } 3557 1.1 christos str[c1] = '\0'; 3558 1.1 christos 3559 1.1 christos if (c == '.' && *p == ']' && str[0] != '\0') 3560 1.1 christos { 3561 1.1 christos /* If we have no collation data we use the default 3562 1.1 christos collation in which each character is the name 3563 1.1 christos for its own class which contains only the one 3564 1.1 christos character. It also means that ASCII is the 3565 1.1 christos character set and therefore we cannot have character 3566 1.1 christos with more than one byte in the multibyte 3567 1.1 christos representation. */ 3568 1.1 christos # ifdef _LIBC 3569 1.1 christos if (nrules == 0) 3570 1.1 christos # endif 3571 1.1 christos { 3572 1.1 christos if (c1 != 1) 3573 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3574 1.1 christos 3575 1.1 christos /* Throw away the ] at the end of the equivalence 3576 1.1 christos class. */ 3577 1.1 christos PATFETCH (c); 3578 1.1 christos 3579 1.1 christos /* Set the bit for the character. */ 3580 1.1 christos SET_LIST_BIT (str[0]); 3581 1.1 christos range_start = ((const unsigned char *) str)[0]; 3582 1.1 christos } 3583 1.1 christos # ifdef _LIBC 3584 1.1 christos else 3585 1.1 christos { 3586 1.1 christos /* Try to match the byte sequence in `str' against 3587 1.1 christos those known to the collate implementation. 3588 1.1 christos First find out whether the bytes in `str' are 3589 1.1 christos actually from exactly one character. */ 3590 1.1 christos int32_t table_size; 3591 1.1 christos const int32_t *symb_table; 3592 1.1 christos const unsigned char *extra; 3593 1.1 christos int32_t idx; 3594 1.1 christos int32_t elem; 3595 1.1 christos int32_t second; 3596 1.1 christos int32_t hash; 3597 1.1 christos 3598 1.1 christos table_size = 3599 1.1 christos _NL_CURRENT_WORD (LC_COLLATE, 3600 1.1 christos _NL_COLLATE_SYMB_HASH_SIZEMB); 3601 1.1 christos symb_table = (const int32_t *) 3602 1.1 christos _NL_CURRENT (LC_COLLATE, 3603 1.1 christos _NL_COLLATE_SYMB_TABLEMB); 3604 1.1 christos extra = (const unsigned char *) 3605 1.1 christos _NL_CURRENT (LC_COLLATE, 3606 1.1 christos _NL_COLLATE_SYMB_EXTRAMB); 3607 1.1 christos 3608 1.1 christos /* Locate the character in the hashing table. */ 3609 1.1 christos hash = elem_hash (str, c1); 3610 1.1 christos 3611 1.1 christos idx = 0; 3612 1.1 christos elem = hash % table_size; 3613 1.1 christos second = hash % (table_size - 2); 3614 1.1 christos while (symb_table[2 * elem] != 0) 3615 1.1 christos { 3616 1.1 christos /* First compare the hashing value. */ 3617 1.1 christos if (symb_table[2 * elem] == hash 3618 1.1 christos && c1 == extra[symb_table[2 * elem + 1]] 3619 1.1 christos && memcmp (str, 3620 1.1 christos &extra[symb_table[2 * elem + 1] 3621 1.1 christos + 1], 3622 1.1 christos c1) == 0) 3623 1.1 christos { 3624 1.1 christos /* Yep, this is the entry. */ 3625 1.1 christos idx = symb_table[2 * elem + 1]; 3626 1.1 christos idx += 1 + extra[idx]; 3627 1.1 christos break; 3628 1.1 christos } 3629 1.1 christos 3630 1.1 christos /* Next entry. */ 3631 1.1 christos elem += second; 3632 1.1 christos } 3633 1.1 christos 3634 1.1 christos if (symb_table[2 * elem] == 0) 3635 1.1 christos /* This is no valid character. */ 3636 1.1 christos FREE_STACK_RETURN (REG_ECOLLATE); 3637 1.1 christos 3638 1.1 christos /* Throw away the ] at the end of the equivalence 3639 1.1 christos class. */ 3640 1.1 christos PATFETCH (c); 3641 1.1 christos 3642 1.1 christos /* Now add the multibyte character(s) we found 3643 1.1 christos to the accept list. 3644 1.1 christos 3645 1.1 christos XXX Note that this is not entirely correct. 3646 1.1 christos we would have to match multibyte sequences 3647 1.1 christos but this is not possible with the current 3648 1.1 christos implementation. Also, we have to match 3649 1.1 christos collating symbols, which expand to more than 3650 1.1 christos one file, as a whole and not allow the 3651 1.1 christos individual bytes. */ 3652 1.1 christos c1 = extra[idx++]; 3653 1.1 christos if (c1 == 1) 3654 1.1 christos range_start = extra[idx]; 3655 1.1 christos while (c1-- > 0) 3656 1.1 christos { 3657 1.1 christos SET_LIST_BIT (extra[idx]); 3658 1.1 christos ++idx; 3659 1.1 christos } 3660 1.1 christos } 3661 1.1 christos # endif 3662 1.1 christos had_char_class = false; 3663 1.1 christos } 3664 1.1 christos else 3665 1.1 christos { 3666 1.1 christos c1++; 3667 1.1 christos while (c1--) 3668 1.1 christos PATUNFETCH; 3669 1.1 christos SET_LIST_BIT ('['); 3670 1.1 christos SET_LIST_BIT ('.'); 3671 1.1 christos range_start = '.'; 3672 1.1 christos had_char_class = false; 3673 1.1 christos } 3674 1.1 christos } 3675 1.1 christos else 3676 1.1 christos { 3677 1.1 christos had_char_class = false; 3678 1.1 christos SET_LIST_BIT (c); 3679 1.1 christos range_start = c; 3680 1.1 christos } 3681 1.1 christos } 3682 1.1 christos 3683 1.1 christos /* Discard any (non)matching list bytes that are all 0 at the 3684 1.1 christos end of the map. Decrease the map-length byte too. */ 3685 1.1 christos while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 3686 1.1 christos b[-1]--; 3687 1.1 christos b += b[-1]; 3688 1.1 christos #endif /* WCHAR */ 3689 1.1 christos } 3690 1.1 christos break; 3691 1.1 christos 3692 1.1 christos 3693 1.1 christos case '(': 3694 1.1 christos if (syntax & RE_NO_BK_PARENS) 3695 1.1 christos goto handle_open; 3696 1.1 christos else 3697 1.1 christos goto normal_char; 3698 1.1 christos 3699 1.1 christos 3700 1.1 christos case ')': 3701 1.1 christos if (syntax & RE_NO_BK_PARENS) 3702 1.1 christos goto handle_close; 3703 1.1 christos else 3704 1.1 christos goto normal_char; 3705 1.1 christos 3706 1.1 christos 3707 1.1 christos case '\n': 3708 1.1 christos if (syntax & RE_NEWLINE_ALT) 3709 1.1 christos goto handle_alt; 3710 1.1 christos else 3711 1.1 christos goto normal_char; 3712 1.1 christos 3713 1.1 christos 3714 1.1 christos case '|': 3715 1.1 christos if (syntax & RE_NO_BK_VBAR) 3716 1.1 christos goto handle_alt; 3717 1.1 christos else 3718 1.1 christos goto normal_char; 3719 1.1 christos 3720 1.1 christos 3721 1.1 christos case '{': 3722 1.1 christos if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) 3723 1.1 christos goto handle_interval; 3724 1.1 christos else 3725 1.1 christos goto normal_char; 3726 1.1 christos 3727 1.1 christos 3728 1.1 christos case '\\': 3729 1.1 christos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 3730 1.1 christos 3731 1.1 christos /* Do not translate the character after the \, so that we can 3732 1.1 christos distinguish, e.g., \B from \b, even if we normally would 3733 1.1 christos translate, e.g., B to b. */ 3734 1.1 christos PATFETCH_RAW (c); 3735 1.1 christos 3736 1.1 christos switch (c) 3737 1.1 christos { 3738 1.1 christos case '(': 3739 1.1 christos if (syntax & RE_NO_BK_PARENS) 3740 1.1 christos goto normal_backslash; 3741 1.1 christos 3742 1.1 christos handle_open: 3743 1.1 christos bufp->re_nsub++; 3744 1.1 christos regnum++; 3745 1.1 christos 3746 1.1 christos if (COMPILE_STACK_FULL) 3747 1.1 christos { 3748 1.1 christos RETALLOC (compile_stack.stack, compile_stack.size << 1, 3749 1.1 christos compile_stack_elt_t); 3750 1.1 christos if (compile_stack.stack == NULL) return REG_ESPACE; 3751 1.1 christos 3752 1.1 christos compile_stack.size <<= 1; 3753 1.1 christos } 3754 1.1 christos 3755 1.1 christos /* These are the values to restore when we hit end of this 3756 1.1 christos group. They are all relative offsets, so that if the 3757 1.1 christos whole pattern moves because of realloc, they will still 3758 1.1 christos be valid. */ 3759 1.1 christos COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR; 3760 1.1 christos COMPILE_STACK_TOP.fixup_alt_jump 3761 1.1 christos = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0; 3762 1.1 christos COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR; 3763 1.1 christos COMPILE_STACK_TOP.regnum = regnum; 3764 1.1 christos 3765 1.1 christos /* We will eventually replace the 0 with the number of 3766 1.1 christos groups inner to this one. But do not push a 3767 1.1 christos start_memory for groups beyond the last one we can 3768 1.1 christos represent in the compiled pattern. */ 3769 1.1 christos if (regnum <= MAX_REGNUM) 3770 1.1 christos { 3771 1.1 christos COMPILE_STACK_TOP.inner_group_offset = b 3772 1.1 christos - COMPILED_BUFFER_VAR + 2; 3773 1.1 christos BUF_PUSH_3 (start_memory, regnum, 0); 3774 1.1 christos } 3775 1.1 christos 3776 1.1 christos compile_stack.avail++; 3777 1.1 christos 3778 1.1 christos fixup_alt_jump = 0; 3779 1.1 christos laststart = 0; 3780 1.1 christos begalt = b; 3781 1.1 christos /* If we've reached MAX_REGNUM groups, then this open 3782 1.1 christos won't actually generate any code, so we'll have to 3783 1.1 christos clear pending_exact explicitly. */ 3784 1.1 christos pending_exact = 0; 3785 1.1 christos break; 3786 1.1 christos 3787 1.1 christos 3788 1.1 christos case ')': 3789 1.1 christos if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 3790 1.1 christos 3791 1.1 christos if (COMPILE_STACK_EMPTY) 3792 1.1 christos { 3793 1.1 christos if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 3794 1.1 christos goto normal_backslash; 3795 1.1 christos else 3796 1.1 christos FREE_STACK_RETURN (REG_ERPAREN); 3797 1.1 christos } 3798 1.1 christos 3799 1.1 christos handle_close: 3800 1.1 christos if (fixup_alt_jump) 3801 1.1 christos { /* Push a dummy failure point at the end of the 3802 1.1 christos alternative for a possible future 3803 1.1 christos `pop_failure_jump' to pop. See comments at 3804 1.1 christos `push_dummy_failure' in `re_match_2'. */ 3805 1.1 christos BUF_PUSH (push_dummy_failure); 3806 1.1 christos 3807 1.1 christos /* We allocated space for this jump when we assigned 3808 1.1 christos to `fixup_alt_jump', in the `handle_alt' case below. */ 3809 1.1 christos STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 3810 1.1 christos } 3811 1.1 christos 3812 1.1 christos /* See similar code for backslashed left paren above. */ 3813 1.1 christos if (COMPILE_STACK_EMPTY) 3814 1.1 christos { 3815 1.1 christos if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 3816 1.1 christos goto normal_char; 3817 1.1 christos else 3818 1.1 christos FREE_STACK_RETURN (REG_ERPAREN); 3819 1.1 christos } 3820 1.1 christos 3821 1.1 christos /* Since we just checked for an empty stack above, this 3822 1.1 christos ``can't happen''. */ 3823 1.1 christos assert (compile_stack.avail != 0); 3824 1.1 christos { 3825 1.1 christos /* We don't just want to restore into `regnum', because 3826 1.1 christos later groups should continue to be numbered higher, 3827 1.1 christos as in `(ab)c(de)' -- the second group is #2. */ 3828 1.1 christos regnum_t this_group_regnum; 3829 1.1 christos 3830 1.1 christos compile_stack.avail--; 3831 1.1 christos begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset; 3832 1.1 christos fixup_alt_jump 3833 1.1 christos = COMPILE_STACK_TOP.fixup_alt_jump 3834 1.1 christos ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1 3835 1.1 christos : 0; 3836 1.1 christos laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset; 3837 1.1 christos this_group_regnum = COMPILE_STACK_TOP.regnum; 3838 1.1 christos /* If we've reached MAX_REGNUM groups, then this open 3839 1.1 christos won't actually generate any code, so we'll have to 3840 1.1 christos clear pending_exact explicitly. */ 3841 1.1 christos pending_exact = 0; 3842 1.1 christos 3843 1.1 christos /* We're at the end of the group, so now we know how many 3844 1.1 christos groups were inside this one. */ 3845 1.1 christos if (this_group_regnum <= MAX_REGNUM) 3846 1.1 christos { 3847 1.1 christos UCHAR_T *inner_group_loc 3848 1.1 christos = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset; 3849 1.1 christos 3850 1.1 christos *inner_group_loc = regnum - this_group_regnum; 3851 1.1 christos BUF_PUSH_3 (stop_memory, this_group_regnum, 3852 1.1 christos regnum - this_group_regnum); 3853 1.1 christos } 3854 1.1 christos } 3855 1.1 christos break; 3856 1.1 christos 3857 1.1 christos 3858 1.1 christos case '|': /* `\|'. */ 3859 1.1 christos if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) 3860 1.1 christos goto normal_backslash; 3861 1.1 christos handle_alt: 3862 1.1 christos if (syntax & RE_LIMITED_OPS) 3863 1.1 christos goto normal_char; 3864 1.1 christos 3865 1.1 christos /* Insert before the previous alternative a jump which 3866 1.1 christos jumps to this alternative if the former fails. */ 3867 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 3868 1.1 christos INSERT_JUMP (on_failure_jump, begalt, 3869 1.1 christos b + 2 + 2 * OFFSET_ADDRESS_SIZE); 3870 1.1 christos pending_exact = 0; 3871 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 3872 1.1 christos 3873 1.1 christos /* The alternative before this one has a jump after it 3874 1.1 christos which gets executed if it gets matched. Adjust that 3875 1.1 christos jump so it will jump to this alternative's analogous 3876 1.1 christos jump (put in below, which in turn will jump to the next 3877 1.1 christos (if any) alternative's such jump, etc.). The last such 3878 1.1 christos jump jumps to the correct final destination. A picture: 3879 1.1 christos _____ _____ 3880 1.1 christos | | | | 3881 1.1 christos | v | v 3882 1.1 christos a | b | c 3883 1.1 christos 3884 1.1 christos If we are at `b', then fixup_alt_jump right now points to a 3885 1.1 christos three-byte space after `a'. We'll put in the jump, set 3886 1.1 christos fixup_alt_jump to right after `b', and leave behind three 3887 1.1 christos bytes which we'll fill in when we get to after `c'. */ 3888 1.1 christos 3889 1.1 christos if (fixup_alt_jump) 3890 1.1 christos STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 3891 1.1 christos 3892 1.1 christos /* Mark and leave space for a jump after this alternative, 3893 1.1 christos to be filled in later either by next alternative or 3894 1.1 christos when know we're at the end of a series of alternatives. */ 3895 1.1 christos fixup_alt_jump = b; 3896 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 3897 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 3898 1.1 christos 3899 1.1 christos laststart = 0; 3900 1.1 christos begalt = b; 3901 1.1 christos break; 3902 1.1 christos 3903 1.1 christos 3904 1.1 christos case '{': 3905 1.1 christos /* If \{ is a literal. */ 3906 1.1 christos if (!(syntax & RE_INTERVALS) 3907 1.1 christos /* If we're at `\{' and it's not the open-interval 3908 1.1 christos operator. */ 3909 1.1 christos || (syntax & RE_NO_BK_BRACES)) 3910 1.1 christos goto normal_backslash; 3911 1.1 christos 3912 1.1 christos handle_interval: 3913 1.1 christos { 3914 1.1 christos /* If got here, then the syntax allows intervals. */ 3915 1.1 christos 3916 1.1 christos /* At least (most) this many matches must be made. */ 3917 1.1 christos int lower_bound = -1, upper_bound = -1; 3918 1.1 christos 3919 1.1 christos /* Place in the uncompiled pattern (i.e., just after 3920 1.1 christos the '{') to go back to if the interval is invalid. */ 3921 1.1 christos const CHAR_T *beg_interval = p; 3922 1.1 christos 3923 1.1 christos if (p == pend) 3924 1.1 christos goto invalid_interval; 3925 1.1 christos 3926 1.1 christos GET_UNSIGNED_NUMBER (lower_bound); 3927 1.1 christos 3928 1.1 christos if (c == ',') 3929 1.1 christos { 3930 1.1 christos GET_UNSIGNED_NUMBER (upper_bound); 3931 1.1 christos if (upper_bound < 0) 3932 1.1 christos upper_bound = RE_DUP_MAX; 3933 1.1 christos } 3934 1.1 christos else 3935 1.1 christos /* Interval such as `{1}' => match exactly once. */ 3936 1.1 christos upper_bound = lower_bound; 3937 1.1 christos 3938 1.1 christos if (! (0 <= lower_bound && lower_bound <= upper_bound)) 3939 1.1 christos goto invalid_interval; 3940 1.1 christos 3941 1.1 christos if (!(syntax & RE_NO_BK_BRACES)) 3942 1.1 christos { 3943 1.1 christos if (c != '\\' || p == pend) 3944 1.1 christos goto invalid_interval; 3945 1.1 christos PATFETCH (c); 3946 1.1 christos } 3947 1.1 christos 3948 1.1 christos if (c != '}') 3949 1.1 christos goto invalid_interval; 3950 1.1 christos 3951 1.1 christos /* If it's invalid to have no preceding re. */ 3952 1.1 christos if (!laststart) 3953 1.1 christos { 3954 1.1 christos if (syntax & RE_CONTEXT_INVALID_OPS 3955 1.1 christos && !(syntax & RE_INVALID_INTERVAL_ORD)) 3956 1.1 christos FREE_STACK_RETURN (REG_BADRPT); 3957 1.1 christos else if (syntax & RE_CONTEXT_INDEP_OPS) 3958 1.1 christos laststart = b; 3959 1.1 christos else 3960 1.1 christos goto unfetch_interval; 3961 1.1 christos } 3962 1.1 christos 3963 1.1 christos /* We just parsed a valid interval. */ 3964 1.1 christos 3965 1.1 christos if (RE_DUP_MAX < upper_bound) 3966 1.1 christos FREE_STACK_RETURN (REG_BADBR); 3967 1.1 christos 3968 1.1 christos /* If the upper bound is zero, don't want to succeed at 3969 1.1 christos all; jump from `laststart' to `b + 3', which will be 3970 1.1 christos the end of the buffer after we insert the jump. */ 3971 1.1 christos /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' 3972 1.1 christos instead of 'b + 3'. */ 3973 1.1 christos if (upper_bound == 0) 3974 1.1 christos { 3975 1.1 christos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 3976 1.1 christos INSERT_JUMP (jump, laststart, b + 1 3977 1.1 christos + OFFSET_ADDRESS_SIZE); 3978 1.1 christos b += 1 + OFFSET_ADDRESS_SIZE; 3979 1.1 christos } 3980 1.1 christos 3981 1.1 christos /* Otherwise, we have a nontrivial interval. When 3982 1.1 christos we're all done, the pattern will look like: 3983 1.1 christos set_number_at <jump count> <upper bound> 3984 1.1 christos set_number_at <succeed_n count> <lower bound> 3985 1.1 christos succeed_n <after jump addr> <succeed_n count> 3986 1.1 christos <body of loop> 3987 1.1 christos jump_n <succeed_n addr> <jump count> 3988 1.1 christos (The upper bound and `jump_n' are omitted if 3989 1.1 christos `upper_bound' is 1, though.) */ 3990 1.1 christos else 3991 1.1 christos { /* If the upper bound is > 1, we need to insert 3992 1.1 christos more at the end of the loop. */ 3993 1.1 christos unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE + 3994 1.1 christos (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE); 3995 1.1 christos 3996 1.1 christos GET_BUFFER_SPACE (nbytes); 3997 1.1 christos 3998 1.1 christos /* Initialize lower bound of the `succeed_n', even 3999 1.1 christos though it will be set during matching by its 4000 1.1 christos attendant `set_number_at' (inserted next), 4001 1.1 christos because `re_compile_fastmap' needs to know. 4002 1.1 christos Jump to the `jump_n' we might insert below. */ 4003 1.1 christos INSERT_JUMP2 (succeed_n, laststart, 4004 1.1 christos b + 1 + 2 * OFFSET_ADDRESS_SIZE 4005 1.1 christos + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE) 4006 1.1 christos , lower_bound); 4007 1.1 christos b += 1 + 2 * OFFSET_ADDRESS_SIZE; 4008 1.1 christos 4009 1.1 christos /* Code to initialize the lower bound. Insert 4010 1.1 christos before the `succeed_n'. The `5' is the last two 4011 1.1 christos bytes of this `set_number_at', plus 3 bytes of 4012 1.1 christos the following `succeed_n'. */ 4013 1.1 christos /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE' 4014 1.1 christos is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE' 4015 1.1 christos of the following `succeed_n'. */ 4016 1.1 christos PREFIX(insert_op2) (set_number_at, laststart, 1 4017 1.1 christos + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b); 4018 1.1 christos b += 1 + 2 * OFFSET_ADDRESS_SIZE; 4019 1.1 christos 4020 1.1 christos if (upper_bound > 1) 4021 1.1 christos { /* More than one repetition is allowed, so 4022 1.1 christos append a backward jump to the `succeed_n' 4023 1.1 christos that starts this interval. 4024 1.1 christos 4025 1.1 christos When we've reached this during matching, 4026 1.1 christos we'll have matched the interval once, so 4027 1.1 christos jump back only `upper_bound - 1' times. */ 4028 1.1 christos STORE_JUMP2 (jump_n, b, laststart 4029 1.1 christos + 2 * OFFSET_ADDRESS_SIZE + 1, 4030 1.1 christos upper_bound - 1); 4031 1.1 christos b += 1 + 2 * OFFSET_ADDRESS_SIZE; 4032 1.1 christos 4033 1.1 christos /* The location we want to set is the second 4034 1.1 christos parameter of the `jump_n'; that is `b-2' as 4035 1.1 christos an absolute address. `laststart' will be 4036 1.1 christos the `set_number_at' we're about to insert; 4037 1.1 christos `laststart+3' the number to set, the source 4038 1.1 christos for the relative address. But we are 4039 1.1 christos inserting into the middle of the pattern -- 4040 1.1 christos so everything is getting moved up by 5. 4041 1.1 christos Conclusion: (b - 2) - (laststart + 3) + 5, 4042 1.1 christos i.e., b - laststart. 4043 1.1 christos 4044 1.1 christos We insert this at the beginning of the loop 4045 1.1 christos so that if we fail during matching, we'll 4046 1.1 christos reinitialize the bounds. */ 4047 1.1 christos PREFIX(insert_op2) (set_number_at, laststart, 4048 1.1 christos b - laststart, 4049 1.1 christos upper_bound - 1, b); 4050 1.1 christos b += 1 + 2 * OFFSET_ADDRESS_SIZE; 4051 1.1 christos } 4052 1.1 christos } 4053 1.1 christos pending_exact = 0; 4054 1.1 christos break; 4055 1.1 christos 4056 1.1 christos invalid_interval: 4057 1.1 christos if (!(syntax & RE_INVALID_INTERVAL_ORD)) 4058 1.1 christos FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR); 4059 1.1 christos unfetch_interval: 4060 1.1 christos /* Match the characters as literals. */ 4061 1.1 christos p = beg_interval; 4062 1.1 christos c = '{'; 4063 1.1 christos if (syntax & RE_NO_BK_BRACES) 4064 1.1 christos goto normal_char; 4065 1.1 christos else 4066 1.1 christos goto normal_backslash; 4067 1.1 christos } 4068 1.1 christos 4069 1.1 christos #ifdef emacs 4070 1.1 christos /* There is no way to specify the before_dot and after_dot 4071 1.1 christos operators. rms says this is ok. --karl */ 4072 1.1 christos case '=': 4073 1.1 christos BUF_PUSH (at_dot); 4074 1.1 christos break; 4075 1.1 christos 4076 1.1 christos case 's': 4077 1.1 christos laststart = b; 4078 1.1 christos PATFETCH (c); 4079 1.1 christos BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 4080 1.1 christos break; 4081 1.1 christos 4082 1.1 christos case 'S': 4083 1.1 christos laststart = b; 4084 1.1 christos PATFETCH (c); 4085 1.1 christos BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 4086 1.1 christos break; 4087 1.1 christos #endif /* emacs */ 4088 1.1 christos 4089 1.1 christos 4090 1.1 christos case 'w': 4091 1.1 christos if (syntax & RE_NO_GNU_OPS) 4092 1.1 christos goto normal_char; 4093 1.1 christos laststart = b; 4094 1.1 christos BUF_PUSH (wordchar); 4095 1.1 christos break; 4096 1.1 christos 4097 1.1 christos 4098 1.1 christos case 'W': 4099 1.1 christos if (syntax & RE_NO_GNU_OPS) 4100 1.1 christos goto normal_char; 4101 1.1 christos laststart = b; 4102 1.1 christos BUF_PUSH (notwordchar); 4103 1.1 christos break; 4104 1.1 christos 4105 1.1 christos 4106 1.1 christos case '<': 4107 1.1 christos if (syntax & RE_NO_GNU_OPS) 4108 1.1 christos goto normal_char; 4109 1.1 christos BUF_PUSH (wordbeg); 4110 1.1 christos break; 4111 1.1 christos 4112 1.1 christos case '>': 4113 1.1 christos if (syntax & RE_NO_GNU_OPS) 4114 1.1 christos goto normal_char; 4115 1.1 christos BUF_PUSH (wordend); 4116 1.1 christos break; 4117 1.1 christos 4118 1.1 christos case 'b': 4119 1.1 christos if (syntax & RE_NO_GNU_OPS) 4120 1.1 christos goto normal_char; 4121 1.1 christos BUF_PUSH (wordbound); 4122 1.1 christos break; 4123 1.1 christos 4124 1.1 christos case 'B': 4125 1.1 christos if (syntax & RE_NO_GNU_OPS) 4126 1.1 christos goto normal_char; 4127 1.1 christos BUF_PUSH (notwordbound); 4128 1.1 christos break; 4129 1.1 christos 4130 1.1 christos case '`': 4131 1.1 christos if (syntax & RE_NO_GNU_OPS) 4132 1.1 christos goto normal_char; 4133 1.1 christos BUF_PUSH (begbuf); 4134 1.1 christos break; 4135 1.1 christos 4136 1.1 christos case '\'': 4137 1.1 christos if (syntax & RE_NO_GNU_OPS) 4138 1.1 christos goto normal_char; 4139 1.1 christos BUF_PUSH (endbuf); 4140 1.1 christos break; 4141 1.1 christos 4142 1.1 christos case '1': case '2': case '3': case '4': case '5': 4143 1.1 christos case '6': case '7': case '8': case '9': 4144 1.1 christos if (syntax & RE_NO_BK_REFS) 4145 1.1 christos goto normal_char; 4146 1.1 christos 4147 1.1 christos c1 = c - '0'; 4148 1.1 christos 4149 1.1 christos if (c1 > regnum) 4150 1.1 christos FREE_STACK_RETURN (REG_ESUBREG); 4151 1.1 christos 4152 1.1 christos /* Can't back reference to a subexpression if inside of it. */ 4153 1.1 christos if (group_in_compile_stack (compile_stack, (regnum_t) c1)) 4154 1.1 christos goto normal_char; 4155 1.1 christos 4156 1.1 christos laststart = b; 4157 1.1 christos BUF_PUSH_2 (duplicate, c1); 4158 1.1 christos break; 4159 1.1 christos 4160 1.1 christos 4161 1.1 christos case '+': 4162 1.1 christos case '?': 4163 1.1 christos if (syntax & RE_BK_PLUS_QM) 4164 1.1 christos goto handle_plus; 4165 1.1 christos else 4166 1.1 christos goto normal_backslash; 4167 1.1 christos 4168 1.1 christos default: 4169 1.1 christos normal_backslash: 4170 1.1 christos /* You might think it would be useful for \ to mean 4171 1.1 christos not to translate; but if we don't translate it 4172 1.1 christos it will never match anything. */ 4173 1.1 christos c = TRANSLATE (c); 4174 1.1 christos goto normal_char; 4175 1.1 christos } 4176 1.1 christos break; 4177 1.1 christos 4178 1.1 christos 4179 1.1 christos default: 4180 1.1 christos /* Expects the character in `c'. */ 4181 1.1 christos normal_char: 4182 1.1 christos /* If no exactn currently being built. */ 4183 1.1 christos if (!pending_exact 4184 1.1 christos #ifdef WCHAR 4185 1.1 christos /* If last exactn handle binary(or character) and 4186 1.1 christos new exactn handle character(or binary). */ 4187 1.1 christos || is_exactn_bin != is_binary[p - 1 - pattern] 4188 1.1 christos #endif /* WCHAR */ 4189 1.1 christos 4190 1.1 christos /* If last exactn not at current position. */ 4191 1.1 christos || pending_exact + *pending_exact + 1 != b 4192 1.1 christos 4193 1.1 christos /* We have only one byte following the exactn for the count. */ 4194 1.1 christos || *pending_exact == (1 << BYTEWIDTH) - 1 4195 1.1 christos 4196 1.1 christos /* If followed by a repetition operator. */ 4197 1.1 christos || *p == '*' || *p == '^' 4198 1.1 christos || ((syntax & RE_BK_PLUS_QM) 4199 1.1 christos ? *p == '\\' && (p[1] == '+' || p[1] == '?') 4200 1.1 christos : (*p == '+' || *p == '?')) 4201 1.1 christos || ((syntax & RE_INTERVALS) 4202 1.1 christos && ((syntax & RE_NO_BK_BRACES) 4203 1.1 christos ? *p == '{' 4204 1.1 christos : (p[0] == '\\' && p[1] == '{')))) 4205 1.1 christos { 4206 1.1 christos /* Start building a new exactn. */ 4207 1.1 christos 4208 1.1 christos laststart = b; 4209 1.1 christos 4210 1.1 christos #ifdef WCHAR 4211 1.1 christos /* Is this exactn binary data or character? */ 4212 1.1 christos is_exactn_bin = is_binary[p - 1 - pattern]; 4213 1.1 christos if (is_exactn_bin) 4214 1.1 christos BUF_PUSH_2 (exactn_bin, 0); 4215 1.1 christos else 4216 1.1 christos BUF_PUSH_2 (exactn, 0); 4217 1.1 christos #else 4218 1.1 christos BUF_PUSH_2 (exactn, 0); 4219 1.1 christos #endif /* WCHAR */ 4220 1.1 christos pending_exact = b - 1; 4221 1.1 christos } 4222 1.1 christos 4223 1.1 christos BUF_PUSH (c); 4224 1.1 christos (*pending_exact)++; 4225 1.1 christos break; 4226 1.1 christos } /* switch (c) */ 4227 1.1 christos } /* while p != pend */ 4228 1.1 christos 4229 1.1 christos 4230 1.1 christos /* Through the pattern now. */ 4231 1.1 christos 4232 1.1 christos if (fixup_alt_jump) 4233 1.1 christos STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 4234 1.1 christos 4235 1.1 christos if (!COMPILE_STACK_EMPTY) 4236 1.1 christos FREE_STACK_RETURN (REG_EPAREN); 4237 1.1 christos 4238 1.1 christos /* If we don't want backtracking, force success 4239 1.1 christos the first time we reach the end of the compiled pattern. */ 4240 1.1 christos if (syntax & RE_NO_POSIX_BACKTRACKING) 4241 1.1 christos BUF_PUSH (succeed); 4242 1.1 christos 4243 1.1 christos #ifdef WCHAR 4244 1.1 christos free (pattern); 4245 1.1 christos free (mbs_offset); 4246 1.1 christos free (is_binary); 4247 1.1 christos #endif 4248 1.1 christos free (compile_stack.stack); 4249 1.1 christos 4250 1.1 christos /* We have succeeded; set the length of the buffer. */ 4251 1.1 christos #ifdef WCHAR 4252 1.1 christos bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR; 4253 1.1 christos #else 4254 1.1 christos bufp->used = b - bufp->buffer; 4255 1.1 christos #endif 4256 1.1 christos 4257 1.1 christos #ifdef DEBUG 4258 1.1 christos if (debug) 4259 1.1 christos { 4260 1.1 christos DEBUG_PRINT1 ("\nCompiled pattern: \n"); 4261 1.1 christos PREFIX(print_compiled_pattern) (bufp); 4262 1.1 christos } 4263 1.1 christos #endif /* DEBUG */ 4264 1.1 christos 4265 1.1 christos #ifndef MATCH_MAY_ALLOCATE 4266 1.1 christos /* Initialize the failure stack to the largest possible stack. This 4267 1.1 christos isn't necessary unless we're trying to avoid calling alloca in 4268 1.1 christos the search and match routines. */ 4269 1.1 christos { 4270 1.1 christos int num_regs = bufp->re_nsub + 1; 4271 1.1 christos 4272 1.1 christos /* Since DOUBLE_FAIL_STACK refuses to double only if the current size 4273 1.1 christos is strictly greater than re_max_failures, the largest possible stack 4274 1.1 christos is 2 * re_max_failures failure points. */ 4275 1.1 christos if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) 4276 1.1 christos { 4277 1.1 christos fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); 4278 1.1 christos 4279 1.1 christos # ifdef emacs 4280 1.1 christos if (! fail_stack.stack) 4281 1.1 christos fail_stack.stack 4282 1.1 christos = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size 4283 1.1 christos * sizeof (PREFIX(fail_stack_elt_t))); 4284 1.1 christos else 4285 1.1 christos fail_stack.stack 4286 1.1 christos = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack, 4287 1.1 christos (fail_stack.size 4288 1.1 christos * sizeof (PREFIX(fail_stack_elt_t)))); 4289 1.1 christos # else /* not emacs */ 4290 1.1 christos if (! fail_stack.stack) 4291 1.1 christos fail_stack.stack 4292 1.1 christos = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size 4293 1.1 christos * sizeof (PREFIX(fail_stack_elt_t))); 4294 1.1 christos else 4295 1.1 christos fail_stack.stack 4296 1.1 christos = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack, 4297 1.1 christos (fail_stack.size 4298 1.1 christos * sizeof (PREFIX(fail_stack_elt_t)))); 4299 1.1 christos # endif /* not emacs */ 4300 1.1 christos } 4301 1.1 christos 4302 1.1 christos PREFIX(regex_grow_registers) (num_regs); 4303 1.1 christos } 4304 1.1 christos #endif /* not MATCH_MAY_ALLOCATE */ 4305 1.1 christos 4306 1.1 christos return REG_NOERROR; 4307 1.1 christos } /* regex_compile */ 4308 1.1 christos 4309 1.1 christos /* Subroutines for `regex_compile'. */ 4310 1.1 christos 4311 1.1 christos /* Store OP at LOC followed by two-byte integer parameter ARG. */ 4312 1.1 christos /* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4313 1.1 christos 4314 1.1 christos static void 4315 1.1 christos PREFIX(store_op1) (op, loc, arg) 4316 1.1 christos re_opcode_t op; 4317 1.1 christos UCHAR_T *loc; 4318 1.1 christos int arg; 4319 1.1 christos { 4320 1.1 christos *loc = (UCHAR_T) op; 4321 1.1 christos STORE_NUMBER (loc + 1, arg); 4322 1.1 christos } 4323 1.1 christos 4324 1.1 christos 4325 1.1 christos /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ 4326 1.1 christos /* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4327 1.1 christos 4328 1.1 christos static void 4329 1.1 christos PREFIX(store_op2) (op, loc, arg1, arg2) 4330 1.1 christos re_opcode_t op; 4331 1.1 christos UCHAR_T *loc; 4332 1.1 christos int arg1, arg2; 4333 1.1 christos { 4334 1.1 christos *loc = (UCHAR_T) op; 4335 1.1 christos STORE_NUMBER (loc + 1, arg1); 4336 1.1 christos STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2); 4337 1.1 christos } 4338 1.1 christos 4339 1.1 christos 4340 1.1 christos /* Copy the bytes from LOC to END to open up three bytes of space at LOC 4341 1.1 christos for OP followed by two-byte integer parameter ARG. */ 4342 1.1 christos /* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4343 1.1 christos 4344 1.1 christos static void 4345 1.1 christos PREFIX(insert_op1) (op, loc, arg, end) 4346 1.1 christos re_opcode_t op; 4347 1.1 christos UCHAR_T *loc; 4348 1.1 christos int arg; 4349 1.1 christos UCHAR_T *end; 4350 1.1 christos { 4351 1.1 christos register UCHAR_T *pfrom = end; 4352 1.1 christos register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE; 4353 1.1 christos 4354 1.1 christos while (pfrom != loc) 4355 1.1 christos *--pto = *--pfrom; 4356 1.1 christos 4357 1.1 christos PREFIX(store_op1) (op, loc, arg); 4358 1.1 christos } 4359 1.1 christos 4360 1.1 christos 4361 1.1 christos /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ 4362 1.1 christos /* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4363 1.1 christos 4364 1.1 christos static void 4365 1.1 christos PREFIX(insert_op2) (op, loc, arg1, arg2, end) 4366 1.1 christos re_opcode_t op; 4367 1.1 christos UCHAR_T *loc; 4368 1.1 christos int arg1, arg2; 4369 1.1 christos UCHAR_T *end; 4370 1.1 christos { 4371 1.1 christos register UCHAR_T *pfrom = end; 4372 1.1 christos register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE; 4373 1.1 christos 4374 1.1 christos while (pfrom != loc) 4375 1.1 christos *--pto = *--pfrom; 4376 1.1 christos 4377 1.1 christos PREFIX(store_op2) (op, loc, arg1, arg2); 4378 1.1 christos } 4379 1.1 christos 4380 1.1 christos 4381 1.1 christos /* P points to just after a ^ in PATTERN. Return true if that ^ comes 4382 1.1 christos after an alternative or a begin-subexpression. We assume there is at 4383 1.1 christos least one character before the ^. */ 4384 1.1 christos 4385 1.1 christos static boolean 4386 1.1 christos PREFIX(at_begline_loc_p) (pattern, p, syntax) 4387 1.1 christos const CHAR_T *pattern, *p; 4388 1.1 christos reg_syntax_t syntax; 4389 1.1 christos { 4390 1.1 christos const CHAR_T *prev = p - 2; 4391 1.1 christos boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 4392 1.1 christos 4393 1.1 christos return 4394 1.1 christos /* After a subexpression? */ 4395 1.1 christos (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 4396 1.1 christos /* After an alternative? */ 4397 1.1 christos || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); 4398 1.1 christos } 4399 1.1 christos 4400 1.1 christos 4401 1.1 christos /* The dual of at_begline_loc_p. This one is for $. We assume there is 4402 1.1 christos at least one character after the $, i.e., `P < PEND'. */ 4403 1.1 christos 4404 1.1 christos static boolean 4405 1.1 christos PREFIX(at_endline_loc_p) (p, pend, syntax) 4406 1.1 christos const CHAR_T *p, *pend; 4407 1.1 christos reg_syntax_t syntax; 4408 1.1 christos { 4409 1.1 christos const CHAR_T *next = p; 4410 1.1 christos boolean next_backslash = *next == '\\'; 4411 1.1 christos const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0; 4412 1.1 christos 4413 1.1 christos return 4414 1.1 christos /* Before a subexpression? */ 4415 1.1 christos (syntax & RE_NO_BK_PARENS ? *next == ')' 4416 1.1 christos : next_backslash && next_next && *next_next == ')') 4417 1.1 christos /* Before an alternative? */ 4418 1.1 christos || (syntax & RE_NO_BK_VBAR ? *next == '|' 4419 1.1 christos : next_backslash && next_next && *next_next == '|'); 4420 1.1 christos } 4421 1.1 christos 4422 1.1 christos #else /* not INSIDE_RECURSION */ 4423 1.1 christos 4424 1.1 christos /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 4425 1.1 christos false if it's not. */ 4426 1.1 christos 4427 1.1 christos static boolean 4428 1.1 christos group_in_compile_stack (compile_stack, regnum) 4429 1.1 christos compile_stack_type compile_stack; 4430 1.1 christos regnum_t regnum; 4431 1.1 christos { 4432 1.1 christos int this_element; 4433 1.1 christos 4434 1.1 christos for (this_element = compile_stack.avail - 1; 4435 1.1 christos this_element >= 0; 4436 1.1 christos this_element--) 4437 1.1 christos if (compile_stack.stack[this_element].regnum == regnum) 4438 1.1 christos return true; 4439 1.1 christos 4440 1.1 christos return false; 4441 1.1 christos } 4442 1.1 christos #endif /* not INSIDE_RECURSION */ 4443 1.1 christos 4444 1.1 christos #ifdef INSIDE_RECURSION 4445 1.1 christos 4446 1.1 christos #ifdef WCHAR 4447 1.1 christos /* This insert space, which size is "num", into the pattern at "loc". 4448 1.1 christos "end" must point the end of the allocated buffer. */ 4449 1.1 christos static void 4450 1.1 christos insert_space (num, loc, end) 4451 1.1 christos int num; 4452 1.1 christos CHAR_T *loc; 4453 1.1 christos CHAR_T *end; 4454 1.1 christos { 4455 1.1 christos register CHAR_T *pto = end; 4456 1.1 christos register CHAR_T *pfrom = end - num; 4457 1.1 christos 4458 1.1 christos while (pfrom >= loc) 4459 1.1 christos *pto-- = *pfrom--; 4460 1.1 christos } 4461 1.1 christos #endif /* WCHAR */ 4462 1.1 christos 4463 1.1 christos #ifdef WCHAR 4464 1.1 christos static reg_errcode_t 4465 1.1 christos wcs_compile_range (range_start_char, p_ptr, pend, translate, syntax, b, 4466 1.1 christos char_set) 4467 1.1 christos CHAR_T range_start_char; 4468 1.1 christos const CHAR_T **p_ptr, *pend; 4469 1.1 christos CHAR_T *char_set, *b; 4470 1.1 christos RE_TRANSLATE_TYPE translate; 4471 1.1 christos reg_syntax_t syntax; 4472 1.1 christos { 4473 1.1 christos const CHAR_T *p = *p_ptr; 4474 1.1 christos CHAR_T range_start, range_end; 4475 1.1 christos reg_errcode_t ret; 4476 1.1 christos # ifdef _LIBC 4477 1.1 christos uint32_t nrules; 4478 1.1 christos uint32_t start_val, end_val; 4479 1.1 christos # endif 4480 1.1 christos if (p == pend) 4481 1.1 christos return REG_ERANGE; 4482 1.1 christos 4483 1.1 christos # ifdef _LIBC 4484 1.1 christos nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 4485 1.1 christos if (nrules != 0) 4486 1.1 christos { 4487 1.1 christos const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE, 4488 1.1 christos _NL_COLLATE_COLLSEQWC); 4489 1.1 christos const unsigned char *extra = (const unsigned char *) 4490 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 4491 1.1 christos 4492 1.1 christos if (range_start_char < -1) 4493 1.1 christos { 4494 1.1 christos /* range_start is a collating symbol. */ 4495 1.1 christos int32_t *wextra; 4496 1.1 christos /* Retreive the index and get collation sequence value. */ 4497 1.1 christos wextra = (int32_t*)(extra + char_set[-range_start_char]); 4498 1.1 christos start_val = wextra[1 + *wextra]; 4499 1.1 christos } 4500 1.1 christos else 4501 1.1 christos start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char)); 4502 1.1 christos 4503 1.1 christos end_val = collseq_table_lookup (collseq, TRANSLATE (p[0])); 4504 1.1 christos 4505 1.1 christos /* Report an error if the range is empty and the syntax prohibits 4506 1.1 christos this. */ 4507 1.1 christos ret = ((syntax & RE_NO_EMPTY_RANGES) 4508 1.1 christos && (start_val > end_val))? REG_ERANGE : REG_NOERROR; 4509 1.1 christos 4510 1.1 christos /* Insert space to the end of the char_ranges. */ 4511 1.1 christos insert_space(2, b - char_set[5] - 2, b - 1); 4512 1.1 christos *(b - char_set[5] - 2) = (wchar_t)start_val; 4513 1.1 christos *(b - char_set[5] - 1) = (wchar_t)end_val; 4514 1.1 christos char_set[4]++; /* ranges_index */ 4515 1.1 christos } 4516 1.1 christos else 4517 1.1 christos # endif 4518 1.1 christos { 4519 1.1 christos range_start = (range_start_char >= 0)? TRANSLATE (range_start_char): 4520 1.1 christos range_start_char; 4521 1.1 christos range_end = TRANSLATE (p[0]); 4522 1.1 christos /* Report an error if the range is empty and the syntax prohibits 4523 1.1 christos this. */ 4524 1.1 christos ret = ((syntax & RE_NO_EMPTY_RANGES) 4525 1.1 christos && (range_start > range_end))? REG_ERANGE : REG_NOERROR; 4526 1.1 christos 4527 1.1 christos /* Insert space to the end of the char_ranges. */ 4528 1.1 christos insert_space(2, b - char_set[5] - 2, b - 1); 4529 1.1 christos *(b - char_set[5] - 2) = range_start; 4530 1.1 christos *(b - char_set[5] - 1) = range_end; 4531 1.1 christos char_set[4]++; /* ranges_index */ 4532 1.1 christos } 4533 1.1 christos /* Have to increment the pointer into the pattern string, so the 4534 1.1 christos caller isn't still at the ending character. */ 4535 1.1 christos (*p_ptr)++; 4536 1.1 christos 4537 1.1 christos return ret; 4538 1.1 christos } 4539 1.1 christos #else /* BYTE */ 4540 1.1 christos /* Read the ending character of a range (in a bracket expression) from the 4541 1.1 christos uncompiled pattern *P_PTR (which ends at PEND). We assume the 4542 1.1 christos starting character is in `P[-2]'. (`P[-1]' is the character `-'.) 4543 1.1 christos Then we set the translation of all bits between the starting and 4544 1.1 christos ending characters (inclusive) in the compiled pattern B. 4545 1.1 christos 4546 1.1 christos Return an error code. 4547 1.1 christos 4548 1.1 christos We use these short variable names so we can use the same macros as 4549 1.1 christos `regex_compile' itself. */ 4550 1.1 christos 4551 1.1 christos static reg_errcode_t 4552 1.1 christos byte_compile_range (range_start_char, p_ptr, pend, translate, syntax, b) 4553 1.1 christos unsigned int range_start_char; 4554 1.1 christos const char **p_ptr, *pend; 4555 1.1 christos RE_TRANSLATE_TYPE translate; 4556 1.1 christos reg_syntax_t syntax; 4557 1.1 christos unsigned char *b; 4558 1.1 christos { 4559 1.1 christos unsigned this_char; 4560 1.1 christos const char *p = *p_ptr; 4561 1.1 christos reg_errcode_t ret; 4562 1.1 christos # if _LIBC 4563 1.1 christos const unsigned char *collseq; 4564 1.1 christos unsigned int start_colseq; 4565 1.1 christos unsigned int end_colseq; 4566 1.1 christos # else 4567 1.1 christos unsigned end_char; 4568 1.1 christos # endif 4569 1.1 christos 4570 1.1 christos if (p == pend) 4571 1.1 christos return REG_ERANGE; 4572 1.1 christos 4573 1.1 christos /* Have to increment the pointer into the pattern string, so the 4574 1.1 christos caller isn't still at the ending character. */ 4575 1.1 christos (*p_ptr)++; 4576 1.1 christos 4577 1.1 christos /* Report an error if the range is empty and the syntax prohibits this. */ 4578 1.1 christos ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 4579 1.1 christos 4580 1.1 christos # if _LIBC 4581 1.1 christos collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 4582 1.1 christos _NL_COLLATE_COLLSEQMB); 4583 1.1 christos 4584 1.1 christos start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)]; 4585 1.1 christos end_colseq = collseq[(unsigned char) TRANSLATE (p[0])]; 4586 1.1 christos for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) 4587 1.1 christos { 4588 1.1 christos unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)]; 4589 1.1 christos 4590 1.1 christos if (start_colseq <= this_colseq && this_colseq <= end_colseq) 4591 1.1 christos { 4592 1.1 christos SET_LIST_BIT (TRANSLATE (this_char)); 4593 1.1 christos ret = REG_NOERROR; 4594 1.1 christos } 4595 1.1 christos } 4596 1.1 christos # else 4597 1.1 christos /* Here we see why `this_char' has to be larger than an `unsigned 4598 1.1 christos char' -- we would otherwise go into an infinite loop, since all 4599 1.1 christos characters <= 0xff. */ 4600 1.1 christos range_start_char = TRANSLATE (range_start_char); 4601 1.1 christos /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE, 4602 1.1 christos and some compilers cast it to int implicitly, so following for_loop 4603 1.1 christos may fall to (almost) infinite loop. 4604 1.1 christos e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff. 4605 1.1 christos To avoid this, we cast p[0] to unsigned int and truncate it. */ 4606 1.1 christos end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1)); 4607 1.1 christos 4608 1.1 christos for (this_char = range_start_char; this_char <= end_char; ++this_char) 4609 1.1 christos { 4610 1.1 christos SET_LIST_BIT (TRANSLATE (this_char)); 4611 1.1 christos ret = REG_NOERROR; 4612 1.1 christos } 4613 1.1 christos # endif 4614 1.1 christos 4615 1.1 christos return ret; 4616 1.1 christos } 4617 1.1 christos #endif /* WCHAR */ 4618 1.1 christos 4619 1.1 christos /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 4621 1.1 christos BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible 4622 1.1 christos characters can start a string that matches the pattern. This fastmap 4623 1.1 christos is used by re_search to skip quickly over impossible starting points. 4624 1.1 christos 4625 1.1 christos The caller must supply the address of a (1 << BYTEWIDTH)-byte data 4626 1.1 christos area as BUFP->fastmap. 4627 1.1 christos 4628 1.1 christos We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in 4629 1.1 christos the pattern buffer. 4630 1.1 christos 4631 1.1 christos Returns 0 if we succeed, -2 if an internal error. */ 4632 1.1 christos 4633 1.1 christos #ifdef WCHAR 4634 1.1 christos /* local function for re_compile_fastmap. 4635 1.1 christos truncate wchar_t character to char. */ 4636 1.1 christos static unsigned char truncate_wchar (CHAR_T c); 4637 1.1 christos 4638 1.1 christos static unsigned char 4639 1.1 christos truncate_wchar (c) 4640 1.1 christos CHAR_T c; 4641 1.1 christos { 4642 1.1 christos unsigned char buf[MB_CUR_MAX]; 4643 1.1 christos mbstate_t state; 4644 1.1 christos int retval; 4645 1.1 christos memset (&state, '\0', sizeof (state)); 4646 1.1 christos retval = wcrtomb (buf, c, &state); 4647 1.1 christos return retval > 0 ? buf[0] : (unsigned char) c; 4648 1.1 christos } 4649 1.1 christos #endif /* WCHAR */ 4650 1.1 christos 4651 1.1 christos static int 4652 1.1 christos PREFIX(re_compile_fastmap) (bufp) 4653 1.1 christos struct re_pattern_buffer *bufp; 4654 1.1 christos { 4655 1.1 christos int j, k; 4656 1.1 christos #ifdef MATCH_MAY_ALLOCATE 4657 1.1 christos PREFIX(fail_stack_type) fail_stack; 4658 1.1 christos #endif 4659 1.1 christos #ifndef REGEX_MALLOC 4660 1.1 christos char *destination; 4661 1.1 christos #endif 4662 1.1 christos 4663 1.1 christos register char *fastmap = bufp->fastmap; 4664 1.1 christos 4665 1.1 christos #ifdef WCHAR 4666 1.1 christos /* We need to cast pattern to (wchar_t*), because we casted this compiled 4667 1.1 christos pattern to (char*) in regex_compile. */ 4668 1.1 christos UCHAR_T *pattern = (UCHAR_T*)bufp->buffer; 4669 1.1 christos register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used); 4670 1.1 christos #else /* BYTE */ 4671 1.1 christos UCHAR_T *pattern = bufp->buffer; 4672 1.1 christos register UCHAR_T *pend = pattern + bufp->used; 4673 1.1 christos #endif /* WCHAR */ 4674 1.1 christos UCHAR_T *p = pattern; 4675 1.1 christos 4676 1.1 christos #ifdef REL_ALLOC 4677 1.1 christos /* This holds the pointer to the failure stack, when 4678 1.1 christos it is allocated relocatably. */ 4679 1.1 christos fail_stack_elt_t *failure_stack_ptr; 4680 1.1 christos #endif 4681 1.1 christos 4682 1.1 christos /* Assume that each path through the pattern can be null until 4683 1.1 christos proven otherwise. We set this false at the bottom of switch 4684 1.1 christos statement, to which we get only if a particular path doesn't 4685 1.1 christos match the empty string. */ 4686 1.1 christos boolean path_can_be_null = true; 4687 1.1 christos 4688 1.1 christos /* We aren't doing a `succeed_n' to begin with. */ 4689 1.1 christos boolean succeed_n_p = false; 4690 1.1 christos 4691 1.1 christos assert (fastmap != NULL && p != NULL); 4692 1.1 christos 4693 1.1 christos INIT_FAIL_STACK (); 4694 1.1 christos bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 4695 1.1 christos bufp->fastmap_accurate = 1; /* It will be when we're done. */ 4696 1.1 christos bufp->can_be_null = 0; 4697 1.1 christos 4698 1.1 christos while (1) 4699 1.1 christos { 4700 1.1 christos if (p == pend || *p == succeed) 4701 1.1 christos { 4702 1.1 christos /* We have reached the (effective) end of pattern. */ 4703 1.1 christos if (!FAIL_STACK_EMPTY ()) 4704 1.1 christos { 4705 1.1 christos bufp->can_be_null |= path_can_be_null; 4706 1.1 christos 4707 1.1 christos /* Reset for next path. */ 4708 1.1 christos path_can_be_null = true; 4709 1.1 christos 4710 1.1 christos p = fail_stack.stack[--fail_stack.avail].pointer; 4711 1.1 christos 4712 1.1 christos continue; 4713 1.1 christos } 4714 1.1 christos else 4715 1.1 christos break; 4716 1.1 christos } 4717 1.1 christos 4718 1.1 christos /* We should never be about to go beyond the end of the pattern. */ 4719 1.1 christos assert (p < pend); 4720 1.1 christos 4721 1.1 christos switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 4722 1.1 christos { 4723 1.1 christos 4724 1.1 christos /* I guess the idea here is to simply not bother with a fastmap 4725 1.1 christos if a backreference is used, since it's too hard to figure out 4726 1.1 christos the fastmap for the corresponding group. Setting 4727 1.1 christos `can_be_null' stops `re_search_2' from using the fastmap, so 4728 1.1 christos that is all we do. */ 4729 1.1 christos case duplicate: 4730 1.1 christos bufp->can_be_null = 1; 4731 1.1 christos goto done; 4732 1.1 christos 4733 1.1 christos 4734 1.1 christos /* Following are the cases which match a character. These end 4735 1.1 christos with `break'. */ 4736 1.1 christos 4737 1.1 christos #ifdef WCHAR 4738 1.1 christos case exactn: 4739 1.1 christos fastmap[truncate_wchar(p[1])] = 1; 4740 1.1 christos break; 4741 1.1 christos #else /* BYTE */ 4742 1.1 christos case exactn: 4743 1.1 christos fastmap[p[1]] = 1; 4744 1.1 christos break; 4745 1.1 christos #endif /* WCHAR */ 4746 1.1 christos #ifdef MBS_SUPPORT 4747 1.1 christos case exactn_bin: 4748 1.1 christos fastmap[p[1]] = 1; 4749 1.1 christos break; 4750 1.1 christos #endif 4751 1.1 christos 4752 1.1 christos #ifdef WCHAR 4753 1.1 christos /* It is hard to distinguish fastmap from (multi byte) characters 4754 1.1 christos which depends on current locale. */ 4755 1.1 christos case charset: 4756 1.1 christos case charset_not: 4757 1.1 christos case wordchar: 4758 1.1 christos case notwordchar: 4759 1.1 christos bufp->can_be_null = 1; 4760 1.1 christos goto done; 4761 1.1 christos #else /* BYTE */ 4762 1.1 christos case charset: 4763 1.1 christos for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 4764 1.1 christos if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 4765 1.1 christos fastmap[j] = 1; 4766 1.1 christos break; 4767 1.1 christos 4768 1.1 christos 4769 1.1 christos case charset_not: 4770 1.1 christos /* Chars beyond end of map must be allowed. */ 4771 1.1 christos for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 4772 1.1 christos fastmap[j] = 1; 4773 1.1 christos 4774 1.1 christos for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 4775 1.1 christos if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 4776 1.1 christos fastmap[j] = 1; 4777 1.1 christos break; 4778 1.1 christos 4779 1.1 christos 4780 1.1 christos case wordchar: 4781 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4782 1.1 christos if (SYNTAX (j) == Sword) 4783 1.1 christos fastmap[j] = 1; 4784 1.1 christos break; 4785 1.1 christos 4786 1.1 christos 4787 1.1 christos case notwordchar: 4788 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4789 1.1 christos if (SYNTAX (j) != Sword) 4790 1.1 christos fastmap[j] = 1; 4791 1.1 christos break; 4792 1.1 christos #endif /* WCHAR */ 4793 1.1 christos 4794 1.1 christos case anychar: 4795 1.1 christos { 4796 1.1 christos int fastmap_newline = fastmap['\n']; 4797 1.1 christos 4798 1.1 christos /* `.' matches anything ... */ 4799 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4800 1.1 christos fastmap[j] = 1; 4801 1.1 christos 4802 1.1 christos /* ... except perhaps newline. */ 4803 1.1 christos if (!(bufp->syntax & RE_DOT_NEWLINE)) 4804 1.1 christos fastmap['\n'] = fastmap_newline; 4805 1.1 christos 4806 1.1 christos /* Return if we have already set `can_be_null'; if we have, 4807 1.1 christos then the fastmap is irrelevant. Something's wrong here. */ 4808 1.1 christos else if (bufp->can_be_null) 4809 1.1 christos goto done; 4810 1.1 christos 4811 1.1 christos /* Otherwise, have to check alternative paths. */ 4812 1.1 christos break; 4813 1.1 christos } 4814 1.1 christos 4815 1.1 christos #ifdef emacs 4816 1.1 christos case syntaxspec: 4817 1.1 christos k = *p++; 4818 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4819 1.1 christos if (SYNTAX (j) == (enum syntaxcode) k) 4820 1.1 christos fastmap[j] = 1; 4821 1.1 christos break; 4822 1.1 christos 4823 1.1 christos 4824 1.1 christos case notsyntaxspec: 4825 1.1 christos k = *p++; 4826 1.1 christos for (j = 0; j < (1 << BYTEWIDTH); j++) 4827 1.1 christos if (SYNTAX (j) != (enum syntaxcode) k) 4828 1.1 christos fastmap[j] = 1; 4829 1.1 christos break; 4830 1.1 christos 4831 1.1 christos 4832 1.1 christos /* All cases after this match the empty string. These end with 4833 1.1 christos `continue'. */ 4834 1.1 christos 4835 1.1 christos 4836 1.1 christos case before_dot: 4837 1.1 christos case at_dot: 4838 1.1 christos case after_dot: 4839 1.1 christos continue; 4840 1.1 christos #endif /* emacs */ 4841 1.1 christos 4842 1.1 christos 4843 1.1 christos case no_op: 4844 1.1 christos case begline: 4845 1.1 christos case endline: 4846 1.1 christos case begbuf: 4847 1.1 christos case endbuf: 4848 1.1 christos case wordbound: 4849 1.1 christos case notwordbound: 4850 1.1 christos case wordbeg: 4851 1.1 christos case wordend: 4852 1.1 christos case push_dummy_failure: 4853 1.1 christos continue; 4854 1.1 christos 4855 1.1 christos 4856 1.1 christos case jump_n: 4857 1.1 christos case pop_failure_jump: 4858 1.1 christos case maybe_pop_jump: 4859 1.1 christos case jump: 4860 1.1 christos case jump_past_alt: 4861 1.1 christos case dummy_failure_jump: 4862 1.1 christos EXTRACT_NUMBER_AND_INCR (j, p); 4863 1.1 christos p += j; 4864 1.1 christos if (j > 0) 4865 1.1 christos continue; 4866 1.1 christos 4867 1.1 christos /* Jump backward implies we just went through the body of a 4868 1.1 christos loop and matched nothing. Opcode jumped to should be 4869 1.1 christos `on_failure_jump' or `succeed_n'. Just treat it like an 4870 1.1 christos ordinary jump. For a * loop, it has pushed its failure 4871 1.1 christos point already; if so, discard that as redundant. */ 4872 1.1 christos if ((re_opcode_t) *p != on_failure_jump 4873 1.1 christos && (re_opcode_t) *p != succeed_n) 4874 1.1 christos continue; 4875 1.1 christos 4876 1.1 christos p++; 4877 1.1 christos EXTRACT_NUMBER_AND_INCR (j, p); 4878 1.1 christos p += j; 4879 1.1 christos 4880 1.1 christos /* If what's on the stack is where we are now, pop it. */ 4881 1.1 christos if (!FAIL_STACK_EMPTY () 4882 1.1 christos && fail_stack.stack[fail_stack.avail - 1].pointer == p) 4883 1.1 christos fail_stack.avail--; 4884 1.1 christos 4885 1.1 christos continue; 4886 1.1 christos 4887 1.1 christos 4888 1.1 christos case on_failure_jump: 4889 1.1 christos case on_failure_keep_string_jump: 4890 1.1 christos handle_on_failure_jump: 4891 1.1 christos EXTRACT_NUMBER_AND_INCR (j, p); 4892 1.1 christos 4893 1.1 christos /* For some patterns, e.g., `(a?)?', `p+j' here points to the 4894 1.1 christos end of the pattern. We don't want to push such a point, 4895 1.1 christos since when we restore it above, entering the switch will 4896 1.1 christos increment `p' past the end of the pattern. We don't need 4897 1.1 christos to push such a point since we obviously won't find any more 4898 1.1 christos fastmap entries beyond `pend'. Such a pattern can match 4899 1.1 christos the null string, though. */ 4900 1.1 christos if (p + j < pend) 4901 1.1 christos { 4902 1.1 christos if (!PUSH_PATTERN_OP (p + j, fail_stack)) 4903 1.1 christos { 4904 1.1 christos RESET_FAIL_STACK (); 4905 1.1 christos return -2; 4906 1.1 christos } 4907 1.1 christos } 4908 1.1 christos else 4909 1.1 christos bufp->can_be_null = 1; 4910 1.1 christos 4911 1.1 christos if (succeed_n_p) 4912 1.1 christos { 4913 1.1 christos EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ 4914 1.1 christos succeed_n_p = false; 4915 1.1 christos } 4916 1.1 christos 4917 1.1 christos continue; 4918 1.1 christos 4919 1.1 christos 4920 1.1 christos case succeed_n: 4921 1.1 christos /* Get to the number of times to succeed. */ 4922 1.1 christos p += OFFSET_ADDRESS_SIZE; 4923 1.1 christos 4924 1.1 christos /* Increment p past the n for when k != 0. */ 4925 1.1 christos EXTRACT_NUMBER_AND_INCR (k, p); 4926 1.1 christos if (k == 0) 4927 1.1 christos { 4928 1.1 christos p -= 2 * OFFSET_ADDRESS_SIZE; 4929 1.1 christos succeed_n_p = true; /* Spaghetti code alert. */ 4930 1.1 christos goto handle_on_failure_jump; 4931 1.1 christos } 4932 1.1 christos continue; 4933 1.1 christos 4934 1.1 christos 4935 1.1 christos case set_number_at: 4936 1.1 christos p += 2 * OFFSET_ADDRESS_SIZE; 4937 1.1 christos continue; 4938 1.1 christos 4939 1.1 christos 4940 1.1 christos case start_memory: 4941 1.1 christos case stop_memory: 4942 1.1 christos p += 2; 4943 1.1 christos continue; 4944 1.1 christos 4945 1.1 christos 4946 1.1 christos default: 4947 1.1 christos abort (); /* We have listed all the cases. */ 4948 1.1 christos } /* switch *p++ */ 4949 1.1 christos 4950 1.1 christos /* Getting here means we have found the possible starting 4951 1.1 christos characters for one path of the pattern -- and that the empty 4952 1.1 christos string does not match. We need not follow this path further. 4953 1.1 christos Instead, look at the next alternative (remembered on the 4954 1.1 christos stack), or quit if no more. The test at the top of the loop 4955 1.1 christos does these things. */ 4956 1.1 christos path_can_be_null = false; 4957 1.1 christos p = pend; 4958 1.1 christos } /* while p */ 4959 1.1 christos 4960 1.1 christos /* Set `can_be_null' for the last path (also the first path, if the 4961 1.1 christos pattern is empty). */ 4962 1.1 christos bufp->can_be_null |= path_can_be_null; 4963 1.1 christos 4964 1.1 christos done: 4965 1.1 christos RESET_FAIL_STACK (); 4966 1.1 christos return 0; 4967 1.1 christos } 4968 1.1 christos 4969 1.1 christos #else /* not INSIDE_RECURSION */ 4970 1.1 christos 4971 1.1 christos int 4972 1.1 christos re_compile_fastmap (bufp) 4973 1.1 christos struct re_pattern_buffer *bufp; 4974 1.1 christos { 4975 1.1 christos # ifdef MBS_SUPPORT 4976 1.1 christos if (MB_CUR_MAX != 1) 4977 1.1 christos return wcs_re_compile_fastmap(bufp); 4978 1.1 christos else 4979 1.1 christos # endif 4980 1.1 christos return byte_re_compile_fastmap(bufp); 4981 1.1 christos } /* re_compile_fastmap */ 4982 1.1 christos #ifdef _LIBC 4983 1.1 christos weak_alias (__re_compile_fastmap, re_compile_fastmap) 4984 1.1 christos #endif 4985 1.1 christos 4986 1.1 christos 4988 1.1 christos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 4989 1.1 christos ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 4990 1.1 christos this memory for recording register information. STARTS and ENDS 4991 1.1 christos must be allocated using the malloc library routine, and must each 4992 1.1 christos be at least NUM_REGS * sizeof (regoff_t) bytes long. 4993 1.1 christos 4994 1.1 christos If NUM_REGS == 0, then subsequent matches should allocate their own 4995 1.1 christos register data. 4996 1.1 christos 4997 1.1 christos Unless this function is called, the first search or match using 4998 1.1 christos PATTERN_BUFFER will allocate its own register data, without 4999 1.1 christos freeing the old data. */ 5000 1.1 christos 5001 1.1 christos void 5002 1.1 christos re_set_registers (bufp, regs, num_regs, starts, ends) 5003 1.1 christos struct re_pattern_buffer *bufp; 5004 1.1 christos struct re_registers *regs; 5005 1.1 christos unsigned num_regs; 5006 1.1 christos regoff_t *starts, *ends; 5007 1.1 christos { 5008 1.1 christos if (num_regs) 5009 1.1 christos { 5010 1.1 christos bufp->regs_allocated = REGS_REALLOCATE; 5011 1.1 christos regs->num_regs = num_regs; 5012 1.1 christos regs->start = starts; 5013 1.1 christos regs->end = ends; 5014 1.1 christos } 5015 1.1 christos else 5016 1.1 christos { 5017 1.1 christos bufp->regs_allocated = REGS_UNALLOCATED; 5018 1.1 christos regs->num_regs = 0; 5019 1.1 christos regs->start = regs->end = (regoff_t *) 0; 5020 1.1 christos } 5021 1.1 christos } 5022 1.1 christos #ifdef _LIBC 5023 1.1 christos weak_alias (__re_set_registers, re_set_registers) 5024 1.1 christos #endif 5025 1.1 christos 5026 1.1 christos /* Searching routines. */ 5028 1.1 christos 5029 1.1 christos /* Like re_search_2, below, but only one string is specified, and 5030 1.1 christos doesn't let you say where to stop matching. */ 5031 1.1 christos 5032 1.1 christos int 5033 1.1 christos re_search (bufp, string, size, startpos, range, regs) 5034 1.1 christos struct re_pattern_buffer *bufp; 5035 1.1 christos const char *string; 5036 1.1 christos int size, startpos, range; 5037 1.1 christos struct re_registers *regs; 5038 1.1 christos { 5039 1.1 christos return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 5040 1.1 christos regs, size); 5041 1.1 christos } 5042 1.1 christos #ifdef _LIBC 5043 1.1 christos weak_alias (__re_search, re_search) 5044 1.1 christos #endif 5045 1.1 christos 5046 1.1 christos 5047 1.1 christos /* Using the compiled pattern in BUFP->buffer, first tries to match the 5048 1.1 christos virtual concatenation of STRING1 and STRING2, starting first at index 5049 1.1 christos STARTPOS, then at STARTPOS + 1, and so on. 5050 1.1 christos 5051 1.1 christos STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. 5052 1.1 christos 5053 1.1 christos RANGE is how far to scan while trying to match. RANGE = 0 means try 5054 1.1 christos only at STARTPOS; in general, the last start tried is STARTPOS + 5055 1.1 christos RANGE. 5056 1.1 christos 5057 1.1 christos In REGS, return the indices of the virtual concatenation of STRING1 5058 1.1 christos and STRING2 that matched the entire BUFP->buffer and its contained 5059 1.1 christos subexpressions. 5060 1.1 christos 5061 1.1 christos Do not consider matching one past the index STOP in the virtual 5062 1.1 christos concatenation of STRING1 and STRING2. 5063 1.1 christos 5064 1.1 christos We return either the position in the strings at which the match was 5065 1.1 christos found, -1 if no match, or -2 if error (such as failure 5066 1.1 christos stack overflow). */ 5067 1.1 christos 5068 1.1 christos int 5069 1.1 christos re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) 5070 1.1 christos struct re_pattern_buffer *bufp; 5071 1.1 christos const char *string1, *string2; 5072 1.1 christos int size1, size2; 5073 1.1 christos int startpos; 5074 1.1 christos int range; 5075 1.1 christos struct re_registers *regs; 5076 1.1 christos int stop; 5077 1.1 christos { 5078 1.1 christos # ifdef MBS_SUPPORT 5079 1.1 christos if (MB_CUR_MAX != 1) 5080 1.1 christos return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos, 5081 1.1 christos range, regs, stop); 5082 1.1 christos else 5083 1.1 christos # endif 5084 1.1 christos return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos, 5085 1.1 christos range, regs, stop); 5086 1.1 christos } /* re_search_2 */ 5087 1.1 christos #ifdef _LIBC 5088 1.1 christos weak_alias (__re_search_2, re_search_2) 5089 1.1 christos #endif 5090 1.1 christos 5091 1.1 christos #endif /* not INSIDE_RECURSION */ 5092 1.1 christos 5093 1.1 christos #ifdef INSIDE_RECURSION 5094 1.1 christos 5095 1.1 christos #ifdef MATCH_MAY_ALLOCATE 5096 1.1 christos # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL 5097 1.1 christos #else 5098 1.1 christos # define FREE_VAR(var) if (var) free (var); var = NULL 5099 1.1 christos #endif 5100 1.1 christos 5101 1.1 christos #ifdef WCHAR 5102 1.1 christos # define MAX_ALLOCA_SIZE 2000 5103 1.1 christos 5104 1.1 christos # define FREE_WCS_BUFFERS() \ 5105 1.1 christos do { \ 5106 1.1 christos if (size1 > MAX_ALLOCA_SIZE) \ 5107 1.1 christos { \ 5108 1.1 christos free (wcs_string1); \ 5109 1.1 christos free (mbs_offset1); \ 5110 1.1 christos } \ 5111 1.1 christos else \ 5112 1.1 christos { \ 5113 1.1 christos FREE_VAR (wcs_string1); \ 5114 1.1 christos FREE_VAR (mbs_offset1); \ 5115 1.1 christos } \ 5116 1.1 christos if (size2 > MAX_ALLOCA_SIZE) \ 5117 1.1 christos { \ 5118 1.1 christos free (wcs_string2); \ 5119 1.1 christos free (mbs_offset2); \ 5120 1.1 christos } \ 5121 1.1 christos else \ 5122 1.1 christos { \ 5123 1.1 christos FREE_VAR (wcs_string2); \ 5124 1.1 christos FREE_VAR (mbs_offset2); \ 5125 1.1 christos } \ 5126 1.1 christos } while (0) 5127 1.1 christos 5128 1.1 christos #endif 5129 1.1 christos 5130 1.1 christos 5131 1.1 christos static int 5132 1.1 christos PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range, 5133 1.1 christos regs, stop) 5134 1.1 christos struct re_pattern_buffer *bufp; 5135 1.1 christos const char *string1, *string2; 5136 1.1 christos int size1, size2; 5137 1.1 christos int startpos; 5138 1.1 christos int range; 5139 1.1 christos struct re_registers *regs; 5140 1.1 christos int stop; 5141 1.1 christos { 5142 1.1 christos int val; 5143 1.1 christos register char *fastmap = bufp->fastmap; 5144 1.1 christos register RE_TRANSLATE_TYPE translate = bufp->translate; 5145 1.1 christos int total_size = size1 + size2; 5146 1.1 christos int endpos = startpos + range; 5147 1.1 christos #ifdef WCHAR 5148 1.1 christos /* We need wchar_t* buffers correspond to cstring1, cstring2. */ 5149 1.1 christos wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL; 5150 1.1 christos /* We need the size of wchar_t buffers correspond to csize1, csize2. */ 5151 1.1 christos int wcs_size1 = 0, wcs_size2 = 0; 5152 1.1 christos /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ 5153 1.1 christos int *mbs_offset1 = NULL, *mbs_offset2 = NULL; 5154 1.1 christos /* They hold whether each wchar_t is binary data or not. */ 5155 1.1 christos char *is_binary = NULL; 5156 1.1 christos #endif /* WCHAR */ 5157 1.1 christos 5158 1.1 christos /* Check for out-of-range STARTPOS. */ 5159 1.1 christos if (startpos < 0 || startpos > total_size) 5160 1.1 christos return -1; 5161 1.1 christos 5162 1.1 christos /* Fix up RANGE if it might eventually take us outside 5163 1.1 christos the virtual concatenation of STRING1 and STRING2. 5164 1.1 christos Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ 5165 1.1 christos if (endpos < 0) 5166 1.1 christos range = 0 - startpos; 5167 1.1 christos else if (endpos > total_size) 5168 1.1 christos range = total_size - startpos; 5169 1.1 christos 5170 1.1 christos /* If the search isn't to be a backwards one, don't waste time in a 5171 1.1 christos search for a pattern that must be anchored. */ 5172 1.1 christos if (bufp->used > 0 && range > 0 5173 1.1 christos && ((re_opcode_t) bufp->buffer[0] == begbuf 5174 1.1 christos /* `begline' is like `begbuf' if it cannot match at newlines. */ 5175 1.1 christos || ((re_opcode_t) bufp->buffer[0] == begline 5176 1.1 christos && !bufp->newline_anchor))) 5177 1.1 christos { 5178 1.1 christos if (startpos > 0) 5179 1.1 christos return -1; 5180 1.1 christos else 5181 1.1 christos range = 1; 5182 1.1 christos } 5183 1.1 christos 5184 1.1 christos #ifdef emacs 5185 1.1 christos /* In a forward search for something that starts with \=. 5186 1.1 christos don't keep searching past point. */ 5187 1.1 christos if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) 5188 1.1 christos { 5189 1.1 christos range = PT - startpos; 5190 1.1 christos if (range <= 0) 5191 1.1 christos return -1; 5192 1.1 christos } 5193 1.1 christos #endif /* emacs */ 5194 1.1 christos 5195 1.1 christos /* Update the fastmap now if not correct already. */ 5196 1.1 christos if (fastmap && !bufp->fastmap_accurate) 5197 1.1 christos if (re_compile_fastmap (bufp) == -2) 5198 1.1 christos return -2; 5199 1.1 christos 5200 1.1 christos #ifdef WCHAR 5201 1.1 christos /* Allocate wchar_t array for wcs_string1 and wcs_string2 and 5202 1.1 christos fill them with converted string. */ 5203 1.1 christos if (size1 != 0) 5204 1.1 christos { 5205 1.1 christos if (size1 > MAX_ALLOCA_SIZE) 5206 1.1 christos { 5207 1.1 christos wcs_string1 = TALLOC (size1 + 1, CHAR_T); 5208 1.1 christos mbs_offset1 = TALLOC (size1 + 1, int); 5209 1.1 christos is_binary = TALLOC (size1 + 1, char); 5210 1.1 christos } 5211 1.1 christos else 5212 1.1 christos { 5213 1.1 christos wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T); 5214 1.1 christos mbs_offset1 = REGEX_TALLOC (size1 + 1, int); 5215 1.1 christos is_binary = REGEX_TALLOC (size1 + 1, char); 5216 1.1 christos } 5217 1.1 christos if (!wcs_string1 || !mbs_offset1 || !is_binary) 5218 1.1 christos { 5219 1.1 christos if (size1 > MAX_ALLOCA_SIZE) 5220 1.1 christos { 5221 1.1 christos free (wcs_string1); 5222 1.1 christos free (mbs_offset1); 5223 1.1 christos free (is_binary); 5224 1.1 christos } 5225 1.1 christos else 5226 1.1 christos { 5227 1.1 christos FREE_VAR (wcs_string1); 5228 1.1 christos FREE_VAR (mbs_offset1); 5229 1.1 christos FREE_VAR (is_binary); 5230 1.1 christos } 5231 1.1 christos return -2; 5232 1.1 christos } 5233 1.1 christos wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1, 5234 1.1 christos mbs_offset1, is_binary); 5235 1.1 christos wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */ 5236 1.1 christos if (size1 > MAX_ALLOCA_SIZE) 5237 1.1 christos free (is_binary); 5238 1.1 christos else 5239 1.1 christos FREE_VAR (is_binary); 5240 1.1 christos } 5241 1.1 christos if (size2 != 0) 5242 1.1 christos { 5243 1.1 christos if (size2 > MAX_ALLOCA_SIZE) 5244 1.1 christos { 5245 1.1 christos wcs_string2 = TALLOC (size2 + 1, CHAR_T); 5246 1.1 christos mbs_offset2 = TALLOC (size2 + 1, int); 5247 1.1 christos is_binary = TALLOC (size2 + 1, char); 5248 1.1 christos } 5249 1.1 christos else 5250 1.1 christos { 5251 1.1 christos wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T); 5252 1.1 christos mbs_offset2 = REGEX_TALLOC (size2 + 1, int); 5253 1.1 christos is_binary = REGEX_TALLOC (size2 + 1, char); 5254 1.1 christos } 5255 1.1 christos if (!wcs_string2 || !mbs_offset2 || !is_binary) 5256 1.1 christos { 5257 1.1 christos FREE_WCS_BUFFERS (); 5258 1.1 christos if (size2 > MAX_ALLOCA_SIZE) 5259 1.1 christos free (is_binary); 5260 1.1 christos else 5261 1.1 christos FREE_VAR (is_binary); 5262 1.1 christos return -2; 5263 1.1 christos } 5264 1.1 christos wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2, 5265 1.1 christos mbs_offset2, is_binary); 5266 1.1 christos wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */ 5267 1.1 christos if (size2 > MAX_ALLOCA_SIZE) 5268 1.1 christos free (is_binary); 5269 1.1 christos else 5270 1.1 christos FREE_VAR (is_binary); 5271 1.1 christos } 5272 1.1 christos #endif /* WCHAR */ 5273 1.1 christos 5274 1.1 christos 5275 1.1 christos /* Loop through the string, looking for a place to start matching. */ 5276 1.1 christos for (;;) 5277 1.1 christos { 5278 1.1 christos /* If a fastmap is supplied, skip quickly over characters that 5279 1.1 christos cannot be the start of a match. If the pattern can match the 5280 1.1 christos null string, however, we don't need to skip characters; we want 5281 1.1 christos the first null string. */ 5282 1.1 christos if (fastmap && startpos < total_size && !bufp->can_be_null) 5283 1.1 christos { 5284 1.1 christos if (range > 0) /* Searching forwards. */ 5285 1.1 christos { 5286 1.1 christos register const char *d; 5287 1.1 christos register int lim = 0; 5288 1.1 christos int irange = range; 5289 1.1 christos 5290 1.1 christos if (startpos < size1 && startpos + range >= size1) 5291 1.1 christos lim = range - (size1 - startpos); 5292 1.1 christos 5293 1.1 christos d = (startpos >= size1 ? string2 - size1 : string1) + startpos; 5294 1.1 christos 5295 1.1 christos /* Written out as an if-else to avoid testing `translate' 5296 1.1 christos inside the loop. */ 5297 1.1 christos if (translate) 5298 1.1 christos while (range > lim 5299 1.1 christos && !fastmap[(unsigned char) 5300 1.1 christos translate[(unsigned char) *d++]]) 5301 1.1 christos range--; 5302 1.1 christos else 5303 1.1 christos while (range > lim && !fastmap[(unsigned char) *d++]) 5304 1.1 christos range--; 5305 1.1 christos 5306 1.1 christos startpos += irange - range; 5307 1.1 christos } 5308 1.1 christos else /* Searching backwards. */ 5309 1.1 christos { 5310 1.1 christos register CHAR_T c = (size1 == 0 || startpos >= size1 5311 1.1 christos ? string2[startpos - size1] 5312 1.1 christos : string1[startpos]); 5313 1.1 christos 5314 1.1 christos if (!fastmap[(unsigned char) TRANSLATE (c)]) 5315 1.1 christos goto advance; 5316 1.1 christos } 5317 1.1 christos } 5318 1.1 christos 5319 1.1 christos /* If can't match the null string, and that's all we have left, fail. */ 5320 1.1 christos if (range >= 0 && startpos == total_size && fastmap 5321 1.1 christos && !bufp->can_be_null) 5322 1.1 christos { 5323 1.1 christos #ifdef WCHAR 5324 1.1 christos FREE_WCS_BUFFERS (); 5325 1.1 christos #endif 5326 1.1 christos return -1; 5327 1.1 christos } 5328 1.1 christos 5329 1.1 christos #ifdef WCHAR 5330 1.1 christos val = wcs_re_match_2_internal (bufp, string1, size1, string2, 5331 1.1 christos size2, startpos, regs, stop, 5332 1.1 christos wcs_string1, wcs_size1, 5333 1.1 christos wcs_string2, wcs_size2, 5334 1.1 christos mbs_offset1, mbs_offset2); 5335 1.1 christos #else /* BYTE */ 5336 1.1 christos val = byte_re_match_2_internal (bufp, string1, size1, string2, 5337 1.1 christos size2, startpos, regs, stop); 5338 1.1 christos #endif /* BYTE */ 5339 1.1 christos 5340 1.1 christos #ifndef REGEX_MALLOC 5341 1.1 christos # ifdef C_ALLOCA 5342 1.1 christos alloca (0); 5343 1.1 christos # endif 5344 1.1 christos #endif 5345 1.1 christos 5346 1.1 christos if (val >= 0) 5347 1.1 christos { 5348 1.1 christos #ifdef WCHAR 5349 1.1 christos FREE_WCS_BUFFERS (); 5350 1.1 christos #endif 5351 1.1 christos return startpos; 5352 1.1 christos } 5353 1.1 christos 5354 1.1 christos if (val == -2) 5355 1.1 christos { 5356 1.1 christos #ifdef WCHAR 5357 1.1 christos FREE_WCS_BUFFERS (); 5358 1.1 christos #endif 5359 1.1 christos return -2; 5360 1.1 christos } 5361 1.1 christos 5362 1.1 christos advance: 5363 1.1 christos if (!range) 5364 1.1 christos break; 5365 1.1 christos else if (range > 0) 5366 1.1 christos { 5367 1.1 christos range--; 5368 1.1 christos startpos++; 5369 1.1 christos } 5370 1.1 christos else 5371 1.1 christos { 5372 1.1 christos range++; 5373 1.1 christos startpos--; 5374 1.1 christos } 5375 1.1 christos } 5376 1.1 christos #ifdef WCHAR 5377 1.1 christos FREE_WCS_BUFFERS (); 5378 1.1 christos #endif 5379 1.1 christos return -1; 5380 1.1 christos } 5381 1.1 christos 5382 1.1 christos #ifdef WCHAR 5383 1.1 christos /* This converts PTR, a pointer into one of the search wchar_t strings 5384 1.1 christos `string1' and `string2' into an multibyte string offset from the 5385 1.1 christos beginning of that string. We use mbs_offset to optimize. 5386 1.1 christos See convert_mbs_to_wcs. */ 5387 1.1 christos # define POINTER_TO_OFFSET(ptr) \ 5388 1.1 christos (FIRST_STRING_P (ptr) \ 5389 1.1 christos ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \ 5390 1.1 christos : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \ 5391 1.1 christos + csize1))) 5392 1.1 christos #else /* BYTE */ 5393 1.1 christos /* This converts PTR, a pointer into one of the search strings `string1' 5394 1.1 christos and `string2' into an offset from the beginning of that string. */ 5395 1.1 christos # define POINTER_TO_OFFSET(ptr) \ 5396 1.1 christos (FIRST_STRING_P (ptr) \ 5397 1.1 christos ? ((regoff_t) ((ptr) - string1)) \ 5398 1.1 christos : ((regoff_t) ((ptr) - string2 + size1))) 5399 1.1 christos #endif /* WCHAR */ 5400 1.1 christos 5401 1.1 christos /* Macros for dealing with the split strings in re_match_2. */ 5402 1.1 christos 5403 1.1 christos #define MATCHING_IN_FIRST_STRING (dend == end_match_1) 5404 1.1 christos 5405 1.1 christos /* Call before fetching a character with *d. This switches over to 5406 1.1 christos string2 if necessary. */ 5407 1.1 christos #define PREFETCH() \ 5408 1.1 christos while (d == dend) \ 5409 1.1 christos { \ 5410 1.1 christos /* End of string2 => fail. */ \ 5411 1.1 christos if (dend == end_match_2) \ 5412 1.1 christos goto fail; \ 5413 1.1 christos /* End of string1 => advance to string2. */ \ 5414 1.1 christos d = string2; \ 5415 1.1 christos dend = end_match_2; \ 5416 1.1 christos } 5417 1.1 christos 5418 1.1 christos /* Test if at very beginning or at very end of the virtual concatenation 5419 1.1 christos of `string1' and `string2'. If only one string, it's `string2'. */ 5420 1.1 christos #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 5421 1.1 christos #define AT_STRINGS_END(d) ((d) == end2) 5422 1.1 christos 5423 1.1 christos 5424 1.1 christos /* Test if D points to a character which is word-constituent. We have 5425 1.1 christos two special cases to check for: if past the end of string1, look at 5426 1.1 christos the first character in string2; and if before the beginning of 5427 1.1 christos string2, look at the last character in string1. */ 5428 1.1 christos #ifdef WCHAR 5429 1.1 christos /* Use internationalized API instead of SYNTAX. */ 5430 1.1 christos # define WORDCHAR_P(d) \ 5431 1.1 christos (iswalnum ((wint_t)((d) == end1 ? *string2 \ 5432 1.1 christos : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \ 5433 1.1 christos || ((d) == end1 ? *string2 \ 5434 1.1 christos : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_') 5435 1.1 christos #else /* BYTE */ 5436 1.1 christos # define WORDCHAR_P(d) \ 5437 1.1 christos (SYNTAX ((d) == end1 ? *string2 \ 5438 1.1 christos : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ 5439 1.1 christos == Sword) 5440 1.1 christos #endif /* WCHAR */ 5441 1.1 christos 5442 1.1 christos /* Disabled due to a compiler bug -- see comment at case wordbound */ 5443 1.1 christos #if 0 5444 1.1 christos /* Test if the character before D and the one at D differ with respect 5445 1.1 christos to being word-constituent. */ 5446 1.1 christos #define AT_WORD_BOUNDARY(d) \ 5447 1.1 christos (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ 5448 1.1 christos || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) 5449 1.1 christos #endif 5450 1.1 christos 5451 1.1 christos /* Free everything we malloc. */ 5452 1.1 christos #ifdef MATCH_MAY_ALLOCATE 5453 1.1 christos # ifdef WCHAR 5454 1.1 christos # define FREE_VARIABLES() \ 5455 1.1 christos do { \ 5456 1.1 christos REGEX_FREE_STACK (fail_stack.stack); \ 5457 1.1 christos FREE_VAR (regstart); \ 5458 1.1 christos FREE_VAR (regend); \ 5459 1.1 christos FREE_VAR (old_regstart); \ 5460 1.1 christos FREE_VAR (old_regend); \ 5461 1.1 christos FREE_VAR (best_regstart); \ 5462 1.1 christos FREE_VAR (best_regend); \ 5463 1.1 christos FREE_VAR (reg_info); \ 5464 1.1 christos FREE_VAR (reg_dummy); \ 5465 1.1 christos FREE_VAR (reg_info_dummy); \ 5466 1.1 christos if (!cant_free_wcs_buf) \ 5467 1.1 christos { \ 5468 1.1 christos FREE_VAR (string1); \ 5469 1.1 christos FREE_VAR (string2); \ 5470 1.1 christos FREE_VAR (mbs_offset1); \ 5471 1.1 christos FREE_VAR (mbs_offset2); \ 5472 1.1 christos } \ 5473 1.1 christos } while (0) 5474 1.1 christos # else /* BYTE */ 5475 1.1 christos # define FREE_VARIABLES() \ 5476 1.1 christos do { \ 5477 1.1 christos REGEX_FREE_STACK (fail_stack.stack); \ 5478 1.1 christos FREE_VAR (regstart); \ 5479 1.1 christos FREE_VAR (regend); \ 5480 1.1 christos FREE_VAR (old_regstart); \ 5481 1.1 christos FREE_VAR (old_regend); \ 5482 1.1 christos FREE_VAR (best_regstart); \ 5483 1.1 christos FREE_VAR (best_regend); \ 5484 1.1 christos FREE_VAR (reg_info); \ 5485 1.1 christos FREE_VAR (reg_dummy); \ 5486 1.1 christos FREE_VAR (reg_info_dummy); \ 5487 1.1 christos } while (0) 5488 1.1 christos # endif /* WCHAR */ 5489 1.1 christos #else 5490 1.1 christos # ifdef WCHAR 5491 1.1 christos # define FREE_VARIABLES() \ 5492 1.1 christos do { \ 5493 1.1 christos if (!cant_free_wcs_buf) \ 5494 1.1 christos { \ 5495 1.1 christos FREE_VAR (string1); \ 5496 1.1 christos FREE_VAR (string2); \ 5497 1.1 christos FREE_VAR (mbs_offset1); \ 5498 1.1 christos FREE_VAR (mbs_offset2); \ 5499 1.1 christos } \ 5500 1.1 christos } while (0) 5501 1.1 christos # else /* BYTE */ 5502 1.1 christos # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 5503 1.1 christos # endif /* WCHAR */ 5504 1.1 christos #endif /* not MATCH_MAY_ALLOCATE */ 5505 1.1 christos 5506 1.1 christos /* These values must meet several constraints. They must not be valid 5507 1.1 christos register values; since we have a limit of 255 registers (because 5508 1.1 christos we use only one byte in the pattern for the register number), we can 5509 1.1 christos use numbers larger than 255. They must differ by 1, because of 5510 1.1 christos NUM_FAILURE_ITEMS above. And the value for the lowest register must 5511 1.1 christos be larger than the value for the highest register, so we do not try 5512 1.1 christos to actually save any registers when none are active. */ 5513 1.1 christos #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) 5514 1.1 christos #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) 5515 1.1 christos 5516 1.1 christos #else /* not INSIDE_RECURSION */ 5518 1.1 christos /* Matching routines. */ 5519 1.1 christos 5520 1.1 christos #ifndef emacs /* Emacs never uses this. */ 5521 1.1 christos /* re_match is like re_match_2 except it takes only a single string. */ 5522 1.1 christos 5523 1.1 christos int 5524 1.1 christos re_match (bufp, string, size, pos, regs) 5525 1.1 christos struct re_pattern_buffer *bufp; 5526 1.1 christos const char *string; 5527 1.1 christos int size, pos; 5528 1.1 christos struct re_registers *regs; 5529 1.1 christos { 5530 1.1 christos int result; 5531 1.1 christos # ifdef MBS_SUPPORT 5532 1.1 christos if (MB_CUR_MAX != 1) 5533 1.1 christos result = wcs_re_match_2_internal (bufp, NULL, 0, string, size, 5534 1.1 christos pos, regs, size, 5535 1.1 christos NULL, 0, NULL, 0, NULL, NULL); 5536 1.1 christos else 5537 1.1 christos # endif 5538 1.1 christos result = byte_re_match_2_internal (bufp, NULL, 0, string, size, 5539 1.1 christos pos, regs, size); 5540 1.1 christos # ifndef REGEX_MALLOC 5541 1.1 christos # ifdef C_ALLOCA 5542 1.1 christos alloca (0); 5543 1.1 christos # endif 5544 1.1 christos # endif 5545 1.1 christos return result; 5546 1.1 christos } 5547 1.1 christos # ifdef _LIBC 5548 1.1 christos weak_alias (__re_match, re_match) 5549 1.1 christos # endif 5550 1.1 christos #endif /* not emacs */ 5551 1.1 christos 5552 1.1 christos #endif /* not INSIDE_RECURSION */ 5553 1.1 christos 5554 1.1 christos #ifdef INSIDE_RECURSION 5555 1.1 christos static boolean PREFIX(group_match_null_string_p) _RE_ARGS ((UCHAR_T **p, 5556 1.1 christos UCHAR_T *end, 5557 1.1 christos PREFIX(register_info_type) *reg_info)); 5558 1.1 christos static boolean PREFIX(alt_match_null_string_p) _RE_ARGS ((UCHAR_T *p, 5559 1.1 christos UCHAR_T *end, 5560 1.1 christos PREFIX(register_info_type) *reg_info)); 5561 1.1 christos static boolean PREFIX(common_op_match_null_string_p) _RE_ARGS ((UCHAR_T **p, 5562 1.1 christos UCHAR_T *end, 5563 1.1 christos PREFIX(register_info_type) *reg_info)); 5564 1.1 christos static int PREFIX(bcmp_translate) _RE_ARGS ((const CHAR_T *s1, const CHAR_T *s2, 5565 1.1 christos int len, char *translate)); 5566 1.1 christos #else /* not INSIDE_RECURSION */ 5567 1.1 christos 5568 1.1 christos /* re_match_2 matches the compiled pattern in BUFP against the 5569 1.1 christos the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 5570 1.1 christos and SIZE2, respectively). We start matching at POS, and stop 5571 1.1 christos matching at STOP. 5572 1.1 christos 5573 1.1 christos If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 5574 1.1 christos store offsets for the substring each group matched in REGS. See the 5575 1.1 christos documentation for exactly how many groups we fill. 5576 1.1 christos 5577 1.1 christos We return -1 if no match, -2 if an internal error (such as the 5578 1.1 christos failure stack overflowing). Otherwise, we return the length of the 5579 1.1 christos matched substring. */ 5580 1.1 christos 5581 1.1 christos int 5582 1.1 christos re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 5583 1.1 christos struct re_pattern_buffer *bufp; 5584 1.1 christos const char *string1, *string2; 5585 1.1 christos int size1, size2; 5586 1.1 christos int pos; 5587 1.1 christos struct re_registers *regs; 5588 1.1 christos int stop; 5589 1.1 christos { 5590 1.1 christos int result; 5591 1.1 christos # ifdef MBS_SUPPORT 5592 1.1 christos if (MB_CUR_MAX != 1) 5593 1.1 christos result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2, 5594 1.1 christos pos, regs, stop, 5595 1.1 christos NULL, 0, NULL, 0, NULL, NULL); 5596 1.1 christos else 5597 1.1 christos # endif 5598 1.1 christos result = byte_re_match_2_internal (bufp, string1, size1, string2, size2, 5599 1.1 christos pos, regs, stop); 5600 1.1 christos 5601 1.1 christos #ifndef REGEX_MALLOC 5602 1.1 christos # ifdef C_ALLOCA 5603 1.1 christos alloca (0); 5604 1.1 christos # endif 5605 1.1 christos #endif 5606 1.1 christos return result; 5607 1.1 christos } 5608 1.1 christos #ifdef _LIBC 5609 1.1 christos weak_alias (__re_match_2, re_match_2) 5610 1.1 christos #endif 5611 1.1 christos 5612 1.1 christos #endif /* not INSIDE_RECURSION */ 5613 1.1 christos 5614 1.1 christos #ifdef INSIDE_RECURSION 5615 1.1 christos 5616 1.1 christos #ifdef WCHAR 5617 1.1 christos static int count_mbs_length PARAMS ((int *, int)); 5618 1.1 christos 5619 1.1 christos /* This check the substring (from 0, to length) of the multibyte string, 5620 1.1 christos to which offset_buffer correspond. And count how many wchar_t_characters 5621 1.1 christos the substring occupy. We use offset_buffer to optimization. 5622 1.1 christos See convert_mbs_to_wcs. */ 5623 1.1 christos 5624 1.1 christos static int 5625 1.1 christos count_mbs_length(offset_buffer, length) 5626 1.1 christos int *offset_buffer; 5627 1.1 christos int length; 5628 1.1 christos { 5629 1.1 christos int upper, lower; 5630 1.1 christos 5631 1.1 christos /* Check whether the size is valid. */ 5632 1.1 christos if (length < 0) 5633 1.1 christos return -1; 5634 1.1 christos 5635 1.1 christos if (offset_buffer == NULL) 5636 1.1 christos return 0; 5637 1.1 christos 5638 1.1 christos /* If there are no multibyte character, offset_buffer[i] == i. 5639 1.1 christos Optmize for this case. */ 5640 1.1 christos if (offset_buffer[length] == length) 5641 1.1 christos return length; 5642 1.1 christos 5643 1.1 christos /* Set up upper with length. (because for all i, offset_buffer[i] >= i) */ 5644 1.1 christos upper = length; 5645 1.1 christos lower = 0; 5646 1.1 christos 5647 1.1 christos while (true) 5648 1.1 christos { 5649 1.1 christos int middle = (lower + upper) / 2; 5650 1.1 christos if (middle == lower || middle == upper) 5651 1.1 christos break; 5652 1.1 christos if (offset_buffer[middle] > length) 5653 1.1 christos upper = middle; 5654 1.1 christos else if (offset_buffer[middle] < length) 5655 1.1 christos lower = middle; 5656 1.1 christos else 5657 1.1 christos return middle; 5658 1.1 christos } 5659 1.1 christos 5660 1.1 christos return -1; 5661 1.1 christos } 5662 1.1 christos #endif /* WCHAR */ 5663 1.1 christos 5664 1.1 christos /* This is a separate function so that we can force an alloca cleanup 5665 1.1 christos afterwards. */ 5666 1.1 christos #ifdef WCHAR 5667 1.1 christos static int 5668 1.1 christos wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos, 5669 1.1 christos regs, stop, string1, size1, string2, size2, 5670 1.1 christos mbs_offset1, mbs_offset2) 5671 1.1 christos struct re_pattern_buffer *bufp; 5672 1.1 christos const char *cstring1, *cstring2; 5673 1.1 christos int csize1, csize2; 5674 1.1 christos int pos; 5675 1.1 christos struct re_registers *regs; 5676 1.1 christos int stop; 5677 1.1 christos /* string1 == string2 == NULL means string1/2, size1/2 and 5678 1.1 christos mbs_offset1/2 need seting up in this function. */ 5679 1.1 christos /* We need wchar_t* buffers correspond to cstring1, cstring2. */ 5680 1.1 christos wchar_t *string1, *string2; 5681 1.1 christos /* We need the size of wchar_t buffers correspond to csize1, csize2. */ 5682 1.1 christos int size1, size2; 5683 1.1 christos /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ 5684 1.1 christos int *mbs_offset1, *mbs_offset2; 5685 1.1 christos #else /* BYTE */ 5686 1.1 christos static int 5687 1.1 christos byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos, 5688 1.1 christos regs, stop) 5689 1.1 christos struct re_pattern_buffer *bufp; 5690 1.1 christos const char *string1, *string2; 5691 1.1 christos int size1, size2; 5692 1.1 christos int pos; 5693 1.1 christos struct re_registers *regs; 5694 1.1 christos int stop; 5695 1.1 christos #endif /* BYTE */ 5696 1.1 christos { 5697 1.1 christos /* General temporaries. */ 5698 1.1 christos int mcnt; 5699 1.1 christos UCHAR_T *p1; 5700 1.1 christos #ifdef WCHAR 5701 1.1 christos /* They hold whether each wchar_t is binary data or not. */ 5702 1.1 christos char *is_binary = NULL; 5703 1.1 christos /* If true, we can't free string1/2, mbs_offset1/2. */ 5704 1.1 christos int cant_free_wcs_buf = 1; 5705 1.1 christos #endif /* WCHAR */ 5706 1.1 christos 5707 1.1 christos /* Just past the end of the corresponding string. */ 5708 1.1 christos const CHAR_T *end1, *end2; 5709 1.1 christos 5710 1.1 christos /* Pointers into string1 and string2, just past the last characters in 5711 1.1 christos each to consider matching. */ 5712 1.1 christos const CHAR_T *end_match_1, *end_match_2; 5713 1.1 christos 5714 1.1 christos /* Where we are in the data, and the end of the current string. */ 5715 1.1 christos const CHAR_T *d, *dend; 5716 1.1 christos 5717 1.1 christos /* Where we are in the pattern, and the end of the pattern. */ 5718 1.1 christos #ifdef WCHAR 5719 1.1 christos UCHAR_T *pattern, *p; 5720 1.1 christos register UCHAR_T *pend; 5721 1.1 christos #else /* BYTE */ 5722 1.1 christos UCHAR_T *p = bufp->buffer; 5723 1.1 christos register UCHAR_T *pend = p + bufp->used; 5724 1.1 christos #endif /* WCHAR */ 5725 1.1 christos 5726 1.1 christos /* Mark the opcode just after a start_memory, so we can test for an 5727 1.1 christos empty subpattern when we get to the stop_memory. */ 5728 1.1 christos UCHAR_T *just_past_start_mem = 0; 5729 1.1 christos 5730 1.1 christos /* We use this to map every character in the string. */ 5731 1.1 christos RE_TRANSLATE_TYPE translate = bufp->translate; 5732 1.1 christos 5733 1.1 christos /* Failure point stack. Each place that can handle a failure further 5734 1.1 christos down the line pushes a failure point on this stack. It consists of 5735 1.1 christos restart, regend, and reg_info for all registers corresponding to 5736 1.1 christos the subexpressions we're currently inside, plus the number of such 5737 1.1 christos registers, and, finally, two char *'s. The first char * is where 5738 1.1 christos to resume scanning the pattern; the second one is where to resume 5739 1.1 christos scanning the strings. If the latter is zero, the failure point is 5740 1.1 christos a ``dummy''; if a failure happens and the failure point is a dummy, 5741 1.1 christos it gets discarded and the next next one is tried. */ 5742 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 5743 1.1 christos PREFIX(fail_stack_type) fail_stack; 5744 1.1 christos #endif 5745 1.1 christos #ifdef DEBUG 5746 1.1 christos static unsigned failure_id; 5747 1.1 christos unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 5748 1.1 christos #endif 5749 1.1 christos 5750 1.1 christos #ifdef REL_ALLOC 5751 1.1 christos /* This holds the pointer to the failure stack, when 5752 1.1 christos it is allocated relocatably. */ 5753 1.1 christos fail_stack_elt_t *failure_stack_ptr; 5754 1.1 christos #endif 5755 1.1 christos 5756 1.1 christos /* We fill all the registers internally, independent of what we 5757 1.1 christos return, for use in backreferences. The number here includes 5758 1.1 christos an element for register zero. */ 5759 1.1 christos size_t num_regs = bufp->re_nsub + 1; 5760 1.1 christos 5761 1.1 christos /* The currently active registers. */ 5762 1.1 christos active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; 5763 1.1 christos active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; 5764 1.1 christos 5765 1.1 christos /* Information on the contents of registers. These are pointers into 5766 1.1 christos the input strings; they record just what was matched (on this 5767 1.1 christos attempt) by a subexpression part of the pattern, that is, the 5768 1.1 christos regnum-th regstart pointer points to where in the pattern we began 5769 1.1 christos matching and the regnum-th regend points to right after where we 5770 1.1 christos stopped matching the regnum-th subexpression. (The zeroth register 5771 1.1 christos keeps track of what the whole pattern matches.) */ 5772 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5773 1.1 christos const CHAR_T **regstart, **regend; 5774 1.1 christos #endif 5775 1.1 christos 5776 1.1 christos /* If a group that's operated upon by a repetition operator fails to 5777 1.1 christos match anything, then the register for its start will need to be 5778 1.1 christos restored because it will have been set to wherever in the string we 5779 1.1 christos are when we last see its open-group operator. Similarly for a 5780 1.1 christos register's end. */ 5781 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5782 1.1 christos const CHAR_T **old_regstart, **old_regend; 5783 1.1 christos #endif 5784 1.1 christos 5785 1.1 christos /* The is_active field of reg_info helps us keep track of which (possibly 5786 1.1 christos nested) subexpressions we are currently in. The matched_something 5787 1.1 christos field of reg_info[reg_num] helps us tell whether or not we have 5788 1.1 christos matched any of the pattern so far this time through the reg_num-th 5789 1.1 christos subexpression. These two fields get reset each time through any 5790 1.1 christos loop their register is in. */ 5791 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 5792 1.1 christos PREFIX(register_info_type) *reg_info; 5793 1.1 christos #endif 5794 1.1 christos 5795 1.1 christos /* The following record the register info as found in the above 5796 1.1 christos variables when we find a match better than any we've seen before. 5797 1.1 christos This happens as we backtrack through the failure points, which in 5798 1.1 christos turn happens only if we have not yet matched the entire string. */ 5799 1.1 christos unsigned best_regs_set = false; 5800 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5801 1.1 christos const CHAR_T **best_regstart, **best_regend; 5802 1.1 christos #endif 5803 1.1 christos 5804 1.1 christos /* Logically, this is `best_regend[0]'. But we don't want to have to 5805 1.1 christos allocate space for that if we're not allocating space for anything 5806 1.1 christos else (see below). Also, we never need info about register 0 for 5807 1.1 christos any of the other register vectors, and it seems rather a kludge to 5808 1.1 christos treat `best_regend' differently than the rest. So we keep track of 5809 1.1 christos the end of the best match so far in a separate variable. We 5810 1.1 christos initialize this to NULL so that when we backtrack the first time 5811 1.1 christos and need to test it, it's not garbage. */ 5812 1.1 christos const CHAR_T *match_end = NULL; 5813 1.1 christos 5814 1.1 christos /* This helps SET_REGS_MATCHED avoid doing redundant work. */ 5815 1.1 christos int set_regs_matched_done = 0; 5816 1.1 christos 5817 1.1 christos /* Used when we pop values we don't care about. */ 5818 1.1 christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5819 1.1 christos const CHAR_T **reg_dummy; 5820 1.1 christos PREFIX(register_info_type) *reg_info_dummy; 5821 1.1 christos #endif 5822 1.1 christos 5823 1.1 christos #ifdef DEBUG 5824 1.1 christos /* Counts the total number of registers pushed. */ 5825 1.1 christos unsigned num_regs_pushed = 0; 5826 1.1 christos #endif 5827 1.1 christos 5828 1.1 christos /* Definitions for state transitions. More efficiently for gcc. */ 5829 1.1 christos #ifdef __GNUC__ 5830 1.1 christos # if defined HAVE_SUBTRACT_LOCAL_LABELS && defined SHARED 5831 1.1 christos # define NEXT \ 5832 1.1 christos do \ 5833 1.1 christos { \ 5834 1.1 christos int offset; \ 5835 1.1 christos const void *__unbounded ptr; \ 5836 1.1 christos offset = (p == pend \ 5837 1.1 christos ? 0 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]); \ 5838 1.1 christos ptr = &&end_of_pattern + offset; \ 5839 1.1 christos goto *ptr; \ 5840 1.1 christos } \ 5841 1.1 christos while (0) 5842 1.1 christos # define REF(x) \ 5843 1.1 christos &&label_##x - &&end_of_pattern 5844 1.1 christos # define JUMP_TABLE_TYPE const int 5845 1.1 christos # else 5846 1.1 christos # define NEXT \ 5847 1.1 christos do \ 5848 1.1 christos { \ 5849 1.1 christos const void *__unbounded ptr; \ 5850 1.1 christos ptr = (p == pend ? &&end_of_pattern \ 5851 1.1 christos : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]); \ 5852 1.1 christos goto *ptr; \ 5853 1.1 christos } \ 5854 1.1 christos while (0) 5855 1.1 christos # define REF(x) \ 5856 1.1 christos &&label_##x 5857 1.1 christos # define JUMP_TABLE_TYPE const void *const 5858 1.1 christos # endif 5859 1.1 christos # define CASE(x) label_##x 5860 1.1 christos static JUMP_TABLE_TYPE jmptable[] = 5861 1.1 christos { 5862 1.1 christos REF (no_op), 5863 1.1 christos REF (succeed), 5864 1.1 christos REF (exactn), 5865 1.1 christos # ifdef MBS_SUPPORT 5866 1.1 christos REF (exactn_bin), 5867 1.1 christos # endif 5868 1.1 christos REF (anychar), 5869 1.1 christos REF (charset), 5870 1.1 christos REF (charset_not), 5871 1.1 christos REF (start_memory), 5872 1.1 christos REF (stop_memory), 5873 1.1 christos REF (duplicate), 5874 1.1 christos REF (begline), 5875 1.1 christos REF (endline), 5876 1.1 christos REF (begbuf), 5877 1.1 christos REF (endbuf), 5878 1.1 christos REF (jump), 5879 1.1 christos REF (jump_past_alt), 5880 1.1 christos REF (on_failure_jump), 5881 1.1 christos REF (on_failure_keep_string_jump), 5882 1.1 christos REF (pop_failure_jump), 5883 1.1 christos REF (maybe_pop_jump), 5884 1.1 christos REF (dummy_failure_jump), 5885 1.1 christos REF (push_dummy_failure), 5886 1.1 christos REF (succeed_n), 5887 1.1 christos REF (jump_n), 5888 1.1 christos REF (set_number_at), 5889 1.1 christos REF (wordchar), 5890 1.1 christos REF (notwordchar), 5891 1.1 christos REF (wordbeg), 5892 1.1 christos REF (wordend), 5893 1.1 christos REF (wordbound), 5894 1.1 christos REF (notwordbound) 5895 1.1 christos # ifdef emacs 5896 1.1 christos ,REF (before_dot), 5897 1.1 christos REF (at_dot), 5898 1.1 christos REF (after_dot), 5899 1.1 christos REF (syntaxspec), 5900 1.1 christos REF (notsyntaxspec) 5901 1.1 christos # endif 5902 1.1 christos }; 5903 1.1 christos #else 5904 1.1 christos # define NEXT \ 5905 1.1 christos break 5906 1.1 christos # define CASE(x) \ 5907 1.1 christos case x 5908 1.1 christos #endif 5909 1.1 christos 5910 1.1 christos DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); 5911 1.1 christos 5912 1.1 christos INIT_FAIL_STACK (); 5913 1.1 christos 5914 1.1 christos #ifdef MATCH_MAY_ALLOCATE 5915 1.1 christos /* Do not bother to initialize all the register variables if there are 5916 1.1 christos no groups in the pattern, as it takes a fair amount of time. If 5917 1.1 christos there are groups, we include space for register 0 (the whole 5918 1.1 christos pattern), even though we never use it, since it simplifies the 5919 1.1 christos array indexing. We should fix this. */ 5920 1.1 christos if (bufp->re_nsub) 5921 1.1 christos { 5922 1.1 christos regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 5923 1.1 christos regend = REGEX_TALLOC (num_regs, const CHAR_T *); 5924 1.1 christos old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 5925 1.1 christos old_regend = REGEX_TALLOC (num_regs, const CHAR_T *); 5926 1.1 christos best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 5927 1.1 christos best_regend = REGEX_TALLOC (num_regs, const CHAR_T *); 5928 1.1 christos reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type)); 5929 1.1 christos reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *); 5930 1.1 christos reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type)); 5931 1.1 christos 5932 1.1 christos if (!(regstart && regend && old_regstart && old_regend && reg_info 5933 1.1 christos && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 5934 1.1 christos { 5935 1.1 christos FREE_VARIABLES (); 5936 1.1 christos return -2; 5937 1.1 christos } 5938 1.1 christos } 5939 1.1 christos else 5940 1.1 christos { 5941 1.1 christos /* We must initialize all our variables to NULL, so that 5942 1.1 christos `FREE_VARIABLES' doesn't try to free them. */ 5943 1.1 christos regstart = regend = old_regstart = old_regend = best_regstart 5944 1.1 christos = best_regend = reg_dummy = NULL; 5945 1.1 christos reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL; 5946 1.1 christos } 5947 1.1 christos #endif /* MATCH_MAY_ALLOCATE */ 5948 1.1 christos 5949 1.1 christos /* The starting position is bogus. */ 5950 1.1 christos #ifdef WCHAR 5951 1.1 christos if (pos < 0 || pos > csize1 + csize2) 5952 1.1 christos #else /* BYTE */ 5953 1.1 christos if (pos < 0 || pos > size1 + size2) 5954 1.1 christos #endif 5955 1.1 christos { 5956 1.1 christos FREE_VARIABLES (); 5957 1.1 christos return -1; 5958 1.1 christos } 5959 1.1 christos 5960 1.1 christos #ifdef WCHAR 5961 1.1 christos /* Allocate wchar_t array for string1 and string2 and 5962 1.1 christos fill them with converted string. */ 5963 1.1 christos if (string1 == NULL && string2 == NULL) 5964 1.1 christos { 5965 1.1 christos /* We need seting up buffers here. */ 5966 1.1 christos 5967 1.1 christos /* We must free wcs buffers in this function. */ 5968 1.1 christos cant_free_wcs_buf = 0; 5969 1.1 christos 5970 1.1 christos if (csize1 != 0) 5971 1.1 christos { 5972 1.1 christos string1 = REGEX_TALLOC (csize1 + 1, CHAR_T); 5973 1.1 christos mbs_offset1 = REGEX_TALLOC (csize1 + 1, int); 5974 1.1 christos is_binary = REGEX_TALLOC (csize1 + 1, char); 5975 1.1 christos if (!string1 || !mbs_offset1 || !is_binary) 5976 1.1 christos { 5977 1.1 christos FREE_VAR (string1); 5978 1.1 christos FREE_VAR (mbs_offset1); 5979 1.1 christos FREE_VAR (is_binary); 5980 1.1 christos return -2; 5981 1.1 christos } 5982 1.1 christos } 5983 1.1 christos if (csize2 != 0) 5984 1.1 christos { 5985 1.1 christos string2 = REGEX_TALLOC (csize2 + 1, CHAR_T); 5986 1.1 christos mbs_offset2 = REGEX_TALLOC (csize2 + 1, int); 5987 1.1 christos is_binary = REGEX_TALLOC (csize2 + 1, char); 5988 1.1 christos if (!string2 || !mbs_offset2 || !is_binary) 5989 1.1 christos { 5990 1.1 christos FREE_VAR (string1); 5991 1.1 christos FREE_VAR (mbs_offset1); 5992 1.1 christos FREE_VAR (string2); 5993 1.1 christos FREE_VAR (mbs_offset2); 5994 1.1 christos FREE_VAR (is_binary); 5995 1.1 christos return -2; 5996 1.1 christos } 5997 1.1 christos size2 = convert_mbs_to_wcs(string2, cstring2, csize2, 5998 1.1 christos mbs_offset2, is_binary); 5999 1.1 christos string2[size2] = L'\0'; /* for a sentinel */ 6000 1.1 christos FREE_VAR (is_binary); 6001 1.1 christos } 6002 1.1 christos } 6003 1.1 christos 6004 1.1 christos /* We need to cast pattern to (wchar_t*), because we casted this compiled 6005 1.1 christos pattern to (char*) in regex_compile. */ 6006 1.1 christos p = pattern = (CHAR_T*)bufp->buffer; 6007 1.1 christos pend = (CHAR_T*)(bufp->buffer + bufp->used); 6008 1.1 christos 6009 1.1 christos #endif /* WCHAR */ 6010 1.1 christos 6011 1.1 christos /* Initialize subexpression text positions to -1 to mark ones that no 6012 1.1 christos start_memory/stop_memory has been seen for. Also initialize the 6013 1.1 christos register information struct. */ 6014 1.1 christos for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 6015 1.1 christos { 6016 1.1 christos regstart[mcnt] = regend[mcnt] 6017 1.1 christos = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 6018 1.1 christos 6019 1.1 christos REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 6020 1.1 christos IS_ACTIVE (reg_info[mcnt]) = 0; 6021 1.1 christos MATCHED_SOMETHING (reg_info[mcnt]) = 0; 6022 1.1 christos EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 6023 1.1 christos } 6024 1.1 christos 6025 1.1 christos /* We move `string1' into `string2' if the latter's empty -- but not if 6026 1.1 christos `string1' is null. */ 6027 1.1 christos if (size2 == 0 && string1 != NULL) 6028 1.1 christos { 6029 1.1 christos string2 = string1; 6030 1.1 christos size2 = size1; 6031 1.1 christos string1 = 0; 6032 1.1 christos size1 = 0; 6033 1.1 christos #ifdef WCHAR 6034 1.1 christos mbs_offset2 = mbs_offset1; 6035 1.1 christos csize2 = csize1; 6036 1.1 christos mbs_offset1 = NULL; 6037 1.1 christos csize1 = 0; 6038 1.1 christos #endif 6039 1.1 christos } 6040 1.1 christos end1 = string1 + size1; 6041 1.1 christos end2 = string2 + size2; 6042 1.1 christos 6043 1.1 christos /* Compute where to stop matching, within the two strings. */ 6044 1.1 christos #ifdef WCHAR 6045 1.1 christos if (stop <= csize1) 6046 1.1 christos { 6047 1.1 christos mcnt = count_mbs_length(mbs_offset1, stop); 6048 1.1 christos end_match_1 = string1 + mcnt; 6049 1.1 christos end_match_2 = string2; 6050 1.1 christos } 6051 1.1 christos else 6052 1.1 christos { 6053 1.1 christos if (stop > csize1 + csize2) 6054 1.1 christos stop = csize1 + csize2; 6055 1.1 christos end_match_1 = end1; 6056 1.1 christos mcnt = count_mbs_length(mbs_offset2, stop-csize1); 6057 1.1 christos end_match_2 = string2 + mcnt; 6058 1.1 christos } 6059 1.1 christos if (mcnt < 0) 6060 1.1 christos { /* count_mbs_length return error. */ 6061 1.1 christos FREE_VARIABLES (); 6062 1.1 christos return -1; 6063 1.1 christos } 6064 1.1 christos #else 6065 1.1 christos if (stop <= size1) 6066 1.1 christos { 6067 1.1 christos end_match_1 = string1 + stop; 6068 1.1 christos end_match_2 = string2; 6069 1.1 christos } 6070 1.1 christos else 6071 1.1 christos { 6072 1.1 christos end_match_1 = end1; 6073 1.1 christos end_match_2 = string2 + stop - size1; 6074 1.1 christos } 6075 1.1 christos #endif /* WCHAR */ 6076 1.1 christos 6077 1.1 christos /* `p' scans through the pattern as `d' scans through the data. 6078 1.1 christos `dend' is the end of the input string that `d' points within. `d' 6079 1.1 christos is advanced into the following input string whenever necessary, but 6080 1.1 christos this happens before fetching; therefore, at the beginning of the 6081 1.1 christos loop, `d' can be pointing at the end of a string, but it cannot 6082 1.1 christos equal `string2'. */ 6083 1.1 christos #ifdef WCHAR 6084 1.1 christos if (size1 > 0 && pos <= csize1) 6085 1.1 christos { 6086 1.1 christos mcnt = count_mbs_length(mbs_offset1, pos); 6087 1.1 christos d = string1 + mcnt; 6088 1.1 christos dend = end_match_1; 6089 1.1 christos } 6090 1.1 christos else 6091 1.1 christos { 6092 1.1 christos mcnt = count_mbs_length(mbs_offset2, pos-csize1); 6093 1.1 christos d = string2 + mcnt; 6094 1.1 christos dend = end_match_2; 6095 1.1 christos } 6096 1.1 christos 6097 1.1 christos if (mcnt < 0) 6098 1.1 christos { /* count_mbs_length return error. */ 6099 1.1 christos FREE_VARIABLES (); 6100 1.1 christos return -1; 6101 1.1 christos } 6102 1.1 christos #else 6103 1.1 christos if (size1 > 0 && pos <= size1) 6104 1.1 christos { 6105 1.1 christos d = string1 + pos; 6106 1.1 christos dend = end_match_1; 6107 1.1 christos } 6108 1.1 christos else 6109 1.1 christos { 6110 1.1 christos d = string2 + pos - size1; 6111 1.1 christos dend = end_match_2; 6112 1.1 christos } 6113 1.1 christos #endif /* WCHAR */ 6114 1.1 christos 6115 1.1 christos DEBUG_PRINT1 ("The compiled pattern is:\n"); 6116 1.1 christos DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 6117 1.1 christos DEBUG_PRINT1 ("The string to match is: `"); 6118 1.1 christos DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 6119 1.1 christos DEBUG_PRINT1 ("'\n"); 6120 1.1 christos 6121 1.1 christos /* This loops over pattern commands. It exits by returning from the 6122 1.1 christos function if the match is complete, or it drops through if the match 6123 1.1 christos fails at this starting point in the input data. */ 6124 1.1 christos for (;;) 6125 1.1 christos { 6126 1.1 christos #ifdef _LIBC 6127 1.1 christos DEBUG_PRINT2 ("\n%p: ", p); 6128 1.1 christos #else 6129 1.1 christos DEBUG_PRINT2 ("\n0x%x: ", p); 6130 1.1 christos #endif 6131 1.1 christos 6132 1.1 christos #ifdef __GNUC__ 6133 1.1 christos NEXT; 6134 1.1 christos #else 6135 1.1 christos if (p == pend) 6136 1.1 christos #endif 6137 1.1 christos { 6138 1.1 christos #ifdef __GNUC__ 6139 1.1 christos end_of_pattern: 6140 1.1 christos #endif 6141 1.1 christos /* End of pattern means we might have succeeded. */ 6142 1.1 christos DEBUG_PRINT1 ("end of pattern ... "); 6143 1.1 christos 6144 1.1 christos /* If we haven't matched the entire string, and we want the 6145 1.1 christos longest match, try backtracking. */ 6146 1.1 christos if (d != end_match_2) 6147 1.1 christos { 6148 1.1 christos /* 1 if this match ends in the same string (string1 or string2) 6149 1.1 christos as the best previous match. */ 6150 1.1 christos boolean same_str_p = (FIRST_STRING_P (match_end) 6151 1.1 christos == MATCHING_IN_FIRST_STRING); 6152 1.1 christos /* 1 if this match is the best seen so far. */ 6153 1.1 christos boolean best_match_p; 6154 1.1 christos 6155 1.1 christos /* AIX compiler got confused when this was combined 6156 1.1 christos with the previous declaration. */ 6157 1.1 christos if (same_str_p) 6158 1.1 christos best_match_p = d > match_end; 6159 1.1 christos else 6160 1.1 christos best_match_p = !MATCHING_IN_FIRST_STRING; 6161 1.1 christos 6162 1.1 christos DEBUG_PRINT1 ("backtracking.\n"); 6163 1.1 christos 6164 1.1 christos if (!FAIL_STACK_EMPTY ()) 6165 1.1 christos { /* More failure points to try. */ 6166 1.1 christos 6167 1.1 christos /* If exceeds best match so far, save it. */ 6168 1.1 christos if (!best_regs_set || best_match_p) 6169 1.1 christos { 6170 1.1 christos best_regs_set = true; 6171 1.1 christos match_end = d; 6172 1.1 christos 6173 1.1 christos DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 6174 1.1 christos 6175 1.1 christos for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 6176 1.1 christos { 6177 1.1 christos best_regstart[mcnt] = regstart[mcnt]; 6178 1.1 christos best_regend[mcnt] = regend[mcnt]; 6179 1.1 christos } 6180 1.1 christos } 6181 1.1 christos goto fail; 6182 1.1 christos } 6183 1.1 christos 6184 1.1 christos /* If no failure points, don't restore garbage. And if 6185 1.1 christos last match is real best match, don't restore second 6186 1.1 christos best one. */ 6187 1.1 christos else if (best_regs_set && !best_match_p) 6188 1.1 christos { 6189 1.1 christos restore_best_regs: 6190 1.1 christos /* Restore best match. It may happen that `dend == 6191 1.1 christos end_match_1' while the restored d is in string2. 6192 1.1 christos For example, the pattern `x.*y.*z' against the 6193 1.1 christos strings `x-' and `y-z-', if the two strings are 6194 1.1 christos not consecutive in memory. */ 6195 1.1 christos DEBUG_PRINT1 ("Restoring best registers.\n"); 6196 1.1 christos 6197 1.1 christos d = match_end; 6198 1.1 christos dend = ((d >= string1 && d <= end1) 6199 1.1 christos ? end_match_1 : end_match_2); 6200 1.1 christos 6201 1.1 christos for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 6202 1.1 christos { 6203 1.1 christos regstart[mcnt] = best_regstart[mcnt]; 6204 1.1 christos regend[mcnt] = best_regend[mcnt]; 6205 1.1 christos } 6206 1.1 christos } 6207 1.1 christos } /* d != end_match_2 */ 6208 1.1 christos 6209 1.1 christos succeed_label: 6210 1.1 christos DEBUG_PRINT1 ("Accepting match.\n"); 6211 1.1 christos /* If caller wants register contents data back, do it. */ 6212 1.1 christos if (regs && !bufp->no_sub) 6213 1.1 christos { 6214 1.1 christos /* Have the register data arrays been allocated? */ 6215 1.1 christos if (bufp->regs_allocated == REGS_UNALLOCATED) 6216 1.1 christos { /* No. So allocate them with malloc. We need one 6217 1.1 christos extra element beyond `num_regs' for the `-1' marker 6218 1.1 christos GNU code uses. */ 6219 1.1 christos regs->num_regs = MAX (RE_NREGS, num_regs + 1); 6220 1.1 christos regs->start = TALLOC (regs->num_regs, regoff_t); 6221 1.1 christos regs->end = TALLOC (regs->num_regs, regoff_t); 6222 1.1 christos if (regs->start == NULL || regs->end == NULL) 6223 1.1 christos { 6224 1.1 christos FREE_VARIABLES (); 6225 1.1 christos return -2; 6226 1.1 christos } 6227 1.1 christos bufp->regs_allocated = REGS_REALLOCATE; 6228 1.1 christos } 6229 1.1 christos else if (bufp->regs_allocated == REGS_REALLOCATE) 6230 1.1 christos { /* Yes. If we need more elements than were already 6231 1.1 christos allocated, reallocate them. If we need fewer, just 6232 1.1 christos leave it alone. */ 6233 1.1 christos if (regs->num_regs < num_regs + 1) 6234 1.1 christos { 6235 1.1 christos regs->num_regs = num_regs + 1; 6236 1.1 christos RETALLOC (regs->start, regs->num_regs, regoff_t); 6237 1.1 christos RETALLOC (regs->end, regs->num_regs, regoff_t); 6238 1.1 christos if (regs->start == NULL || regs->end == NULL) 6239 1.1 christos { 6240 1.1 christos FREE_VARIABLES (); 6241 1.1 christos return -2; 6242 1.1 christos } 6243 1.1 christos } 6244 1.1 christos } 6245 1.1 christos else 6246 1.1 christos { 6247 1.1 christos /* These braces fend off a "empty body in an else-statement" 6248 1.1 christos warning under GCC when assert expands to nothing. */ 6249 1.1 christos assert (bufp->regs_allocated == REGS_FIXED); 6250 1.1 christos } 6251 1.1 christos 6252 1.1 christos /* Convert the pointer data in `regstart' and `regend' to 6253 1.1 christos indices. Register zero has to be set differently, 6254 1.1 christos since we haven't kept track of any info for it. */ 6255 1.1 christos if (regs->num_regs > 0) 6256 1.1 christos { 6257 1.1 christos regs->start[0] = pos; 6258 1.1 christos #ifdef WCHAR 6259 1.1 christos if (MATCHING_IN_FIRST_STRING) 6260 1.1 christos regs->end[0] = (mbs_offset1 != NULL ? 6261 1.1 christos mbs_offset1[d-string1] : 0); 6262 1.1 christos else 6263 1.1 christos regs->end[0] = csize1 + (mbs_offset2 != NULL 6264 1.1 christos ? mbs_offset2[d-string2] : 0); 6265 1.1 christos #else 6266 1.1 christos regs->end[0] = (MATCHING_IN_FIRST_STRING 6267 1.1 christos ? ((regoff_t) (d - string1)) 6268 1.1 christos : ((regoff_t) (d - string2 + size1))); 6269 1.1 christos #endif /* WCHAR */ 6270 1.1 christos } 6271 1.1 christos 6272 1.1 christos /* Go through the first `min (num_regs, regs->num_regs)' 6273 1.1 christos registers, since that is all we initialized. */ 6274 1.1 christos for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); 6275 1.1 christos mcnt++) 6276 1.1 christos { 6277 1.1 christos if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 6278 1.1 christos regs->start[mcnt] = regs->end[mcnt] = -1; 6279 1.1 christos else 6280 1.1 christos { 6281 1.1 christos regs->start[mcnt] 6282 1.1 christos = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 6283 1.1 christos regs->end[mcnt] 6284 1.1 christos = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 6285 1.1 christos } 6286 1.1 christos } 6287 1.1 christos 6288 1.1 christos /* If the regs structure we return has more elements than 6289 1.1 christos were in the pattern, set the extra elements to -1. If 6290 1.1 christos we (re)allocated the registers, this is the case, 6291 1.1 christos because we always allocate enough to have at least one 6292 1.1 christos -1 at the end. */ 6293 1.1 christos for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) 6294 1.1 christos regs->start[mcnt] = regs->end[mcnt] = -1; 6295 1.1 christos } /* regs && !bufp->no_sub */ 6296 1.1 christos 6297 1.1 christos DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 6298 1.1 christos nfailure_points_pushed, nfailure_points_popped, 6299 1.1 christos nfailure_points_pushed - nfailure_points_popped); 6300 1.1 christos DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 6301 1.1 christos 6302 1.1 christos #ifdef WCHAR 6303 1.1 christos if (MATCHING_IN_FIRST_STRING) 6304 1.1 christos mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0; 6305 1.1 christos else 6306 1.1 christos mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) + 6307 1.1 christos csize1; 6308 1.1 christos mcnt -= pos; 6309 1.1 christos #else 6310 1.1 christos mcnt = d - pos - (MATCHING_IN_FIRST_STRING 6311 1.1 christos ? string1 : string2 - size1); 6312 1.1 christos #endif /* WCHAR */ 6313 1.1 christos 6314 1.1 christos DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 6315 1.1 christos 6316 1.1 christos FREE_VARIABLES (); 6317 1.1 christos return mcnt; 6318 1.1 christos } 6319 1.1 christos 6320 1.1 christos #ifndef __GNUC__ 6321 1.1 christos /* Otherwise match next pattern command. */ 6322 1.1 christos switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 6323 1.1 christos { 6324 1.1 christos #endif 6325 1.1 christos /* Ignore these. Used to ignore the n of succeed_n's which 6326 1.1 christos currently have n == 0. */ 6327 1.1 christos CASE (no_op): 6328 1.1 christos DEBUG_PRINT1 ("EXECUTING no_op.\n"); 6329 1.1 christos NEXT; 6330 1.1 christos 6331 1.1 christos CASE (succeed): 6332 1.1 christos DEBUG_PRINT1 ("EXECUTING succeed.\n"); 6333 1.1 christos goto succeed_label; 6334 1.1 christos 6335 1.1 christos /* Match the next n pattern characters exactly. The following 6336 1.1 christos byte in the pattern defines n, and the n bytes after that 6337 1.1 christos are the characters to match. */ 6338 1.1 christos CASE (exactn): 6339 1.1 christos #ifdef MBS_SUPPORT 6340 1.1 christos CASE (exactn_bin): 6341 1.1 christos #endif 6342 1.1 christos mcnt = *p++; 6343 1.1 christos DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 6344 1.1 christos 6345 1.1 christos /* This is written out as an if-else so we don't waste time 6346 1.1 christos testing `translate' inside the loop. */ 6347 1.1 christos if (translate) 6348 1.1 christos { 6349 1.1 christos do 6350 1.1 christos { 6351 1.1 christos PREFETCH (); 6352 1.1 christos #ifdef WCHAR 6353 1.1 christos if (*d <= 0xff) 6354 1.1 christos { 6355 1.1 christos if ((UCHAR_T) translate[(unsigned char) *d++] 6356 1.1 christos != (UCHAR_T) *p++) 6357 1.1 christos goto fail; 6358 1.1 christos } 6359 1.1 christos else 6360 1.1 christos { 6361 1.1 christos if (*d++ != (CHAR_T) *p++) 6362 1.1 christos goto fail; 6363 1.1 christos } 6364 1.1 christos #else 6365 1.1 christos if ((UCHAR_T) translate[(unsigned char) *d++] 6366 1.1 christos != (UCHAR_T) *p++) 6367 1.1 christos goto fail; 6368 1.1 christos #endif /* WCHAR */ 6369 1.1 christos } 6370 1.1 christos while (--mcnt); 6371 1.1 christos } 6372 1.1 christos else 6373 1.1 christos { 6374 1.1 christos do 6375 1.1 christos { 6376 1.1 christos PREFETCH (); 6377 1.1 christos if (*d++ != (CHAR_T) *p++) goto fail; 6378 1.1 christos } 6379 1.1 christos while (--mcnt); 6380 1.1 christos } 6381 1.1 christos SET_REGS_MATCHED (); 6382 1.1 christos NEXT; 6383 1.1 christos 6384 1.1 christos 6385 1.1 christos /* Match any character except possibly a newline or a null. */ 6386 1.1 christos CASE (anychar): 6387 1.1 christos DEBUG_PRINT1 ("EXECUTING anychar.\n"); 6388 1.1 christos 6389 1.1 christos PREFETCH (); 6390 1.1 christos 6391 1.1 christos if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') 6392 1.1 christos || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) 6393 1.1 christos goto fail; 6394 1.1 christos 6395 1.1 christos SET_REGS_MATCHED (); 6396 1.1 christos DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d); 6397 1.1 christos d++; 6398 1.1 christos NEXT; 6399 1.1 christos 6400 1.1 christos 6401 1.1 christos CASE (charset): 6402 1.1 christos CASE (charset_not): 6403 1.1 christos { 6404 1.1 christos register UCHAR_T c; 6405 1.1 christos #ifdef WCHAR 6406 1.1 christos unsigned int i, char_class_length, coll_symbol_length, 6407 1.1 christos equiv_class_length, ranges_length, chars_length, length; 6408 1.1 christos CHAR_T *workp, *workp2, *charset_top; 6409 1.1 christos #define WORK_BUFFER_SIZE 128 6410 1.1 christos CHAR_T str_buf[WORK_BUFFER_SIZE]; 6411 1.1 christos # ifdef _LIBC 6412 1.1 christos uint32_t nrules; 6413 1.1 christos # endif /* _LIBC */ 6414 1.1 christos #endif /* WCHAR */ 6415 1.1 christos boolean not = (re_opcode_t) *(p - 1) == charset_not; 6416 1.1 christos 6417 1.1 christos DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); 6418 1.1 christos PREFETCH (); 6419 1.1 christos c = TRANSLATE (*d); /* The character to match. */ 6420 1.1 christos #ifdef WCHAR 6421 1.1 christos # ifdef _LIBC 6422 1.1 christos nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 6423 1.1 christos # endif /* _LIBC */ 6424 1.1 christos charset_top = p - 1; 6425 1.1 christos char_class_length = *p++; 6426 1.1 christos coll_symbol_length = *p++; 6427 1.1 christos equiv_class_length = *p++; 6428 1.1 christos ranges_length = *p++; 6429 1.1 christos chars_length = *p++; 6430 1.1 christos /* p points charset[6], so the address of the next instruction 6431 1.1 christos (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'], 6432 1.1 christos where l=length of char_classes, m=length of collating_symbol, 6433 1.1 christos n=equivalence_class, o=length of char_range, 6434 1.1 christos p'=length of character. */ 6435 1.1 christos workp = p; 6436 1.1 christos /* Update p to indicate the next instruction. */ 6437 1.1 christos p += char_class_length + coll_symbol_length+ equiv_class_length + 6438 1.1 christos 2*ranges_length + chars_length; 6439 1.1 christos 6440 1.1 christos /* match with char_class? */ 6441 1.1 christos for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE) 6442 1.1 christos { 6443 1.1 christos wctype_t wctype; 6444 1.1 christos uintptr_t alignedp = ((uintptr_t)workp 6445 1.1 christos + __alignof__(wctype_t) - 1) 6446 1.1 christos & ~(uintptr_t)(__alignof__(wctype_t) - 1); 6447 1.1 christos wctype = *((wctype_t*)alignedp); 6448 1.1 christos workp += CHAR_CLASS_SIZE; 6449 1.1 christos if (iswctype((wint_t)c, wctype)) 6450 1.1 christos goto char_set_matched; 6451 1.1 christos } 6452 1.1 christos 6453 1.1 christos /* match with collating_symbol? */ 6454 1.1 christos # ifdef _LIBC 6455 1.1 christos if (nrules != 0) 6456 1.1 christos { 6457 1.1 christos const unsigned char *extra = (const unsigned char *) 6458 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 6459 1.1 christos 6460 1.1 christos for (workp2 = workp + coll_symbol_length ; workp < workp2 ; 6461 1.1 christos workp++) 6462 1.1 christos { 6463 1.1 christos int32_t *wextra; 6464 1.1 christos wextra = (int32_t*)(extra + *workp++); 6465 1.1 christos for (i = 0; i < *wextra; ++i) 6466 1.1 christos if (TRANSLATE(d[i]) != wextra[1 + i]) 6467 1.1 christos break; 6468 1.1 christos 6469 1.1 christos if (i == *wextra) 6470 1.1 christos { 6471 1.1 christos /* Update d, however d will be incremented at 6472 1.1 christos char_set_matched:, we decrement d here. */ 6473 1.1 christos d += i - 1; 6474 1.1 christos goto char_set_matched; 6475 1.1 christos } 6476 1.1 christos } 6477 1.1 christos } 6478 1.1 christos else /* (nrules == 0) */ 6479 1.1 christos # endif 6480 1.1 christos /* If we can't look up collation data, we use wcscoll 6481 1.1 christos instead. */ 6482 1.1 christos { 6483 1.1 christos for (workp2 = workp + coll_symbol_length ; workp < workp2 ;) 6484 1.1 christos { 6485 1.1 christos const CHAR_T *backup_d = d, *backup_dend = dend; 6486 1.1 christos length = wcslen (workp); 6487 1.1 christos 6488 1.1 christos /* If wcscoll(the collating symbol, whole string) > 0, 6489 1.1 christos any substring of the string never match with the 6490 1.1 christos collating symbol. */ 6491 1.1 christos if (wcscoll (workp, d) > 0) 6492 1.1 christos { 6493 1.1 christos workp += length + 1; 6494 1.1 christos continue; 6495 1.1 christos } 6496 1.1 christos 6497 1.1 christos /* First, we compare the collating symbol with 6498 1.1 christos the first character of the string. 6499 1.1 christos If it don't match, we add the next character to 6500 1.1 christos the compare buffer in turn. */ 6501 1.1 christos for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++) 6502 1.1 christos { 6503 1.1 christos int match; 6504 1.1 christos if (d == dend) 6505 1.1 christos { 6506 1.1 christos if (dend == end_match_2) 6507 1.1 christos break; 6508 1.1 christos d = string2; 6509 1.1 christos dend = end_match_2; 6510 1.1 christos } 6511 1.1 christos 6512 1.1 christos /* add next character to the compare buffer. */ 6513 1.1 christos str_buf[i] = TRANSLATE(*d); 6514 1.1 christos str_buf[i+1] = '\0'; 6515 1.1 christos 6516 1.1 christos match = wcscoll (workp, str_buf); 6517 1.1 christos if (match == 0) 6518 1.1 christos goto char_set_matched; 6519 1.1 christos 6520 1.1 christos if (match < 0) 6521 1.1 christos /* (str_buf > workp) indicate (str_buf + X > workp), 6522 1.1 christos because for all X (str_buf + X > str_buf). 6523 1.1 christos So we don't need continue this loop. */ 6524 1.1 christos break; 6525 1.1 christos 6526 1.1 christos /* Otherwise(str_buf < workp), 6527 1.1 christos (str_buf+next_character) may equals (workp). 6528 1.1 christos So we continue this loop. */ 6529 1.1 christos } 6530 1.1 christos /* not matched */ 6531 1.1 christos d = backup_d; 6532 1.1 christos dend = backup_dend; 6533 1.1 christos workp += length + 1; 6534 1.1 christos } 6535 1.1 christos } 6536 1.1 christos /* match with equivalence_class? */ 6537 1.1 christos # ifdef _LIBC 6538 1.1 christos if (nrules != 0) 6539 1.1 christos { 6540 1.1 christos const CHAR_T *backup_d = d, *backup_dend = dend; 6541 1.1 christos /* Try to match the equivalence class against 6542 1.1 christos those known to the collate implementation. */ 6543 1.1 christos const int32_t *table; 6544 1.1 christos const int32_t *weights; 6545 1.1 christos const int32_t *extra; 6546 1.1 christos const int32_t *indirect; 6547 1.1 christos int32_t idx, idx2; 6548 1.1 christos wint_t *cp; 6549 1.1 christos size_t len; 6550 1.1 christos 6551 1.1 christos /* This #include defines a local function! */ 6552 1.1 christos # include <locale/weightwc.h> 6553 1.1 christos 6554 1.1 christos table = (const int32_t *) 6555 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); 6556 1.1 christos weights = (const wint_t *) 6557 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); 6558 1.1 christos extra = (const wint_t *) 6559 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); 6560 1.1 christos indirect = (const int32_t *) 6561 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); 6562 1.1 christos 6563 1.1 christos /* Write 1 collating element to str_buf, and 6564 1.1 christos get its index. */ 6565 1.1 christos idx2 = 0; 6566 1.1 christos 6567 1.1 christos for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++) 6568 1.1 christos { 6569 1.1 christos cp = (wint_t*)str_buf; 6570 1.1 christos if (d == dend) 6571 1.1 christos { 6572 1.1 christos if (dend == end_match_2) 6573 1.1 christos break; 6574 1.1 christos d = string2; 6575 1.1 christos dend = end_match_2; 6576 1.1 christos } 6577 1.1 christos str_buf[i] = TRANSLATE(*(d+i)); 6578 1.1 christos str_buf[i+1] = '\0'; /* sentinel */ 6579 1.1 christos idx2 = findidx ((const wint_t**)&cp); 6580 1.1 christos } 6581 1.1 christos 6582 1.1 christos /* Update d, however d will be incremented at 6583 1.1 christos char_set_matched:, we decrement d here. */ 6584 1.1 christos d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1); 6585 1.1 christos if (d >= dend) 6586 1.1 christos { 6587 1.1 christos if (dend == end_match_2) 6588 1.1 christos d = dend; 6589 1.1 christos else 6590 1.1 christos { 6591 1.1 christos d = string2; 6592 1.1 christos dend = end_match_2; 6593 1.1 christos } 6594 1.1 christos } 6595 1.1 christos 6596 1.1 christos len = weights[idx2]; 6597 1.1 christos 6598 1.1 christos for (workp2 = workp + equiv_class_length ; workp < workp2 ; 6599 1.1 christos workp++) 6600 1.1 christos { 6601 1.1 christos idx = (int32_t)*workp; 6602 1.1 christos /* We already checked idx != 0 in regex_compile. */ 6603 1.1 christos 6604 1.1 christos if (idx2 != 0 && len == weights[idx]) 6605 1.1 christos { 6606 1.1 christos int cnt = 0; 6607 1.1 christos while (cnt < len && (weights[idx + 1 + cnt] 6608 1.1 christos == weights[idx2 + 1 + cnt])) 6609 1.1 christos ++cnt; 6610 1.1 christos 6611 1.1 christos if (cnt == len) 6612 1.1 christos goto char_set_matched; 6613 1.1 christos } 6614 1.1 christos } 6615 1.1 christos /* not matched */ 6616 1.1 christos d = backup_d; 6617 1.1 christos dend = backup_dend; 6618 1.1 christos } 6619 1.1 christos else /* (nrules == 0) */ 6620 1.1 christos # endif 6621 1.1 christos /* If we can't look up collation data, we use wcscoll 6622 1.1 christos instead. */ 6623 1.1 christos { 6624 1.1 christos for (workp2 = workp + equiv_class_length ; workp < workp2 ;) 6625 1.1 christos { 6626 1.1 christos const CHAR_T *backup_d = d, *backup_dend = dend; 6627 1.1 christos length = wcslen (workp); 6628 1.1 christos 6629 1.1 christos /* If wcscoll(the collating symbol, whole string) > 0, 6630 1.1 christos any substring of the string never match with the 6631 1.1 christos collating symbol. */ 6632 1.1 christos if (wcscoll (workp, d) > 0) 6633 1.1 christos { 6634 1.1 christos workp += length + 1; 6635 1.1 christos break; 6636 1.1 christos } 6637 1.1 christos 6638 1.1 christos /* First, we compare the equivalence class with 6639 1.1 christos the first character of the string. 6640 1.1 christos If it don't match, we add the next character to 6641 1.1 christos the compare buffer in turn. */ 6642 1.1 christos for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++) 6643 1.1 christos { 6644 1.1 christos int match; 6645 1.1 christos if (d == dend) 6646 1.1 christos { 6647 1.1 christos if (dend == end_match_2) 6648 1.1 christos break; 6649 1.1 christos d = string2; 6650 1.1 christos dend = end_match_2; 6651 1.1 christos } 6652 1.1 christos 6653 1.1 christos /* add next character to the compare buffer. */ 6654 1.1 christos str_buf[i] = TRANSLATE(*d); 6655 1.1 christos str_buf[i+1] = '\0'; 6656 1.1 christos 6657 1.1 christos match = wcscoll (workp, str_buf); 6658 1.1 christos 6659 1.1 christos if (match == 0) 6660 1.1 christos goto char_set_matched; 6661 1.1 christos 6662 1.1 christos if (match < 0) 6663 1.1 christos /* (str_buf > workp) indicate (str_buf + X > workp), 6664 1.1 christos because for all X (str_buf + X > str_buf). 6665 1.1 christos So we don't need continue this loop. */ 6666 1.1 christos break; 6667 1.1 christos 6668 1.1 christos /* Otherwise(str_buf < workp), 6669 1.1 christos (str_buf+next_character) may equals (workp). 6670 1.1 christos So we continue this loop. */ 6671 1.1 christos } 6672 1.1 christos /* not matched */ 6673 1.1 christos d = backup_d; 6674 1.1 christos dend = backup_dend; 6675 1.1 christos workp += length + 1; 6676 1.1 christos } 6677 1.1 christos } 6678 1.1 christos 6679 1.1 christos /* match with char_range? */ 6680 1.1 christos # ifdef _LIBC 6681 1.1 christos if (nrules != 0) 6682 1.1 christos { 6683 1.1 christos uint32_t collseqval; 6684 1.1 christos const char *collseq = (const char *) 6685 1.1 christos _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); 6686 1.1 christos 6687 1.1 christos collseqval = collseq_table_lookup (collseq, c); 6688 1.1 christos 6689 1.1 christos for (; workp < p - chars_length ;) 6690 1.1 christos { 6691 1.1 christos uint32_t start_val, end_val; 6692 1.1 christos 6693 1.1 christos /* We already compute the collation sequence value 6694 1.1 christos of the characters (or collating symbols). */ 6695 1.1 christos start_val = (uint32_t) *workp++; /* range_start */ 6696 1.1 christos end_val = (uint32_t) *workp++; /* range_end */ 6697 1.1 christos 6698 1.1 christos if (start_val <= collseqval && collseqval <= end_val) 6699 1.1 christos goto char_set_matched; 6700 1.1 christos } 6701 1.1 christos } 6702 1.1 christos else 6703 1.1 christos # endif 6704 1.1 christos { 6705 1.1 christos /* We set range_start_char at str_buf[0], range_end_char 6706 1.1 christos at str_buf[4], and compared char at str_buf[2]. */ 6707 1.1 christos str_buf[1] = 0; 6708 1.1 christos str_buf[2] = c; 6709 1.1 christos str_buf[3] = 0; 6710 1.1 christos str_buf[5] = 0; 6711 1.1 christos for (; workp < p - chars_length ;) 6712 1.1 christos { 6713 1.1 christos wchar_t *range_start_char, *range_end_char; 6714 1.1 christos 6715 1.1 christos /* match if (range_start_char <= c <= range_end_char). */ 6716 1.1 christos 6717 1.1 christos /* If range_start(or end) < 0, we assume -range_start(end) 6718 1.1 christos is the offset of the collating symbol which is specified 6719 1.1 christos as the character of the range start(end). */ 6720 1.1 christos 6721 1.1 christos /* range_start */ 6722 1.1 christos if (*workp < 0) 6723 1.1 christos range_start_char = charset_top - (*workp++); 6724 1.1 christos else 6725 1.1 christos { 6726 1.1 christos str_buf[0] = *workp++; 6727 1.1 christos range_start_char = str_buf; 6728 1.1 christos } 6729 1.1 christos 6730 1.1 christos /* range_end */ 6731 1.1 christos if (*workp < 0) 6732 1.1 christos range_end_char = charset_top - (*workp++); 6733 1.1 christos else 6734 1.1 christos { 6735 1.1 christos str_buf[4] = *workp++; 6736 1.1 christos range_end_char = str_buf + 4; 6737 1.1 christos } 6738 1.1 christos 6739 1.1 christos if (wcscoll (range_start_char, str_buf+2) <= 0 6740 1.1 christos && wcscoll (str_buf+2, range_end_char) <= 0) 6741 1.1 christos goto char_set_matched; 6742 1.1 christos } 6743 1.1 christos } 6744 1.1 christos 6745 1.1 christos /* match with char? */ 6746 1.1 christos for (; workp < p ; workp++) 6747 1.1 christos if (c == *workp) 6748 1.1 christos goto char_set_matched; 6749 1.1 christos 6750 1.1 christos not = !not; 6751 1.1 christos 6752 1.1 christos char_set_matched: 6753 1.1 christos if (not) goto fail; 6754 1.1 christos #else 6755 1.1 christos /* Cast to `unsigned' instead of `unsigned char' in case the 6756 1.1 christos bit list is a full 32 bytes long. */ 6757 1.1 christos if (c < (unsigned) (*p * BYTEWIDTH) 6758 1.1 christos && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 6759 1.1 christos not = !not; 6760 1.1 christos 6761 1.1 christos p += 1 + *p; 6762 1.1 christos 6763 1.1 christos if (!not) goto fail; 6764 1.1 christos #undef WORK_BUFFER_SIZE 6765 1.1 christos #endif /* WCHAR */ 6766 1.1 christos SET_REGS_MATCHED (); 6767 1.1 christos d++; 6768 1.1 christos NEXT; 6769 1.1 christos } 6770 1.1 christos 6771 1.1 christos 6772 1.1 christos /* The beginning of a group is represented by start_memory. 6773 1.1 christos The arguments are the register number in the next byte, and the 6774 1.1 christos number of groups inner to this one in the next. The text 6775 1.1 christos matched within the group is recorded (in the internal 6776 1.1 christos registers data structure) under the register number. */ 6777 1.1 christos CASE (start_memory): 6778 1.1 christos DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n", 6779 1.1 christos (long int) *p, (long int) p[1]); 6780 1.1 christos 6781 1.1 christos /* Find out if this group can match the empty string. */ 6782 1.1 christos p1 = p; /* To send to group_match_null_string_p. */ 6783 1.1 christos 6784 1.1 christos if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 6785 1.1 christos REG_MATCH_NULL_STRING_P (reg_info[*p]) 6786 1.1 christos = PREFIX(group_match_null_string_p) (&p1, pend, reg_info); 6787 1.1 christos 6788 1.1 christos /* Save the position in the string where we were the last time 6789 1.1 christos we were at this open-group operator in case the group is 6790 1.1 christos operated upon by a repetition operator, e.g., with `(a*)*b' 6791 1.1 christos against `ab'; then we want to ignore where we are now in 6792 1.1 christos the string in case this attempt to match fails. */ 6793 1.1 christos old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 6794 1.1 christos ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 6795 1.1 christos : regstart[*p]; 6796 1.1 christos DEBUG_PRINT2 (" old_regstart: %d\n", 6797 1.1 christos POINTER_TO_OFFSET (old_regstart[*p])); 6798 1.1 christos 6799 1.1 christos regstart[*p] = d; 6800 1.1 christos DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 6801 1.1 christos 6802 1.1 christos IS_ACTIVE (reg_info[*p]) = 1; 6803 1.1 christos MATCHED_SOMETHING (reg_info[*p]) = 0; 6804 1.1 christos 6805 1.1 christos /* Clear this whenever we change the register activity status. */ 6806 1.1 christos set_regs_matched_done = 0; 6807 1.1 christos 6808 1.1 christos /* This is the new highest active register. */ 6809 1.1 christos highest_active_reg = *p; 6810 1.1 christos 6811 1.1 christos /* If nothing was active before, this is the new lowest active 6812 1.1 christos register. */ 6813 1.1 christos if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 6814 1.1 christos lowest_active_reg = *p; 6815 1.1 christos 6816 1.1 christos /* Move past the register number and inner group count. */ 6817 1.1 christos p += 2; 6818 1.1 christos just_past_start_mem = p; 6819 1.1 christos 6820 1.1 christos NEXT; 6821 1.1 christos 6822 1.1 christos 6823 1.1 christos /* The stop_memory opcode represents the end of a group. Its 6824 1.1 christos arguments are the same as start_memory's: the register 6825 1.1 christos number, and the number of inner groups. */ 6826 1.1 christos CASE (stop_memory): 6827 1.1 christos DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n", 6828 1.1 christos (long int) *p, (long int) p[1]); 6829 1.1 christos 6830 1.1 christos /* We need to save the string position the last time we were at 6831 1.1 christos this close-group operator in case the group is operated 6832 1.1 christos upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 6833 1.1 christos against `aba'; then we want to ignore where we are now in 6834 1.1 christos the string in case this attempt to match fails. */ 6835 1.1 christos old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 6836 1.1 christos ? REG_UNSET (regend[*p]) ? d : regend[*p] 6837 1.1 christos : regend[*p]; 6838 1.1 christos DEBUG_PRINT2 (" old_regend: %d\n", 6839 1.1 christos POINTER_TO_OFFSET (old_regend[*p])); 6840 1.1 christos 6841 1.1 christos regend[*p] = d; 6842 1.1 christos DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 6843 1.1 christos 6844 1.1 christos /* This register isn't active anymore. */ 6845 1.1 christos IS_ACTIVE (reg_info[*p]) = 0; 6846 1.1 christos 6847 1.1 christos /* Clear this whenever we change the register activity status. */ 6848 1.1 christos set_regs_matched_done = 0; 6849 1.1 christos 6850 1.1 christos /* If this was the only register active, nothing is active 6851 1.1 christos anymore. */ 6852 1.1 christos if (lowest_active_reg == highest_active_reg) 6853 1.1 christos { 6854 1.1 christos lowest_active_reg = NO_LOWEST_ACTIVE_REG; 6855 1.1 christos highest_active_reg = NO_HIGHEST_ACTIVE_REG; 6856 1.1 christos } 6857 1.1 christos else 6858 1.1 christos { /* We must scan for the new highest active register, since 6859 1.1 christos it isn't necessarily one less than now: consider 6860 1.1 christos (a(b)c(d(e)f)g). When group 3 ends, after the f), the 6861 1.1 christos new highest active register is 1. */ 6862 1.1 christos UCHAR_T r = *p - 1; 6863 1.1 christos while (r > 0 && !IS_ACTIVE (reg_info[r])) 6864 1.1 christos r--; 6865 1.1 christos 6866 1.1 christos /* If we end up at register zero, that means that we saved 6867 1.1 christos the registers as the result of an `on_failure_jump', not 6868 1.1 christos a `start_memory', and we jumped to past the innermost 6869 1.1 christos `stop_memory'. For example, in ((.)*) we save 6870 1.1 christos registers 1 and 2 as a result of the *, but when we pop 6871 1.1 christos back to the second ), we are at the stop_memory 1. 6872 1.1 christos Thus, nothing is active. */ 6873 1.1 christos if (r == 0) 6874 1.1 christos { 6875 1.1 christos lowest_active_reg = NO_LOWEST_ACTIVE_REG; 6876 1.1 christos highest_active_reg = NO_HIGHEST_ACTIVE_REG; 6877 1.1 christos } 6878 1.1 christos else 6879 1.1 christos highest_active_reg = r; 6880 1.1 christos } 6881 1.1 christos 6882 1.1 christos /* If just failed to match something this time around with a 6883 1.1 christos group that's operated on by a repetition operator, try to 6884 1.1 christos force exit from the ``loop'', and restore the register 6885 1.1 christos information for this group that we had before trying this 6886 1.1 christos last match. */ 6887 1.1 christos if ((!MATCHED_SOMETHING (reg_info[*p]) 6888 1.1 christos || just_past_start_mem == p - 1) 6889 1.1 christos && (p + 2) < pend) 6890 1.1 christos { 6891 1.1 christos boolean is_a_jump_n = false; 6892 1.1 christos 6893 1.1 christos p1 = p + 2; 6894 1.1 christos mcnt = 0; 6895 1.1 christos switch ((re_opcode_t) *p1++) 6896 1.1 christos { 6897 1.1 christos case jump_n: 6898 1.1 christos is_a_jump_n = true; 6899 1.1 christos case pop_failure_jump: 6900 1.1 christos case maybe_pop_jump: 6901 1.1 christos case jump: 6902 1.1 christos case dummy_failure_jump: 6903 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 6904 1.1 christos if (is_a_jump_n) 6905 1.1 christos p1 += OFFSET_ADDRESS_SIZE; 6906 1.1 christos break; 6907 1.1 christos 6908 1.1 christos default: 6909 1.1 christos /* do nothing */ ; 6910 1.1 christos } 6911 1.1 christos p1 += mcnt; 6912 1.1 christos 6913 1.1 christos /* If the next operation is a jump backwards in the pattern 6914 1.1 christos to an on_failure_jump right before the start_memory 6915 1.1 christos corresponding to this stop_memory, exit from the loop 6916 1.1 christos by forcing a failure after pushing on the stack the 6917 1.1 christos on_failure_jump's jump in the pattern, and d. */ 6918 1.1 christos if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump 6919 1.1 christos && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory 6920 1.1 christos && p1[2+OFFSET_ADDRESS_SIZE] == *p) 6921 1.1 christos { 6922 1.1 christos /* If this group ever matched anything, then restore 6923 1.1 christos what its registers were before trying this last 6924 1.1 christos failed match, e.g., with `(a*)*b' against `ab' for 6925 1.1 christos regstart[1], and, e.g., with `((a*)*(b*)*)*' 6926 1.1 christos against `aba' for regend[3]. 6927 1.1 christos 6928 1.1 christos Also restore the registers for inner groups for, 6929 1.1 christos e.g., `((a*)(b*))*' against `aba' (register 3 would 6930 1.1 christos otherwise get trashed). */ 6931 1.1 christos 6932 1.1 christos if (EVER_MATCHED_SOMETHING (reg_info[*p])) 6933 1.1 christos { 6934 1.1 christos unsigned r; 6935 1.1 christos 6936 1.1 christos EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 6937 1.1 christos 6938 1.1 christos /* Restore this and inner groups' (if any) registers. */ 6939 1.1 christos for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); 6940 1.1 christos r++) 6941 1.1 christos { 6942 1.1 christos regstart[r] = old_regstart[r]; 6943 1.1 christos 6944 1.1 christos /* xx why this test? */ 6945 1.1 christos if (old_regend[r] >= regstart[r]) 6946 1.1 christos regend[r] = old_regend[r]; 6947 1.1 christos } 6948 1.1 christos } 6949 1.1 christos p1++; 6950 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 6951 1.1 christos PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 6952 1.1 christos 6953 1.1 christos goto fail; 6954 1.1 christos } 6955 1.1 christos } 6956 1.1 christos 6957 1.1 christos /* Move past the register number and the inner group count. */ 6958 1.1 christos p += 2; 6959 1.1 christos NEXT; 6960 1.1 christos 6961 1.1 christos 6962 1.1 christos /* \<digit> has been turned into a `duplicate' command which is 6963 1.1 christos followed by the numeric value of <digit> as the register number. */ 6964 1.1 christos CASE (duplicate): 6965 1.1 christos { 6966 1.1 christos register const CHAR_T *d2, *dend2; 6967 1.1 christos int regno = *p++; /* Get which register to match against. */ 6968 1.1 christos DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 6969 1.1 christos 6970 1.1 christos /* Can't back reference a group which we've never matched. */ 6971 1.1 christos if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 6972 1.1 christos goto fail; 6973 1.1 christos 6974 1.1 christos /* Where in input to try to start matching. */ 6975 1.1 christos d2 = regstart[regno]; 6976 1.1 christos 6977 1.1 christos /* Where to stop matching; if both the place to start and 6978 1.1 christos the place to stop matching are in the same string, then 6979 1.1 christos set to the place to stop, otherwise, for now have to use 6980 1.1 christos the end of the first string. */ 6981 1.1 christos 6982 1.1 christos dend2 = ((FIRST_STRING_P (regstart[regno]) 6983 1.1 christos == FIRST_STRING_P (regend[regno])) 6984 1.1 christos ? regend[regno] : end_match_1); 6985 1.1 christos for (;;) 6986 1.1 christos { 6987 1.1 christos /* If necessary, advance to next segment in register 6988 1.1 christos contents. */ 6989 1.1 christos while (d2 == dend2) 6990 1.1 christos { 6991 1.1 christos if (dend2 == end_match_2) break; 6992 1.1 christos if (dend2 == regend[regno]) break; 6993 1.1 christos 6994 1.1 christos /* End of string1 => advance to string2. */ 6995 1.1 christos d2 = string2; 6996 1.1 christos dend2 = regend[regno]; 6997 1.1 christos } 6998 1.1 christos /* At end of register contents => success */ 6999 1.1 christos if (d2 == dend2) break; 7000 1.1 christos 7001 1.1 christos /* If necessary, advance to next segment in data. */ 7002 1.1 christos PREFETCH (); 7003 1.1 christos 7004 1.1 christos /* How many characters left in this segment to match. */ 7005 1.1 christos mcnt = dend - d; 7006 1.1 christos 7007 1.1 christos /* Want how many consecutive characters we can match in 7008 1.1 christos one shot, so, if necessary, adjust the count. */ 7009 1.1 christos if (mcnt > dend2 - d2) 7010 1.1 christos mcnt = dend2 - d2; 7011 1.1 christos 7012 1.1 christos /* Compare that many; failure if mismatch, else move 7013 1.1 christos past them. */ 7014 1.1 christos if (translate 7015 1.1 christos ? PREFIX(bcmp_translate) (d, d2, mcnt, translate) 7016 1.1 christos : memcmp (d, d2, mcnt*sizeof(UCHAR_T))) 7017 1.1 christos goto fail; 7018 1.1 christos d += mcnt, d2 += mcnt; 7019 1.1 christos 7020 1.1 christos /* Do this because we've match some characters. */ 7021 1.1 christos SET_REGS_MATCHED (); 7022 1.1 christos } 7023 1.1 christos } 7024 1.1 christos NEXT; 7025 1.1 christos 7026 1.1 christos 7027 1.1 christos /* begline matches the empty string at the beginning of the string 7028 1.1 christos (unless `not_bol' is set in `bufp'), and, if 7029 1.1 christos `newline_anchor' is set, after newlines. */ 7030 1.1 christos CASE (begline): 7031 1.1 christos DEBUG_PRINT1 ("EXECUTING begline.\n"); 7032 1.1 christos 7033 1.1 christos if (AT_STRINGS_BEG (d)) 7034 1.1 christos { 7035 1.1 christos if (!bufp->not_bol) 7036 1.1 christos { 7037 1.1 christos NEXT; 7038 1.1 christos } 7039 1.1 christos } 7040 1.1 christos else if (d[-1] == '\n' && bufp->newline_anchor) 7041 1.1 christos { 7042 1.1 christos NEXT; 7043 1.1 christos } 7044 1.1 christos /* In all other cases, we fail. */ 7045 1.1 christos goto fail; 7046 1.1 christos 7047 1.1 christos 7048 1.1 christos /* endline is the dual of begline. */ 7049 1.1 christos CASE (endline): 7050 1.1 christos DEBUG_PRINT1 ("EXECUTING endline.\n"); 7051 1.1 christos 7052 1.1 christos if (AT_STRINGS_END (d)) 7053 1.1 christos { 7054 1.1 christos if (!bufp->not_eol) 7055 1.1 christos { 7056 1.1 christos NEXT; 7057 1.1 christos } 7058 1.1 christos } 7059 1.1 christos 7060 1.1 christos /* We have to ``prefetch'' the next character. */ 7061 1.1 christos else if ((d == end1 ? *string2 : *d) == '\n' 7062 1.1 christos && bufp->newline_anchor) 7063 1.1 christos { 7064 1.1 christos NEXT; 7065 1.1 christos } 7066 1.1 christos goto fail; 7067 1.1 christos 7068 1.1 christos 7069 1.1 christos /* Match at the very beginning of the data. */ 7070 1.1 christos CASE (begbuf): 7071 1.1 christos DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 7072 1.1 christos if (AT_STRINGS_BEG (d)) 7073 1.1 christos { 7074 1.1 christos NEXT; 7075 1.1 christos } 7076 1.1 christos goto fail; 7077 1.1 christos 7078 1.1 christos 7079 1.1 christos /* Match at the very end of the data. */ 7080 1.1 christos CASE (endbuf): 7081 1.1 christos DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 7082 1.1 christos if (AT_STRINGS_END (d)) 7083 1.1 christos { 7084 1.1 christos NEXT; 7085 1.1 christos } 7086 1.1 christos goto fail; 7087 1.1 christos 7088 1.1 christos 7089 1.1 christos /* on_failure_keep_string_jump is used to optimize `.*\n'. It 7090 1.1 christos pushes NULL as the value for the string on the stack. Then 7091 1.1 christos `pop_failure_point' will keep the current value for the 7092 1.1 christos string, instead of restoring it. To see why, consider 7093 1.1 christos matching `foo\nbar' against `.*\n'. The .* matches the foo; 7094 1.1 christos then the . fails against the \n. But the next thing we want 7095 1.1 christos to do is match the \n against the \n; if we restored the 7096 1.1 christos string value, we would be back at the foo. 7097 1.1 christos 7098 1.1 christos Because this is used only in specific cases, we don't need to 7099 1.1 christos check all the things that `on_failure_jump' does, to make 7100 1.1 christos sure the right things get saved on the stack. Hence we don't 7101 1.1 christos share its code. The only reason to push anything on the 7102 1.1 christos stack at all is that otherwise we would have to change 7103 1.1 christos `anychar's code to do something besides goto fail in this 7104 1.1 christos case; that seems worse than this. */ 7105 1.1 christos CASE (on_failure_keep_string_jump): 7106 1.1 christos DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 7107 1.1 christos 7108 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 7109 1.1 christos #ifdef _LIBC 7110 1.1 christos DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); 7111 1.1 christos #else 7112 1.1 christos DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 7113 1.1 christos #endif 7114 1.1 christos 7115 1.1 christos PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 7116 1.1 christos NEXT; 7117 1.1 christos 7118 1.1 christos 7119 1.1 christos /* Uses of on_failure_jump: 7120 1.1 christos 7121 1.1 christos Each alternative starts with an on_failure_jump that points 7122 1.1 christos to the beginning of the next alternative. Each alternative 7123 1.1 christos except the last ends with a jump that in effect jumps past 7124 1.1 christos the rest of the alternatives. (They really jump to the 7125 1.1 christos ending jump of the following alternative, because tensioning 7126 1.1 christos these jumps is a hassle.) 7127 1.1 christos 7128 1.1 christos Repeats start with an on_failure_jump that points past both 7129 1.1 christos the repetition text and either the following jump or 7130 1.1 christos pop_failure_jump back to this on_failure_jump. */ 7131 1.1 christos CASE (on_failure_jump): 7132 1.1 christos on_failure: 7133 1.1 christos DEBUG_PRINT1 ("EXECUTING on_failure_jump"); 7134 1.1 christos 7135 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 7136 1.1 christos #ifdef _LIBC 7137 1.1 christos DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); 7138 1.1 christos #else 7139 1.1 christos DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 7140 1.1 christos #endif 7141 1.1 christos 7142 1.1 christos /* If this on_failure_jump comes right before a group (i.e., 7143 1.1 christos the original * applied to a group), save the information 7144 1.1 christos for that group and all inner ones, so that if we fail back 7145 1.1 christos to this point, the group's information will be correct. 7146 1.1 christos For example, in \(a*\)*\1, we need the preceding group, 7147 1.1 christos and in \(zz\(a*\)b*\)\2, we need the inner group. */ 7148 1.1 christos 7149 1.1 christos /* We can't use `p' to check ahead because we push 7150 1.1 christos a failure point to `p + mcnt' after we do this. */ 7151 1.1 christos p1 = p; 7152 1.1 christos 7153 1.1 christos /* We need to skip no_op's before we look for the 7154 1.1 christos start_memory in case this on_failure_jump is happening as 7155 1.1 christos the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 7156 1.1 christos against aba. */ 7157 1.1 christos while (p1 < pend && (re_opcode_t) *p1 == no_op) 7158 1.1 christos p1++; 7159 1.1 christos 7160 1.1 christos if (p1 < pend && (re_opcode_t) *p1 == start_memory) 7161 1.1 christos { 7162 1.1 christos /* We have a new highest active register now. This will 7163 1.1 christos get reset at the start_memory we are about to get to, 7164 1.1 christos but we will have saved all the registers relevant to 7165 1.1 christos this repetition op, as described above. */ 7166 1.1 christos highest_active_reg = *(p1 + 1) + *(p1 + 2); 7167 1.1 christos if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 7168 1.1 christos lowest_active_reg = *(p1 + 1); 7169 1.1 christos } 7170 1.1 christos 7171 1.1 christos DEBUG_PRINT1 (":\n"); 7172 1.1 christos PUSH_FAILURE_POINT (p + mcnt, d, -2); 7173 1.1 christos NEXT; 7174 1.1 christos 7175 1.1 christos 7176 1.1 christos /* A smart repeat ends with `maybe_pop_jump'. 7177 1.1 christos We change it to either `pop_failure_jump' or `jump'. */ 7178 1.1 christos CASE (maybe_pop_jump): 7179 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 7180 1.1 christos DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 7181 1.1 christos { 7182 1.1 christos register UCHAR_T *p2 = p; 7183 1.1 christos 7184 1.1 christos /* Compare the beginning of the repeat with what in the 7185 1.1 christos pattern follows its end. If we can establish that there 7186 1.1 christos is nothing that they would both match, i.e., that we 7187 1.1 christos would have to backtrack because of (as in, e.g., `a*a') 7188 1.1 christos then we can change to pop_failure_jump, because we'll 7189 1.1 christos never have to backtrack. 7190 1.1 christos 7191 1.1 christos This is not true in the case of alternatives: in 7192 1.1 christos `(a|ab)*' we do need to backtrack to the `ab' alternative 7193 1.1 christos (e.g., if the string was `ab'). But instead of trying to 7194 1.1 christos detect that here, the alternative has put on a dummy 7195 1.1 christos failure point which is what we will end up popping. */ 7196 1.1 christos 7197 1.1 christos /* Skip over open/close-group commands. 7198 1.1 christos If what follows this loop is a ...+ construct, 7199 1.1 christos look at what begins its body, since we will have to 7200 1.1 christos match at least one of that. */ 7201 1.1 christos while (1) 7202 1.1 christos { 7203 1.1 christos if (p2 + 2 < pend 7204 1.1 christos && ((re_opcode_t) *p2 == stop_memory 7205 1.1 christos || (re_opcode_t) *p2 == start_memory)) 7206 1.1 christos p2 += 3; 7207 1.1 christos else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend 7208 1.1 christos && (re_opcode_t) *p2 == dummy_failure_jump) 7209 1.1 christos p2 += 2 + 2 * OFFSET_ADDRESS_SIZE; 7210 1.1 christos else 7211 1.1 christos break; 7212 1.1 christos } 7213 1.1 christos 7214 1.1 christos p1 = p + mcnt; 7215 1.1 christos /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 7216 1.1 christos to the `maybe_finalize_jump' of this case. Examine what 7217 1.1 christos follows. */ 7218 1.1 christos 7219 1.1 christos /* If we're at the end of the pattern, we can change. */ 7220 1.1 christos if (p2 == pend) 7221 1.1 christos { 7222 1.1 christos /* Consider what happens when matching ":\(.*\)" 7223 1.1 christos against ":/". I don't really understand this code 7224 1.1 christos yet. */ 7225 1.1 christos p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T) 7226 1.1 christos pop_failure_jump; 7227 1.1 christos DEBUG_PRINT1 7228 1.1 christos (" End of pattern: change to `pop_failure_jump'.\n"); 7229 1.1 christos } 7230 1.1 christos 7231 1.1 christos else if ((re_opcode_t) *p2 == exactn 7232 1.1 christos #ifdef MBS_SUPPORT 7233 1.1 christos || (re_opcode_t) *p2 == exactn_bin 7234 1.1 christos #endif 7235 1.1 christos || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 7236 1.1 christos { 7237 1.1 christos register UCHAR_T c 7238 1.1 christos = *p2 == (UCHAR_T) endline ? '\n' : p2[2]; 7239 1.1 christos 7240 1.1 christos if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn 7241 1.1 christos #ifdef MBS_SUPPORT 7242 1.1 christos || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin 7243 1.1 christos #endif 7244 1.1 christos ) && p1[3+OFFSET_ADDRESS_SIZE] != c) 7245 1.1 christos { 7246 1.1 christos p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T) 7247 1.1 christos pop_failure_jump; 7248 1.1 christos #ifdef WCHAR 7249 1.1 christos DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n", 7250 1.1 christos (wint_t) c, 7251 1.1 christos (wint_t) p1[3+OFFSET_ADDRESS_SIZE]); 7252 1.1 christos #else 7253 1.1 christos DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 7254 1.1 christos (char) c, 7255 1.1 christos (char) p1[3+OFFSET_ADDRESS_SIZE]); 7256 1.1 christos #endif 7257 1.1 christos } 7258 1.1 christos 7259 1.1 christos #ifndef WCHAR 7260 1.1 christos else if ((re_opcode_t) p1[3] == charset 7261 1.1 christos || (re_opcode_t) p1[3] == charset_not) 7262 1.1 christos { 7263 1.1 christos int not = (re_opcode_t) p1[3] == charset_not; 7264 1.1 christos 7265 1.1 christos if (c < (unsigned) (p1[4] * BYTEWIDTH) 7266 1.1 christos && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 7267 1.1 christos not = !not; 7268 1.1 christos 7269 1.1 christos /* `not' is equal to 1 if c would match, which means 7270 1.1 christos that we can't change to pop_failure_jump. */ 7271 1.1 christos if (!not) 7272 1.1 christos { 7273 1.1 christos p[-3] = (unsigned char) pop_failure_jump; 7274 1.1 christos DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7275 1.1 christos } 7276 1.1 christos } 7277 1.1 christos #endif /* not WCHAR */ 7278 1.1 christos } 7279 1.1 christos #ifndef WCHAR 7280 1.1 christos else if ((re_opcode_t) *p2 == charset) 7281 1.1 christos { 7282 1.1 christos /* We win if the first character of the loop is not part 7283 1.1 christos of the charset. */ 7284 1.1 christos if ((re_opcode_t) p1[3] == exactn 7285 1.1 christos && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] 7286 1.1 christos && (p2[2 + p1[5] / BYTEWIDTH] 7287 1.1 christos & (1 << (p1[5] % BYTEWIDTH))))) 7288 1.1 christos { 7289 1.1 christos p[-3] = (unsigned char) pop_failure_jump; 7290 1.1 christos DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7291 1.1 christos } 7292 1.1 christos 7293 1.1 christos else if ((re_opcode_t) p1[3] == charset_not) 7294 1.1 christos { 7295 1.1 christos int idx; 7296 1.1 christos /* We win if the charset_not inside the loop 7297 1.1 christos lists every character listed in the charset after. */ 7298 1.1 christos for (idx = 0; idx < (int) p2[1]; idx++) 7299 1.1 christos if (! (p2[2 + idx] == 0 7300 1.1 christos || (idx < (int) p1[4] 7301 1.1 christos && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) 7302 1.1 christos break; 7303 1.1 christos 7304 1.1 christos if (idx == p2[1]) 7305 1.1 christos { 7306 1.1 christos p[-3] = (unsigned char) pop_failure_jump; 7307 1.1 christos DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7308 1.1 christos } 7309 1.1 christos } 7310 1.1 christos else if ((re_opcode_t) p1[3] == charset) 7311 1.1 christos { 7312 1.1 christos int idx; 7313 1.1 christos /* We win if the charset inside the loop 7314 1.1 christos has no overlap with the one after the loop. */ 7315 1.1 christos for (idx = 0; 7316 1.1 christos idx < (int) p2[1] && idx < (int) p1[4]; 7317 1.1 christos idx++) 7318 1.1 christos if ((p2[2 + idx] & p1[5 + idx]) != 0) 7319 1.1 christos break; 7320 1.1 christos 7321 1.1 christos if (idx == p2[1] || idx == p1[4]) 7322 1.1 christos { 7323 1.1 christos p[-3] = (unsigned char) pop_failure_jump; 7324 1.1 christos DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7325 1.1 christos } 7326 1.1 christos } 7327 1.1 christos } 7328 1.1 christos #endif /* not WCHAR */ 7329 1.1 christos } 7330 1.1 christos p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again. */ 7331 1.1 christos if ((re_opcode_t) p[-1] != pop_failure_jump) 7332 1.1 christos { 7333 1.1 christos p[-1] = (UCHAR_T) jump; 7334 1.1 christos DEBUG_PRINT1 (" Match => jump.\n"); 7335 1.1 christos goto unconditional_jump; 7336 1.1 christos } 7337 1.1 christos /* Note fall through. */ 7338 1.1 christos 7339 1.1 christos 7340 1.1 christos /* The end of a simple repeat has a pop_failure_jump back to 7341 1.1 christos its matching on_failure_jump, where the latter will push a 7342 1.1 christos failure point. The pop_failure_jump takes off failure 7343 1.1 christos points put on by this pop_failure_jump's matching 7344 1.1 christos on_failure_jump; we got through the pattern to here from the 7345 1.1 christos matching on_failure_jump, so didn't fail. */ 7346 1.1 christos CASE (pop_failure_jump): 7347 1.1 christos { 7348 1.1 christos /* We need to pass separate storage for the lowest and 7349 1.1 christos highest registers, even though we don't care about the 7350 1.1 christos actual values. Otherwise, we will restore only one 7351 1.1 christos register from the stack, since lowest will == highest in 7352 1.1 christos `pop_failure_point'. */ 7353 1.1 christos active_reg_t dummy_low_reg, dummy_high_reg; 7354 1.1 christos UCHAR_T *pdummy = NULL; 7355 1.1 christos const CHAR_T *sdummy = NULL; 7356 1.1 christos 7357 1.1 christos DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 7358 1.1 christos POP_FAILURE_POINT (sdummy, pdummy, 7359 1.1 christos dummy_low_reg, dummy_high_reg, 7360 1.1 christos reg_dummy, reg_dummy, reg_info_dummy); 7361 1.1 christos } 7362 1.1 christos /* Note fall through. */ 7363 1.1 christos 7364 1.1 christos unconditional_jump: 7365 1.1 christos #ifdef _LIBC 7366 1.1 christos DEBUG_PRINT2 ("\n%p: ", p); 7367 1.1 christos #else 7368 1.1 christos DEBUG_PRINT2 ("\n0x%x: ", p); 7369 1.1 christos #endif 7370 1.1 christos /* Note fall through. */ 7371 1.1 christos 7372 1.1 christos /* Unconditionally jump (without popping any failure points). */ 7373 1.1 christos CASE (jump): 7374 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 7375 1.1 christos DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 7376 1.1 christos p += mcnt; /* Do the jump. */ 7377 1.1 christos #ifdef _LIBC 7378 1.1 christos DEBUG_PRINT2 ("(to %p).\n", p); 7379 1.1 christos #else 7380 1.1 christos DEBUG_PRINT2 ("(to 0x%x).\n", p); 7381 1.1 christos #endif 7382 1.1 christos NEXT; 7383 1.1 christos 7384 1.1 christos 7385 1.1 christos /* We need this opcode so we can detect where alternatives end 7386 1.1 christos in `group_match_null_string_p' et al. */ 7387 1.1 christos CASE (jump_past_alt): 7388 1.1 christos DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 7389 1.1 christos goto unconditional_jump; 7390 1.1 christos 7391 1.1 christos 7392 1.1 christos /* Normally, the on_failure_jump pushes a failure point, which 7393 1.1 christos then gets popped at pop_failure_jump. We will end up at 7394 1.1 christos pop_failure_jump, also, and with a pattern of, say, `a+', we 7395 1.1 christos are skipping over the on_failure_jump, so we have to push 7396 1.1 christos something meaningless for pop_failure_jump to pop. */ 7397 1.1 christos CASE (dummy_failure_jump): 7398 1.1 christos DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 7399 1.1 christos /* It doesn't matter what we push for the string here. What 7400 1.1 christos the code at `fail' tests is the value for the pattern. */ 7401 1.1 christos PUSH_FAILURE_POINT (NULL, NULL, -2); 7402 1.1 christos goto unconditional_jump; 7403 1.1 christos 7404 1.1 christos 7405 1.1 christos /* At the end of an alternative, we need to push a dummy failure 7406 1.1 christos point in case we are followed by a `pop_failure_jump', because 7407 1.1 christos we don't want the failure point for the alternative to be 7408 1.1 christos popped. For example, matching `(a|ab)*' against `aab' 7409 1.1 christos requires that we match the `ab' alternative. */ 7410 1.1 christos CASE (push_dummy_failure): 7411 1.1 christos DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 7412 1.1 christos /* See comments just above at `dummy_failure_jump' about the 7413 1.1 christos two zeroes. */ 7414 1.1 christos PUSH_FAILURE_POINT (NULL, NULL, -2); 7415 1.1 christos NEXT; 7416 1.1 christos 7417 1.1 christos /* Have to succeed matching what follows at least n times. 7418 1.1 christos After that, handle like `on_failure_jump'. */ 7419 1.1 christos CASE (succeed_n): 7420 1.1 christos EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE); 7421 1.1 christos DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 7422 1.1 christos 7423 1.1 christos assert (mcnt >= 0); 7424 1.1 christos /* Originally, this is how many times we HAVE to succeed. */ 7425 1.1 christos if (mcnt > 0) 7426 1.1 christos { 7427 1.1 christos mcnt--; 7428 1.1 christos p += OFFSET_ADDRESS_SIZE; 7429 1.1 christos STORE_NUMBER_AND_INCR (p, mcnt); 7430 1.1 christos #ifdef _LIBC 7431 1.1 christos DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE 7432 1.1 christos , mcnt); 7433 1.1 christos #else 7434 1.1 christos DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE 7435 1.1 christos , mcnt); 7436 1.1 christos #endif 7437 1.1 christos } 7438 1.1 christos else if (mcnt == 0) 7439 1.1 christos { 7440 1.1 christos #ifdef _LIBC 7441 1.1 christos DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", 7442 1.1 christos p + OFFSET_ADDRESS_SIZE); 7443 1.1 christos #else 7444 1.1 christos DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", 7445 1.1 christos p + OFFSET_ADDRESS_SIZE); 7446 1.1 christos #endif /* _LIBC */ 7447 1.1 christos 7448 1.1 christos #ifdef WCHAR 7449 1.1 christos p[1] = (UCHAR_T) no_op; 7450 1.1 christos #else 7451 1.1 christos p[2] = (UCHAR_T) no_op; 7452 1.1 christos p[3] = (UCHAR_T) no_op; 7453 1.1 christos #endif /* WCHAR */ 7454 1.1 christos goto on_failure; 7455 1.1 christos } 7456 1.1 christos NEXT; 7457 1.1 christos 7458 1.1 christos CASE (jump_n): 7459 1.1 christos EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE); 7460 1.1 christos DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 7461 1.1 christos 7462 1.1 christos /* Originally, this is how many times we CAN jump. */ 7463 1.1 christos if (mcnt) 7464 1.1 christos { 7465 1.1 christos mcnt--; 7466 1.1 christos STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt); 7467 1.1 christos 7468 1.1 christos #ifdef _LIBC 7469 1.1 christos DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE, 7470 1.1 christos mcnt); 7471 1.1 christos #else 7472 1.1 christos DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE, 7473 1.1 christos mcnt); 7474 1.1 christos #endif /* _LIBC */ 7475 1.1 christos goto unconditional_jump; 7476 1.1 christos } 7477 1.1 christos /* If don't have to jump any more, skip over the rest of command. */ 7478 1.1 christos else 7479 1.1 christos p += 2 * OFFSET_ADDRESS_SIZE; 7480 1.1 christos NEXT; 7481 1.1 christos 7482 1.1 christos CASE (set_number_at): 7483 1.1 christos { 7484 1.1 christos DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 7485 1.1 christos 7486 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 7487 1.1 christos p1 = p + mcnt; 7488 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p); 7489 1.1 christos #ifdef _LIBC 7490 1.1 christos DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); 7491 1.1 christos #else 7492 1.1 christos DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 7493 1.1 christos #endif 7494 1.1 christos STORE_NUMBER (p1, mcnt); 7495 1.1 christos NEXT; 7496 1.1 christos } 7497 1.1 christos 7498 1.1 christos #if 0 7499 1.1 christos /* The DEC Alpha C compiler 3.x generates incorrect code for the 7500 1.1 christos test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 7501 1.1 christos AT_WORD_BOUNDARY, so this code is disabled. Expanding the 7502 1.1 christos macro and introducing temporary variables works around the bug. */ 7503 1.1 christos 7504 1.1 christos CASE (wordbound): 7505 1.1 christos DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 7506 1.1 christos if (AT_WORD_BOUNDARY (d)) 7507 1.1 christos { 7508 1.1 christos NEXT; 7509 1.1 christos } 7510 1.1 christos goto fail; 7511 1.1 christos 7512 1.1 christos CASE (notwordbound): 7513 1.1 christos DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 7514 1.1 christos if (AT_WORD_BOUNDARY (d)) 7515 1.1 christos goto fail; 7516 1.1 christos NEXT; 7517 1.1 christos #else 7518 1.1 christos CASE (wordbound): 7519 1.1 christos { 7520 1.1 christos boolean prevchar, thischar; 7521 1.1 christos 7522 1.1 christos DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 7523 1.1 christos if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 7524 1.1 christos { 7525 1.1 christos NEXT; 7526 1.1 christos } 7527 1.1 christos 7528 1.1 christos prevchar = WORDCHAR_P (d - 1); 7529 1.1 christos thischar = WORDCHAR_P (d); 7530 1.1 christos if (prevchar != thischar) 7531 1.1 christos { 7532 1.1 christos NEXT; 7533 1.1 christos } 7534 1.1 christos goto fail; 7535 1.1 christos } 7536 1.1 christos 7537 1.1 christos CASE (notwordbound): 7538 1.1 christos { 7539 1.1 christos boolean prevchar, thischar; 7540 1.1 christos 7541 1.1 christos DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 7542 1.1 christos if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 7543 1.1 christos goto fail; 7544 1.1 christos 7545 1.1 christos prevchar = WORDCHAR_P (d - 1); 7546 1.1 christos thischar = WORDCHAR_P (d); 7547 1.1 christos if (prevchar != thischar) 7548 1.1 christos goto fail; 7549 1.1 christos NEXT; 7550 1.1 christos } 7551 1.1 christos #endif 7552 1.1 christos 7553 1.1 christos CASE (wordbeg): 7554 1.1 christos DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 7555 1.1 christos if (!AT_STRINGS_END (d) && WORDCHAR_P (d) 7556 1.1 christos && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) 7557 1.1 christos { 7558 1.1 christos NEXT; 7559 1.1 christos } 7560 1.1 christos goto fail; 7561 1.1 christos 7562 1.1 christos CASE (wordend): 7563 1.1 christos DEBUG_PRINT1 ("EXECUTING wordend.\n"); 7564 1.1 christos if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) 7565 1.1 christos && (AT_STRINGS_END (d) || !WORDCHAR_P (d))) 7566 1.1 christos { 7567 1.1 christos NEXT; 7568 1.1 christos } 7569 1.1 christos goto fail; 7570 1.1 christos 7571 1.1 christos #ifdef emacs 7572 1.1 christos CASE (before_dot): 7573 1.1 christos DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 7574 1.1 christos if (PTR_CHAR_POS ((unsigned char *) d) >= point) 7575 1.1 christos goto fail; 7576 1.1 christos NEXT; 7577 1.1 christos 7578 1.1 christos CASE (at_dot): 7579 1.1 christos DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 7580 1.1 christos if (PTR_CHAR_POS ((unsigned char *) d) != point) 7581 1.1 christos goto fail; 7582 1.1 christos NEXT; 7583 1.1 christos 7584 1.1 christos CASE (after_dot): 7585 1.1 christos DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 7586 1.1 christos if (PTR_CHAR_POS ((unsigned char *) d) <= point) 7587 1.1 christos goto fail; 7588 1.1 christos NEXT; 7589 1.1 christos 7590 1.1 christos CASE (syntaxspec): 7591 1.1 christos DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); 7592 1.1 christos mcnt = *p++; 7593 1.1 christos goto matchsyntax; 7594 1.1 christos 7595 1.1 christos CASE (wordchar): 7596 1.1 christos DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); 7597 1.1 christos mcnt = (int) Sword; 7598 1.1 christos matchsyntax: 7599 1.1 christos PREFETCH (); 7600 1.1 christos /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 7601 1.1 christos d++; 7602 1.1 christos if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) 7603 1.1 christos goto fail; 7604 1.1 christos SET_REGS_MATCHED (); 7605 1.1 christos NEXT; 7606 1.1 christos 7607 1.1 christos CASE (notsyntaxspec): 7608 1.1 christos DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); 7609 1.1 christos mcnt = *p++; 7610 1.1 christos goto matchnotsyntax; 7611 1.1 christos 7612 1.1 christos CASE (notwordchar): 7613 1.1 christos DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); 7614 1.1 christos mcnt = (int) Sword; 7615 1.1 christos matchnotsyntax: 7616 1.1 christos PREFETCH (); 7617 1.1 christos /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 7618 1.1 christos d++; 7619 1.1 christos if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) 7620 1.1 christos goto fail; 7621 1.1 christos SET_REGS_MATCHED (); 7622 1.1 christos NEXT; 7623 1.1 christos 7624 1.1 christos #else /* not emacs */ 7625 1.1 christos CASE (wordchar): 7626 1.1 christos DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 7627 1.1 christos PREFETCH (); 7628 1.1 christos if (!WORDCHAR_P (d)) 7629 1.1 christos goto fail; 7630 1.1 christos SET_REGS_MATCHED (); 7631 1.1 christos d++; 7632 1.1 christos NEXT; 7633 1.1 christos 7634 1.1 christos CASE (notwordchar): 7635 1.1 christos DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 7636 1.1 christos PREFETCH (); 7637 1.1 christos if (WORDCHAR_P (d)) 7638 1.1 christos goto fail; 7639 1.1 christos SET_REGS_MATCHED (); 7640 1.1 christos d++; 7641 1.1 christos NEXT; 7642 1.1 christos #endif /* not emacs */ 7643 1.1 christos 7644 1.1 christos #ifndef __GNUC__ 7645 1.1 christos default: 7646 1.1 christos abort (); 7647 1.1 christos } 7648 1.1 christos continue; /* Successfully executed one pattern command; keep going. */ 7649 1.1 christos #endif 7650 1.1 christos 7651 1.1 christos 7652 1.1 christos /* We goto here if a matching operation fails. */ 7653 1.1 christos fail: 7654 1.1 christos if (!FAIL_STACK_EMPTY ()) 7655 1.1 christos { /* A restart point is known. Restore to that state. */ 7656 1.1 christos DEBUG_PRINT1 ("\nFAIL:\n"); 7657 1.1 christos POP_FAILURE_POINT (d, p, 7658 1.1 christos lowest_active_reg, highest_active_reg, 7659 1.1 christos regstart, regend, reg_info); 7660 1.1 christos 7661 1.1 christos /* If this failure point is a dummy, try the next one. */ 7662 1.1 christos if (!p) 7663 1.1 christos goto fail; 7664 1.1 christos 7665 1.1 christos /* If we failed to the end of the pattern, don't examine *p. */ 7666 1.1 christos assert (p <= pend); 7667 1.1 christos if (p < pend) 7668 1.1 christos { 7669 1.1 christos boolean is_a_jump_n = false; 7670 1.1 christos 7671 1.1 christos /* If failed to a backwards jump that's part of a repetition 7672 1.1 christos loop, need to pop this failure point and use the next one. */ 7673 1.1 christos switch ((re_opcode_t) *p) 7674 1.1 christos { 7675 1.1 christos case jump_n: 7676 1.1 christos is_a_jump_n = true; 7677 1.1 christos case maybe_pop_jump: 7678 1.1 christos case pop_failure_jump: 7679 1.1 christos case jump: 7680 1.1 christos p1 = p + 1; 7681 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7682 1.1 christos p1 += mcnt; 7683 1.1 christos 7684 1.1 christos if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) 7685 1.1 christos || (!is_a_jump_n 7686 1.1 christos && (re_opcode_t) *p1 == on_failure_jump)) 7687 1.1 christos goto fail; 7688 1.1 christos break; 7689 1.1 christos default: 7690 1.1 christos /* do nothing */ ; 7691 1.1 christos } 7692 1.1 christos } 7693 1.1 christos 7694 1.1 christos if (d >= string1 && d <= end1) 7695 1.1 christos dend = end_match_1; 7696 1.1 christos } 7697 1.1 christos else 7698 1.1 christos break; /* Matching at this starting point really fails. */ 7699 1.1 christos } /* for (;;) */ 7700 1.1 christos 7701 1.1 christos if (best_regs_set) 7702 1.1 christos goto restore_best_regs; 7703 1.1 christos 7704 1.1 christos FREE_VARIABLES (); 7705 1.1 christos 7706 1.1 christos return -1; /* Failure to match. */ 7707 1.1 christos } /* re_match_2 */ 7708 1.1 christos 7709 1.1 christos /* Subroutine definitions for re_match_2. */ 7711 1.1 christos 7712 1.1 christos 7713 1.1 christos /* We are passed P pointing to a register number after a start_memory. 7714 1.1 christos 7715 1.1 christos Return true if the pattern up to the corresponding stop_memory can 7716 1.1 christos match the empty string, and false otherwise. 7717 1.1 christos 7718 1.1 christos If we find the matching stop_memory, sets P to point to one past its number. 7719 1.1 christos Otherwise, sets P to an undefined byte less than or equal to END. 7720 1.1 christos 7721 1.1 christos We don't handle duplicates properly (yet). */ 7722 1.1 christos 7723 1.1 christos static boolean 7724 1.1 christos PREFIX(group_match_null_string_p) (p, end, reg_info) 7725 1.1 christos UCHAR_T **p, *end; 7726 1.1 christos PREFIX(register_info_type) *reg_info; 7727 1.1 christos { 7728 1.1 christos int mcnt; 7729 1.1 christos /* Point to after the args to the start_memory. */ 7730 1.1 christos UCHAR_T *p1 = *p + 2; 7731 1.1 christos 7732 1.1 christos while (p1 < end) 7733 1.1 christos { 7734 1.1 christos /* Skip over opcodes that can match nothing, and return true or 7735 1.1 christos false, as appropriate, when we get to one that can't, or to the 7736 1.1 christos matching stop_memory. */ 7737 1.1 christos 7738 1.1 christos switch ((re_opcode_t) *p1) 7739 1.1 christos { 7740 1.1 christos /* Could be either a loop or a series of alternatives. */ 7741 1.1 christos case on_failure_jump: 7742 1.1 christos p1++; 7743 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7744 1.1 christos 7745 1.1 christos /* If the next operation is not a jump backwards in the 7746 1.1 christos pattern. */ 7747 1.1 christos 7748 1.1 christos if (mcnt >= 0) 7749 1.1 christos { 7750 1.1 christos /* Go through the on_failure_jumps of the alternatives, 7751 1.1 christos seeing if any of the alternatives cannot match nothing. 7752 1.1 christos The last alternative starts with only a jump, 7753 1.1 christos whereas the rest start with on_failure_jump and end 7754 1.1 christos with a jump, e.g., here is the pattern for `a|b|c': 7755 1.1 christos 7756 1.1 christos /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 7757 1.1 christos /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 7758 1.1 christos /exactn/1/c 7759 1.1 christos 7760 1.1 christos So, we have to first go through the first (n-1) 7761 1.1 christos alternatives and then deal with the last one separately. */ 7762 1.1 christos 7763 1.1 christos 7764 1.1 christos /* Deal with the first (n-1) alternatives, which start 7765 1.1 christos with an on_failure_jump (see above) that jumps to right 7766 1.1 christos past a jump_past_alt. */ 7767 1.1 christos 7768 1.1 christos while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] == 7769 1.1 christos jump_past_alt) 7770 1.1 christos { 7771 1.1 christos /* `mcnt' holds how many bytes long the alternative 7772 1.1 christos is, including the ending `jump_past_alt' and 7773 1.1 christos its number. */ 7774 1.1 christos 7775 1.1 christos if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt - 7776 1.1 christos (1 + OFFSET_ADDRESS_SIZE), 7777 1.1 christos reg_info)) 7778 1.1 christos return false; 7779 1.1 christos 7780 1.1 christos /* Move to right after this alternative, including the 7781 1.1 christos jump_past_alt. */ 7782 1.1 christos p1 += mcnt; 7783 1.1 christos 7784 1.1 christos /* Break if it's the beginning of an n-th alternative 7785 1.1 christos that doesn't begin with an on_failure_jump. */ 7786 1.1 christos if ((re_opcode_t) *p1 != on_failure_jump) 7787 1.1 christos break; 7788 1.1 christos 7789 1.1 christos /* Still have to check that it's not an n-th 7790 1.1 christos alternative that starts with an on_failure_jump. */ 7791 1.1 christos p1++; 7792 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7793 1.1 christos if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] != 7794 1.1 christos jump_past_alt) 7795 1.1 christos { 7796 1.1 christos /* Get to the beginning of the n-th alternative. */ 7797 1.1 christos p1 -= 1 + OFFSET_ADDRESS_SIZE; 7798 1.1 christos break; 7799 1.1 christos } 7800 1.1 christos } 7801 1.1 christos 7802 1.1 christos /* Deal with the last alternative: go back and get number 7803 1.1 christos of the `jump_past_alt' just before it. `mcnt' contains 7804 1.1 christos the length of the alternative. */ 7805 1.1 christos EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE); 7806 1.1 christos 7807 1.1 christos if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info)) 7808 1.1 christos return false; 7809 1.1 christos 7810 1.1 christos p1 += mcnt; /* Get past the n-th alternative. */ 7811 1.1 christos } /* if mcnt > 0 */ 7812 1.1 christos break; 7813 1.1 christos 7814 1.1 christos 7815 1.1 christos case stop_memory: 7816 1.1 christos assert (p1[1] == **p); 7817 1.1 christos *p = p1 + 2; 7818 1.1 christos return true; 7819 1.1 christos 7820 1.1 christos 7821 1.1 christos default: 7822 1.1 christos if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info)) 7823 1.1 christos return false; 7824 1.1 christos } 7825 1.1 christos } /* while p1 < end */ 7826 1.1 christos 7827 1.1 christos return false; 7828 1.1 christos } /* group_match_null_string_p */ 7829 1.1 christos 7830 1.1 christos 7831 1.1 christos /* Similar to group_match_null_string_p, but doesn't deal with alternatives: 7832 1.1 christos It expects P to be the first byte of a single alternative and END one 7833 1.1 christos byte past the last. The alternative can contain groups. */ 7834 1.1 christos 7835 1.1 christos static boolean 7836 1.1 christos PREFIX(alt_match_null_string_p) (p, end, reg_info) 7837 1.1 christos UCHAR_T *p, *end; 7838 1.1 christos PREFIX(register_info_type) *reg_info; 7839 1.1 christos { 7840 1.1 christos int mcnt; 7841 1.1 christos UCHAR_T *p1 = p; 7842 1.1 christos 7843 1.1 christos while (p1 < end) 7844 1.1 christos { 7845 1.1 christos /* Skip over opcodes that can match nothing, and break when we get 7846 1.1 christos to one that can't. */ 7847 1.1 christos 7848 1.1 christos switch ((re_opcode_t) *p1) 7849 1.1 christos { 7850 1.1 christos /* It's a loop. */ 7851 1.1 christos case on_failure_jump: 7852 1.1 christos p1++; 7853 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7854 1.1 christos p1 += mcnt; 7855 1.1 christos break; 7856 1.1 christos 7857 1.1 christos default: 7858 1.1 christos if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info)) 7859 1.1 christos return false; 7860 1.1 christos } 7861 1.1 christos } /* while p1 < end */ 7862 1.1 christos 7863 1.1 christos return true; 7864 1.1 christos } /* alt_match_null_string_p */ 7865 1.1 christos 7866 1.1 christos 7867 1.1 christos /* Deals with the ops common to group_match_null_string_p and 7868 1.1 christos alt_match_null_string_p. 7869 1.1 christos 7870 1.1 christos Sets P to one after the op and its arguments, if any. */ 7871 1.1 christos 7872 1.1 christos static boolean 7873 1.1 christos PREFIX(common_op_match_null_string_p) (p, end, reg_info) 7874 1.1 christos UCHAR_T **p, *end; 7875 1.1 christos PREFIX(register_info_type) *reg_info; 7876 1.1 christos { 7877 1.1 christos int mcnt; 7878 1.1 christos boolean ret; 7879 1.1 christos int reg_no; 7880 1.1 christos UCHAR_T *p1 = *p; 7881 1.1 christos 7882 1.1 christos switch ((re_opcode_t) *p1++) 7883 1.1 christos { 7884 1.1 christos case no_op: 7885 1.1 christos case begline: 7886 1.1 christos case endline: 7887 1.1 christos case begbuf: 7888 1.1 christos case endbuf: 7889 1.1 christos case wordbeg: 7890 1.1 christos case wordend: 7891 1.1 christos case wordbound: 7892 1.1 christos case notwordbound: 7893 1.1 christos #ifdef emacs 7894 1.1 christos case before_dot: 7895 1.1 christos case at_dot: 7896 1.1 christos case after_dot: 7897 1.1 christos #endif 7898 1.1 christos break; 7899 1.1 christos 7900 1.1 christos case start_memory: 7901 1.1 christos reg_no = *p1; 7902 1.1 christos assert (reg_no > 0 && reg_no <= MAX_REGNUM); 7903 1.1 christos ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info); 7904 1.1 christos 7905 1.1 christos /* Have to set this here in case we're checking a group which 7906 1.1 christos contains a group and a back reference to it. */ 7907 1.1 christos 7908 1.1 christos if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) 7909 1.1 christos REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; 7910 1.1 christos 7911 1.1 christos if (!ret) 7912 1.1 christos return false; 7913 1.1 christos break; 7914 1.1 christos 7915 1.1 christos /* If this is an optimized succeed_n for zero times, make the jump. */ 7916 1.1 christos case jump: 7917 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7918 1.1 christos if (mcnt >= 0) 7919 1.1 christos p1 += mcnt; 7920 1.1 christos else 7921 1.1 christos return false; 7922 1.1 christos break; 7923 1.1 christos 7924 1.1 christos case succeed_n: 7925 1.1 christos /* Get to the number of times to succeed. */ 7926 1.1 christos p1 += OFFSET_ADDRESS_SIZE; 7927 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7928 1.1 christos 7929 1.1 christos if (mcnt == 0) 7930 1.1 christos { 7931 1.1 christos p1 -= 2 * OFFSET_ADDRESS_SIZE; 7932 1.1 christos EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7933 1.1 christos p1 += mcnt; 7934 1.1 christos } 7935 1.1 christos else 7936 1.1 christos return false; 7937 1.1 christos break; 7938 1.1 christos 7939 1.1 christos case duplicate: 7940 1.1 christos if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) 7941 1.1 christos return false; 7942 1.1 christos break; 7943 1.1 christos 7944 1.1 christos case set_number_at: 7945 1.1 christos p1 += 2 * OFFSET_ADDRESS_SIZE; 7946 1.1 christos 7947 1.1 christos default: 7948 1.1 christos /* All other opcodes mean we cannot match the empty string. */ 7949 1.1 christos return false; 7950 1.1 christos } 7951 1.1 christos 7952 1.1 christos *p = p1; 7953 1.1 christos return true; 7954 1.1 christos } /* common_op_match_null_string_p */ 7955 1.1 christos 7956 1.1 christos 7957 1.1 christos /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 7958 1.1 christos bytes; nonzero otherwise. */ 7959 1.1 christos 7960 1.1 christos static int 7961 1.1 christos PREFIX(bcmp_translate) (s1, s2, len, translate) 7962 1.1 christos const CHAR_T *s1, *s2; 7963 1.1 christos register int len; 7964 1.1 christos RE_TRANSLATE_TYPE translate; 7965 1.1 christos { 7966 1.1 christos register const UCHAR_T *p1 = (const UCHAR_T *) s1; 7967 1.1 christos register const UCHAR_T *p2 = (const UCHAR_T *) s2; 7968 1.1 christos while (len) 7969 1.1 christos { 7970 1.1 christos #ifdef WCHAR 7971 1.1 christos if (((*p1<=0xff)?translate[*p1++]:*p1++) 7972 1.1 christos != ((*p2<=0xff)?translate[*p2++]:*p2++)) 7973 1.1 christos return 1; 7974 1.1 christos #else /* BYTE */ 7975 1.1 christos if (translate[*p1++] != translate[*p2++]) return 1; 7976 1.1 christos #endif /* WCHAR */ 7977 1.1 christos len--; 7978 1.1 christos } 7979 1.1 christos return 0; 7980 1.1 christos } 7981 1.1 christos 7982 1.1 christos 7984 1.1 christos #else /* not INSIDE_RECURSION */ 7985 1.1 christos 7986 1.1 christos /* Entry points for GNU code. */ 7987 1.1 christos 7988 1.1 christos /* re_compile_pattern is the GNU regular expression compiler: it 7989 1.1 christos compiles PATTERN (of length SIZE) and puts the result in BUFP. 7990 1.1 christos Returns 0 if the pattern was valid, otherwise an error string. 7991 1.1 christos 7992 1.1 christos Assumes the `allocated' (and perhaps `buffer') and `translate' fields 7993 1.1 christos are set in BUFP on entry. 7994 1.1 christos 7995 1.1 christos We call regex_compile to do the actual compilation. */ 7996 1.1 christos 7997 1.1 christos const char * 7998 1.1 christos re_compile_pattern (pattern, length, bufp) 7999 1.1 christos const char *pattern; 8000 1.1 christos size_t length; 8001 1.1 christos struct re_pattern_buffer *bufp; 8002 1.1 christos { 8003 1.1 christos reg_errcode_t ret; 8004 1.1 christos 8005 1.1 christos /* GNU code is written to assume at least RE_NREGS registers will be set 8006 1.1 christos (and at least one extra will be -1). */ 8007 1.1 christos bufp->regs_allocated = REGS_UNALLOCATED; 8008 1.1 christos 8009 1.1 christos /* And GNU code determines whether or not to get register information 8010 1.1 christos by passing null for the REGS argument to re_match, etc., not by 8011 1.1 christos setting no_sub. */ 8012 1.1 christos bufp->no_sub = 0; 8013 1.1 christos 8014 1.1 christos /* Match anchors at newline. */ 8015 1.1 christos bufp->newline_anchor = 1; 8016 1.1 christos 8017 1.1 christos # ifdef MBS_SUPPORT 8018 1.1 christos if (MB_CUR_MAX != 1) 8019 1.1 christos ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp); 8020 1.1 christos else 8021 1.1 christos # endif 8022 1.1 christos ret = byte_regex_compile (pattern, length, re_syntax_options, bufp); 8023 1.1 christos 8024 1.1 christos if (!ret) 8025 1.1 christos return NULL; 8026 1.1 christos return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]); 8027 1.1 christos } 8028 1.1 christos #ifdef _LIBC 8029 1.1 christos weak_alias (__re_compile_pattern, re_compile_pattern) 8030 1.1 christos #endif 8031 1.1 christos 8032 1.1 christos /* Entry points compatible with 4.2 BSD regex library. We don't define 8034 1.1 christos them unless specifically requested. */ 8035 1.1 christos 8036 1.1 christos #if defined _REGEX_RE_COMP || defined _LIBC 8037 1.1 christos 8038 1.1 christos /* BSD has one and only one pattern buffer. */ 8039 1.1 christos static struct re_pattern_buffer re_comp_buf; 8040 1.1 christos 8041 1.1 christos char * 8042 1.1 christos #ifdef _LIBC 8043 1.1 christos /* Make these definitions weak in libc, so POSIX programs can redefine 8044 1.1 christos these names if they don't use our functions, and still use 8045 1.1 christos regcomp/regexec below without link errors. */ 8046 1.1 christos weak_function 8047 1.1 christos #endif 8048 1.1 christos re_comp (s) 8049 1.1 christos const char *s; 8050 1.1 christos { 8051 1.1 christos reg_errcode_t ret; 8052 1.1 christos 8053 1.1 christos if (!s) 8054 1.1 christos { 8055 1.1 christos if (!re_comp_buf.buffer) 8056 1.1 christos return gettext ("No previous regular expression"); 8057 1.1 christos return 0; 8058 1.1 christos } 8059 1.1 christos 8060 1.1 christos if (!re_comp_buf.buffer) 8061 1.1 christos { 8062 1.1 christos re_comp_buf.buffer = (unsigned char *) malloc (200); 8063 1.1 christos if (re_comp_buf.buffer == NULL) 8064 1.1 christos return (char *) gettext (re_error_msgid 8065 1.1 christos + re_error_msgid_idx[(int) REG_ESPACE]); 8066 1.1 christos re_comp_buf.allocated = 200; 8067 1.1 christos 8068 1.1 christos re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); 8069 1.1 christos if (re_comp_buf.fastmap == NULL) 8070 1.1 christos return (char *) gettext (re_error_msgid 8071 1.1 christos + re_error_msgid_idx[(int) REG_ESPACE]); 8072 1.1 christos } 8073 1.1 christos 8074 1.1 christos /* Since `re_exec' always passes NULL for the `regs' argument, we 8075 1.1 christos don't need to initialize the pattern buffer fields which affect it. */ 8076 1.1 christos 8077 1.1 christos /* Match anchors at newlines. */ 8078 1.1 christos re_comp_buf.newline_anchor = 1; 8079 1.1 christos 8080 1.1 christos # ifdef MBS_SUPPORT 8081 1.1 christos if (MB_CUR_MAX != 1) 8082 1.1 christos ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 8083 1.1 christos else 8084 1.1 christos # endif 8085 1.1 christos ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 8086 1.1 christos 8087 1.1 christos if (!ret) 8088 1.1 christos return NULL; 8089 1.1 christos 8090 1.1 christos /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 8091 1.1 christos return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]); 8092 1.1 christos } 8093 1.1 christos 8094 1.1 christos 8095 1.1 christos int 8096 1.1 christos #ifdef _LIBC 8097 1.1 christos weak_function 8098 1.1 christos #endif 8099 1.1 christos re_exec (s) 8100 1.1 christos const char *s; 8101 1.1 christos { 8102 1.1 christos const int len = strlen (s); 8103 1.1 christos return 8104 1.1 christos 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); 8105 1.1 christos } 8106 1.1 christos 8107 1.1 christos #endif /* _REGEX_RE_COMP */ 8108 1.1 christos 8109 1.1 christos /* POSIX.2 functions. Don't define these for Emacs. */ 8111 1.1 christos 8112 1.1 christos #ifndef emacs 8113 1.1 christos 8114 1.1 christos /* regcomp takes a regular expression as a string and compiles it. 8115 1.1 christos 8116 1.1 christos PREG is a regex_t *. We do not expect any fields to be initialized, 8117 1.1 christos since POSIX says we shouldn't. Thus, we set 8118 1.1 christos 8119 1.1 christos `buffer' to the compiled pattern; 8120 1.1 christos `used' to the length of the compiled pattern; 8121 1.1 christos `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 8122 1.1 christos REG_EXTENDED bit in CFLAGS is set; otherwise, to 8123 1.1 christos RE_SYNTAX_POSIX_BASIC; 8124 1.1 christos `newline_anchor' to REG_NEWLINE being set in CFLAGS; 8125 1.1 christos `fastmap' to an allocated space for the fastmap; 8126 1.1 christos `fastmap_accurate' to zero; 8127 1.1 christos `re_nsub' to the number of subexpressions in PATTERN. 8128 1.1 christos 8129 1.1 christos PATTERN is the address of the pattern string. 8130 1.1 christos 8131 1.1 christos CFLAGS is a series of bits which affect compilation. 8132 1.1 christos 8133 1.1 christos If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we 8134 1.1 christos use POSIX basic syntax. 8135 1.1 christos 8136 1.1 christos If REG_NEWLINE is set, then . and [^...] don't match newline. 8137 1.1 christos Also, regexec will try a match beginning after every newline. 8138 1.1 christos 8139 1.1 christos If REG_ICASE is set, then we considers upper- and lowercase 8140 1.1 christos versions of letters to be equivalent when matching. 8141 1.1 christos 8142 1.1 christos If REG_NOSUB is set, then when PREG is passed to regexec, that 8143 1.1 christos routine will report only success or failure, and nothing about the 8144 1.1 christos registers. 8145 1.1 christos 8146 1.1 christos It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for 8147 1.1 christos the return codes and their meanings.) */ 8148 1.1 christos 8149 1.1 christos int 8150 1.1 christos regcomp (preg, pattern, cflags) 8151 1.1 christos regex_t *preg; 8152 1.1 christos const char *pattern; 8153 1.1 christos int cflags; 8154 1.1 christos { 8155 1.1 christos reg_errcode_t ret; 8156 1.1 christos reg_syntax_t syntax 8157 1.1 christos = (cflags & REG_EXTENDED) ? 8158 1.1 christos RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; 8159 1.1 christos 8160 1.1 christos /* regex_compile will allocate the space for the compiled pattern. */ 8161 1.1 christos preg->buffer = 0; 8162 1.1 christos preg->allocated = 0; 8163 1.1 christos preg->used = 0; 8164 1.1 christos 8165 1.1 christos /* Try to allocate space for the fastmap. */ 8166 1.1 christos preg->fastmap = (char *) malloc (1 << BYTEWIDTH); 8167 1.1 christos 8168 1.1 christos if (cflags & REG_ICASE) 8169 1.1 christos { 8170 1.1 christos unsigned i; 8171 1.1 christos 8172 1.1 christos preg->translate 8173 1.1 christos = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 8174 1.1 christos * sizeof (*(RE_TRANSLATE_TYPE)0)); 8175 1.1 christos if (preg->translate == NULL) 8176 1.1 christos return (int) REG_ESPACE; 8177 1.1 christos 8178 1.1 christos /* Map uppercase characters to corresponding lowercase ones. */ 8179 1.1 christos for (i = 0; i < CHAR_SET_SIZE; i++) 8180 1.1 christos preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; 8181 1.1 christos } 8182 1.1 christos else 8183 1.1 christos preg->translate = NULL; 8184 1.1 christos 8185 1.1 christos /* If REG_NEWLINE is set, newlines are treated differently. */ 8186 1.1 christos if (cflags & REG_NEWLINE) 8187 1.1 christos { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 8188 1.1 christos syntax &= ~RE_DOT_NEWLINE; 8189 1.1 christos syntax |= RE_HAT_LISTS_NOT_NEWLINE; 8190 1.1 christos /* It also changes the matching behavior. */ 8191 1.1 christos preg->newline_anchor = 1; 8192 1.1 christos } 8193 1.1 christos else 8194 1.1 christos preg->newline_anchor = 0; 8195 1.1 christos 8196 1.1 christos preg->no_sub = !!(cflags & REG_NOSUB); 8197 1.1 christos 8198 1.1 christos /* POSIX says a null character in the pattern terminates it, so we 8199 1.1 christos can use strlen here in compiling the pattern. */ 8200 1.1 christos # ifdef MBS_SUPPORT 8201 1.1 christos if (MB_CUR_MAX != 1) 8202 1.1 christos ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg); 8203 1.1 christos else 8204 1.1 christos # endif 8205 1.1 christos ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg); 8206 1.1 christos 8207 1.1 christos /* POSIX doesn't distinguish between an unmatched open-group and an 8208 1.1 christos unmatched close-group: both are REG_EPAREN. */ 8209 1.1 christos if (ret == REG_ERPAREN) ret = REG_EPAREN; 8210 1.1 christos 8211 1.1 christos if (ret == REG_NOERROR && preg->fastmap) 8212 1.1 christos { 8213 1.1 christos /* Compute the fastmap now, since regexec cannot modify the pattern 8214 1.1 christos buffer. */ 8215 1.1 christos if (re_compile_fastmap (preg) == -2) 8216 1.1 christos { 8217 1.1 christos /* Some error occurred while computing the fastmap, just forget 8218 1.1 christos about it. */ 8219 1.1 christos free (preg->fastmap); 8220 1.1 christos preg->fastmap = NULL; 8221 1.1 christos } 8222 1.1 christos } 8223 1.1 christos 8224 1.1 christos return (int) ret; 8225 1.1 christos } 8226 1.1 christos #ifdef _LIBC 8227 1.1 christos weak_alias (__regcomp, regcomp) 8228 1.1 christos #endif 8229 1.1 christos 8230 1.1 christos 8231 1.1 christos /* regexec searches for a given pattern, specified by PREG, in the 8232 1.1 christos string STRING. 8233 1.1 christos 8234 1.1 christos If NMATCH is zero or REG_NOSUB was set in the cflags argument to 8235 1.1 christos `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 8236 1.1 christos least NMATCH elements, and we set them to the offsets of the 8237 1.1 christos corresponding matched substrings. 8238 1.1 christos 8239 1.1 christos EFLAGS specifies `execution flags' which affect matching: if 8240 1.1 christos REG_NOTBOL is set, then ^ does not match at the beginning of the 8241 1.1 christos string; if REG_NOTEOL is set, then $ does not match at the end. 8242 1.1 christos 8243 1.1 christos We return 0 if we find a match and REG_NOMATCH if not. */ 8244 1.1 christos 8245 1.1 christos int 8246 1.1 christos regexec (preg, string, nmatch, pmatch, eflags) 8247 1.1 christos const regex_t *preg; 8248 1.1 christos const char *string; 8249 1.1 christos size_t nmatch; 8250 1.1 christos regmatch_t pmatch[]; 8251 1.1 christos int eflags; 8252 1.1 christos { 8253 1.1 christos int ret; 8254 1.1 christos struct re_registers regs; 8255 1.1 christos regex_t private_preg; 8256 1.1 christos int len = strlen (string); 8257 1.1 christos boolean want_reg_info = !preg->no_sub && nmatch > 0; 8258 1.1 christos 8259 1.1 christos private_preg = *preg; 8260 1.1 christos 8261 1.1 christos private_preg.not_bol = !!(eflags & REG_NOTBOL); 8262 1.1 christos private_preg.not_eol = !!(eflags & REG_NOTEOL); 8263 1.1 christos 8264 1.1 christos /* The user has told us exactly how many registers to return 8265 1.1 christos information about, via `nmatch'. We have to pass that on to the 8266 1.1 christos matching routines. */ 8267 1.1 christos private_preg.regs_allocated = REGS_FIXED; 8268 1.1 christos 8269 1.1 christos if (want_reg_info) 8270 1.1 christos { 8271 1.1 christos regs.num_regs = nmatch; 8272 1.1 christos regs.start = TALLOC (nmatch * 2, regoff_t); 8273 1.1 christos if (regs.start == NULL) 8274 1.1 christos return (int) REG_NOMATCH; 8275 1.1 christos regs.end = regs.start + nmatch; 8276 1.1 christos } 8277 1.1 christos 8278 1.1 christos /* Perform the searching operation. */ 8279 1.1 christos ret = re_search (&private_preg, string, len, 8280 1.1 christos /* start: */ 0, /* range: */ len, 8281 1.1 christos want_reg_info ? ®s : (struct re_registers *) 0); 8282 1.1 christos 8283 1.1 christos /* Copy the register information to the POSIX structure. */ 8284 1.1 christos if (want_reg_info) 8285 1.1 christos { 8286 1.1 christos if (ret >= 0) 8287 1.1 christos { 8288 1.1 christos unsigned r; 8289 1.1 christos 8290 1.1 christos for (r = 0; r < nmatch; r++) 8291 1.1 christos { 8292 1.1 christos pmatch[r].rm_so = regs.start[r]; 8293 1.1 christos pmatch[r].rm_eo = regs.end[r]; 8294 1.1 christos } 8295 1.1 christos } 8296 1.1 christos 8297 1.1 christos /* If we needed the temporary register info, free the space now. */ 8298 1.1 christos free (regs.start); 8299 1.1 christos } 8300 1.1 christos 8301 1.1 christos /* We want zero return to mean success, unlike `re_search'. */ 8302 1.1 christos return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; 8303 1.1 christos } 8304 1.1 christos #ifdef _LIBC 8305 1.1 christos weak_alias (__regexec, regexec) 8306 1.1 christos #endif 8307 1.1 christos 8308 1.1 christos 8309 1.1 christos /* Returns a message corresponding to an error code, ERRCODE, returned 8310 1.1 christos from either regcomp or regexec. We don't use PREG here. */ 8311 1.1 christos 8312 1.1 christos size_t 8313 1.1 christos regerror (errcode, preg, errbuf, errbuf_size) 8314 1.1 christos int errcode; 8315 1.1 christos const regex_t *preg; 8316 1.1 christos char *errbuf; 8317 1.1 christos size_t errbuf_size; 8318 1.1 christos { 8319 1.1 christos const char *msg; 8320 1.1 christos size_t msg_size; 8321 1.1 christos 8322 1.1 christos if (errcode < 0 8323 1.1 christos || errcode >= (int) (sizeof (re_error_msgid_idx) 8324 1.1 christos / sizeof (re_error_msgid_idx[0]))) 8325 1.1 christos /* Only error codes returned by the rest of the code should be passed 8326 1.1 christos to this routine. If we are given anything else, or if other regex 8327 1.1 christos code generates an invalid error code, then the program has a bug. 8328 1.1 christos Dump core so we can fix it. */ 8329 1.1 christos abort (); 8330 1.1 christos 8331 1.1 christos msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]); 8332 1.1 christos 8333 1.1 christos msg_size = strlen (msg) + 1; /* Includes the null. */ 8334 1.1 christos 8335 1.1 christos if (errbuf_size != 0) 8336 1.1 christos { 8337 1.1 christos if (msg_size > errbuf_size) 8338 1.1 christos { 8339 1.1 christos #if defined HAVE_MEMPCPY || defined _LIBC 8340 1.1 christos *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; 8341 1.1 christos #else 8342 1.1 christos memcpy (errbuf, msg, errbuf_size - 1); 8343 1.1 christos errbuf[errbuf_size - 1] = 0; 8344 1.1 christos #endif 8345 1.1 christos } 8346 1.1 christos else 8347 1.1 christos memcpy (errbuf, msg, msg_size); 8348 1.1 christos } 8349 1.1 christos 8350 1.1 christos return msg_size; 8351 1.1 christos } 8352 1.1 christos #ifdef _LIBC 8353 1.1 christos weak_alias (__regerror, regerror) 8354 1.1 christos #endif 8355 1.1 christos 8356 1.1 christos 8357 1.1 christos /* Free dynamically allocated space used by PREG. */ 8358 1.1 christos 8359 1.1 christos void 8360 1.1 christos regfree (preg) 8361 1.1 christos regex_t *preg; 8362 1.1 christos { 8363 1.1 christos if (preg->buffer != NULL) 8364 1.1 christos free (preg->buffer); 8365 1.1 christos preg->buffer = NULL; 8366 1.1 christos 8367 1.1 christos preg->allocated = 0; 8368 1.1 christos preg->used = 0; 8369 1.1 christos 8370 1.1 christos if (preg->fastmap != NULL) 8371 1.1 christos free (preg->fastmap); 8372 1.1 christos preg->fastmap = NULL; 8373 1.1 christos preg->fastmap_accurate = 0; 8374 1.1 christos 8375 1.1 christos if (preg->translate != NULL) 8376 1.1 christos free (preg->translate); 8377 1.1 christos preg->translate = NULL; 8378 1.1 christos } 8379 1.1 christos #ifdef _LIBC 8380 1.1 christos weak_alias (__regfree, regfree) 8381 1.1 christos #endif 8382 1.1 christos 8383 1.1 christos #endif /* not emacs */ 8384 1.1 christos 8385 1.1 christos #endif /* not INSIDE_RECURSION */ 8386 1.1 christos 8387 1.1 christos 8388 1.1 christos #undef STORE_NUMBER 8390 1.1 christos #undef STORE_NUMBER_AND_INCR 8391 1.1 christos #undef EXTRACT_NUMBER 8392 1.1 christos #undef EXTRACT_NUMBER_AND_INCR 8393 1.1 christos 8394 1.1 christos #undef DEBUG_PRINT_COMPILED_PATTERN 8395 1.1 christos #undef DEBUG_PRINT_DOUBLE_STRING 8396 1.1 christos 8397 1.1 christos #undef INIT_FAIL_STACK 8398 1.1 christos #undef RESET_FAIL_STACK 8399 1.1 christos #undef DOUBLE_FAIL_STACK 8400 1.1 christos #undef PUSH_PATTERN_OP 8401 1.1 christos #undef PUSH_FAILURE_POINTER 8402 1.1 christos #undef PUSH_FAILURE_INT 8403 1.1 christos #undef PUSH_FAILURE_ELT 8404 1.1 christos #undef POP_FAILURE_POINTER 8405 1.1 christos #undef POP_FAILURE_INT 8406 1.1 christos #undef POP_FAILURE_ELT 8407 1.1 christos #undef DEBUG_PUSH 8408 1.1 christos #undef DEBUG_POP 8409 1.1 christos #undef PUSH_FAILURE_POINT 8410 1.1 christos #undef POP_FAILURE_POINT 8411 1.1 christos 8412 1.1 christos #undef REG_UNSET_VALUE 8413 1.1 christos #undef REG_UNSET 8414 1.1 christos 8415 1.1 christos #undef PATFETCH 8416 1.1 christos #undef PATFETCH_RAW 8417 1.1 christos #undef PATUNFETCH 8418 1.1 christos #undef TRANSLATE 8419 1.1 christos 8420 1.1 christos #undef INIT_BUF_SIZE 8421 1.1 christos #undef GET_BUFFER_SPACE 8422 1.1 christos #undef BUF_PUSH 8423 1.1 christos #undef BUF_PUSH_2 8424 1.1 christos #undef BUF_PUSH_3 8425 1.1 christos #undef STORE_JUMP 8426 1.1 christos #undef STORE_JUMP2 8427 1.1 christos #undef INSERT_JUMP 8428 1.1 christos #undef INSERT_JUMP2 8429 1.1 christos #undef EXTEND_BUFFER 8430 1.1 christos #undef GET_UNSIGNED_NUMBER 8431 1.1 christos #undef FREE_STACK_RETURN 8432 1.1 christos 8433 1.1 christos # undef POINTER_TO_OFFSET 8434 # undef MATCHING_IN_FRST_STRING 8435 # undef PREFETCH 8436 # undef AT_STRINGS_BEG 8437 # undef AT_STRINGS_END 8438 # undef WORDCHAR_P 8439 # undef FREE_VAR 8440 # undef FREE_VARIABLES 8441 # undef NO_HIGHEST_ACTIVE_REG 8442 # undef NO_LOWEST_ACTIVE_REG 8443 8444 # undef CHAR_T 8445 # undef UCHAR_T 8446 # undef COMPILED_BUFFER_VAR 8447 # undef OFFSET_ADDRESS_SIZE 8448 # undef CHAR_CLASS_SIZE 8449 # undef PREFIX 8450 # undef ARG_PREFIX 8451 # undef PUT_CHAR 8452 # undef BYTE 8453 # undef WCHAR 8454 8455 # define DEFINED_ONCE 8456