Home | History | Annotate | Line # | Download | only in lib
regex.c revision 1.1
      1 /*	$NetBSD: regex.c,v 1.1 2016/01/13 03:15:30 christos Exp $	*/
      2 
      3 /* Extended regular expression matching and search library,
      4    version 0.12.
      5    (Implements POSIX draft P1003.2/D11.2, except for some of the
      6    internationalization features.)
      7    Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
      8 
      9    This program is free software; you can redistribute it and/or modify
     10    it under the terms of the GNU General Public License as published by
     11    the Free Software Foundation; either version 2, or (at your option)
     12    any later version.
     13 
     14    This program is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17    GNU General Public License for more details.
     18 
     19    You should have received a copy of the GNU General Public License
     20    along with this program; if not, write to the Free Software Foundation,
     21    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
     22 
     23 /* AIX requires this to be the first thing in the file. */
     24 #if defined _AIX && !defined REGEX_MALLOC
     25   #pragma alloca
     26 #endif
     27 
     28 #undef	_GNU_SOURCE
     29 #define _GNU_SOURCE
     30 
     31 #ifdef HAVE_CONFIG_H
     32 # include <config.h>
     33 #endif
     34 
     35 #ifndef PARAMS
     36 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
     37 #  define PARAMS(args) args
     38 # else
     39 #  define PARAMS(args) ()
     40 # endif  /* GCC.  */
     41 #endif  /* Not PARAMS.  */
     42 
     43 #ifndef INSIDE_RECURSION
     44 
     45 # if defined STDC_HEADERS && !defined emacs
     46 #  include <stddef.h>
     47 # else
     48 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
     49 #  include <sys/types.h>
     50 # endif
     51 
     52 # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
     53 
     54 /* For platform which support the ISO C amendement 1 functionality we
     55    support user defined character classes.  */
     56 # if defined _LIBC || WIDE_CHAR_SUPPORT
     57 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
     58 #  include <wchar.h>
     59 #  include <wctype.h>
     60 # endif
     61 
     62 # ifdef _LIBC
     63 /* We have to keep the namespace clean.  */
     64 #  define regfree(preg) __regfree (preg)
     65 #  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
     66 #  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
     67 #  define regerror(errcode, preg, errbuf, errbuf_size) \
     68 	__regerror(errcode, preg, errbuf, errbuf_size)
     69 #  define re_set_registers(bu, re, nu, st, en) \
     70 	__re_set_registers (bu, re, nu, st, en)
     71 #  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
     72 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
     73 #  define re_match(bufp, string, size, pos, regs) \
     74 	__re_match (bufp, string, size, pos, regs)
     75 #  define re_search(bufp, string, size, startpos, range, regs) \
     76 	__re_search (bufp, string, size, startpos, range, regs)
     77 #  define re_compile_pattern(pattern, length, bufp) \
     78 	__re_compile_pattern (pattern, length, bufp)
     79 #  define re_set_syntax(syntax) __re_set_syntax (syntax)
     80 #  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
     81 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
     82 #  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
     83 
     84 #  define btowc __btowc
     85 #  define iswctype __iswctype
     86 #  define mbrtowc __mbrtowc
     87 #  define wcslen __wcslen
     88 #  define wcscoll __wcscoll
     89 #  define wcrtomb __wcrtomb
     90 
     91 /* We are also using some library internals.  */
     92 #  include <locale/localeinfo.h>
     93 #  include <locale/elem-hash.h>
     94 #  include <langinfo.h>
     95 #  include <locale/coll-lookup.h>
     96 # endif
     97 
     98 /* This is for other GNU distributions with internationalized messages.  */
     99 # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
    100 #  include <libintl.h>
    101 #  ifdef _LIBC
    102 #   undef gettext
    103 #   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
    104 #  endif
    105 # else
    106 #  define gettext(msgid) (msgid)
    107 # endif
    108 
    109 # ifndef gettext_noop
    110 /* This define is so xgettext can find the internationalizable
    111    strings.  */
    112 #  define gettext_noop(String) String
    113 # endif
    114 
    115 /* Support for bounded pointers.  */
    116 # if !defined _LIBC && !defined __BOUNDED_POINTERS__
    117 #  define __bounded	/* nothing */
    118 #  define __unbounded	/* nothing */
    119 #  define __ptrvalue	/* nothing */
    120 # endif
    121 
    122 /* The `emacs' switch turns on certain matching commands
    123    that make sense only in Emacs. */
    124 # ifdef emacs
    125 
    126 #  include "lisp.h"
    127 #  include "buffer.h"
    128 #  include "syntax.h"
    129 
    130 # else  /* not emacs */
    131 
    132 /* If we are not linking with Emacs proper,
    133    we can't use the relocating allocator
    134    even if config.h says that we can.  */
    135 #  undef REL_ALLOC
    136 
    137 #  if defined STDC_HEADERS || defined _LIBC
    138 #   include <stdlib.h>
    139 #  else
    140 char *malloc ();
    141 char *realloc ();
    142 #  endif
    143 
    144 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
    145    If nothing else has been done, use the method below.  */
    146 #  ifdef INHIBIT_STRING_HEADER
    147 #   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
    148 #    if !defined bzero && !defined bcopy
    149 #     undef INHIBIT_STRING_HEADER
    150 #    endif
    151 #   endif
    152 #  endif
    153 
    154 /* This is the normal way of making sure we have a bcopy and a bzero.
    155    This is used in most programs--a few other programs avoid this
    156    by defining INHIBIT_STRING_HEADER.  */
    157 #  ifndef INHIBIT_STRING_HEADER
    158 #   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
    159 #    include <string.h>
    160 #    ifndef bzero
    161 #     ifndef _LIBC
    162 #      define bzero(s, n)	(memset (s, '\0', n), (s))
    163 #     else
    164 #      define bzero(s, n)	__bzero (s, n)
    165 #     endif
    166 #    endif
    167 #   else
    168 #    include <strings.h>
    169 #    ifndef memcmp
    170 #     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
    171 #    endif
    172 #    ifndef memcpy
    173 #     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
    174 #    endif
    175 #   endif
    176 #  endif
    177 
    178 /* Define the syntax stuff for \<, \>, etc.  */
    179 
    180 /* This must be nonzero for the wordchar and notwordchar pattern
    181    commands in re_match_2.  */
    182 #  ifndef Sword
    183 #   define Sword 1
    184 #  endif
    185 
    186 #  ifdef SWITCH_ENUM_BUG
    187 #   define SWITCH_ENUM_CAST(x) ((int)(x))
    188 #  else
    189 #   define SWITCH_ENUM_CAST(x) (x)
    190 #  endif
    191 
    192 # endif /* not emacs */
    193 
    194 # if defined _LIBC || HAVE_LIMITS_H
    195 #  include <limits.h>
    196 # endif
    197 
    198 # ifndef MB_LEN_MAX
    199 #  define MB_LEN_MAX 1
    200 # endif
    201 
    202 /* Get the interface, including the syntax bits.  */
    204 # include <regex.h>
    205 
    206 /* isalpha etc. are used for the character classes.  */
    207 # include <ctype.h>
    208 
    209 /* Jim Meyering writes:
    210 
    211    "... Some ctype macros are valid only for character codes that
    212    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
    213    using /bin/cc or gcc but without giving an ansi option).  So, all
    214    ctype uses should be through macros like ISPRINT...  If
    215    STDC_HEADERS is defined, then autoconf has verified that the ctype
    216    macros don't need to be guarded with references to isascii. ...
    217    Defining isascii to 1 should let any compiler worth its salt
    218    eliminate the && through constant folding."
    219    Solaris defines some of these symbols so we must undefine them first.  */
    220 
    221 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
    222 #  define IN_CTYPE_DOMAIN(c) 1
    223 # else
    224 #  define IN_CTYPE_DOMAIN(c) isascii(c)
    225 # endif
    226 
    227 # ifdef isblank
    228 #  define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c))
    229 # else
    230 #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
    231 # endif
    232 # ifdef isgraph
    233 #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c))
    234 # else
    235 #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c))
    236 # endif
    237 
    238 # undef ISPRINT
    239 # define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
    240 # define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
    241 # define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c))
    242 # define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c))
    243 # define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
    244 # define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c))
    245 # define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c))
    246 # define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
    247 # define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
    248 # define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c))
    249 
    250 # ifdef _tolower
    251 #  define TOLOWER(c) _tolower(c)
    252 # else
    253 #  define TOLOWER(c) tolower(c)
    254 # endif
    255 
    256 # ifndef NULL
    257 #  define NULL (void *)0
    258 # endif
    259 
    260 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
    261    since ours (we hope) works properly with all combinations of
    262    machines, compilers, `char' and `unsigned char' argument types.
    263    (Per Bothner suggested the basic approach.)  */
    264 # undef SIGN_EXTEND_CHAR
    265 # if __STDC__
    266 #  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
    267 # else  /* not __STDC__ */
    268 /* As in Harbison and Steele.  */
    269 #  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
    270 # endif
    271 
    272 # ifndef emacs
    274 /* How many characters in the character set.  */
    275 #  define CHAR_SET_SIZE 256
    276 
    277 #  ifdef SYNTAX_TABLE
    278 
    279 extern char *re_syntax_table;
    280 
    281 #  else /* not SYNTAX_TABLE */
    282 
    283 static char re_syntax_table[CHAR_SET_SIZE];
    284 
    285 static void init_syntax_once PARAMS ((void));
    286 
    287 static void
    288 init_syntax_once ()
    289 {
    290    register int c;
    291    static int done = 0;
    292 
    293    if (done)
    294      return;
    295    bzero (re_syntax_table, sizeof re_syntax_table);
    296 
    297    for (c = 0; c < CHAR_SET_SIZE; ++c)
    298      if (ISALNUM (c))
    299 	re_syntax_table[c] = Sword;
    300 
    301    re_syntax_table['_'] = Sword;
    302 
    303    done = 1;
    304 }
    305 
    306 #  endif /* not SYNTAX_TABLE */
    307 
    308 #  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
    309 
    310 # endif /* emacs */
    311 
    312 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
    314    use `alloca' instead of `malloc'.  This is because using malloc in
    315    re_search* or re_match* could cause memory leaks when C-g is used in
    316    Emacs; also, malloc is slower and causes storage fragmentation.  On
    317    the other hand, malloc is more portable, and easier to debug.
    318 
    319    Because we sometimes use alloca, some routines have to be macros,
    320    not functions -- `alloca'-allocated space disappears at the end of the
    321    function it is called in.  */
    322 
    323 # ifdef REGEX_MALLOC
    324 
    325 #  define REGEX_ALLOCATE malloc
    326 #  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
    327 #  define REGEX_FREE free
    328 
    329 # else /* not REGEX_MALLOC  */
    330 
    331 /* Emacs already defines alloca, sometimes.  */
    332 #  ifndef alloca
    333 
    334 /* Make alloca work the best possible way.  */
    335 #   ifdef __GNUC__
    336 #    define alloca __builtin_alloca
    337 #   else /* not __GNUC__ */
    338 #    if HAVE_ALLOCA_H
    339 #     include <alloca.h>
    340 #    endif /* HAVE_ALLOCA_H */
    341 #   endif /* not __GNUC__ */
    342 
    343 #  endif /* not alloca */
    344 
    345 #  define REGEX_ALLOCATE alloca
    346 
    347 /* Assumes a `char *destination' variable.  */
    348 #  define REGEX_REALLOCATE(source, osize, nsize)			\
    349   (destination = (char *) alloca (nsize),				\
    350    memcpy (destination, source, osize))
    351 
    352 /* No need to do anything to free, after alloca.  */
    353 #  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
    354 
    355 # endif /* not REGEX_MALLOC */
    356 
    357 /* Define how to allocate the failure stack.  */
    358 
    359 # if defined REL_ALLOC && defined REGEX_MALLOC
    360 
    361 #  define REGEX_ALLOCATE_STACK(size)				\
    362   r_alloc (&failure_stack_ptr, (size))
    363 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
    364   r_re_alloc (&failure_stack_ptr, (nsize))
    365 #  define REGEX_FREE_STACK(ptr)					\
    366   r_alloc_free (&failure_stack_ptr)
    367 
    368 # else /* not using relocating allocator */
    369 
    370 #  ifdef REGEX_MALLOC
    371 
    372 #   define REGEX_ALLOCATE_STACK malloc
    373 #   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
    374 #   define REGEX_FREE_STACK free
    375 
    376 #  else /* not REGEX_MALLOC */
    377 
    378 #   define REGEX_ALLOCATE_STACK alloca
    379 
    380 #   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
    381    REGEX_REALLOCATE (source, osize, nsize)
    382 /* No need to explicitly free anything.  */
    383 #   define REGEX_FREE_STACK(arg)
    384 
    385 #  endif /* not REGEX_MALLOC */
    386 # endif /* not using relocating allocator */
    387 
    388 
    389 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
    390    `string1' or just past its end.  This works if PTR is NULL, which is
    391    a good thing.  */
    392 # define FIRST_STRING_P(ptr) 					\
    393   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
    394 
    395 /* (Re)Allocate N items of type T using malloc, or fail.  */
    396 # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
    397 # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
    398 # define RETALLOC_IF(addr, n, t) \
    399   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
    400 # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
    401 
    402 # define BYTEWIDTH 8 /* In bits.  */
    403 
    404 # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
    405 
    406 # undef MAX
    407 # undef MIN
    408 # define MAX(a, b) ((a) > (b) ? (a) : (b))
    409 # define MIN(a, b) ((a) < (b) ? (a) : (b))
    410 
    411 typedef char boolean;
    412 # define false 0
    413 # define true 1
    414 
    415 static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
    416                                                    reg_syntax_t syntax,
    417                                                    struct re_pattern_buffer *bufp));
    418 
    419 static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
    420 					     const char *string1, int size1,
    421 					     const char *string2, int size2,
    422 					     int pos,
    423 					     struct re_registers *regs,
    424 					     int stop));
    425 static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
    426 				     const char *string1, int size1,
    427 				     const char *string2, int size2,
    428 				     int startpos, int range,
    429 				     struct re_registers *regs, int stop));
    430 static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
    431 
    432 #ifdef MBS_SUPPORT
    433 static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
    434                                                    reg_syntax_t syntax,
    435                                                    struct re_pattern_buffer *bufp));
    436 
    437 
    438 static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
    439 					    const char *cstring1, int csize1,
    440 					    const char *cstring2, int csize2,
    441 					    int pos,
    442 					    struct re_registers *regs,
    443 					    int stop,
    444 					    wchar_t *string1, int size1,
    445 					    wchar_t *string2, int size2,
    446 					    int *mbs_offset1, int *mbs_offset2));
    447 static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
    448 				    const char *string1, int size1,
    449 				    const char *string2, int size2,
    450 				    int startpos, int range,
    451 				    struct re_registers *regs, int stop));
    452 static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
    453 #endif
    454 
    455 /* These are the command codes that appear in compiled regular
    457    expressions.  Some opcodes are followed by argument bytes.  A
    458    command code can specify any interpretation whatsoever for its
    459    arguments.  Zero bytes may appear in the compiled regular expression.  */
    460 
    461 typedef enum
    462 {
    463   no_op = 0,
    464 
    465   /* Succeed right away--no more backtracking.  */
    466   succeed,
    467 
    468         /* Followed by one byte giving n, then by n literal bytes.  */
    469   exactn,
    470 
    471 # ifdef MBS_SUPPORT
    472 	/* Same as exactn, but contains binary data.  */
    473   exactn_bin,
    474 # endif
    475 
    476         /* Matches any (more or less) character.  */
    477   anychar,
    478 
    479         /* Matches any one char belonging to specified set.  First
    480            following byte is number of bitmap bytes.  Then come bytes
    481            for a bitmap saying which chars are in.  Bits in each byte
    482            are ordered low-bit-first.  A character is in the set if its
    483            bit is 1.  A character too large to have a bit in the map is
    484            automatically not in the set.  */
    485         /* ifdef MBS_SUPPORT, following element is length of character
    486 	   classes, length of collating symbols, length of equivalence
    487 	   classes, length of character ranges, and length of characters.
    488 	   Next, character class element, collating symbols elements,
    489 	   equivalence class elements, range elements, and character
    490 	   elements follow.
    491 	   See regex_compile function.  */
    492   charset,
    493 
    494         /* Same parameters as charset, but match any character that is
    495            not one of those specified.  */
    496   charset_not,
    497 
    498         /* Start remembering the text that is matched, for storing in a
    499            register.  Followed by one byte with the register number, in
    500            the range 0 to one less than the pattern buffer's re_nsub
    501            field.  Then followed by one byte with the number of groups
    502            inner to this one.  (This last has to be part of the
    503            start_memory only because we need it in the on_failure_jump
    504            of re_match_2.)  */
    505   start_memory,
    506 
    507         /* Stop remembering the text that is matched and store it in a
    508            memory register.  Followed by one byte with the register
    509            number, in the range 0 to one less than `re_nsub' in the
    510            pattern buffer, and one byte with the number of inner groups,
    511            just like `start_memory'.  (We need the number of inner
    512            groups here because we don't have any easy way of finding the
    513            corresponding start_memory when we're at a stop_memory.)  */
    514   stop_memory,
    515 
    516         /* Match a duplicate of something remembered. Followed by one
    517            byte containing the register number.  */
    518   duplicate,
    519 
    520         /* Fail unless at beginning of line.  */
    521   begline,
    522 
    523         /* Fail unless at end of line.  */
    524   endline,
    525 
    526         /* Succeeds if at beginning of buffer (if emacs) or at beginning
    527            of string to be matched (if not).  */
    528   begbuf,
    529 
    530         /* Analogously, for end of buffer/string.  */
    531   endbuf,
    532 
    533         /* Followed by two byte relative address to which to jump.  */
    534   jump,
    535 
    536 	/* Same as jump, but marks the end of an alternative.  */
    537   jump_past_alt,
    538 
    539         /* Followed by two-byte relative address of place to resume at
    540            in case of failure.  */
    541         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    542   on_failure_jump,
    543 
    544         /* Like on_failure_jump, but pushes a placeholder instead of the
    545            current string position when executed.  */
    546   on_failure_keep_string_jump,
    547 
    548         /* Throw away latest failure point and then jump to following
    549            two-byte relative address.  */
    550         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    551   pop_failure_jump,
    552 
    553         /* Change to pop_failure_jump if know won't have to backtrack to
    554            match; otherwise change to jump.  This is used to jump
    555            back to the beginning of a repeat.  If what follows this jump
    556            clearly won't match what the repeat does, such that we can be
    557            sure that there is no use backtracking out of repetitions
    558            already matched, then we change it to a pop_failure_jump.
    559            Followed by two-byte address.  */
    560         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    561   maybe_pop_jump,
    562 
    563         /* Jump to following two-byte address, and push a dummy failure
    564            point. This failure point will be thrown away if an attempt
    565            is made to use it for a failure.  A `+' construct makes this
    566            before the first repeat.  Also used as an intermediary kind
    567            of jump when compiling an alternative.  */
    568         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    569   dummy_failure_jump,
    570 
    571 	/* Push a dummy failure point and continue.  Used at the end of
    572 	   alternatives.  */
    573   push_dummy_failure,
    574 
    575         /* Followed by two-byte relative address and two-byte number n.
    576            After matching N times, jump to the address upon failure.  */
    577         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    578   succeed_n,
    579 
    580         /* Followed by two-byte relative address, and two-byte number n.
    581            Jump to the address N times, then fail.  */
    582         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    583   jump_n,
    584 
    585         /* Set the following two-byte relative address to the
    586            subsequent two-byte number.  The address *includes* the two
    587            bytes of number.  */
    588         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    589   set_number_at,
    590 
    591   wordchar,	/* Matches any word-constituent character.  */
    592   notwordchar,	/* Matches any char that is not a word-constituent.  */
    593 
    594   wordbeg,	/* Succeeds if at word beginning.  */
    595   wordend,	/* Succeeds if at word end.  */
    596 
    597   wordbound,	/* Succeeds if at a word boundary.  */
    598   notwordbound	/* Succeeds if not at a word boundary.  */
    599 
    600 # ifdef emacs
    601   ,before_dot,	/* Succeeds if before point.  */
    602   at_dot,	/* Succeeds if at point.  */
    603   after_dot,	/* Succeeds if after point.  */
    604 
    605 	/* Matches any character whose syntax is specified.  Followed by
    606            a byte which contains a syntax code, e.g., Sword.  */
    607   syntaxspec,
    608 
    609 	/* Matches any character whose syntax is not that specified.  */
    610   notsyntaxspec
    611 # endif /* emacs */
    612 } re_opcode_t;
    613 #endif /* not INSIDE_RECURSION */
    614 
    615 
    617 #ifdef BYTE
    618 # define CHAR_T char
    619 # define UCHAR_T unsigned char
    620 # define COMPILED_BUFFER_VAR bufp->buffer
    621 # define OFFSET_ADDRESS_SIZE 2
    622 # define PREFIX(name) byte_##name
    623 # define ARG_PREFIX(name) name
    624 # define PUT_CHAR(c) putchar (c)
    625 #else
    626 # ifdef WCHAR
    627 #  define CHAR_T wchar_t
    628 #  define UCHAR_T wchar_t
    629 #  define COMPILED_BUFFER_VAR wc_buffer
    630 #  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
    631 #  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
    632 #  define PREFIX(name) wcs_##name
    633 #  define ARG_PREFIX(name) c##name
    634 /* Should we use wide stream??  */
    635 #  define PUT_CHAR(c) printf ("%C", c);
    636 #  define TRUE 1
    637 #  define FALSE 0
    638 # else
    639 #  ifdef MBS_SUPPORT
    640 #   define WCHAR
    641 #   define INSIDE_RECURSION
    642 #   include "regex.c"
    643 #   undef INSIDE_RECURSION
    644 #  endif
    645 #  define BYTE
    646 #  define INSIDE_RECURSION
    647 #  include "regex.c"
    648 #  undef INSIDE_RECURSION
    649 # endif
    650 #endif
    651 #include "unlocked-io.h"
    652 
    653 #ifdef INSIDE_RECURSION
    654 /* Common operations on the compiled pattern.  */
    655 
    656 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
    657 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    658 
    659 # ifdef WCHAR
    660 #  define STORE_NUMBER(destination, number)				\
    661   do {									\
    662     *(destination) = (UCHAR_T)(number);				\
    663   } while (0)
    664 # else /* BYTE */
    665 #  define STORE_NUMBER(destination, number)				\
    666   do {									\
    667     (destination)[0] = (number) & 0377;					\
    668     (destination)[1] = (number) >> 8;					\
    669   } while (0)
    670 # endif /* WCHAR */
    671 
    672 /* Same as STORE_NUMBER, except increment DESTINATION to
    673    the byte after where the number is stored.  Therefore, DESTINATION
    674    must be an lvalue.  */
    675 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    676 
    677 # define STORE_NUMBER_AND_INCR(destination, number)			\
    678   do {									\
    679     STORE_NUMBER (destination, number);					\
    680     (destination) += OFFSET_ADDRESS_SIZE;				\
    681   } while (0)
    682 
    683 /* Put into DESTINATION a number stored in two contiguous bytes starting
    684    at SOURCE.  */
    685 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    686 
    687 # ifdef WCHAR
    688 #  define EXTRACT_NUMBER(destination, source)				\
    689   do {									\
    690     (destination) = *(source);						\
    691   } while (0)
    692 # else /* BYTE */
    693 #  define EXTRACT_NUMBER(destination, source)				\
    694   do {									\
    695     (destination) = *(source) & 0377;					\
    696     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
    697   } while (0)
    698 # endif
    699 
    700 # ifdef DEBUG
    701 static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source));
    702 static void
    703 PREFIX(extract_number) (dest, source)
    704     int *dest;
    705     UCHAR_T *source;
    706 {
    707 #  ifdef WCHAR
    708   *dest = *source;
    709 #  else /* BYTE */
    710   int temp = SIGN_EXTEND_CHAR (*(source + 1));
    711   *dest = *source & 0377;
    712   *dest += temp << 8;
    713 #  endif
    714 }
    715 
    716 #  ifndef EXTRACT_MACROS /* To debug the macros.  */
    717 #   undef EXTRACT_NUMBER
    718 #   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
    719 #  endif /* not EXTRACT_MACROS */
    720 
    721 # endif /* DEBUG */
    722 
    723 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
    724    SOURCE must be an lvalue.  */
    725 
    726 # define EXTRACT_NUMBER_AND_INCR(destination, source)			\
    727   do {									\
    728     EXTRACT_NUMBER (destination, source);				\
    729     (source) += OFFSET_ADDRESS_SIZE; 					\
    730   } while (0)
    731 
    732 # ifdef DEBUG
    733 static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination,
    734 						       UCHAR_T **source));
    735 static void
    736 PREFIX(extract_number_and_incr) (destination, source)
    737     int *destination;
    738     UCHAR_T **source;
    739 {
    740   PREFIX(extract_number) (destination, *source);
    741   *source += OFFSET_ADDRESS_SIZE;
    742 }
    743 
    744 #  ifndef EXTRACT_MACROS
    745 #   undef EXTRACT_NUMBER_AND_INCR
    746 #   define EXTRACT_NUMBER_AND_INCR(dest, src) \
    747   PREFIX(extract_number_and_incr) (&dest, &src)
    748 #  endif /* not EXTRACT_MACROS */
    749 
    750 # endif /* DEBUG */
    751 
    752 
    753 
    755 /* If DEBUG is defined, Regex prints many voluminous messages about what
    756    it is doing (if the variable `debug' is nonzero).  If linked with the
    757    main program in `iregex.c', you can enter patterns and strings
    758    interactively.  And if linked with the main program in `main.c' and
    759    the other test files, you can run the already-written tests.  */
    760 
    761 # ifdef DEBUG
    762 
    763 #  ifndef DEFINED_ONCE
    764 
    765 /* We use standard I/O for debugging.  */
    766 #   include <stdio.h>
    767 
    768 /* It is useful to test things that ``must'' be true when debugging.  */
    769 #   include <assert.h>
    770 
    771 static int debug;
    772 
    773 #   define DEBUG_STATEMENT(e) e
    774 #   define DEBUG_PRINT1(x) if (debug) printf (x)
    775 #   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
    776 #   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
    777 #   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
    778 #  endif /* not DEFINED_ONCE */
    779 
    780 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
    781   if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
    782 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
    783   if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
    784 
    785 
    786 /* Print the fastmap in human-readable form.  */
    787 
    788 #  ifndef DEFINED_ONCE
    789 void
    790 print_fastmap (fastmap)
    791     char *fastmap;
    792 {
    793   unsigned was_a_range = 0;
    794   unsigned i = 0;
    795 
    796   while (i < (1 << BYTEWIDTH))
    797     {
    798       if (fastmap[i++])
    799 	{
    800 	  was_a_range = 0;
    801           putchar (i - 1);
    802           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
    803             {
    804               was_a_range = 1;
    805               i++;
    806             }
    807 	  if (was_a_range)
    808             {
    809               printf ("-");
    810               putchar (i - 1);
    811             }
    812         }
    813     }
    814   putchar ('\n');
    815 }
    816 #  endif /* not DEFINED_ONCE */
    817 
    818 
    819 /* Print a compiled pattern string in human-readable form, starting at
    820    the START pointer into it and ending just before the pointer END.  */
    821 
    822 void
    823 PREFIX(print_partial_compiled_pattern) (start, end)
    824     UCHAR_T *start;
    825     UCHAR_T *end;
    826 {
    827   int mcnt, mcnt2;
    828   UCHAR_T *p1;
    829   UCHAR_T *p = start;
    830   UCHAR_T *pend = end;
    831 
    832   if (start == NULL)
    833     {
    834       printf ("(null)\n");
    835       return;
    836     }
    837 
    838   /* Loop over pattern commands.  */
    839   while (p < pend)
    840     {
    841 #  ifdef _LIBC
    842       printf ("%td:\t", p - start);
    843 #  else
    844       printf ("%ld:\t", (long int) (p - start));
    845 #  endif
    846 
    847       switch ((re_opcode_t) *p++)
    848 	{
    849         case no_op:
    850           printf ("/no_op");
    851           break;
    852 
    853 	case exactn:
    854 	  mcnt = *p++;
    855           printf ("/exactn/%d", mcnt);
    856           do
    857 	    {
    858               putchar ('/');
    859 	      PUT_CHAR (*p++);
    860             }
    861           while (--mcnt);
    862           break;
    863 
    864 #  ifdef MBS_SUPPORT
    865 	case exactn_bin:
    866 	  mcnt = *p++;
    867 	  printf ("/exactn_bin/%d", mcnt);
    868           do
    869 	    {
    870 	      printf("/%lx", (long int) *p++);
    871             }
    872           while (--mcnt);
    873           break;
    874 #  endif /* MBS_SUPPORT */
    875 
    876 	case start_memory:
    877           mcnt = *p++;
    878           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
    879           break;
    880 
    881 	case stop_memory:
    882           mcnt = *p++;
    883 	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
    884           break;
    885 
    886 	case duplicate:
    887 	  printf ("/duplicate/%ld", (long int) *p++);
    888 	  break;
    889 
    890 	case anychar:
    891 	  printf ("/anychar");
    892 	  break;
    893 
    894 	case charset:
    895         case charset_not:
    896           {
    897 #  ifdef WCHAR
    898 	    int i, length;
    899 	    wchar_t *workp = p;
    900 	    printf ("/charset [%s",
    901 	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
    902 	    p += 5;
    903 	    length = *workp++; /* the length of char_classes */
    904 	    for (i=0 ; i<length ; i++)
    905 	      printf("[:%lx:]", (long int) *p++);
    906 	    length = *workp++; /* the length of collating_symbol */
    907 	    for (i=0 ; i<length ;)
    908 	      {
    909 		printf("[.");
    910 		while(*p != 0)
    911 		  PUT_CHAR((i++,*p++));
    912 		i++,p++;
    913 		printf(".]");
    914 	      }
    915 	    length = *workp++; /* the length of equivalence_class */
    916 	    for (i=0 ; i<length ;)
    917 	      {
    918 		printf("[=");
    919 		while(*p != 0)
    920 		  PUT_CHAR((i++,*p++));
    921 		i++,p++;
    922 		printf("=]");
    923 	      }
    924 	    length = *workp++; /* the length of char_range */
    925 	    for (i=0 ; i<length ; i++)
    926 	      {
    927 		wchar_t range_start = *p++;
    928 		wchar_t range_end = *p++;
    929 		printf("%C-%C", range_start, range_end);
    930 	      }
    931 	    length = *workp++; /* the length of char */
    932 	    for (i=0 ; i<length ; i++)
    933 	      printf("%C", *p++);
    934 	    putchar (']');
    935 #  else
    936             register int c, last = -100;
    937 	    register int in_range = 0;
    938 
    939 	    printf ("/charset [%s",
    940 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
    941 
    942             assert (p + *p < pend);
    943 
    944             for (c = 0; c < 256; c++)
    945 	      if (c / 8 < *p
    946 		  && (p[1 + (c/8)] & (1 << (c % 8))))
    947 		{
    948 		  /* Are we starting a range?  */
    949 		  if (last + 1 == c && ! in_range)
    950 		    {
    951 		      putchar ('-');
    952 		      in_range = 1;
    953 		    }
    954 		  /* Have we broken a range?  */
    955 		  else if (last + 1 != c && in_range)
    956               {
    957 		      putchar (last);
    958 		      in_range = 0;
    959 		    }
    960 
    961 		  if (! in_range)
    962 		    putchar (c);
    963 
    964 		  last = c;
    965               }
    966 
    967 	    if (in_range)
    968 	      putchar (last);
    969 
    970 	    putchar (']');
    971 
    972 	    p += 1 + *p;
    973 #  endif /* WCHAR */
    974 	  }
    975 	  break;
    976 
    977 	case begline:
    978 	  printf ("/begline");
    979           break;
    980 
    981 	case endline:
    982           printf ("/endline");
    983           break;
    984 
    985 	case on_failure_jump:
    986           PREFIX(extract_number_and_incr) (&mcnt, &p);
    987 #  ifdef _LIBC
    988   	  printf ("/on_failure_jump to %td", p + mcnt - start);
    989 #  else
    990   	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
    991 #  endif
    992           break;
    993 
    994 	case on_failure_keep_string_jump:
    995           PREFIX(extract_number_and_incr) (&mcnt, &p);
    996 #  ifdef _LIBC
    997   	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
    998 #  else
    999   	  printf ("/on_failure_keep_string_jump to %ld",
   1000 		  (long int) (p + mcnt - start));
   1001 #  endif
   1002           break;
   1003 
   1004 	case dummy_failure_jump:
   1005           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1006 #  ifdef _LIBC
   1007   	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
   1008 #  else
   1009   	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
   1010 #  endif
   1011           break;
   1012 
   1013 	case push_dummy_failure:
   1014           printf ("/push_dummy_failure");
   1015           break;
   1016 
   1017         case maybe_pop_jump:
   1018           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1019 #  ifdef _LIBC
   1020   	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
   1021 #  else
   1022   	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
   1023 #  endif
   1024 	  break;
   1025 
   1026         case pop_failure_jump:
   1027 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1028 #  ifdef _LIBC
   1029   	  printf ("/pop_failure_jump to %td", p + mcnt - start);
   1030 #  else
   1031   	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
   1032 #  endif
   1033 	  break;
   1034 
   1035         case jump_past_alt:
   1036 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1037 #  ifdef _LIBC
   1038   	  printf ("/jump_past_alt to %td", p + mcnt - start);
   1039 #  else
   1040   	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
   1041 #  endif
   1042 	  break;
   1043 
   1044         case jump:
   1045 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1046 #  ifdef _LIBC
   1047   	  printf ("/jump to %td", p + mcnt - start);
   1048 #  else
   1049   	  printf ("/jump to %ld", (long int) (p + mcnt - start));
   1050 #  endif
   1051 	  break;
   1052 
   1053         case succeed_n:
   1054           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1055 	  p1 = p + mcnt;
   1056           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1057 #  ifdef _LIBC
   1058 	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
   1059 #  else
   1060 	  printf ("/succeed_n to %ld, %d times",
   1061 		  (long int) (p1 - start), mcnt2);
   1062 #  endif
   1063           break;
   1064 
   1065         case jump_n:
   1066           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1067 	  p1 = p + mcnt;
   1068           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1069 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
   1070           break;
   1071 
   1072         case set_number_at:
   1073           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1074 	  p1 = p + mcnt;
   1075           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1076 #  ifdef _LIBC
   1077 	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
   1078 #  else
   1079 	  printf ("/set_number_at location %ld to %d",
   1080 		  (long int) (p1 - start), mcnt2);
   1081 #  endif
   1082           break;
   1083 
   1084         case wordbound:
   1085 	  printf ("/wordbound");
   1086 	  break;
   1087 
   1088 	case notwordbound:
   1089 	  printf ("/notwordbound");
   1090           break;
   1091 
   1092 	case wordbeg:
   1093 	  printf ("/wordbeg");
   1094 	  break;
   1095 
   1096 	case wordend:
   1097 	  printf ("/wordend");
   1098 	  break;
   1099 
   1100 #  ifdef emacs
   1101 	case before_dot:
   1102 	  printf ("/before_dot");
   1103           break;
   1104 
   1105 	case at_dot:
   1106 	  printf ("/at_dot");
   1107           break;
   1108 
   1109 	case after_dot:
   1110 	  printf ("/after_dot");
   1111           break;
   1112 
   1113 	case syntaxspec:
   1114           printf ("/syntaxspec");
   1115 	  mcnt = *p++;
   1116 	  printf ("/%d", mcnt);
   1117           break;
   1118 
   1119 	case notsyntaxspec:
   1120           printf ("/notsyntaxspec");
   1121 	  mcnt = *p++;
   1122 	  printf ("/%d", mcnt);
   1123 	  break;
   1124 #  endif /* emacs */
   1125 
   1126 	case wordchar:
   1127 	  printf ("/wordchar");
   1128           break;
   1129 
   1130 	case notwordchar:
   1131 	  printf ("/notwordchar");
   1132           break;
   1133 
   1134 	case begbuf:
   1135 	  printf ("/begbuf");
   1136           break;
   1137 
   1138 	case endbuf:
   1139 	  printf ("/endbuf");
   1140           break;
   1141 
   1142         default:
   1143           printf ("?%ld", (long int) *(p-1));
   1144 	}
   1145 
   1146       putchar ('\n');
   1147     }
   1148 
   1149 #  ifdef _LIBC
   1150   printf ("%td:\tend of pattern.\n", p - start);
   1151 #  else
   1152   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
   1153 #  endif
   1154 }
   1155 
   1156 
   1157 void
   1158 PREFIX(print_compiled_pattern) (bufp)
   1159     struct re_pattern_buffer *bufp;
   1160 {
   1161   UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
   1162 
   1163   PREFIX(print_partial_compiled_pattern) (buffer, buffer
   1164 				  + bufp->used / sizeof(UCHAR_T));
   1165   printf ("%ld bytes used/%ld bytes allocated.\n",
   1166 	  bufp->used, bufp->allocated);
   1167 
   1168   if (bufp->fastmap_accurate && bufp->fastmap)
   1169     {
   1170       printf ("fastmap: ");
   1171       print_fastmap (bufp->fastmap);
   1172     }
   1173 
   1174 #  ifdef _LIBC
   1175   printf ("re_nsub: %Zd\t", bufp->re_nsub);
   1176 #  else
   1177   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
   1178 #  endif
   1179   printf ("regs_alloc: %d\t", bufp->regs_allocated);
   1180   printf ("can_be_null: %d\t", bufp->can_be_null);
   1181   printf ("newline_anchor: %d\n", bufp->newline_anchor);
   1182   printf ("no_sub: %d\t", bufp->no_sub);
   1183   printf ("not_bol: %d\t", bufp->not_bol);
   1184   printf ("not_eol: %d\t", bufp->not_eol);
   1185   printf ("syntax: %lx\n", bufp->syntax);
   1186   /* Perhaps we should print the translate table?  */
   1187 }
   1188 
   1189 
   1190 void
   1191 PREFIX(print_double_string) (where, string1, size1, string2, size2)
   1192     const CHAR_T *where;
   1193     const CHAR_T *string1;
   1194     const CHAR_T *string2;
   1195     int size1;
   1196     int size2;
   1197 {
   1198   int this_char;
   1199 
   1200   if (where == NULL)
   1201     printf ("(null)");
   1202   else
   1203     {
   1204       int cnt;
   1205 
   1206       if (FIRST_STRING_P (where))
   1207         {
   1208           for (this_char = where - string1; this_char < size1; this_char++)
   1209 	    PUT_CHAR (string1[this_char]);
   1210 
   1211           where = string2;
   1212         }
   1213 
   1214       cnt = 0;
   1215       for (this_char = where - string2; this_char < size2; this_char++)
   1216 	{
   1217 	  PUT_CHAR (string2[this_char]);
   1218 	  if (++cnt > 100)
   1219 	    {
   1220 	      fputs ("...", stdout);
   1221 	      break;
   1222 	    }
   1223 	}
   1224     }
   1225 }
   1226 
   1227 #  ifndef DEFINED_ONCE
   1228 void
   1229 printchar (c)
   1230      int c;
   1231 {
   1232   putc (c, stderr);
   1233 }
   1234 #  endif
   1235 
   1236 # else /* not DEBUG */
   1237 
   1238 #  ifndef DEFINED_ONCE
   1239 #   undef assert
   1240 #   define assert(e)
   1241 
   1242 #   define DEBUG_STATEMENT(e)
   1243 #   define DEBUG_PRINT1(x)
   1244 #   define DEBUG_PRINT2(x1, x2)
   1245 #   define DEBUG_PRINT3(x1, x2, x3)
   1246 #   define DEBUG_PRINT4(x1, x2, x3, x4)
   1247 #  endif /* not DEFINED_ONCE */
   1248 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
   1249 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
   1250 
   1251 # endif /* not DEBUG */
   1252 
   1253 
   1254 
   1256 # ifdef WCHAR
   1257 /* This  convert a multibyte string to a wide character string.
   1258    And write their correspondances to offset_buffer(see below)
   1259    and write whether each wchar_t is binary data to is_binary.
   1260    This assume invalid multibyte sequences as binary data.
   1261    We assume offset_buffer and is_binary is already allocated
   1262    enough space.  */
   1263 
   1264 static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
   1265 				  size_t len, int *offset_buffer,
   1266 				  char *is_binary);
   1267 static size_t
   1268 convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
   1269      CHAR_T *dest;
   1270      const unsigned char* src;
   1271      size_t len; /* the length of multibyte string.  */
   1272 
   1273      /* It hold correspondances between src(char string) and
   1274 	dest(wchar_t string) for optimization.
   1275 	e.g. src  = "xxxyzz"
   1276              dest = {'X', 'Y', 'Z'}
   1277 	      (each "xxx", "y" and "zz" represent one multibyte character
   1278 	       corresponding to 'X', 'Y' and 'Z'.)
   1279 	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
   1280 	  	        = {0, 3, 4, 6}
   1281      */
   1282      int *offset_buffer;
   1283      char *is_binary;
   1284 {
   1285   wchar_t *pdest = dest;
   1286   const unsigned char *psrc = src;
   1287   size_t wc_count = 0;
   1288 
   1289   mbstate_t mbs;
   1290   int i, consumed;
   1291   size_t mb_remain = len;
   1292   size_t mb_count = 0;
   1293 
   1294   /* Initialize the conversion state.  */
   1295   memset (&mbs, 0, sizeof (mbstate_t));
   1296 
   1297   offset_buffer[0] = 0;
   1298   for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
   1299 	 psrc += consumed)
   1300     {
   1301       consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
   1302 
   1303       if (consumed <= 0)
   1304 	/* failed to convert. maybe src contains binary data.
   1305 	   So we consume 1 byte manualy.  */
   1306 	{
   1307 	  *pdest = *psrc;
   1308 	  consumed = 1;
   1309 	  is_binary[wc_count] = TRUE;
   1310 	}
   1311       else
   1312 	is_binary[wc_count] = FALSE;
   1313       /* In sjis encoding, we use yen sign as escape character in
   1314 	 place of reverse solidus. So we convert 0x5c(yen sign in
   1315 	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
   1316 	 solidus in UCS2).  */
   1317       if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
   1318 	*pdest = (wchar_t) *psrc;
   1319 
   1320       offset_buffer[wc_count + 1] = mb_count += consumed;
   1321     }
   1322 
   1323   /* Fill remain of the buffer with sentinel.  */
   1324   for (i = wc_count + 1 ; i <= len ; i++)
   1325     offset_buffer[i] = mb_count + 1;
   1326 
   1327   return wc_count;
   1328 }
   1329 
   1330 # endif /* WCHAR */
   1331 
   1332 #else /* not INSIDE_RECURSION */
   1333 
   1334 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
   1335    also be assigned to arbitrarily: each pattern buffer stores its own
   1336    syntax, so it can be changed between regex compilations.  */
   1337 /* This has no initializer because initialized variables in Emacs
   1338    become read-only after dumping.  */
   1339 reg_syntax_t re_syntax_options;
   1340 
   1341 
   1342 /* Specify the precise syntax of regexps for compilation.  This provides
   1343    for compatibility for various utilities which historically have
   1344    different, incompatible syntaxes.
   1345 
   1346    The argument SYNTAX is a bit mask comprised of the various bits
   1347    defined in regex.h.  We return the old syntax.  */
   1348 
   1349 reg_syntax_t
   1350 re_set_syntax (syntax)
   1351     reg_syntax_t syntax;
   1352 {
   1353   reg_syntax_t ret = re_syntax_options;
   1354 
   1355   re_syntax_options = syntax;
   1356 # ifdef DEBUG
   1357   if (syntax & RE_DEBUG)
   1358     debug = 1;
   1359   else if (debug) /* was on but now is not */
   1360     debug = 0;
   1361 # endif /* DEBUG */
   1362   return ret;
   1363 }
   1364 # ifdef _LIBC
   1365 weak_alias (__re_set_syntax, re_set_syntax)
   1366 # endif
   1367 
   1368 /* This table gives an error message for each of the error codes listed
   1370    in regex.h.  Obviously the order here has to be same as there.
   1371    POSIX doesn't require that we do anything for REG_NOERROR,
   1372    but why not be nice?  */
   1373 
   1374 static const char re_error_msgid[] =
   1375   {
   1376 # define REG_NOERROR_IDX	0
   1377     gettext_noop ("Success")	/* REG_NOERROR */
   1378     "\0"
   1379 # define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
   1380     gettext_noop ("No match")	/* REG_NOMATCH */
   1381     "\0"
   1382 # define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
   1383     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
   1384     "\0"
   1385 # define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
   1386     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
   1387     "\0"
   1388 # define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
   1389     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
   1390     "\0"
   1391 # define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
   1392     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
   1393     "\0"
   1394 # define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
   1395     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
   1396     "\0"
   1397 # define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
   1398     gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
   1399     "\0"
   1400 # define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
   1401     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
   1402     "\0"
   1403 # define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
   1404     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
   1405     "\0"
   1406 # define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
   1407     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
   1408     "\0"
   1409 # define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
   1410     gettext_noop ("Invalid range end")	/* REG_ERANGE */
   1411     "\0"
   1412 # define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
   1413     gettext_noop ("Memory exhausted") /* REG_ESPACE */
   1414     "\0"
   1415 # define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
   1416     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
   1417     "\0"
   1418 # define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
   1419     gettext_noop ("Premature end of regular expression") /* REG_EEND */
   1420     "\0"
   1421 # define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
   1422     gettext_noop ("Regular expression too big") /* REG_ESIZE */
   1423     "\0"
   1424 # define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
   1425     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
   1426   };
   1427 
   1428 static const size_t re_error_msgid_idx[] =
   1429   {
   1430     REG_NOERROR_IDX,
   1431     REG_NOMATCH_IDX,
   1432     REG_BADPAT_IDX,
   1433     REG_ECOLLATE_IDX,
   1434     REG_ECTYPE_IDX,
   1435     REG_EESCAPE_IDX,
   1436     REG_ESUBREG_IDX,
   1437     REG_EBRACK_IDX,
   1438     REG_EPAREN_IDX,
   1439     REG_EBRACE_IDX,
   1440     REG_BADBR_IDX,
   1441     REG_ERANGE_IDX,
   1442     REG_ESPACE_IDX,
   1443     REG_BADRPT_IDX,
   1444     REG_EEND_IDX,
   1445     REG_ESIZE_IDX,
   1446     REG_ERPAREN_IDX
   1447   };
   1448 
   1449 #endif /* INSIDE_RECURSION */
   1451 
   1452 #ifndef DEFINED_ONCE
   1453 /* Avoiding alloca during matching, to placate r_alloc.  */
   1454 
   1455 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
   1456    searching and matching functions should not call alloca.  On some
   1457    systems, alloca is implemented in terms of malloc, and if we're
   1458    using the relocating allocator routines, then malloc could cause a
   1459    relocation, which might (if the strings being searched are in the
   1460    ralloc heap) shift the data out from underneath the regexp
   1461    routines.
   1462 
   1463    Here's another reason to avoid allocation: Emacs
   1464    processes input from X in a signal handler; processing X input may
   1465    call malloc; if input arrives while a matching routine is calling
   1466    malloc, then we're scrod.  But Emacs can't just block input while
   1467    calling matching routines; then we don't notice interrupts when
   1468    they come in.  So, Emacs blocks input around all regexp calls
   1469    except the matching calls, which it leaves unprotected, in the
   1470    faith that they will not malloc.  */
   1471 
   1472 /* Normally, this is fine.  */
   1473 # define MATCH_MAY_ALLOCATE
   1474 
   1475 /* When using GNU C, we are not REALLY using the C alloca, no matter
   1476    what config.h may say.  So don't take precautions for it.  */
   1477 # ifdef __GNUC__
   1478 #  undef C_ALLOCA
   1479 # endif
   1480 
   1481 /* The match routines may not allocate if (1) they would do it with malloc
   1482    and (2) it's not safe for them to use malloc.
   1483    Note that if REL_ALLOC is defined, matching would not use malloc for the
   1484    failure stack, but we would still use it for the register vectors;
   1485    so REL_ALLOC should not affect this.  */
   1486 # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
   1487 #  undef MATCH_MAY_ALLOCATE
   1488 # endif
   1489 #endif /* not DEFINED_ONCE */
   1490 
   1491 #ifdef INSIDE_RECURSION
   1493 /* Failure stack declarations and macros; both re_compile_fastmap and
   1494    re_match_2 use a failure stack.  These have to be macros because of
   1495    REGEX_ALLOCATE_STACK.  */
   1496 
   1497 
   1498 /* Number of failure points for which to initially allocate space
   1499    when matching.  If this number is exceeded, we allocate more
   1500    space, so it is not a hard limit.  */
   1501 # ifndef INIT_FAILURE_ALLOC
   1502 #  define INIT_FAILURE_ALLOC 5
   1503 # endif
   1504 
   1505 /* Roughly the maximum number of failure points on the stack.  Would be
   1506    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
   1507    This is a variable only so users of regex can assign to it; we never
   1508    change it ourselves.  */
   1509 
   1510 # ifdef INT_IS_16BIT
   1511 
   1512 #  ifndef DEFINED_ONCE
   1513 #   if defined MATCH_MAY_ALLOCATE
   1514 /* 4400 was enough to cause a crash on Alpha OSF/1,
   1515    whose default stack limit is 2mb.  */
   1516 long int re_max_failures = 4000;
   1517 #   else
   1518 long int re_max_failures = 2000;
   1519 #   endif
   1520 #  endif
   1521 
   1522 union PREFIX(fail_stack_elt)
   1523 {
   1524   UCHAR_T *pointer;
   1525   long int integer;
   1526 };
   1527 
   1528 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
   1529 
   1530 typedef struct
   1531 {
   1532   PREFIX(fail_stack_elt_t) *stack;
   1533   unsigned long int size;
   1534   unsigned long int avail;		/* Offset of next open position.  */
   1535 } PREFIX(fail_stack_type);
   1536 
   1537 # else /* not INT_IS_16BIT */
   1538 
   1539 #  ifndef DEFINED_ONCE
   1540 #   if defined MATCH_MAY_ALLOCATE
   1541 /* 4400 was enough to cause a crash on Alpha OSF/1,
   1542    whose default stack limit is 2mb.  */
   1543 int re_max_failures = 4000;
   1544 #   else
   1545 int re_max_failures = 2000;
   1546 #   endif
   1547 #  endif
   1548 
   1549 union PREFIX(fail_stack_elt)
   1550 {
   1551   UCHAR_T *pointer;
   1552   int integer;
   1553 };
   1554 
   1555 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
   1556 
   1557 typedef struct
   1558 {
   1559   PREFIX(fail_stack_elt_t) *stack;
   1560   unsigned size;
   1561   unsigned avail;			/* Offset of next open position.  */
   1562 } PREFIX(fail_stack_type);
   1563 
   1564 # endif /* INT_IS_16BIT */
   1565 
   1566 # ifndef DEFINED_ONCE
   1567 #  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
   1568 #  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
   1569 #  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
   1570 # endif
   1571 
   1572 
   1573 /* Define macros to initialize and free the failure stack.
   1574    Do `return -2' if the alloc fails.  */
   1575 
   1576 # ifdef MATCH_MAY_ALLOCATE
   1577 #  define INIT_FAIL_STACK()						\
   1578   do {									\
   1579     fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
   1580       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
   1581 									\
   1582     if (fail_stack.stack == NULL)				\
   1583       return -2;							\
   1584 									\
   1585     fail_stack.size = INIT_FAILURE_ALLOC;			\
   1586     fail_stack.avail = 0;					\
   1587   } while (0)
   1588 
   1589 #  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
   1590 # else
   1591 #  define INIT_FAIL_STACK()						\
   1592   do {									\
   1593     fail_stack.avail = 0;					\
   1594   } while (0)
   1595 
   1596 #  define RESET_FAIL_STACK()
   1597 # endif
   1598 
   1599 
   1600 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
   1601 
   1602    Return 1 if succeeds, and 0 if either ran out of memory
   1603    allocating space for it or it was already too large.
   1604 
   1605    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
   1606 
   1607 # define DOUBLE_FAIL_STACK(fail_stack)					\
   1608   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
   1609    ? 0									\
   1610    : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
   1611         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
   1612           (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
   1613           ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
   1614 									\
   1615       (fail_stack).stack == NULL					\
   1616       ? 0								\
   1617       : ((fail_stack).size <<= 1, 					\
   1618          1)))
   1619 
   1620 
   1621 /* Push pointer POINTER on FAIL_STACK.
   1622    Return 1 if was able to do so and 0 if ran out of memory allocating
   1623    space to do so.  */
   1624 # define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
   1625   ((FAIL_STACK_FULL ()							\
   1626     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
   1627    ? 0									\
   1628    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
   1629       1))
   1630 
   1631 /* Push a pointer value onto the failure stack.
   1632    Assumes the variable `fail_stack'.  Probably should only
   1633    be called from within `PUSH_FAILURE_POINT'.  */
   1634 # define PUSH_FAILURE_POINTER(item)					\
   1635   fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
   1636 
   1637 /* This pushes an integer-valued item onto the failure stack.
   1638    Assumes the variable `fail_stack'.  Probably should only
   1639    be called from within `PUSH_FAILURE_POINT'.  */
   1640 # define PUSH_FAILURE_INT(item)					\
   1641   fail_stack.stack[fail_stack.avail++].integer = (item)
   1642 
   1643 /* Push a fail_stack_elt_t value onto the failure stack.
   1644    Assumes the variable `fail_stack'.  Probably should only
   1645    be called from within `PUSH_FAILURE_POINT'.  */
   1646 # define PUSH_FAILURE_ELT(item)					\
   1647   fail_stack.stack[fail_stack.avail++] =  (item)
   1648 
   1649 /* These three POP... operations complement the three PUSH... operations.
   1650    All assume that `fail_stack' is nonempty.  */
   1651 # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
   1652 # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
   1653 # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
   1654 
   1655 /* Used to omit pushing failure point id's when we're not debugging.  */
   1656 # ifdef DEBUG
   1657 #  define DEBUG_PUSH PUSH_FAILURE_INT
   1658 #  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
   1659 # else
   1660 #  define DEBUG_PUSH(item)
   1661 #  define DEBUG_POP(item_addr)
   1662 # endif
   1663 
   1664 
   1665 /* Push the information about the state we will need
   1666    if we ever fail back to it.
   1667 
   1668    Requires variables fail_stack, regstart, regend, reg_info, and
   1669    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
   1670    be declared.
   1671 
   1672    Does `return FAILURE_CODE' if runs out of memory.  */
   1673 
   1674 # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
   1675   do {									\
   1676     char *destination;							\
   1677     /* Must be int, so when we don't save any registers, the arithmetic	\
   1678        of 0 + -1 isn't done as unsigned.  */				\
   1679     /* Can't be int, since there is not a shred of a guarantee that int	\
   1680        is wide enough to hold a value of something to which pointer can	\
   1681        be assigned */							\
   1682     active_reg_t this_reg;						\
   1683     									\
   1684     DEBUG_STATEMENT (failure_id++);					\
   1685     DEBUG_STATEMENT (nfailure_points_pushed++);				\
   1686     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
   1687     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
   1688     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
   1689 									\
   1690     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
   1691     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
   1692 									\
   1693     /* Ensure we have enough space allocated for what we will push.  */	\
   1694     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
   1695       {									\
   1696         if (!DOUBLE_FAIL_STACK (fail_stack))				\
   1697           return failure_code;						\
   1698 									\
   1699         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
   1700 		       (fail_stack).size);				\
   1701         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
   1702       }									\
   1703 									\
   1704     /* Push the info, starting with the registers.  */			\
   1705     DEBUG_PRINT1 ("\n");						\
   1706 									\
   1707     if (1)								\
   1708       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
   1709 	   this_reg++)							\
   1710 	{								\
   1711 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
   1712 	  DEBUG_STATEMENT (num_regs_pushed++);				\
   1713 									\
   1714 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
   1715 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
   1716 									\
   1717 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
   1718 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
   1719 									\
   1720 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
   1721 			reg_info[this_reg].word.pointer);		\
   1722 	  DEBUG_PRINT2 (" match_null=%d",				\
   1723 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
   1724 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
   1725 	  DEBUG_PRINT2 (" matched_something=%d",			\
   1726 			MATCHED_SOMETHING (reg_info[this_reg]));	\
   1727 	  DEBUG_PRINT2 (" ever_matched=%d",				\
   1728 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
   1729 	  DEBUG_PRINT1 ("\n");						\
   1730 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
   1731 	}								\
   1732 									\
   1733     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
   1734     PUSH_FAILURE_INT (lowest_active_reg);				\
   1735 									\
   1736     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
   1737     PUSH_FAILURE_INT (highest_active_reg);				\
   1738 									\
   1739     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
   1740     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
   1741     PUSH_FAILURE_POINTER (pattern_place);				\
   1742 									\
   1743     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
   1744     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
   1745 				 size2);				\
   1746     DEBUG_PRINT1 ("'\n");						\
   1747     PUSH_FAILURE_POINTER (string_place);				\
   1748 									\
   1749     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
   1750     DEBUG_PUSH (failure_id);						\
   1751   } while (0)
   1752 
   1753 # ifndef DEFINED_ONCE
   1754 /* This is the number of items that are pushed and popped on the stack
   1755    for each register.  */
   1756 #  define NUM_REG_ITEMS  3
   1757 
   1758 /* Individual items aside from the registers.  */
   1759 #  ifdef DEBUG
   1760 #   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
   1761 #  else
   1762 #   define NUM_NONREG_ITEMS 4
   1763 #  endif
   1764 
   1765 /* We push at most this many items on the stack.  */
   1766 /* We used to use (num_regs - 1), which is the number of registers
   1767    this regexp will save; but that was changed to 5
   1768    to avoid stack overflow for a regexp with lots of parens.  */
   1769 #  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
   1770 
   1771 /* We actually push this many items.  */
   1772 #  define NUM_FAILURE_ITEMS				\
   1773   (((0							\
   1774      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
   1775     * NUM_REG_ITEMS)					\
   1776    + NUM_NONREG_ITEMS)
   1777 
   1778 /* How many items can still be added to the stack without overflowing it.  */
   1779 #  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
   1780 # endif /* not DEFINED_ONCE */
   1781 
   1782 
   1783 /* Pops what PUSH_FAIL_STACK pushes.
   1784 
   1785    We restore into the parameters, all of which should be lvalues:
   1786      STR -- the saved data position.
   1787      PAT -- the saved pattern position.
   1788      LOW_REG, HIGH_REG -- the highest and lowest active registers.
   1789      REGSTART, REGEND -- arrays of string positions.
   1790      REG_INFO -- array of information about each subexpression.
   1791 
   1792    Also assumes the variables `fail_stack' and (if debugging), `bufp',
   1793    `pend', `string1', `size1', `string2', and `size2'.  */
   1794 # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
   1795 {									\
   1796   DEBUG_STATEMENT (unsigned failure_id;)				\
   1797   active_reg_t this_reg;						\
   1798   const UCHAR_T *string_temp;						\
   1799 									\
   1800   assert (!FAIL_STACK_EMPTY ());					\
   1801 									\
   1802   /* Remove failure points and point to how many regs pushed.  */	\
   1803   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
   1804   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
   1805   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
   1806 									\
   1807   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
   1808 									\
   1809   DEBUG_POP (&failure_id);						\
   1810   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
   1811 									\
   1812   /* If the saved string location is NULL, it came from an		\
   1813      on_failure_keep_string_jump opcode, and we want to throw away the	\
   1814      saved NULL, thus retaining our current position in the string.  */	\
   1815   string_temp = POP_FAILURE_POINTER ();					\
   1816   if (string_temp != NULL)						\
   1817     str = (const CHAR_T *) string_temp;					\
   1818 									\
   1819   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
   1820   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
   1821   DEBUG_PRINT1 ("'\n");							\
   1822 									\
   1823   pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
   1824   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
   1825   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
   1826 									\
   1827   /* Restore register info.  */						\
   1828   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1829   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
   1830 									\
   1831   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1832   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
   1833 									\
   1834   if (1)								\
   1835     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
   1836       {									\
   1837 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
   1838 									\
   1839 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
   1840 	DEBUG_PRINT2 ("      info: %p\n",				\
   1841 		      reg_info[this_reg].word.pointer);			\
   1842 									\
   1843 	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
   1844 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
   1845 									\
   1846 	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
   1847 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
   1848       }									\
   1849   else									\
   1850     {									\
   1851       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
   1852 	{								\
   1853 	  reg_info[this_reg].word.integer = 0;				\
   1854 	  regend[this_reg] = 0;						\
   1855 	  regstart[this_reg] = 0;					\
   1856 	}								\
   1857       highest_active_reg = high_reg;					\
   1858     }									\
   1859 									\
   1860   set_regs_matched_done = 0;						\
   1861   DEBUG_STATEMENT (nfailure_points_popped++);				\
   1862 } /* POP_FAILURE_POINT */
   1863 
   1864 /* Structure for per-register (a.k.a. per-group) information.
   1866    Other register information, such as the
   1867    starting and ending positions (which are addresses), and the list of
   1868    inner groups (which is a bits list) are maintained in separate
   1869    variables.
   1870 
   1871    We are making a (strictly speaking) nonportable assumption here: that
   1872    the compiler will pack our bit fields into something that fits into
   1873    the type of `word', i.e., is something that fits into one item on the
   1874    failure stack.  */
   1875 
   1876 
   1877 /* Declarations and macros for re_match_2.  */
   1878 
   1879 typedef union
   1880 {
   1881   PREFIX(fail_stack_elt_t) word;
   1882   struct
   1883   {
   1884       /* This field is one if this group can match the empty string,
   1885          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
   1886 # define MATCH_NULL_UNSET_VALUE 3
   1887     unsigned match_null_string_p : 2;
   1888     unsigned is_active : 1;
   1889     unsigned matched_something : 1;
   1890     unsigned ever_matched_something : 1;
   1891   } bits;
   1892 } PREFIX(register_info_type);
   1893 
   1894 # ifndef DEFINED_ONCE
   1895 #  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
   1896 #  define IS_ACTIVE(R)  ((R).bits.is_active)
   1897 #  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
   1898 #  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
   1899 
   1900 
   1901 /* Call this when have matched a real character; it sets `matched' flags
   1902    for the subexpressions which we are currently inside.  Also records
   1903    that those subexprs have matched.  */
   1904 #  define SET_REGS_MATCHED()						\
   1905   do									\
   1906     {									\
   1907       if (!set_regs_matched_done)					\
   1908 	{								\
   1909 	  active_reg_t r;						\
   1910 	  set_regs_matched_done = 1;					\
   1911 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
   1912 	    {								\
   1913 	      MATCHED_SOMETHING (reg_info[r])				\
   1914 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
   1915 		= 1;							\
   1916 	    }								\
   1917 	}								\
   1918     }									\
   1919   while (0)
   1920 # endif /* not DEFINED_ONCE */
   1921 
   1922 /* Registers are set to a sentinel when they haven't yet matched.  */
   1923 static CHAR_T PREFIX(reg_unset_dummy);
   1924 # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
   1925 # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
   1926 
   1927 /* Subroutine declarations and macros for regex_compile.  */
   1928 static void PREFIX(store_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg));
   1929 static void PREFIX(store_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
   1930 				 int arg1, int arg2));
   1931 static void PREFIX(insert_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
   1932 				  int arg, UCHAR_T *end));
   1933 static void PREFIX(insert_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
   1934 				  int arg1, int arg2, UCHAR_T *end));
   1935 static boolean PREFIX(at_begline_loc_p) _RE_ARGS ((const CHAR_T *pattern,
   1936 					   const CHAR_T *p,
   1937 					   reg_syntax_t syntax));
   1938 static boolean PREFIX(at_endline_loc_p) _RE_ARGS ((const CHAR_T *p,
   1939 					   const CHAR_T *pend,
   1940 					   reg_syntax_t syntax));
   1941 # ifdef WCHAR
   1942 static reg_errcode_t wcs_compile_range _RE_ARGS ((CHAR_T range_start,
   1943 						  const CHAR_T **p_ptr,
   1944 						  const CHAR_T *pend,
   1945 						  char *translate,
   1946 						  reg_syntax_t syntax,
   1947 						  UCHAR_T *b,
   1948 						  CHAR_T *char_set));
   1949 static void insert_space _RE_ARGS ((int num, CHAR_T *loc, CHAR_T *end));
   1950 # else /* BYTE */
   1951 static reg_errcode_t byte_compile_range _RE_ARGS ((unsigned int range_start,
   1952 						   const char **p_ptr,
   1953 						   const char *pend,
   1954 						   char *translate,
   1955 						   reg_syntax_t syntax,
   1956 						   unsigned char *b));
   1957 # endif /* WCHAR */
   1958 
   1959 /* Fetch the next character in the uncompiled pattern---translating it
   1960    if necessary.  Also cast from a signed character in the constant
   1961    string passed to us by the user to an unsigned char that we can use
   1962    as an array index (in, e.g., `translate').  */
   1963 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1964    because it is impossible to allocate 4GB array for some encodings
   1965    which have 4 byte character_set like UCS4.  */
   1966 # ifndef PATFETCH
   1967 #  ifdef WCHAR
   1968 #   define PATFETCH(c)							\
   1969   do {if (p == pend) return REG_EEND;					\
   1970     c = (UCHAR_T) *p++;							\
   1971     if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
   1972   } while (0)
   1973 #  else /* BYTE */
   1974 #   define PATFETCH(c)							\
   1975   do {if (p == pend) return REG_EEND;					\
   1976     c = (unsigned char) *p++;						\
   1977     if (translate) c = (unsigned char) translate[c];			\
   1978   } while (0)
   1979 #  endif /* WCHAR */
   1980 # endif
   1981 
   1982 /* Fetch the next character in the uncompiled pattern, with no
   1983    translation.  */
   1984 # define PATFETCH_RAW(c)						\
   1985   do {if (p == pend) return REG_EEND;					\
   1986     c = (UCHAR_T) *p++; 	       					\
   1987   } while (0)
   1988 
   1989 /* Go backwards one character in the pattern.  */
   1990 # define PATUNFETCH p--
   1991 
   1992 
   1993 /* If `translate' is non-null, return translate[D], else just D.  We
   1994    cast the subscript to translate because some data is declared as
   1995    `char *', to avoid warnings when a string constant is passed.  But
   1996    when we use a character as a subscript we must make it unsigned.  */
   1997 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1998    because it is impossible to allocate 4GB array for some encodings
   1999    which have 4 byte character_set like UCS4.  */
   2000 
   2001 # ifndef TRANSLATE
   2002 #  ifdef WCHAR
   2003 #   define TRANSLATE(d) \
   2004   ((translate && ((UCHAR_T) (d)) <= 0xff) \
   2005    ? (char) translate[(unsigned char) (d)] : (d))
   2006 # else /* BYTE */
   2007 #   define TRANSLATE(d) \
   2008   (translate ? (char) translate[(unsigned char) (d)] : (d))
   2009 #  endif /* WCHAR */
   2010 # endif
   2011 
   2012 
   2013 /* Macros for outputting the compiled pattern into `buffer'.  */
   2014 
   2015 /* If the buffer isn't allocated when it comes in, use this.  */
   2016 # define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
   2017 
   2018 /* Make sure we have at least N more bytes of space in buffer.  */
   2019 # ifdef WCHAR
   2020 #  define GET_BUFFER_SPACE(n)						\
   2021     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
   2022             + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
   2023       EXTEND_BUFFER ()
   2024 # else /* BYTE */
   2025 #  define GET_BUFFER_SPACE(n)						\
   2026     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
   2027       EXTEND_BUFFER ()
   2028 # endif /* WCHAR */
   2029 
   2030 /* Make sure we have one more byte of buffer space and then add C to it.  */
   2031 # define BUF_PUSH(c)							\
   2032   do {									\
   2033     GET_BUFFER_SPACE (1);						\
   2034     *b++ = (UCHAR_T) (c);						\
   2035   } while (0)
   2036 
   2037 
   2038 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
   2039 # define BUF_PUSH_2(c1, c2)						\
   2040   do {									\
   2041     GET_BUFFER_SPACE (2);						\
   2042     *b++ = (UCHAR_T) (c1);						\
   2043     *b++ = (UCHAR_T) (c2);						\
   2044   } while (0)
   2045 
   2046 
   2047 /* As with BUF_PUSH_2, except for three bytes.  */
   2048 # define BUF_PUSH_3(c1, c2, c3)						\
   2049   do {									\
   2050     GET_BUFFER_SPACE (3);						\
   2051     *b++ = (UCHAR_T) (c1);						\
   2052     *b++ = (UCHAR_T) (c2);						\
   2053     *b++ = (UCHAR_T) (c3);						\
   2054   } while (0)
   2055 
   2056 /* Store a jump with opcode OP at LOC to location TO.  We store a
   2057    relative address offset by the three bytes the jump itself occupies.  */
   2058 # define STORE_JUMP(op, loc, to) \
   2059  PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
   2060 
   2061 /* Likewise, for a two-argument jump.  */
   2062 # define STORE_JUMP2(op, loc, to, arg) \
   2063   PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
   2064 
   2065 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
   2066 # define INSERT_JUMP(op, loc, to) \
   2067   PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
   2068 
   2069 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
   2070 # define INSERT_JUMP2(op, loc, to, arg) \
   2071   PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
   2072 	      arg, b)
   2073 
   2074 /* This is not an arbitrary limit: the arguments which represent offsets
   2075    into the pattern are two bytes long.  So if 2^16 bytes turns out to
   2076    be too small, many things would have to change.  */
   2077 /* Any other compiler which, like MSC, has allocation limit below 2^16
   2078    bytes will have to use approach similar to what was done below for
   2079    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
   2080    reallocating to 0 bytes.  Such thing is not going to work too well.
   2081    You have been warned!!  */
   2082 # ifndef DEFINED_ONCE
   2083 #  if defined _MSC_VER  && !defined WIN32
   2084 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
   2085    The REALLOC define eliminates a flurry of conversion warnings,
   2086    but is not required. */
   2087 #   define MAX_BUF_SIZE  65500L
   2088 #   define REALLOC(p,s) realloc ((p), (size_t) (s))
   2089 #  else
   2090 #   define MAX_BUF_SIZE (1L << 16)
   2091 #   define REALLOC(p,s) realloc ((p), (s))
   2092 #  endif
   2093 
   2094 /* Extend the buffer by twice its current size via realloc and
   2095    reset the pointers that pointed into the old block to point to the
   2096    correct places in the new one.  If extending the buffer results in it
   2097    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
   2098 #  if __BOUNDED_POINTERS__
   2099 #   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
   2100 #   define MOVE_BUFFER_POINTER(P) \
   2101   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
   2102 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
   2103   else						\
   2104     {						\
   2105       SET_HIGH_BOUND (b);			\
   2106       SET_HIGH_BOUND (begalt);			\
   2107       if (fixup_alt_jump)			\
   2108 	SET_HIGH_BOUND (fixup_alt_jump);	\
   2109       if (laststart)				\
   2110 	SET_HIGH_BOUND (laststart);		\
   2111       if (pending_exact)			\
   2112 	SET_HIGH_BOUND (pending_exact);		\
   2113     }
   2114 #  else
   2115 #   define MOVE_BUFFER_POINTER(P) (P) += incr
   2116 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND
   2117 #  endif
   2118 # endif /* not DEFINED_ONCE */
   2119 
   2120 # ifdef WCHAR
   2121 #  define EXTEND_BUFFER()						\
   2122   do {									\
   2123     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
   2124     int wchar_count;							\
   2125     if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
   2126       return REG_ESIZE;							\
   2127     bufp->allocated <<= 1;						\
   2128     if (bufp->allocated > MAX_BUF_SIZE)					\
   2129       bufp->allocated = MAX_BUF_SIZE;					\
   2130     /* How many characters the new buffer can have?  */			\
   2131     wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
   2132     if (wchar_count == 0) wchar_count = 1;				\
   2133     /* Truncate the buffer to CHAR_T align.  */			\
   2134     bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
   2135     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
   2136     bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
   2137     if (COMPILED_BUFFER_VAR == NULL)					\
   2138       return REG_ESPACE;						\
   2139     /* If the buffer moved, move all the pointers into it.  */		\
   2140     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2141       {									\
   2142 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
   2143 	MOVE_BUFFER_POINTER (b);					\
   2144 	MOVE_BUFFER_POINTER (begalt);					\
   2145 	if (fixup_alt_jump)						\
   2146 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2147 	if (laststart)							\
   2148 	  MOVE_BUFFER_POINTER (laststart);				\
   2149 	if (pending_exact)						\
   2150 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2151       }									\
   2152     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2153   } while (0)
   2154 # else /* BYTE */
   2155 #  define EXTEND_BUFFER()						\
   2156   do {									\
   2157     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
   2158     if (bufp->allocated == MAX_BUF_SIZE)				\
   2159       return REG_ESIZE;							\
   2160     bufp->allocated <<= 1;						\
   2161     if (bufp->allocated > MAX_BUF_SIZE)					\
   2162       bufp->allocated = MAX_BUF_SIZE;					\
   2163     bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,		\
   2164 						bufp->allocated);	\
   2165     if (COMPILED_BUFFER_VAR == NULL)					\
   2166       return REG_ESPACE;						\
   2167     /* If the buffer moved, move all the pointers into it.  */		\
   2168     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2169       {									\
   2170 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
   2171 	MOVE_BUFFER_POINTER (b);					\
   2172 	MOVE_BUFFER_POINTER (begalt);					\
   2173 	if (fixup_alt_jump)						\
   2174 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2175 	if (laststart)							\
   2176 	  MOVE_BUFFER_POINTER (laststart);				\
   2177 	if (pending_exact)						\
   2178 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2179       }									\
   2180     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2181   } while (0)
   2182 # endif /* WCHAR */
   2183 
   2184 # ifndef DEFINED_ONCE
   2185 /* Since we have one byte reserved for the register number argument to
   2186    {start,stop}_memory, the maximum number of groups we can report
   2187    things about is what fits in that byte.  */
   2188 #  define MAX_REGNUM 255
   2189 
   2190 /* But patterns can have more than `MAX_REGNUM' registers.  We just
   2191    ignore the excess.  */
   2192 typedef unsigned regnum_t;
   2193 
   2194 
   2195 /* Macros for the compile stack.  */
   2196 
   2197 /* Since offsets can go either forwards or backwards, this type needs to
   2198    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
   2199 /* int may be not enough when sizeof(int) == 2.  */
   2200 typedef long pattern_offset_t;
   2201 
   2202 typedef struct
   2203 {
   2204   pattern_offset_t begalt_offset;
   2205   pattern_offset_t fixup_alt_jump;
   2206   pattern_offset_t inner_group_offset;
   2207   pattern_offset_t laststart_offset;
   2208   regnum_t regnum;
   2209 } compile_stack_elt_t;
   2210 
   2211 
   2212 typedef struct
   2213 {
   2214   compile_stack_elt_t *stack;
   2215   unsigned size;
   2216   unsigned avail;			/* Offset of next open position.  */
   2217 } compile_stack_type;
   2218 
   2219 
   2220 #  define INIT_COMPILE_STACK_SIZE 32
   2221 
   2222 #  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
   2223 #  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
   2224 
   2225 /* The next available element.  */
   2226 #  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
   2227 
   2228 # endif /* not DEFINED_ONCE */
   2229 
   2230 /* Set the bit for character C in a list.  */
   2231 # ifndef DEFINED_ONCE
   2232 #  define SET_LIST_BIT(c)                               \
   2233   (b[((unsigned char) (c)) / BYTEWIDTH]               \
   2234    |= 1 << (((unsigned char) c) % BYTEWIDTH))
   2235 # endif /* DEFINED_ONCE */
   2236 
   2237 /* Get the next unsigned number in the uncompiled pattern.  */
   2238 # define GET_UNSIGNED_NUMBER(num) \
   2239   {									\
   2240     while (p != pend)							\
   2241       {									\
   2242 	PATFETCH (c);							\
   2243 	if (c < '0' || c > '9')						\
   2244 	  break;							\
   2245 	if (num <= RE_DUP_MAX)						\
   2246 	  {								\
   2247 	    if (num < 0)						\
   2248 	      num = 0;							\
   2249 	    num = num * 10 + c - '0';					\
   2250 	  }								\
   2251       }									\
   2252   }
   2253 
   2254 # ifndef DEFINED_ONCE
   2255 #  if defined _LIBC || WIDE_CHAR_SUPPORT
   2256 /* The GNU C library provides support for user-defined character classes
   2257    and the functions from ISO C amendement 1.  */
   2258 #   ifdef CHARCLASS_NAME_MAX
   2259 #    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
   2260 #   else
   2261 /* This shouldn't happen but some implementation might still have this
   2262    problem.  Use a reasonable default value.  */
   2263 #    define CHAR_CLASS_MAX_LENGTH 256
   2264 #   endif
   2265 
   2266 #   ifdef _LIBC
   2267 #    define IS_CHAR_CLASS(string) __wctype (string)
   2268 #   else
   2269 #    define IS_CHAR_CLASS(string) wctype (string)
   2270 #   endif
   2271 #  else
   2272 #   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
   2273 
   2274 #   define IS_CHAR_CLASS(string)					\
   2275    (STREQ (string, "alpha") || STREQ (string, "upper")			\
   2276     || STREQ (string, "lower") || STREQ (string, "digit")		\
   2277     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
   2278     || STREQ (string, "space") || STREQ (string, "print")		\
   2279     || STREQ (string, "punct") || STREQ (string, "graph")		\
   2280     || STREQ (string, "cntrl") || STREQ (string, "blank"))
   2281 #  endif
   2282 # endif /* DEFINED_ONCE */
   2283 
   2284 # ifndef MATCH_MAY_ALLOCATE
   2286 
   2287 /* If we cannot allocate large objects within re_match_2_internal,
   2288    we make the fail stack and register vectors global.
   2289    The fail stack, we grow to the maximum size when a regexp
   2290    is compiled.
   2291    The register vectors, we adjust in size each time we
   2292    compile a regexp, according to the number of registers it needs.  */
   2293 
   2294 static PREFIX(fail_stack_type) fail_stack;
   2295 
   2296 /* Size with which the following vectors are currently allocated.
   2297    That is so we can make them bigger as needed,
   2298    but never make them smaller.  */
   2299 #  ifdef DEFINED_ONCE
   2300 static int regs_allocated_size;
   2301 
   2302 static const char **     regstart, **     regend;
   2303 static const char ** old_regstart, ** old_regend;
   2304 static const char **best_regstart, **best_regend;
   2305 static const char **reg_dummy;
   2306 #  endif /* DEFINED_ONCE */
   2307 
   2308 static PREFIX(register_info_type) *PREFIX(reg_info);
   2309 static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
   2310 
   2311 /* Make the register vectors big enough for NUM_REGS registers,
   2312    but don't make them smaller.  */
   2313 
   2314 static void
   2315 PREFIX(regex_grow_registers) (num_regs)
   2316      int num_regs;
   2317 {
   2318   if (num_regs > regs_allocated_size)
   2319     {
   2320       RETALLOC_IF (regstart,	 num_regs, const char *);
   2321       RETALLOC_IF (regend,	 num_regs, const char *);
   2322       RETALLOC_IF (old_regstart, num_regs, const char *);
   2323       RETALLOC_IF (old_regend,	 num_regs, const char *);
   2324       RETALLOC_IF (best_regstart, num_regs, const char *);
   2325       RETALLOC_IF (best_regend,	 num_regs, const char *);
   2326       RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
   2327       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
   2328       RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
   2329 
   2330       regs_allocated_size = num_regs;
   2331     }
   2332 }
   2333 
   2334 # endif /* not MATCH_MAY_ALLOCATE */
   2335 
   2336 # ifndef DEFINED_ONCE
   2338 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
   2339 						 compile_stack,
   2340 						 regnum_t regnum));
   2341 # endif /* not DEFINED_ONCE */
   2342 
   2343 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
   2344    Returns one of error codes defined in `regex.h', or zero for success.
   2345 
   2346    Assumes the `allocated' (and perhaps `buffer') and `translate'
   2347    fields are set in BUFP on entry.
   2348 
   2349    If it succeeds, results are put in BUFP (if it returns an error, the
   2350    contents of BUFP are undefined):
   2351      `buffer' is the compiled pattern;
   2352      `syntax' is set to SYNTAX;
   2353      `used' is set to the length of the compiled pattern;
   2354      `fastmap_accurate' is zero;
   2355      `re_nsub' is the number of subexpressions in PATTERN;
   2356      `not_bol' and `not_eol' are zero;
   2357 
   2358    The `fastmap' and `newline_anchor' fields are neither
   2359    examined nor set.  */
   2360 
   2361 /* Return, freeing storage we allocated.  */
   2362 # ifdef WCHAR
   2363 #  define FREE_STACK_RETURN(value)		\
   2364   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
   2365 # else
   2366 #  define FREE_STACK_RETURN(value)		\
   2367   return (free (compile_stack.stack), value)
   2368 # endif /* WCHAR */
   2369 
   2370 static reg_errcode_t
   2371 PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp)
   2372      const char *ARG_PREFIX(pattern);
   2373      size_t ARG_PREFIX(size);
   2374      reg_syntax_t syntax;
   2375      struct re_pattern_buffer *bufp;
   2376 {
   2377   /* We fetch characters from PATTERN here.  Even though PATTERN is
   2378      `char *' (i.e., signed), we declare these variables as unsigned, so
   2379      they can be reliably used as array indices.  */
   2380   register UCHAR_T c, c1;
   2381 
   2382 #ifdef WCHAR
   2383   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
   2384   CHAR_T *pattern, *COMPILED_BUFFER_VAR;
   2385   size_t size;
   2386   /* offset buffer for optimization. See convert_mbs_to_wc.  */
   2387   int *mbs_offset = NULL;
   2388   /* It hold whether each wchar_t is binary data or not.  */
   2389   char *is_binary = NULL;
   2390   /* A flag whether exactn is handling binary data or not.  */
   2391   char is_exactn_bin = FALSE;
   2392 #endif /* WCHAR */
   2393 
   2394   /* A random temporary spot in PATTERN.  */
   2395   const CHAR_T *p1;
   2396 
   2397   /* Points to the end of the buffer, where we should append.  */
   2398   register UCHAR_T *b;
   2399 
   2400   /* Keeps track of unclosed groups.  */
   2401   compile_stack_type compile_stack;
   2402 
   2403   /* Points to the current (ending) position in the pattern.  */
   2404 #ifdef WCHAR
   2405   const CHAR_T *p;
   2406   const CHAR_T *pend;
   2407 #else /* BYTE */
   2408   const CHAR_T *p = pattern;
   2409   const CHAR_T *pend = pattern + size;
   2410 #endif /* WCHAR */
   2411 
   2412   /* How to translate the characters in the pattern.  */
   2413   RE_TRANSLATE_TYPE translate = bufp->translate;
   2414 
   2415   /* Address of the count-byte of the most recently inserted `exactn'
   2416      command.  This makes it possible to tell if a new exact-match
   2417      character can be added to that command or if the character requires
   2418      a new `exactn' command.  */
   2419   UCHAR_T *pending_exact = 0;
   2420 
   2421   /* Address of start of the most recently finished expression.
   2422      This tells, e.g., postfix * where to find the start of its
   2423      operand.  Reset at the beginning of groups and alternatives.  */
   2424   UCHAR_T *laststart = 0;
   2425 
   2426   /* Address of beginning of regexp, or inside of last group.  */
   2427   UCHAR_T *begalt;
   2428 
   2429   /* Address of the place where a forward jump should go to the end of
   2430      the containing expression.  Each alternative of an `or' -- except the
   2431      last -- ends with a forward jump of this sort.  */
   2432   UCHAR_T *fixup_alt_jump = 0;
   2433 
   2434   /* Counts open-groups as they are encountered.  Remembered for the
   2435      matching close-group on the compile stack, so the same register
   2436      number is put in the stop_memory as the start_memory.  */
   2437   regnum_t regnum = 0;
   2438 
   2439 #ifdef WCHAR
   2440   /* Initialize the wchar_t PATTERN and offset_buffer.  */
   2441   p = pend = pattern = TALLOC(csize + 1, CHAR_T);
   2442   mbs_offset = TALLOC(csize + 1, int);
   2443   is_binary = TALLOC(csize + 1, char);
   2444   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
   2445     {
   2446       free(pattern);
   2447       free(mbs_offset);
   2448       free(is_binary);
   2449       return REG_ESPACE;
   2450     }
   2451   pattern[csize] = L'\0';	/* sentinel */
   2452   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
   2453   pend = p + size;
   2454   if (size < 0)
   2455     {
   2456       free(pattern);
   2457       free(mbs_offset);
   2458       free(is_binary);
   2459       return REG_BADPAT;
   2460     }
   2461 #endif
   2462 
   2463 #ifdef DEBUG
   2464   DEBUG_PRINT1 ("\nCompiling pattern: ");
   2465   if (debug)
   2466     {
   2467       unsigned debug_count;
   2468 
   2469       for (debug_count = 0; debug_count < size; debug_count++)
   2470         PUT_CHAR (pattern[debug_count]);
   2471       putchar ('\n');
   2472     }
   2473 #endif /* DEBUG */
   2474 
   2475   /* Initialize the compile stack.  */
   2476   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
   2477   if (compile_stack.stack == NULL)
   2478     {
   2479 #ifdef WCHAR
   2480       free(pattern);
   2481       free(mbs_offset);
   2482       free(is_binary);
   2483 #endif
   2484       return REG_ESPACE;
   2485     }
   2486 
   2487   compile_stack.size = INIT_COMPILE_STACK_SIZE;
   2488   compile_stack.avail = 0;
   2489 
   2490   /* Initialize the pattern buffer.  */
   2491   bufp->syntax = syntax;
   2492   bufp->fastmap_accurate = 0;
   2493   bufp->not_bol = bufp->not_eol = 0;
   2494 
   2495   /* Set `used' to zero, so that if we return an error, the pattern
   2496      printer (for debugging) will think there's no pattern.  We reset it
   2497      at the end.  */
   2498   bufp->used = 0;
   2499 
   2500   /* Always count groups, whether or not bufp->no_sub is set.  */
   2501   bufp->re_nsub = 0;
   2502 
   2503 #if !defined emacs && !defined SYNTAX_TABLE
   2504   /* Initialize the syntax table.  */
   2505    init_syntax_once ();
   2506 #endif
   2507 
   2508   if (bufp->allocated == 0)
   2509     {
   2510       if (bufp->buffer)
   2511 	{ /* If zero allocated, but buffer is non-null, try to realloc
   2512              enough space.  This loses if buffer's address is bogus, but
   2513              that is the user's responsibility.  */
   2514 #ifdef WCHAR
   2515 	  /* Free bufp->buffer and allocate an array for wchar_t pattern
   2516 	     buffer.  */
   2517           free(bufp->buffer);
   2518           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
   2519 					UCHAR_T);
   2520 #else
   2521           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
   2522 #endif /* WCHAR */
   2523         }
   2524       else
   2525         { /* Caller did not allocate a buffer.  Do it for them.  */
   2526           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
   2527 					UCHAR_T);
   2528         }
   2529 
   2530       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
   2531 #ifdef WCHAR
   2532       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
   2533 #endif /* WCHAR */
   2534       bufp->allocated = INIT_BUF_SIZE;
   2535     }
   2536 #ifdef WCHAR
   2537   else
   2538     COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
   2539 #endif
   2540 
   2541   begalt = b = COMPILED_BUFFER_VAR;
   2542 
   2543   /* Loop through the uncompiled pattern until we're at the end.  */
   2544   while (p != pend)
   2545     {
   2546       PATFETCH (c);
   2547 
   2548       switch (c)
   2549         {
   2550         case '^':
   2551           {
   2552             if (   /* If at start of pattern, it's an operator.  */
   2553                    p == pattern + 1
   2554                    /* If context independent, it's an operator.  */
   2555                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2556                    /* Otherwise, depends on what's come before.  */
   2557                 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
   2558               BUF_PUSH (begline);
   2559             else
   2560               goto normal_char;
   2561           }
   2562           break;
   2563 
   2564 
   2565         case '$':
   2566           {
   2567             if (   /* If at end of pattern, it's an operator.  */
   2568                    p == pend
   2569                    /* If context independent, it's an operator.  */
   2570                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2571                    /* Otherwise, depends on what's next.  */
   2572                 || PREFIX(at_endline_loc_p) (p, pend, syntax))
   2573                BUF_PUSH (endline);
   2574              else
   2575                goto normal_char;
   2576            }
   2577            break;
   2578 
   2579 
   2580 	case '+':
   2581         case '?':
   2582           if ((syntax & RE_BK_PLUS_QM)
   2583               || (syntax & RE_LIMITED_OPS))
   2584             goto normal_char;
   2585         handle_plus:
   2586         case '*':
   2587           /* If there is no previous pattern... */
   2588           if (!laststart)
   2589             {
   2590               if (syntax & RE_CONTEXT_INVALID_OPS)
   2591                 FREE_STACK_RETURN (REG_BADRPT);
   2592               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
   2593                 goto normal_char;
   2594             }
   2595 
   2596           {
   2597             /* Are we optimizing this jump?  */
   2598             boolean keep_string_p = false;
   2599 
   2600             /* 1 means zero (many) matches is allowed.  */
   2601             char zero_times_ok = 0, many_times_ok = 0;
   2602 
   2603             /* If there is a sequence of repetition chars, collapse it
   2604                down to just one (the right one).  We can't combine
   2605                interval operators with these because of, e.g., `a{2}*',
   2606                which should only match an even number of `a's.  */
   2607 
   2608             for (;;)
   2609               {
   2610                 zero_times_ok |= c != '+';
   2611                 many_times_ok |= c != '?';
   2612 
   2613                 if (p == pend)
   2614                   break;
   2615 
   2616                 PATFETCH (c);
   2617 
   2618                 if (c == '*'
   2619                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
   2620                   ;
   2621 
   2622                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
   2623                   {
   2624                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2625 
   2626                     PATFETCH (c1);
   2627                     if (!(c1 == '+' || c1 == '?'))
   2628                       {
   2629                         PATUNFETCH;
   2630                         PATUNFETCH;
   2631                         break;
   2632                       }
   2633 
   2634                     c = c1;
   2635                   }
   2636                 else
   2637                   {
   2638                     PATUNFETCH;
   2639                     break;
   2640                   }
   2641 
   2642                 /* If we get here, we found another repeat character.  */
   2643                }
   2644 
   2645             /* Star, etc. applied to an empty pattern is equivalent
   2646                to an empty pattern.  */
   2647             if (!laststart)
   2648               break;
   2649 
   2650             /* Now we know whether or not zero matches is allowed
   2651                and also whether or not two or more matches is allowed.  */
   2652             if (many_times_ok)
   2653               { /* More than one repetition is allowed, so put in at the
   2654                    end a backward relative jump from `b' to before the next
   2655                    jump we're going to put in below (which jumps from
   2656                    laststart to after this jump).
   2657 
   2658                    But if we are at the `*' in the exact sequence `.*\n',
   2659                    insert an unconditional jump backwards to the .,
   2660                    instead of the beginning of the loop.  This way we only
   2661                    push a failure point once, instead of every time
   2662                    through the loop.  */
   2663                 assert (p - 1 > pattern);
   2664 
   2665                 /* Allocate the space for the jump.  */
   2666                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2667 
   2668                 /* We know we are not at the first character of the pattern,
   2669                    because laststart was nonzero.  And we've already
   2670                    incremented `p', by the way, to be the character after
   2671                    the `*'.  Do we have to do something analogous here
   2672                    for null bytes, because of RE_DOT_NOT_NULL?  */
   2673                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
   2674 		    && zero_times_ok
   2675                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
   2676                     && !(syntax & RE_DOT_NEWLINE))
   2677                   { /* We have .*\n.  */
   2678                     STORE_JUMP (jump, b, laststart);
   2679                     keep_string_p = true;
   2680                   }
   2681                 else
   2682                   /* Anything else.  */
   2683                   STORE_JUMP (maybe_pop_jump, b, laststart -
   2684 			      (1 + OFFSET_ADDRESS_SIZE));
   2685 
   2686                 /* We've added more stuff to the buffer.  */
   2687                 b += 1 + OFFSET_ADDRESS_SIZE;
   2688               }
   2689 
   2690             /* On failure, jump from laststart to b + 3, which will be the
   2691                end of the buffer after this jump is inserted.  */
   2692 	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
   2693 	       'b + 3'.  */
   2694             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2695             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
   2696                                        : on_failure_jump,
   2697                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
   2698             pending_exact = 0;
   2699             b += 1 + OFFSET_ADDRESS_SIZE;
   2700 
   2701             if (!zero_times_ok)
   2702               {
   2703                 /* At least one repetition is required, so insert a
   2704                    `dummy_failure_jump' before the initial
   2705                    `on_failure_jump' instruction of the loop. This
   2706                    effects a skip over that instruction the first time
   2707                    we hit that loop.  */
   2708                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2709                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
   2710 			     2 + 2 * OFFSET_ADDRESS_SIZE);
   2711                 b += 1 + OFFSET_ADDRESS_SIZE;
   2712               }
   2713             }
   2714 	  break;
   2715 
   2716 
   2717 	case '.':
   2718           laststart = b;
   2719           BUF_PUSH (anychar);
   2720           break;
   2721 
   2722 
   2723         case '[':
   2724           {
   2725             boolean had_char_class = false;
   2726 #ifdef WCHAR
   2727 	    CHAR_T range_start = 0xffffffff;
   2728 #else
   2729 	    unsigned int range_start = 0xffffffff;
   2730 #endif
   2731             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2732 
   2733 #ifdef WCHAR
   2734 	    /* We assume a charset(_not) structure as a wchar_t array.
   2735 	       charset[0] = (re_opcode_t) charset(_not)
   2736                charset[1] = l (= length of char_classes)
   2737                charset[2] = m (= length of collating_symbols)
   2738                charset[3] = n (= length of equivalence_classes)
   2739 	       charset[4] = o (= length of char_ranges)
   2740 	       charset[5] = p (= length of chars)
   2741 
   2742                charset[6] = char_class (wctype_t)
   2743                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
   2744                          ...
   2745                charset[l+5]  = char_class (wctype_t)
   2746 
   2747                charset[l+6]  = collating_symbol (wchar_t)
   2748                             ...
   2749                charset[l+m+5]  = collating_symbol (wchar_t)
   2750 					ifdef _LIBC we use the index if
   2751 					_NL_COLLATE_SYMB_EXTRAMB instead of
   2752 					wchar_t string.
   2753 
   2754                charset[l+m+6]  = equivalence_classes (wchar_t)
   2755                               ...
   2756                charset[l+m+n+5]  = equivalence_classes (wchar_t)
   2757 					ifdef _LIBC we use the index in
   2758 					_NL_COLLATE_WEIGHT instead of
   2759 					wchar_t string.
   2760 
   2761 	       charset[l+m+n+6] = range_start
   2762 	       charset[l+m+n+7] = range_end
   2763 	                       ...
   2764 	       charset[l+m+n+2o+4] = range_start
   2765 	       charset[l+m+n+2o+5] = range_end
   2766 					ifdef _LIBC we use the value looked up
   2767 					in _NL_COLLATE_COLLSEQ instead of
   2768 					wchar_t character.
   2769 
   2770 	       charset[l+m+n+2o+6] = char
   2771 	                          ...
   2772 	       charset[l+m+n+2o+p+5] = char
   2773 
   2774 	     */
   2775 
   2776 	    /* We need at least 6 spaces: the opcode, the length of
   2777                char_classes, the length of collating_symbols, the length of
   2778                equivalence_classes, the length of char_ranges, the length of
   2779                chars.  */
   2780 	    GET_BUFFER_SPACE (6);
   2781 
   2782 	    /* Save b as laststart. And We use laststart as the pointer
   2783 	       to the first element of the charset here.
   2784 	       In other words, laststart[i] indicates charset[i].  */
   2785             laststart = b;
   2786 
   2787             /* We test `*p == '^' twice, instead of using an if
   2788                statement, so we only need one BUF_PUSH.  */
   2789             BUF_PUSH (*p == '^' ? charset_not : charset);
   2790             if (*p == '^')
   2791               p++;
   2792 
   2793             /* Push the length of char_classes, the length of
   2794                collating_symbols, the length of equivalence_classes, the
   2795                length of char_ranges and the length of chars.  */
   2796             BUF_PUSH_3 (0, 0, 0);
   2797             BUF_PUSH_2 (0, 0);
   2798 
   2799             /* Remember the first position in the bracket expression.  */
   2800             p1 = p;
   2801 
   2802             /* charset_not matches newline according to a syntax bit.  */
   2803             if ((re_opcode_t) b[-6] == charset_not
   2804                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   2805 	      {
   2806 		BUF_PUSH('\n');
   2807 		laststart[5]++; /* Update the length of characters  */
   2808 	      }
   2809 
   2810             /* Read in characters and ranges, setting map bits.  */
   2811             for (;;)
   2812               {
   2813                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2814 
   2815                 PATFETCH (c);
   2816 
   2817                 /* \ might escape characters inside [...] and [^...].  */
   2818                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   2819                   {
   2820                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2821 
   2822                     PATFETCH (c1);
   2823 		    BUF_PUSH(c1);
   2824 		    laststart[5]++; /* Update the length of chars  */
   2825 		    range_start = c1;
   2826                     continue;
   2827                   }
   2828 
   2829                 /* Could be the end of the bracket expression.  If it's
   2830                    not (i.e., when the bracket expression is `[]' so
   2831                    far), the ']' character bit gets set way below.  */
   2832                 if (c == ']' && p != p1 + 1)
   2833                   break;
   2834 
   2835                 /* Look ahead to see if it's a range when the last thing
   2836                    was a character class.  */
   2837                 if (had_char_class && c == '-' && *p != ']')
   2838                   FREE_STACK_RETURN (REG_ERANGE);
   2839 
   2840                 /* Look ahead to see if it's a range when the last thing
   2841                    was a character: if this is a hyphen not at the
   2842                    beginning or the end of a list, then it's the range
   2843                    operator.  */
   2844                 if (c == '-'
   2845                     && !(p - 2 >= pattern && p[-2] == '[')
   2846                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   2847                     && *p != ']')
   2848                   {
   2849                     reg_errcode_t ret;
   2850 		    /* Allocate the space for range_start and range_end.  */
   2851 		    GET_BUFFER_SPACE (2);
   2852 		    /* Update the pointer to indicate end of buffer.  */
   2853                     b += 2;
   2854                     ret = wcs_compile_range (range_start, &p, pend, translate,
   2855                                          syntax, b, laststart);
   2856                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2857                     range_start = 0xffffffff;
   2858                   }
   2859                 else if (p[0] == '-' && p[1] != ']')
   2860                   { /* This handles ranges made up of characters only.  */
   2861                     reg_errcode_t ret;
   2862 
   2863 		    /* Move past the `-'.  */
   2864                     PATFETCH (c1);
   2865 		    /* Allocate the space for range_start and range_end.  */
   2866 		    GET_BUFFER_SPACE (2);
   2867 		    /* Update the pointer to indicate end of buffer.  */
   2868                     b += 2;
   2869                     ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
   2870                                          laststart);
   2871                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2872 		    range_start = 0xffffffff;
   2873                   }
   2874 
   2875                 /* See if we're at the beginning of a possible character
   2876                    class.  */
   2877                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   2878                   { /* Leave room for the null.  */
   2879                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   2880 
   2881                     PATFETCH (c);
   2882                     c1 = 0;
   2883 
   2884                     /* If pattern is `[[:'.  */
   2885                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2886 
   2887                     for (;;)
   2888                       {
   2889                         PATFETCH (c);
   2890                         if ((c == ':' && *p == ']') || p == pend)
   2891                           break;
   2892 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   2893 			  str[c1++] = c;
   2894 			else
   2895 			  /* This is in any case an invalid class name.  */
   2896 			  str[0] = '\0';
   2897                       }
   2898                     str[c1] = '\0';
   2899 
   2900                     /* If isn't a word bracketed by `[:' and `:]':
   2901                        undo the ending character, the letters, and leave
   2902                        the leading `:' and `[' (but store them as character).  */
   2903                     if (c == ':' && *p == ']')
   2904                       {
   2905 			wctype_t wt;
   2906 			uintptr_t alignedp;
   2907 
   2908 			/* Query the character class as wctype_t.  */
   2909 			wt = IS_CHAR_CLASS (str);
   2910 			if (wt == 0)
   2911 			  FREE_STACK_RETURN (REG_ECTYPE);
   2912 
   2913                         /* Throw away the ] at the end of the character
   2914                            class.  */
   2915                         PATFETCH (c);
   2916 
   2917                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2918 
   2919 			/* Allocate the space for character class.  */
   2920                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
   2921 			/* Update the pointer to indicate end of buffer.  */
   2922                         b += CHAR_CLASS_SIZE;
   2923 			/* Move data which follow character classes
   2924 			    not to violate the data.  */
   2925                         insert_space(CHAR_CLASS_SIZE,
   2926 				     laststart + 6 + laststart[1],
   2927 				     b - 1);
   2928 			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
   2929 				    + __alignof__(wctype_t) - 1)
   2930 			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   2931 			/* Store the character class.  */
   2932                         *((wctype_t*)alignedp) = wt;
   2933                         /* Update length of char_classes */
   2934                         laststart[1] += CHAR_CLASS_SIZE;
   2935 
   2936                         had_char_class = true;
   2937                       }
   2938                     else
   2939                       {
   2940                         c1++;
   2941                         while (c1--)
   2942                           PATUNFETCH;
   2943                         BUF_PUSH ('[');
   2944                         BUF_PUSH (':');
   2945                         laststart[5] += 2; /* Update the length of characters  */
   2946 			range_start = ':';
   2947                         had_char_class = false;
   2948                       }
   2949                   }
   2950                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
   2951 							  || *p == '.'))
   2952 		  {
   2953 		    CHAR_T str[128];	/* Should be large enough.  */
   2954 		    CHAR_T delim = *p; /* '=' or '.'  */
   2955 # ifdef _LIBC
   2956 		    uint32_t nrules =
   2957 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   2958 # endif
   2959 		    PATFETCH (c);
   2960 		    c1 = 0;
   2961 
   2962 		    /* If pattern is `[[=' or '[[.'.  */
   2963 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2964 
   2965 		    for (;;)
   2966 		      {
   2967 			PATFETCH (c);
   2968 			if ((c == delim && *p == ']') || p == pend)
   2969 			  break;
   2970 			if (c1 < sizeof (str) - 1)
   2971 			  str[c1++] = c;
   2972 			else
   2973 			  /* This is in any case an invalid class name.  */
   2974 			  str[0] = '\0';
   2975                       }
   2976 		    str[c1] = '\0';
   2977 
   2978 		    if (c == delim && *p == ']' && str[0] != '\0')
   2979 		      {
   2980                         unsigned int i, offset;
   2981 			/* If we have no collation data we use the default
   2982 			   collation in which each character is in a class
   2983 			   by itself.  It also means that ASCII is the
   2984 			   character set and therefore we cannot have character
   2985 			   with more than one byte in the multibyte
   2986 			   representation.  */
   2987 
   2988                         /* If not defined _LIBC, we push the name and
   2989 			   `\0' for the sake of matching performance.  */
   2990 			int datasize = c1 + 1;
   2991 
   2992 # ifdef _LIBC
   2993 			int32_t idx = 0;
   2994 			if (nrules == 0)
   2995 # endif
   2996 			  {
   2997 			    if (c1 != 1)
   2998 			      FREE_STACK_RETURN (REG_ECOLLATE);
   2999 			  }
   3000 # ifdef _LIBC
   3001 			else
   3002 			  {
   3003 			    const int32_t *table;
   3004 			    const int32_t *weights;
   3005 			    const int32_t *extra;
   3006 			    const int32_t *indirect;
   3007 			    wint_t *cp;
   3008 
   3009 			    /* This #include defines a local function!  */
   3010 #  include <locale/weightwc.h>
   3011 
   3012 			    if(delim == '=')
   3013 			      {
   3014 				/* We push the index for equivalence class.  */
   3015 				cp = (wint_t*)str;
   3016 
   3017 				table = (const int32_t *)
   3018 				  _NL_CURRENT (LC_COLLATE,
   3019 					       _NL_COLLATE_TABLEWC);
   3020 				weights = (const int32_t *)
   3021 				  _NL_CURRENT (LC_COLLATE,
   3022 					       _NL_COLLATE_WEIGHTWC);
   3023 				extra = (const int32_t *)
   3024 				  _NL_CURRENT (LC_COLLATE,
   3025 					       _NL_COLLATE_EXTRAWC);
   3026 				indirect = (const int32_t *)
   3027 				  _NL_CURRENT (LC_COLLATE,
   3028 					       _NL_COLLATE_INDIRECTWC);
   3029 
   3030 				idx = findidx ((const wint_t**)&cp);
   3031 				if (idx == 0 || cp < (wint_t*) str + c1)
   3032 				  /* This is no valid character.  */
   3033 				  FREE_STACK_RETURN (REG_ECOLLATE);
   3034 
   3035 				str[0] = (wchar_t)idx;
   3036 			      }
   3037 			    else /* delim == '.' */
   3038 			      {
   3039 				/* We push collation sequence value
   3040 				   for collating symbol.  */
   3041 				int32_t table_size;
   3042 				const int32_t *symb_table;
   3043 				const unsigned char *extra;
   3044 				int32_t idx;
   3045 				int32_t elem;
   3046 				int32_t second;
   3047 				int32_t hash;
   3048 				char char_str[c1];
   3049 
   3050 				/* We have to convert the name to a single-byte
   3051 				   string.  This is possible since the names
   3052 				   consist of ASCII characters and the internal
   3053 				   representation is UCS4.  */
   3054 				for (i = 0; i < c1; ++i)
   3055 				  char_str[i] = str[i];
   3056 
   3057 				table_size =
   3058 				  _NL_CURRENT_WORD (LC_COLLATE,
   3059 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
   3060 				symb_table = (const int32_t *)
   3061 				  _NL_CURRENT (LC_COLLATE,
   3062 					       _NL_COLLATE_SYMB_TABLEMB);
   3063 				extra = (const unsigned char *)
   3064 				  _NL_CURRENT (LC_COLLATE,
   3065 					       _NL_COLLATE_SYMB_EXTRAMB);
   3066 
   3067 				/* Locate the character in the hashing table.  */
   3068 				hash = elem_hash (char_str, c1);
   3069 
   3070 				idx = 0;
   3071 				elem = hash % table_size;
   3072 				second = hash % (table_size - 2);
   3073 				while (symb_table[2 * elem] != 0)
   3074 				  {
   3075 				    /* First compare the hashing value.  */
   3076 				    if (symb_table[2 * elem] == hash
   3077 					&& c1 == extra[symb_table[2 * elem + 1]]
   3078 					&& memcmp (char_str,
   3079 						   &extra[symb_table[2 * elem + 1]
   3080 							 + 1], c1) == 0)
   3081 				      {
   3082 					/* Yep, this is the entry.  */
   3083 					idx = symb_table[2 * elem + 1];
   3084 					idx += 1 + extra[idx];
   3085 					break;
   3086 				      }
   3087 
   3088 				    /* Next entry.  */
   3089 				    elem += second;
   3090 				  }
   3091 
   3092 				if (symb_table[2 * elem] != 0)
   3093 				  {
   3094 				    /* Compute the index of the byte sequence
   3095 				       in the table.  */
   3096 				    idx += 1 + extra[idx];
   3097 				    /* Adjust for the alignment.  */
   3098 				    idx = (idx + 3) & ~3;
   3099 
   3100 				    str[0] = (wchar_t) idx + 4;
   3101 				  }
   3102 				else if (symb_table[2 * elem] == 0 && c1 == 1)
   3103 				  {
   3104 				    /* No valid character.  Match it as a
   3105 				       single byte character.  */
   3106 				    had_char_class = false;
   3107 				    BUF_PUSH(str[0]);
   3108 				    /* Update the length of characters  */
   3109 				    laststart[5]++;
   3110 				    range_start = str[0];
   3111 
   3112 				    /* Throw away the ] at the end of the
   3113 				       collating symbol.  */
   3114 				    PATFETCH (c);
   3115 				    /* exit from the switch block.  */
   3116 				    continue;
   3117 				  }
   3118 				else
   3119 				  FREE_STACK_RETURN (REG_ECOLLATE);
   3120 			      }
   3121 			    datasize = 1;
   3122 			  }
   3123 # endif
   3124                         /* Throw away the ] at the end of the equivalence
   3125                            class (or collating symbol).  */
   3126                         PATFETCH (c);
   3127 
   3128 			/* Allocate the space for the equivalence class
   3129 			   (or collating symbol) (and '\0' if needed).  */
   3130                         GET_BUFFER_SPACE(datasize);
   3131 			/* Update the pointer to indicate end of buffer.  */
   3132                         b += datasize;
   3133 
   3134 			if (delim == '=')
   3135 			  { /* equivalence class  */
   3136 			    /* Calculate the offset of char_ranges,
   3137 			       which is next to equivalence_classes.  */
   3138 			    offset = laststart[1] + laststart[2]
   3139 			      + laststart[3] +6;
   3140 			    /* Insert space.  */
   3141 			    insert_space(datasize, laststart + offset, b - 1);
   3142 
   3143 			    /* Write the equivalence_class and \0.  */
   3144 			    for (i = 0 ; i < datasize ; i++)
   3145 			      laststart[offset + i] = str[i];
   3146 
   3147 			    /* Update the length of equivalence_classes.  */
   3148 			    laststart[3] += datasize;
   3149 			    had_char_class = true;
   3150 			  }
   3151 			else /* delim == '.' */
   3152 			  { /* collating symbol  */
   3153 			    /* Calculate the offset of the equivalence_classes,
   3154 			       which is next to collating_symbols.  */
   3155 			    offset = laststart[1] + laststart[2] + 6;
   3156 			    /* Insert space and write the collationg_symbol
   3157 			       and \0.  */
   3158 			    insert_space(datasize, laststart + offset, b-1);
   3159 			    for (i = 0 ; i < datasize ; i++)
   3160 			      laststart[offset + i] = str[i];
   3161 
   3162 			    /* In re_match_2_internal if range_start < -1, we
   3163 			       assume -range_start is the offset of the
   3164 			       collating symbol which is specified as
   3165 			       the character of the range start.  So we assign
   3166 			       -(laststart[1] + laststart[2] + 6) to
   3167 			       range_start.  */
   3168 			    range_start = -(laststart[1] + laststart[2] + 6);
   3169 			    /* Update the length of collating_symbol.  */
   3170 			    laststart[2] += datasize;
   3171 			    had_char_class = false;
   3172 			  }
   3173 		      }
   3174                     else
   3175                       {
   3176                         c1++;
   3177                         while (c1--)
   3178                           PATUNFETCH;
   3179                         BUF_PUSH ('[');
   3180                         BUF_PUSH (delim);
   3181                         laststart[5] += 2; /* Update the length of characters  */
   3182 			range_start = delim;
   3183                         had_char_class = false;
   3184                       }
   3185 		  }
   3186                 else
   3187                   {
   3188                     had_char_class = false;
   3189 		    BUF_PUSH(c);
   3190 		    laststart[5]++;  /* Update the length of characters  */
   3191 		    range_start = c;
   3192                   }
   3193 	      }
   3194 
   3195 #else /* BYTE */
   3196             /* Ensure that we have enough space to push a charset: the
   3197                opcode, the length count, and the bitset; 34 bytes in all.  */
   3198 	    GET_BUFFER_SPACE (34);
   3199 
   3200             laststart = b;
   3201 
   3202             /* We test `*p == '^' twice, instead of using an if
   3203                statement, so we only need one BUF_PUSH.  */
   3204             BUF_PUSH (*p == '^' ? charset_not : charset);
   3205             if (*p == '^')
   3206               p++;
   3207 
   3208             /* Remember the first position in the bracket expression.  */
   3209             p1 = p;
   3210 
   3211             /* Push the number of bytes in the bitmap.  */
   3212             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
   3213 
   3214             /* Clear the whole map.  */
   3215             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
   3216 
   3217             /* charset_not matches newline according to a syntax bit.  */
   3218             if ((re_opcode_t) b[-2] == charset_not
   3219                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   3220               SET_LIST_BIT ('\n');
   3221 
   3222             /* Read in characters and ranges, setting map bits.  */
   3223             for (;;)
   3224               {
   3225                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3226 
   3227                 PATFETCH (c);
   3228 
   3229                 /* \ might escape characters inside [...] and [^...].  */
   3230                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   3231                   {
   3232                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3233 
   3234                     PATFETCH (c1);
   3235                     SET_LIST_BIT (c1);
   3236 		    range_start = c1;
   3237                     continue;
   3238                   }
   3239 
   3240                 /* Could be the end of the bracket expression.  If it's
   3241                    not (i.e., when the bracket expression is `[]' so
   3242                    far), the ']' character bit gets set way below.  */
   3243                 if (c == ']' && p != p1 + 1)
   3244                   break;
   3245 
   3246                 /* Look ahead to see if it's a range when the last thing
   3247                    was a character class.  */
   3248                 if (had_char_class && c == '-' && *p != ']')
   3249                   FREE_STACK_RETURN (REG_ERANGE);
   3250 
   3251                 /* Look ahead to see if it's a range when the last thing
   3252                    was a character: if this is a hyphen not at the
   3253                    beginning or the end of a list, then it's the range
   3254                    operator.  */
   3255                 if (c == '-'
   3256                     && !(p - 2 >= pattern && p[-2] == '[')
   3257                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   3258                     && *p != ']')
   3259                   {
   3260                     reg_errcode_t ret
   3261                       = byte_compile_range (range_start, &p, pend, translate,
   3262 					    syntax, b);
   3263                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3264 		    range_start = 0xffffffff;
   3265                   }
   3266 
   3267                 else if (p[0] == '-' && p[1] != ']')
   3268                   { /* This handles ranges made up of characters only.  */
   3269                     reg_errcode_t ret;
   3270 
   3271 		    /* Move past the `-'.  */
   3272                     PATFETCH (c1);
   3273 
   3274                     ret = byte_compile_range (c, &p, pend, translate, syntax, b);
   3275                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3276 		    range_start = 0xffffffff;
   3277                   }
   3278 
   3279                 /* See if we're at the beginning of a possible character
   3280                    class.  */
   3281 
   3282                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   3283                   { /* Leave room for the null.  */
   3284                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   3285 
   3286                     PATFETCH (c);
   3287                     c1 = 0;
   3288 
   3289                     /* If pattern is `[[:'.  */
   3290                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3291 
   3292                     for (;;)
   3293                       {
   3294                         PATFETCH (c);
   3295                         if ((c == ':' && *p == ']') || p == pend)
   3296                           break;
   3297 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   3298 			  str[c1++] = c;
   3299 			else
   3300 			  /* This is in any case an invalid class name.  */
   3301 			  str[0] = '\0';
   3302                       }
   3303                     str[c1] = '\0';
   3304 
   3305                     /* If isn't a word bracketed by `[:' and `:]':
   3306                        undo the ending character, the letters, and leave
   3307                        the leading `:' and `[' (but set bits for them).  */
   3308                     if (c == ':' && *p == ']')
   3309                       {
   3310 # if defined _LIBC || WIDE_CHAR_SUPPORT
   3311                         boolean is_lower = STREQ (str, "lower");
   3312                         boolean is_upper = STREQ (str, "upper");
   3313 			wctype_t wt;
   3314                         int ch;
   3315 
   3316 			wt = IS_CHAR_CLASS (str);
   3317 			if (wt == 0)
   3318 			  FREE_STACK_RETURN (REG_ECTYPE);
   3319 
   3320                         /* Throw away the ] at the end of the character
   3321                            class.  */
   3322                         PATFETCH (c);
   3323 
   3324                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3325 
   3326                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
   3327 			  {
   3328 			    if (iswctype (btowc (ch), wt))
   3329 			      SET_LIST_BIT (ch);
   3330 
   3331 			    if (translate && (is_upper || is_lower)
   3332 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3333 			      SET_LIST_BIT (ch);
   3334 			  }
   3335 
   3336                         had_char_class = true;
   3337 # else
   3338                         int ch;
   3339                         boolean is_alnum = STREQ (str, "alnum");
   3340                         boolean is_alpha = STREQ (str, "alpha");
   3341                         boolean is_blank = STREQ (str, "blank");
   3342                         boolean is_cntrl = STREQ (str, "cntrl");
   3343                         boolean is_digit = STREQ (str, "digit");
   3344                         boolean is_graph = STREQ (str, "graph");
   3345                         boolean is_lower = STREQ (str, "lower");
   3346                         boolean is_print = STREQ (str, "print");
   3347                         boolean is_punct = STREQ (str, "punct");
   3348                         boolean is_space = STREQ (str, "space");
   3349                         boolean is_upper = STREQ (str, "upper");
   3350                         boolean is_xdigit = STREQ (str, "xdigit");
   3351 
   3352                         if (!IS_CHAR_CLASS (str))
   3353 			  FREE_STACK_RETURN (REG_ECTYPE);
   3354 
   3355                         /* Throw away the ] at the end of the character
   3356                            class.  */
   3357                         PATFETCH (c);
   3358 
   3359                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3360 
   3361                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
   3362                           {
   3363 			    /* This was split into 3 if's to
   3364 			       avoid an arbitrary limit in some compiler.  */
   3365                             if (   (is_alnum  && ISALNUM (ch))
   3366                                 || (is_alpha  && ISALPHA (ch))
   3367                                 || (is_blank  && ISBLANK (ch))
   3368                                 || (is_cntrl  && ISCNTRL (ch)))
   3369 			      SET_LIST_BIT (ch);
   3370 			    if (   (is_digit  && ISDIGIT (ch))
   3371                                 || (is_graph  && ISGRAPH (ch))
   3372                                 || (is_lower  && ISLOWER (ch))
   3373                                 || (is_print  && ISPRINT (ch)))
   3374 			      SET_LIST_BIT (ch);
   3375 			    if (   (is_punct  && ISPUNCT (ch))
   3376                                 || (is_space  && ISSPACE (ch))
   3377                                 || (is_upper  && ISUPPER (ch))
   3378                                 || (is_xdigit && ISXDIGIT (ch)))
   3379 			      SET_LIST_BIT (ch);
   3380 			    if (   translate && (is_upper || is_lower)
   3381 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3382 			      SET_LIST_BIT (ch);
   3383                           }
   3384                         had_char_class = true;
   3385 # endif	/* libc || wctype.h */
   3386                       }
   3387                     else
   3388                       {
   3389                         c1++;
   3390                         while (c1--)
   3391                           PATUNFETCH;
   3392                         SET_LIST_BIT ('[');
   3393                         SET_LIST_BIT (':');
   3394 			range_start = ':';
   3395                         had_char_class = false;
   3396                       }
   3397                   }
   3398                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
   3399 		  {
   3400 		    unsigned char str[MB_LEN_MAX + 1];
   3401 # ifdef _LIBC
   3402 		    uint32_t nrules =
   3403 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3404 # endif
   3405 
   3406 		    PATFETCH (c);
   3407 		    c1 = 0;
   3408 
   3409 		    /* If pattern is `[[='.  */
   3410 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3411 
   3412 		    for (;;)
   3413 		      {
   3414 			PATFETCH (c);
   3415 			if ((c == '=' && *p == ']') || p == pend)
   3416 			  break;
   3417 			if (c1 < MB_LEN_MAX)
   3418 			  str[c1++] = c;
   3419 			else
   3420 			  /* This is in any case an invalid class name.  */
   3421 			  str[0] = '\0';
   3422                       }
   3423 		    str[c1] = '\0';
   3424 
   3425 		    if (c == '=' && *p == ']' && str[0] != '\0')
   3426 		      {
   3427 			/* If we have no collation data we use the default
   3428 			   collation in which each character is in a class
   3429 			   by itself.  It also means that ASCII is the
   3430 			   character set and therefore we cannot have character
   3431 			   with more than one byte in the multibyte
   3432 			   representation.  */
   3433 # ifdef _LIBC
   3434 			if (nrules == 0)
   3435 # endif
   3436 			  {
   3437 			    if (c1 != 1)
   3438 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3439 
   3440 			    /* Throw away the ] at the end of the equivalence
   3441 			       class.  */
   3442 			    PATFETCH (c);
   3443 
   3444 			    /* Set the bit for the character.  */
   3445 			    SET_LIST_BIT (str[0]);
   3446 			  }
   3447 # ifdef _LIBC
   3448 			else
   3449 			  {
   3450 			    /* Try to match the byte sequence in `str' against
   3451 			       those known to the collate implementation.
   3452 			       First find out whether the bytes in `str' are
   3453 			       actually from exactly one character.  */
   3454 			    const int32_t *table;
   3455 			    const unsigned char *weights;
   3456 			    const unsigned char *extra;
   3457 			    const int32_t *indirect;
   3458 			    int32_t idx;
   3459 			    const unsigned char *cp = str;
   3460 			    int ch;
   3461 
   3462 			    /* This #include defines a local function!  */
   3463 #  include <locale/weight.h>
   3464 
   3465 			    table = (const int32_t *)
   3466 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
   3467 			    weights = (const unsigned char *)
   3468 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
   3469 			    extra = (const unsigned char *)
   3470 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
   3471 			    indirect = (const int32_t *)
   3472 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
   3473 
   3474 			    idx = findidx (&cp);
   3475 			    if (idx == 0 || cp < str + c1)
   3476 			      /* This is no valid character.  */
   3477 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3478 
   3479 			    /* Throw away the ] at the end of the equivalence
   3480 			       class.  */
   3481 			    PATFETCH (c);
   3482 
   3483 			    /* Now we have to go throught the whole table
   3484 			       and find all characters which have the same
   3485 			       first level weight.
   3486 
   3487 			       XXX Note that this is not entirely correct.
   3488 			       we would have to match multibyte sequences
   3489 			       but this is not possible with the current
   3490 			       implementation.  */
   3491 			    for (ch = 1; ch < 256; ++ch)
   3492 			      /* XXX This test would have to be changed if we
   3493 				 would allow matching multibyte sequences.  */
   3494 			      if (table[ch] > 0)
   3495 				{
   3496 				  int32_t idx2 = table[ch];
   3497 				  size_t len = weights[idx2];
   3498 
   3499 				  /* Test whether the lenghts match.  */
   3500 				  if (weights[idx] == len)
   3501 				    {
   3502 				      /* They do.  New compare the bytes of
   3503 					 the weight.  */
   3504 				      size_t cnt = 0;
   3505 
   3506 				      while (cnt < len
   3507 					     && (weights[idx + 1 + cnt]
   3508 						 == weights[idx2 + 1 + cnt]))
   3509 					++cnt;
   3510 
   3511 				      if (cnt == len)
   3512 					/* They match.  Mark the character as
   3513 					   acceptable.  */
   3514 					SET_LIST_BIT (ch);
   3515 				    }
   3516 				}
   3517 			  }
   3518 # endif
   3519 			had_char_class = true;
   3520 		      }
   3521                     else
   3522                       {
   3523                         c1++;
   3524                         while (c1--)
   3525                           PATUNFETCH;
   3526                         SET_LIST_BIT ('[');
   3527                         SET_LIST_BIT ('=');
   3528 			range_start = '=';
   3529                         had_char_class = false;
   3530                       }
   3531 		  }
   3532                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
   3533 		  {
   3534 		    unsigned char str[128];	/* Should be large enough.  */
   3535 # ifdef _LIBC
   3536 		    uint32_t nrules =
   3537 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3538 # endif
   3539 
   3540 		    PATFETCH (c);
   3541 		    c1 = 0;
   3542 
   3543 		    /* If pattern is `[[.'.  */
   3544 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3545 
   3546 		    for (;;)
   3547 		      {
   3548 			PATFETCH (c);
   3549 			if ((c == '.' && *p == ']') || p == pend)
   3550 			  break;
   3551 			if (c1 < sizeof (str))
   3552 			  str[c1++] = c;
   3553 			else
   3554 			  /* This is in any case an invalid class name.  */
   3555 			  str[0] = '\0';
   3556                       }
   3557 		    str[c1] = '\0';
   3558 
   3559 		    if (c == '.' && *p == ']' && str[0] != '\0')
   3560 		      {
   3561 			/* If we have no collation data we use the default
   3562 			   collation in which each character is the name
   3563 			   for its own class which contains only the one
   3564 			   character.  It also means that ASCII is the
   3565 			   character set and therefore we cannot have character
   3566 			   with more than one byte in the multibyte
   3567 			   representation.  */
   3568 # ifdef _LIBC
   3569 			if (nrules == 0)
   3570 # endif
   3571 			  {
   3572 			    if (c1 != 1)
   3573 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3574 
   3575 			    /* Throw away the ] at the end of the equivalence
   3576 			       class.  */
   3577 			    PATFETCH (c);
   3578 
   3579 			    /* Set the bit for the character.  */
   3580 			    SET_LIST_BIT (str[0]);
   3581 			    range_start = ((const unsigned char *) str)[0];
   3582 			  }
   3583 # ifdef _LIBC
   3584 			else
   3585 			  {
   3586 			    /* Try to match the byte sequence in `str' against
   3587 			       those known to the collate implementation.
   3588 			       First find out whether the bytes in `str' are
   3589 			       actually from exactly one character.  */
   3590 			    int32_t table_size;
   3591 			    const int32_t *symb_table;
   3592 			    const unsigned char *extra;
   3593 			    int32_t idx;
   3594 			    int32_t elem;
   3595 			    int32_t second;
   3596 			    int32_t hash;
   3597 
   3598 			    table_size =
   3599 			      _NL_CURRENT_WORD (LC_COLLATE,
   3600 						_NL_COLLATE_SYMB_HASH_SIZEMB);
   3601 			    symb_table = (const int32_t *)
   3602 			      _NL_CURRENT (LC_COLLATE,
   3603 					   _NL_COLLATE_SYMB_TABLEMB);
   3604 			    extra = (const unsigned char *)
   3605 			      _NL_CURRENT (LC_COLLATE,
   3606 					   _NL_COLLATE_SYMB_EXTRAMB);
   3607 
   3608 			    /* Locate the character in the hashing table.  */
   3609 			    hash = elem_hash (str, c1);
   3610 
   3611 			    idx = 0;
   3612 			    elem = hash % table_size;
   3613 			    second = hash % (table_size - 2);
   3614 			    while (symb_table[2 * elem] != 0)
   3615 			      {
   3616 				/* First compare the hashing value.  */
   3617 				if (symb_table[2 * elem] == hash
   3618 				    && c1 == extra[symb_table[2 * elem + 1]]
   3619 				    && memcmp (str,
   3620 					       &extra[symb_table[2 * elem + 1]
   3621 						     + 1],
   3622 					       c1) == 0)
   3623 				  {
   3624 				    /* Yep, this is the entry.  */
   3625 				    idx = symb_table[2 * elem + 1];
   3626 				    idx += 1 + extra[idx];
   3627 				    break;
   3628 				  }
   3629 
   3630 				/* Next entry.  */
   3631 				elem += second;
   3632 			      }
   3633 
   3634 			    if (symb_table[2 * elem] == 0)
   3635 			      /* This is no valid character.  */
   3636 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3637 
   3638 			    /* Throw away the ] at the end of the equivalence
   3639 			       class.  */
   3640 			    PATFETCH (c);
   3641 
   3642 			    /* Now add the multibyte character(s) we found
   3643 			       to the accept list.
   3644 
   3645 			       XXX Note that this is not entirely correct.
   3646 			       we would have to match multibyte sequences
   3647 			       but this is not possible with the current
   3648 			       implementation.  Also, we have to match
   3649 			       collating symbols, which expand to more than
   3650 			       one file, as a whole and not allow the
   3651 			       individual bytes.  */
   3652 			    c1 = extra[idx++];
   3653 			    if (c1 == 1)
   3654 			      range_start = extra[idx];
   3655 			    while (c1-- > 0)
   3656 			      {
   3657 				SET_LIST_BIT (extra[idx]);
   3658 				++idx;
   3659 			      }
   3660 			  }
   3661 # endif
   3662 			had_char_class = false;
   3663 		      }
   3664                     else
   3665                       {
   3666                         c1++;
   3667                         while (c1--)
   3668                           PATUNFETCH;
   3669                         SET_LIST_BIT ('[');
   3670                         SET_LIST_BIT ('.');
   3671 			range_start = '.';
   3672                         had_char_class = false;
   3673                       }
   3674 		  }
   3675                 else
   3676                   {
   3677                     had_char_class = false;
   3678                     SET_LIST_BIT (c);
   3679 		    range_start = c;
   3680                   }
   3681               }
   3682 
   3683             /* Discard any (non)matching list bytes that are all 0 at the
   3684                end of the map.  Decrease the map-length byte too.  */
   3685             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
   3686               b[-1]--;
   3687             b += b[-1];
   3688 #endif /* WCHAR */
   3689           }
   3690           break;
   3691 
   3692 
   3693 	case '(':
   3694           if (syntax & RE_NO_BK_PARENS)
   3695             goto handle_open;
   3696           else
   3697             goto normal_char;
   3698 
   3699 
   3700         case ')':
   3701           if (syntax & RE_NO_BK_PARENS)
   3702             goto handle_close;
   3703           else
   3704             goto normal_char;
   3705 
   3706 
   3707         case '\n':
   3708           if (syntax & RE_NEWLINE_ALT)
   3709             goto handle_alt;
   3710           else
   3711             goto normal_char;
   3712 
   3713 
   3714 	case '|':
   3715           if (syntax & RE_NO_BK_VBAR)
   3716             goto handle_alt;
   3717           else
   3718             goto normal_char;
   3719 
   3720 
   3721         case '{':
   3722            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
   3723              goto handle_interval;
   3724            else
   3725              goto normal_char;
   3726 
   3727 
   3728         case '\\':
   3729           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3730 
   3731           /* Do not translate the character after the \, so that we can
   3732              distinguish, e.g., \B from \b, even if we normally would
   3733              translate, e.g., B to b.  */
   3734           PATFETCH_RAW (c);
   3735 
   3736           switch (c)
   3737             {
   3738             case '(':
   3739               if (syntax & RE_NO_BK_PARENS)
   3740                 goto normal_backslash;
   3741 
   3742             handle_open:
   3743               bufp->re_nsub++;
   3744               regnum++;
   3745 
   3746               if (COMPILE_STACK_FULL)
   3747                 {
   3748                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
   3749                             compile_stack_elt_t);
   3750                   if (compile_stack.stack == NULL) return REG_ESPACE;
   3751 
   3752                   compile_stack.size <<= 1;
   3753                 }
   3754 
   3755               /* These are the values to restore when we hit end of this
   3756                  group.  They are all relative offsets, so that if the
   3757                  whole pattern moves because of realloc, they will still
   3758                  be valid.  */
   3759               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
   3760               COMPILE_STACK_TOP.fixup_alt_jump
   3761                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
   3762               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
   3763               COMPILE_STACK_TOP.regnum = regnum;
   3764 
   3765               /* We will eventually replace the 0 with the number of
   3766                  groups inner to this one.  But do not push a
   3767                  start_memory for groups beyond the last one we can
   3768                  represent in the compiled pattern.  */
   3769               if (regnum <= MAX_REGNUM)
   3770                 {
   3771                   COMPILE_STACK_TOP.inner_group_offset = b
   3772 		    - COMPILED_BUFFER_VAR + 2;
   3773                   BUF_PUSH_3 (start_memory, regnum, 0);
   3774                 }
   3775 
   3776               compile_stack.avail++;
   3777 
   3778               fixup_alt_jump = 0;
   3779               laststart = 0;
   3780               begalt = b;
   3781 	      /* If we've reached MAX_REGNUM groups, then this open
   3782 		 won't actually generate any code, so we'll have to
   3783 		 clear pending_exact explicitly.  */
   3784 	      pending_exact = 0;
   3785               break;
   3786 
   3787 
   3788             case ')':
   3789               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
   3790 
   3791               if (COMPILE_STACK_EMPTY)
   3792 		{
   3793 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3794 		    goto normal_backslash;
   3795 		  else
   3796 		    FREE_STACK_RETURN (REG_ERPAREN);
   3797 		}
   3798 
   3799             handle_close:
   3800               if (fixup_alt_jump)
   3801                 { /* Push a dummy failure point at the end of the
   3802                      alternative for a possible future
   3803                      `pop_failure_jump' to pop.  See comments at
   3804                      `push_dummy_failure' in `re_match_2'.  */
   3805                   BUF_PUSH (push_dummy_failure);
   3806 
   3807                   /* We allocated space for this jump when we assigned
   3808                      to `fixup_alt_jump', in the `handle_alt' case below.  */
   3809                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
   3810                 }
   3811 
   3812               /* See similar code for backslashed left paren above.  */
   3813               if (COMPILE_STACK_EMPTY)
   3814 		{
   3815 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3816 		    goto normal_char;
   3817 		  else
   3818 		    FREE_STACK_RETURN (REG_ERPAREN);
   3819 		}
   3820 
   3821               /* Since we just checked for an empty stack above, this
   3822                  ``can't happen''.  */
   3823               assert (compile_stack.avail != 0);
   3824               {
   3825                 /* We don't just want to restore into `regnum', because
   3826                    later groups should continue to be numbered higher,
   3827                    as in `(ab)c(de)' -- the second group is #2.  */
   3828                 regnum_t this_group_regnum;
   3829 
   3830                 compile_stack.avail--;
   3831                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
   3832                 fixup_alt_jump
   3833                   = COMPILE_STACK_TOP.fixup_alt_jump
   3834                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
   3835                     : 0;
   3836                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
   3837                 this_group_regnum = COMPILE_STACK_TOP.regnum;
   3838 		/* If we've reached MAX_REGNUM groups, then this open
   3839 		   won't actually generate any code, so we'll have to
   3840 		   clear pending_exact explicitly.  */
   3841 		pending_exact = 0;
   3842 
   3843                 /* We're at the end of the group, so now we know how many
   3844                    groups were inside this one.  */
   3845                 if (this_group_regnum <= MAX_REGNUM)
   3846                   {
   3847 		    UCHAR_T *inner_group_loc
   3848                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
   3849 
   3850                     *inner_group_loc = regnum - this_group_regnum;
   3851                     BUF_PUSH_3 (stop_memory, this_group_regnum,
   3852                                 regnum - this_group_regnum);
   3853                   }
   3854               }
   3855               break;
   3856 
   3857 
   3858             case '|':					/* `\|'.  */
   3859               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
   3860                 goto normal_backslash;
   3861             handle_alt:
   3862               if (syntax & RE_LIMITED_OPS)
   3863                 goto normal_char;
   3864 
   3865               /* Insert before the previous alternative a jump which
   3866                  jumps to this alternative if the former fails.  */
   3867               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3868               INSERT_JUMP (on_failure_jump, begalt,
   3869 			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
   3870               pending_exact = 0;
   3871               b += 1 + OFFSET_ADDRESS_SIZE;
   3872 
   3873               /* The alternative before this one has a jump after it
   3874                  which gets executed if it gets matched.  Adjust that
   3875                  jump so it will jump to this alternative's analogous
   3876                  jump (put in below, which in turn will jump to the next
   3877                  (if any) alternative's such jump, etc.).  The last such
   3878                  jump jumps to the correct final destination.  A picture:
   3879                           _____ _____
   3880                           |   | |   |
   3881                           |   v |   v
   3882                          a | b   | c
   3883 
   3884                  If we are at `b', then fixup_alt_jump right now points to a
   3885                  three-byte space after `a'.  We'll put in the jump, set
   3886                  fixup_alt_jump to right after `b', and leave behind three
   3887                  bytes which we'll fill in when we get to after `c'.  */
   3888 
   3889               if (fixup_alt_jump)
   3890                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   3891 
   3892               /* Mark and leave space for a jump after this alternative,
   3893                  to be filled in later either by next alternative or
   3894                  when know we're at the end of a series of alternatives.  */
   3895               fixup_alt_jump = b;
   3896               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3897               b += 1 + OFFSET_ADDRESS_SIZE;
   3898 
   3899               laststart = 0;
   3900               begalt = b;
   3901               break;
   3902 
   3903 
   3904             case '{':
   3905               /* If \{ is a literal.  */
   3906               if (!(syntax & RE_INTERVALS)
   3907                      /* If we're at `\{' and it's not the open-interval
   3908                         operator.  */
   3909 		  || (syntax & RE_NO_BK_BRACES))
   3910                 goto normal_backslash;
   3911 
   3912             handle_interval:
   3913               {
   3914                 /* If got here, then the syntax allows intervals.  */
   3915 
   3916                 /* At least (most) this many matches must be made.  */
   3917                 int lower_bound = -1, upper_bound = -1;
   3918 
   3919 		/* Place in the uncompiled pattern (i.e., just after
   3920 		   the '{') to go back to if the interval is invalid.  */
   3921 		const CHAR_T *beg_interval = p;
   3922 
   3923                 if (p == pend)
   3924 		  goto invalid_interval;
   3925 
   3926                 GET_UNSIGNED_NUMBER (lower_bound);
   3927 
   3928                 if (c == ',')
   3929                   {
   3930                     GET_UNSIGNED_NUMBER (upper_bound);
   3931 		    if (upper_bound < 0)
   3932 		      upper_bound = RE_DUP_MAX;
   3933                   }
   3934                 else
   3935                   /* Interval such as `{1}' => match exactly once. */
   3936                   upper_bound = lower_bound;
   3937 
   3938                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
   3939 		  goto invalid_interval;
   3940 
   3941                 if (!(syntax & RE_NO_BK_BRACES))
   3942                   {
   3943 		    if (c != '\\' || p == pend)
   3944 		      goto invalid_interval;
   3945                     PATFETCH (c);
   3946                   }
   3947 
   3948                 if (c != '}')
   3949 		  goto invalid_interval;
   3950 
   3951                 /* If it's invalid to have no preceding re.  */
   3952                 if (!laststart)
   3953                   {
   3954 		    if (syntax & RE_CONTEXT_INVALID_OPS
   3955 			&& !(syntax & RE_INVALID_INTERVAL_ORD))
   3956                       FREE_STACK_RETURN (REG_BADRPT);
   3957                     else if (syntax & RE_CONTEXT_INDEP_OPS)
   3958                       laststart = b;
   3959                     else
   3960                       goto unfetch_interval;
   3961                   }
   3962 
   3963                 /* We just parsed a valid interval.  */
   3964 
   3965                 if (RE_DUP_MAX < upper_bound)
   3966 		  FREE_STACK_RETURN (REG_BADBR);
   3967 
   3968                 /* If the upper bound is zero, don't want to succeed at
   3969                    all; jump from `laststart' to `b + 3', which will be
   3970 		   the end of the buffer after we insert the jump.  */
   3971 		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
   3972 		   instead of 'b + 3'.  */
   3973                  if (upper_bound == 0)
   3974                    {
   3975                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3976                      INSERT_JUMP (jump, laststart, b + 1
   3977 				  + OFFSET_ADDRESS_SIZE);
   3978                      b += 1 + OFFSET_ADDRESS_SIZE;
   3979                    }
   3980 
   3981                  /* Otherwise, we have a nontrivial interval.  When
   3982                     we're all done, the pattern will look like:
   3983                       set_number_at <jump count> <upper bound>
   3984                       set_number_at <succeed_n count> <lower bound>
   3985                       succeed_n <after jump addr> <succeed_n count>
   3986                       <body of loop>
   3987                       jump_n <succeed_n addr> <jump count>
   3988                     (The upper bound and `jump_n' are omitted if
   3989                     `upper_bound' is 1, though.)  */
   3990                  else
   3991                    { /* If the upper bound is > 1, we need to insert
   3992                         more at the end of the loop.  */
   3993                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
   3994 		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
   3995 
   3996                      GET_BUFFER_SPACE (nbytes);
   3997 
   3998                      /* Initialize lower bound of the `succeed_n', even
   3999                         though it will be set during matching by its
   4000                         attendant `set_number_at' (inserted next),
   4001                         because `re_compile_fastmap' needs to know.
   4002                         Jump to the `jump_n' we might insert below.  */
   4003                      INSERT_JUMP2 (succeed_n, laststart,
   4004                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
   4005 				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
   4006 				   , lower_bound);
   4007                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   4008 
   4009                      /* Code to initialize the lower bound.  Insert
   4010                         before the `succeed_n'.  The `5' is the last two
   4011                         bytes of this `set_number_at', plus 3 bytes of
   4012                         the following `succeed_n'.  */
   4013 		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
   4014 			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
   4015 			of the following `succeed_n'.  */
   4016                      PREFIX(insert_op2) (set_number_at, laststart, 1
   4017 				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
   4018                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   4019 
   4020                      if (upper_bound > 1)
   4021                        { /* More than one repetition is allowed, so
   4022                             append a backward jump to the `succeed_n'
   4023                             that starts this interval.
   4024 
   4025                             When we've reached this during matching,
   4026                             we'll have matched the interval once, so
   4027                             jump back only `upper_bound - 1' times.  */
   4028                          STORE_JUMP2 (jump_n, b, laststart
   4029 				      + 2 * OFFSET_ADDRESS_SIZE + 1,
   4030                                       upper_bound - 1);
   4031                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   4032 
   4033                          /* The location we want to set is the second
   4034                             parameter of the `jump_n'; that is `b-2' as
   4035                             an absolute address.  `laststart' will be
   4036                             the `set_number_at' we're about to insert;
   4037                             `laststart+3' the number to set, the source
   4038                             for the relative address.  But we are
   4039                             inserting into the middle of the pattern --
   4040                             so everything is getting moved up by 5.
   4041                             Conclusion: (b - 2) - (laststart + 3) + 5,
   4042                             i.e., b - laststart.
   4043 
   4044                             We insert this at the beginning of the loop
   4045                             so that if we fail during matching, we'll
   4046                             reinitialize the bounds.  */
   4047                          PREFIX(insert_op2) (set_number_at, laststart,
   4048 					     b - laststart,
   4049 					     upper_bound - 1, b);
   4050                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   4051                        }
   4052                    }
   4053                 pending_exact = 0;
   4054 		break;
   4055 
   4056 	      invalid_interval:
   4057 		if (!(syntax & RE_INVALID_INTERVAL_ORD))
   4058 		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
   4059 	      unfetch_interval:
   4060 		/* Match the characters as literals.  */
   4061 		p = beg_interval;
   4062 		c = '{';
   4063 		if (syntax & RE_NO_BK_BRACES)
   4064 		  goto normal_char;
   4065 		else
   4066 		  goto normal_backslash;
   4067 	      }
   4068 
   4069 #ifdef emacs
   4070             /* There is no way to specify the before_dot and after_dot
   4071                operators.  rms says this is ok.  --karl  */
   4072             case '=':
   4073               BUF_PUSH (at_dot);
   4074               break;
   4075 
   4076             case 's':
   4077               laststart = b;
   4078               PATFETCH (c);
   4079               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
   4080               break;
   4081 
   4082             case 'S':
   4083               laststart = b;
   4084               PATFETCH (c);
   4085               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
   4086               break;
   4087 #endif /* emacs */
   4088 
   4089 
   4090             case 'w':
   4091 	      if (syntax & RE_NO_GNU_OPS)
   4092 		goto normal_char;
   4093               laststart = b;
   4094               BUF_PUSH (wordchar);
   4095               break;
   4096 
   4097 
   4098             case 'W':
   4099 	      if (syntax & RE_NO_GNU_OPS)
   4100 		goto normal_char;
   4101               laststart = b;
   4102               BUF_PUSH (notwordchar);
   4103               break;
   4104 
   4105 
   4106             case '<':
   4107 	      if (syntax & RE_NO_GNU_OPS)
   4108 		goto normal_char;
   4109               BUF_PUSH (wordbeg);
   4110               break;
   4111 
   4112             case '>':
   4113 	      if (syntax & RE_NO_GNU_OPS)
   4114 		goto normal_char;
   4115               BUF_PUSH (wordend);
   4116               break;
   4117 
   4118             case 'b':
   4119 	      if (syntax & RE_NO_GNU_OPS)
   4120 		goto normal_char;
   4121               BUF_PUSH (wordbound);
   4122               break;
   4123 
   4124             case 'B':
   4125 	      if (syntax & RE_NO_GNU_OPS)
   4126 		goto normal_char;
   4127               BUF_PUSH (notwordbound);
   4128               break;
   4129 
   4130             case '`':
   4131 	      if (syntax & RE_NO_GNU_OPS)
   4132 		goto normal_char;
   4133               BUF_PUSH (begbuf);
   4134               break;
   4135 
   4136             case '\'':
   4137 	      if (syntax & RE_NO_GNU_OPS)
   4138 		goto normal_char;
   4139               BUF_PUSH (endbuf);
   4140               break;
   4141 
   4142             case '1': case '2': case '3': case '4': case '5':
   4143             case '6': case '7': case '8': case '9':
   4144               if (syntax & RE_NO_BK_REFS)
   4145                 goto normal_char;
   4146 
   4147               c1 = c - '0';
   4148 
   4149               if (c1 > regnum)
   4150                 FREE_STACK_RETURN (REG_ESUBREG);
   4151 
   4152               /* Can't back reference to a subexpression if inside of it.  */
   4153               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
   4154                 goto normal_char;
   4155 
   4156               laststart = b;
   4157               BUF_PUSH_2 (duplicate, c1);
   4158               break;
   4159 
   4160 
   4161             case '+':
   4162             case '?':
   4163               if (syntax & RE_BK_PLUS_QM)
   4164                 goto handle_plus;
   4165               else
   4166                 goto normal_backslash;
   4167 
   4168             default:
   4169             normal_backslash:
   4170               /* You might think it would be useful for \ to mean
   4171                  not to translate; but if we don't translate it
   4172                  it will never match anything.  */
   4173               c = TRANSLATE (c);
   4174               goto normal_char;
   4175             }
   4176           break;
   4177 
   4178 
   4179 	default:
   4180         /* Expects the character in `c'.  */
   4181 	normal_char:
   4182 	      /* If no exactn currently being built.  */
   4183           if (!pending_exact
   4184 #ifdef WCHAR
   4185 	      /* If last exactn handle binary(or character) and
   4186 		 new exactn handle character(or binary).  */
   4187 	      || is_exactn_bin != is_binary[p - 1 - pattern]
   4188 #endif /* WCHAR */
   4189 
   4190               /* If last exactn not at current position.  */
   4191               || pending_exact + *pending_exact + 1 != b
   4192 
   4193               /* We have only one byte following the exactn for the count.  */
   4194 	      || *pending_exact == (1 << BYTEWIDTH) - 1
   4195 
   4196               /* If followed by a repetition operator.  */
   4197               || *p == '*' || *p == '^'
   4198 	      || ((syntax & RE_BK_PLUS_QM)
   4199 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
   4200 		  : (*p == '+' || *p == '?'))
   4201 	      || ((syntax & RE_INTERVALS)
   4202                   && ((syntax & RE_NO_BK_BRACES)
   4203 		      ? *p == '{'
   4204                       : (p[0] == '\\' && p[1] == '{'))))
   4205 	    {
   4206 	      /* Start building a new exactn.  */
   4207 
   4208               laststart = b;
   4209 
   4210 #ifdef WCHAR
   4211 	      /* Is this exactn binary data or character? */
   4212 	      is_exactn_bin = is_binary[p - 1 - pattern];
   4213 	      if (is_exactn_bin)
   4214 		  BUF_PUSH_2 (exactn_bin, 0);
   4215 	      else
   4216 		  BUF_PUSH_2 (exactn, 0);
   4217 #else
   4218 	      BUF_PUSH_2 (exactn, 0);
   4219 #endif /* WCHAR */
   4220 	      pending_exact = b - 1;
   4221             }
   4222 
   4223 	  BUF_PUSH (c);
   4224           (*pending_exact)++;
   4225 	  break;
   4226         } /* switch (c) */
   4227     } /* while p != pend */
   4228 
   4229 
   4230   /* Through the pattern now.  */
   4231 
   4232   if (fixup_alt_jump)
   4233     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   4234 
   4235   if (!COMPILE_STACK_EMPTY)
   4236     FREE_STACK_RETURN (REG_EPAREN);
   4237 
   4238   /* If we don't want backtracking, force success
   4239      the first time we reach the end of the compiled pattern.  */
   4240   if (syntax & RE_NO_POSIX_BACKTRACKING)
   4241     BUF_PUSH (succeed);
   4242 
   4243 #ifdef WCHAR
   4244   free (pattern);
   4245   free (mbs_offset);
   4246   free (is_binary);
   4247 #endif
   4248   free (compile_stack.stack);
   4249 
   4250   /* We have succeeded; set the length of the buffer.  */
   4251 #ifdef WCHAR
   4252   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
   4253 #else
   4254   bufp->used = b - bufp->buffer;
   4255 #endif
   4256 
   4257 #ifdef DEBUG
   4258   if (debug)
   4259     {
   4260       DEBUG_PRINT1 ("\nCompiled pattern: \n");
   4261       PREFIX(print_compiled_pattern) (bufp);
   4262     }
   4263 #endif /* DEBUG */
   4264 
   4265 #ifndef MATCH_MAY_ALLOCATE
   4266   /* Initialize the failure stack to the largest possible stack.  This
   4267      isn't necessary unless we're trying to avoid calling alloca in
   4268      the search and match routines.  */
   4269   {
   4270     int num_regs = bufp->re_nsub + 1;
   4271 
   4272     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
   4273        is strictly greater than re_max_failures, the largest possible stack
   4274        is 2 * re_max_failures failure points.  */
   4275     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
   4276       {
   4277 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
   4278 
   4279 # ifdef emacs
   4280 	if (! fail_stack.stack)
   4281 	  fail_stack.stack
   4282 	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
   4283 				    * sizeof (PREFIX(fail_stack_elt_t)));
   4284 	else
   4285 	  fail_stack.stack
   4286 	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
   4287 				     (fail_stack.size
   4288 				      * sizeof (PREFIX(fail_stack_elt_t))));
   4289 # else /* not emacs */
   4290 	if (! fail_stack.stack)
   4291 	  fail_stack.stack
   4292 	    = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
   4293 				   * sizeof (PREFIX(fail_stack_elt_t)));
   4294 	else
   4295 	  fail_stack.stack
   4296 	    = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
   4297 					    (fail_stack.size
   4298 				     * sizeof (PREFIX(fail_stack_elt_t))));
   4299 # endif /* not emacs */
   4300       }
   4301 
   4302    PREFIX(regex_grow_registers) (num_regs);
   4303   }
   4304 #endif /* not MATCH_MAY_ALLOCATE */
   4305 
   4306   return REG_NOERROR;
   4307 } /* regex_compile */
   4308 
   4309 /* Subroutines for `regex_compile'.  */
   4310 
   4311 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
   4312 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4313 
   4314 static void
   4315 PREFIX(store_op1) (op, loc, arg)
   4316     re_opcode_t op;
   4317     UCHAR_T *loc;
   4318     int arg;
   4319 {
   4320   *loc = (UCHAR_T) op;
   4321   STORE_NUMBER (loc + 1, arg);
   4322 }
   4323 
   4324 
   4325 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4326 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4327 
   4328 static void
   4329 PREFIX(store_op2) (op, loc, arg1, arg2)
   4330     re_opcode_t op;
   4331     UCHAR_T *loc;
   4332     int arg1, arg2;
   4333 {
   4334   *loc = (UCHAR_T) op;
   4335   STORE_NUMBER (loc + 1, arg1);
   4336   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
   4337 }
   4338 
   4339 
   4340 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
   4341    for OP followed by two-byte integer parameter ARG.  */
   4342 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4343 
   4344 static void
   4345 PREFIX(insert_op1) (op, loc, arg, end)
   4346     re_opcode_t op;
   4347     UCHAR_T *loc;
   4348     int arg;
   4349     UCHAR_T *end;
   4350 {
   4351   register UCHAR_T *pfrom = end;
   4352   register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
   4353 
   4354   while (pfrom != loc)
   4355     *--pto = *--pfrom;
   4356 
   4357   PREFIX(store_op1) (op, loc, arg);
   4358 }
   4359 
   4360 
   4361 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4362 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4363 
   4364 static void
   4365 PREFIX(insert_op2) (op, loc, arg1, arg2, end)
   4366     re_opcode_t op;
   4367     UCHAR_T *loc;
   4368     int arg1, arg2;
   4369     UCHAR_T *end;
   4370 {
   4371   register UCHAR_T *pfrom = end;
   4372   register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
   4373 
   4374   while (pfrom != loc)
   4375     *--pto = *--pfrom;
   4376 
   4377   PREFIX(store_op2) (op, loc, arg1, arg2);
   4378 }
   4379 
   4380 
   4381 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
   4382    after an alternative or a begin-subexpression.  We assume there is at
   4383    least one character before the ^.  */
   4384 
   4385 static boolean
   4386 PREFIX(at_begline_loc_p) (pattern, p, syntax)
   4387     const CHAR_T *pattern, *p;
   4388     reg_syntax_t syntax;
   4389 {
   4390   const CHAR_T *prev = p - 2;
   4391   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
   4392 
   4393   return
   4394        /* After a subexpression?  */
   4395        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
   4396        /* After an alternative?  */
   4397     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
   4398 }
   4399 
   4400 
   4401 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
   4402    at least one character after the $, i.e., `P < PEND'.  */
   4403 
   4404 static boolean
   4405 PREFIX(at_endline_loc_p) (p, pend, syntax)
   4406     const CHAR_T *p, *pend;
   4407     reg_syntax_t syntax;
   4408 {
   4409   const CHAR_T *next = p;
   4410   boolean next_backslash = *next == '\\';
   4411   const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
   4412 
   4413   return
   4414        /* Before a subexpression?  */
   4415        (syntax & RE_NO_BK_PARENS ? *next == ')'
   4416         : next_backslash && next_next && *next_next == ')')
   4417        /* Before an alternative?  */
   4418     || (syntax & RE_NO_BK_VBAR ? *next == '|'
   4419         : next_backslash && next_next && *next_next == '|');
   4420 }
   4421 
   4422 #else /* not INSIDE_RECURSION */
   4423 
   4424 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
   4425    false if it's not.  */
   4426 
   4427 static boolean
   4428 group_in_compile_stack (compile_stack, regnum)
   4429     compile_stack_type compile_stack;
   4430     regnum_t regnum;
   4431 {
   4432   int this_element;
   4433 
   4434   for (this_element = compile_stack.avail - 1;
   4435        this_element >= 0;
   4436        this_element--)
   4437     if (compile_stack.stack[this_element].regnum == regnum)
   4438       return true;
   4439 
   4440   return false;
   4441 }
   4442 #endif /* not INSIDE_RECURSION */
   4443 
   4444 #ifdef INSIDE_RECURSION
   4445 
   4446 #ifdef WCHAR
   4447 /* This insert space, which size is "num", into the pattern at "loc".
   4448    "end" must point the end of the allocated buffer.  */
   4449 static void
   4450 insert_space (num, loc, end)
   4451      int num;
   4452      CHAR_T *loc;
   4453      CHAR_T *end;
   4454 {
   4455   register CHAR_T *pto = end;
   4456   register CHAR_T *pfrom = end - num;
   4457 
   4458   while (pfrom >= loc)
   4459     *pto-- = *pfrom--;
   4460 }
   4461 #endif /* WCHAR */
   4462 
   4463 #ifdef WCHAR
   4464 static reg_errcode_t
   4465 wcs_compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
   4466 		   char_set)
   4467      CHAR_T range_start_char;
   4468      const CHAR_T **p_ptr, *pend;
   4469      CHAR_T *char_set, *b;
   4470      RE_TRANSLATE_TYPE translate;
   4471      reg_syntax_t syntax;
   4472 {
   4473   const CHAR_T *p = *p_ptr;
   4474   CHAR_T range_start, range_end;
   4475   reg_errcode_t ret;
   4476 # ifdef _LIBC
   4477   uint32_t nrules;
   4478   uint32_t start_val, end_val;
   4479 # endif
   4480   if (p == pend)
   4481     return REG_ERANGE;
   4482 
   4483 # ifdef _LIBC
   4484   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   4485   if (nrules != 0)
   4486     {
   4487       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
   4488 						       _NL_COLLATE_COLLSEQWC);
   4489       const unsigned char *extra = (const unsigned char *)
   4490 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   4491 
   4492       if (range_start_char < -1)
   4493 	{
   4494 	  /* range_start is a collating symbol.  */
   4495 	  int32_t *wextra;
   4496 	  /* Retreive the index and get collation sequence value.  */
   4497 	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
   4498 	  start_val = wextra[1 + *wextra];
   4499 	}
   4500       else
   4501 	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
   4502 
   4503       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
   4504 
   4505       /* Report an error if the range is empty and the syntax prohibits
   4506 	 this.  */
   4507       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4508 	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
   4509 
   4510       /* Insert space to the end of the char_ranges.  */
   4511       insert_space(2, b - char_set[5] - 2, b - 1);
   4512       *(b - char_set[5] - 2) = (wchar_t)start_val;
   4513       *(b - char_set[5] - 1) = (wchar_t)end_val;
   4514       char_set[4]++; /* ranges_index */
   4515     }
   4516   else
   4517 # endif
   4518     {
   4519       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
   4520 	range_start_char;
   4521       range_end = TRANSLATE (p[0]);
   4522       /* Report an error if the range is empty and the syntax prohibits
   4523 	 this.  */
   4524       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4525 	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
   4526 
   4527       /* Insert space to the end of the char_ranges.  */
   4528       insert_space(2, b - char_set[5] - 2, b - 1);
   4529       *(b - char_set[5] - 2) = range_start;
   4530       *(b - char_set[5] - 1) = range_end;
   4531       char_set[4]++; /* ranges_index */
   4532     }
   4533   /* Have to increment the pointer into the pattern string, so the
   4534      caller isn't still at the ending character.  */
   4535   (*p_ptr)++;
   4536 
   4537   return ret;
   4538 }
   4539 #else /* BYTE */
   4540 /* Read the ending character of a range (in a bracket expression) from the
   4541    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
   4542    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
   4543    Then we set the translation of all bits between the starting and
   4544    ending characters (inclusive) in the compiled pattern B.
   4545 
   4546    Return an error code.
   4547 
   4548    We use these short variable names so we can use the same macros as
   4549    `regex_compile' itself.  */
   4550 
   4551 static reg_errcode_t
   4552 byte_compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
   4553      unsigned int range_start_char;
   4554      const char **p_ptr, *pend;
   4555      RE_TRANSLATE_TYPE translate;
   4556      reg_syntax_t syntax;
   4557      unsigned char *b;
   4558 {
   4559   unsigned this_char;
   4560   const char *p = *p_ptr;
   4561   reg_errcode_t ret;
   4562 # if _LIBC
   4563   const unsigned char *collseq;
   4564   unsigned int start_colseq;
   4565   unsigned int end_colseq;
   4566 # else
   4567   unsigned end_char;
   4568 # endif
   4569 
   4570   if (p == pend)
   4571     return REG_ERANGE;
   4572 
   4573   /* Have to increment the pointer into the pattern string, so the
   4574      caller isn't still at the ending character.  */
   4575   (*p_ptr)++;
   4576 
   4577   /* Report an error if the range is empty and the syntax prohibits this.  */
   4578   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
   4579 
   4580 # if _LIBC
   4581   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
   4582 						 _NL_COLLATE_COLLSEQMB);
   4583 
   4584   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
   4585   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
   4586   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
   4587     {
   4588       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
   4589 
   4590       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
   4591 	{
   4592 	  SET_LIST_BIT (TRANSLATE (this_char));
   4593 	  ret = REG_NOERROR;
   4594 	}
   4595     }
   4596 # else
   4597   /* Here we see why `this_char' has to be larger than an `unsigned
   4598      char' -- we would otherwise go into an infinite loop, since all
   4599      characters <= 0xff.  */
   4600   range_start_char = TRANSLATE (range_start_char);
   4601   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
   4602      and some compilers cast it to int implicitly, so following for_loop
   4603      may fall to (almost) infinite loop.
   4604      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
   4605      To avoid this, we cast p[0] to unsigned int and truncate it.  */
   4606   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
   4607 
   4608   for (this_char = range_start_char; this_char <= end_char; ++this_char)
   4609     {
   4610       SET_LIST_BIT (TRANSLATE (this_char));
   4611       ret = REG_NOERROR;
   4612     }
   4613 # endif
   4614 
   4615   return ret;
   4616 }
   4617 #endif /* WCHAR */
   4618 
   4619 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
   4621    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
   4622    characters can start a string that matches the pattern.  This fastmap
   4623    is used by re_search to skip quickly over impossible starting points.
   4624 
   4625    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
   4626    area as BUFP->fastmap.
   4627 
   4628    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
   4629    the pattern buffer.
   4630 
   4631    Returns 0 if we succeed, -2 if an internal error.   */
   4632 
   4633 #ifdef WCHAR
   4634 /* local function for re_compile_fastmap.
   4635    truncate wchar_t character to char.  */
   4636 static unsigned char truncate_wchar (CHAR_T c);
   4637 
   4638 static unsigned char
   4639 truncate_wchar (c)
   4640      CHAR_T c;
   4641 {
   4642   unsigned char buf[MB_CUR_MAX];
   4643   mbstate_t state;
   4644   int retval;
   4645   memset (&state, '\0', sizeof (state));
   4646   retval = wcrtomb (buf, c, &state);
   4647   return retval > 0 ? buf[0] : (unsigned char) c;
   4648 }
   4649 #endif /* WCHAR */
   4650 
   4651 static int
   4652 PREFIX(re_compile_fastmap) (bufp)
   4653      struct re_pattern_buffer *bufp;
   4654 {
   4655   int j, k;
   4656 #ifdef MATCH_MAY_ALLOCATE
   4657   PREFIX(fail_stack_type) fail_stack;
   4658 #endif
   4659 #ifndef REGEX_MALLOC
   4660   char *destination;
   4661 #endif
   4662 
   4663   register char *fastmap = bufp->fastmap;
   4664 
   4665 #ifdef WCHAR
   4666   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   4667      pattern to (char*) in regex_compile.  */
   4668   UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
   4669   register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
   4670 #else /* BYTE */
   4671   UCHAR_T *pattern = bufp->buffer;
   4672   register UCHAR_T *pend = pattern + bufp->used;
   4673 #endif /* WCHAR */
   4674   UCHAR_T *p = pattern;
   4675 
   4676 #ifdef REL_ALLOC
   4677   /* This holds the pointer to the failure stack, when
   4678      it is allocated relocatably.  */
   4679   fail_stack_elt_t *failure_stack_ptr;
   4680 #endif
   4681 
   4682   /* Assume that each path through the pattern can be null until
   4683      proven otherwise.  We set this false at the bottom of switch
   4684      statement, to which we get only if a particular path doesn't
   4685      match the empty string.  */
   4686   boolean path_can_be_null = true;
   4687 
   4688   /* We aren't doing a `succeed_n' to begin with.  */
   4689   boolean succeed_n_p = false;
   4690 
   4691   assert (fastmap != NULL && p != NULL);
   4692 
   4693   INIT_FAIL_STACK ();
   4694   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
   4695   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
   4696   bufp->can_be_null = 0;
   4697 
   4698   while (1)
   4699     {
   4700       if (p == pend || *p == succeed)
   4701 	{
   4702 	  /* We have reached the (effective) end of pattern.  */
   4703 	  if (!FAIL_STACK_EMPTY ())
   4704 	    {
   4705 	      bufp->can_be_null |= path_can_be_null;
   4706 
   4707 	      /* Reset for next path.  */
   4708 	      path_can_be_null = true;
   4709 
   4710 	      p = fail_stack.stack[--fail_stack.avail].pointer;
   4711 
   4712 	      continue;
   4713 	    }
   4714 	  else
   4715 	    break;
   4716 	}
   4717 
   4718       /* We should never be about to go beyond the end of the pattern.  */
   4719       assert (p < pend);
   4720 
   4721       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   4722 	{
   4723 
   4724         /* I guess the idea here is to simply not bother with a fastmap
   4725            if a backreference is used, since it's too hard to figure out
   4726            the fastmap for the corresponding group.  Setting
   4727            `can_be_null' stops `re_search_2' from using the fastmap, so
   4728            that is all we do.  */
   4729 	case duplicate:
   4730 	  bufp->can_be_null = 1;
   4731           goto done;
   4732 
   4733 
   4734       /* Following are the cases which match a character.  These end
   4735          with `break'.  */
   4736 
   4737 #ifdef WCHAR
   4738 	case exactn:
   4739           fastmap[truncate_wchar(p[1])] = 1;
   4740 	  break;
   4741 #else /* BYTE */
   4742 	case exactn:
   4743           fastmap[p[1]] = 1;
   4744 	  break;
   4745 #endif /* WCHAR */
   4746 #ifdef MBS_SUPPORT
   4747 	case exactn_bin:
   4748 	  fastmap[p[1]] = 1;
   4749 	  break;
   4750 #endif
   4751 
   4752 #ifdef WCHAR
   4753         /* It is hard to distinguish fastmap from (multi byte) characters
   4754            which depends on current locale.  */
   4755         case charset:
   4756 	case charset_not:
   4757 	case wordchar:
   4758 	case notwordchar:
   4759           bufp->can_be_null = 1;
   4760           goto done;
   4761 #else /* BYTE */
   4762         case charset:
   4763           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4764 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
   4765               fastmap[j] = 1;
   4766 	  break;
   4767 
   4768 
   4769 	case charset_not:
   4770 	  /* Chars beyond end of map must be allowed.  */
   4771 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
   4772             fastmap[j] = 1;
   4773 
   4774 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4775 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
   4776               fastmap[j] = 1;
   4777           break;
   4778 
   4779 
   4780 	case wordchar:
   4781 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4782 	    if (SYNTAX (j) == Sword)
   4783 	      fastmap[j] = 1;
   4784 	  break;
   4785 
   4786 
   4787 	case notwordchar:
   4788 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4789 	    if (SYNTAX (j) != Sword)
   4790 	      fastmap[j] = 1;
   4791 	  break;
   4792 #endif /* WCHAR */
   4793 
   4794         case anychar:
   4795 	  {
   4796 	    int fastmap_newline = fastmap['\n'];
   4797 
   4798 	    /* `.' matches anything ...  */
   4799 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
   4800 	      fastmap[j] = 1;
   4801 
   4802 	    /* ... except perhaps newline.  */
   4803 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
   4804 	      fastmap['\n'] = fastmap_newline;
   4805 
   4806 	    /* Return if we have already set `can_be_null'; if we have,
   4807 	       then the fastmap is irrelevant.  Something's wrong here.  */
   4808 	    else if (bufp->can_be_null)
   4809 	      goto done;
   4810 
   4811 	    /* Otherwise, have to check alternative paths.  */
   4812 	    break;
   4813 	  }
   4814 
   4815 #ifdef emacs
   4816         case syntaxspec:
   4817 	  k = *p++;
   4818 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4819 	    if (SYNTAX (j) == (enum syntaxcode) k)
   4820 	      fastmap[j] = 1;
   4821 	  break;
   4822 
   4823 
   4824 	case notsyntaxspec:
   4825 	  k = *p++;
   4826 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4827 	    if (SYNTAX (j) != (enum syntaxcode) k)
   4828 	      fastmap[j] = 1;
   4829 	  break;
   4830 
   4831 
   4832       /* All cases after this match the empty string.  These end with
   4833          `continue'.  */
   4834 
   4835 
   4836 	case before_dot:
   4837 	case at_dot:
   4838 	case after_dot:
   4839           continue;
   4840 #endif /* emacs */
   4841 
   4842 
   4843         case no_op:
   4844         case begline:
   4845         case endline:
   4846 	case begbuf:
   4847 	case endbuf:
   4848 	case wordbound:
   4849 	case notwordbound:
   4850 	case wordbeg:
   4851 	case wordend:
   4852         case push_dummy_failure:
   4853           continue;
   4854 
   4855 
   4856 	case jump_n:
   4857         case pop_failure_jump:
   4858 	case maybe_pop_jump:
   4859 	case jump:
   4860         case jump_past_alt:
   4861 	case dummy_failure_jump:
   4862           EXTRACT_NUMBER_AND_INCR (j, p);
   4863 	  p += j;
   4864 	  if (j > 0)
   4865 	    continue;
   4866 
   4867           /* Jump backward implies we just went through the body of a
   4868              loop and matched nothing.  Opcode jumped to should be
   4869              `on_failure_jump' or `succeed_n'.  Just treat it like an
   4870              ordinary jump.  For a * loop, it has pushed its failure
   4871              point already; if so, discard that as redundant.  */
   4872           if ((re_opcode_t) *p != on_failure_jump
   4873 	      && (re_opcode_t) *p != succeed_n)
   4874 	    continue;
   4875 
   4876           p++;
   4877           EXTRACT_NUMBER_AND_INCR (j, p);
   4878           p += j;
   4879 
   4880           /* If what's on the stack is where we are now, pop it.  */
   4881           if (!FAIL_STACK_EMPTY ()
   4882 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
   4883             fail_stack.avail--;
   4884 
   4885           continue;
   4886 
   4887 
   4888         case on_failure_jump:
   4889         case on_failure_keep_string_jump:
   4890 	handle_on_failure_jump:
   4891           EXTRACT_NUMBER_AND_INCR (j, p);
   4892 
   4893           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
   4894              end of the pattern.  We don't want to push such a point,
   4895              since when we restore it above, entering the switch will
   4896              increment `p' past the end of the pattern.  We don't need
   4897              to push such a point since we obviously won't find any more
   4898              fastmap entries beyond `pend'.  Such a pattern can match
   4899              the null string, though.  */
   4900           if (p + j < pend)
   4901             {
   4902               if (!PUSH_PATTERN_OP (p + j, fail_stack))
   4903 		{
   4904 		  RESET_FAIL_STACK ();
   4905 		  return -2;
   4906 		}
   4907             }
   4908           else
   4909             bufp->can_be_null = 1;
   4910 
   4911           if (succeed_n_p)
   4912             {
   4913               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
   4914               succeed_n_p = false;
   4915 	    }
   4916 
   4917           continue;
   4918 
   4919 
   4920 	case succeed_n:
   4921           /* Get to the number of times to succeed.  */
   4922           p += OFFSET_ADDRESS_SIZE;
   4923 
   4924           /* Increment p past the n for when k != 0.  */
   4925           EXTRACT_NUMBER_AND_INCR (k, p);
   4926           if (k == 0)
   4927 	    {
   4928               p -= 2 * OFFSET_ADDRESS_SIZE;
   4929   	      succeed_n_p = true;  /* Spaghetti code alert.  */
   4930               goto handle_on_failure_jump;
   4931             }
   4932           continue;
   4933 
   4934 
   4935 	case set_number_at:
   4936           p += 2 * OFFSET_ADDRESS_SIZE;
   4937           continue;
   4938 
   4939 
   4940 	case start_memory:
   4941         case stop_memory:
   4942 	  p += 2;
   4943 	  continue;
   4944 
   4945 
   4946 	default:
   4947           abort (); /* We have listed all the cases.  */
   4948         } /* switch *p++ */
   4949 
   4950       /* Getting here means we have found the possible starting
   4951          characters for one path of the pattern -- and that the empty
   4952          string does not match.  We need not follow this path further.
   4953          Instead, look at the next alternative (remembered on the
   4954          stack), or quit if no more.  The test at the top of the loop
   4955          does these things.  */
   4956       path_can_be_null = false;
   4957       p = pend;
   4958     } /* while p */
   4959 
   4960   /* Set `can_be_null' for the last path (also the first path, if the
   4961      pattern is empty).  */
   4962   bufp->can_be_null |= path_can_be_null;
   4963 
   4964  done:
   4965   RESET_FAIL_STACK ();
   4966   return 0;
   4967 }
   4968 
   4969 #else /* not INSIDE_RECURSION */
   4970 
   4971 int
   4972 re_compile_fastmap (bufp)
   4973      struct re_pattern_buffer *bufp;
   4974 {
   4975 # ifdef MBS_SUPPORT
   4976   if (MB_CUR_MAX != 1)
   4977     return wcs_re_compile_fastmap(bufp);
   4978   else
   4979 # endif
   4980     return byte_re_compile_fastmap(bufp);
   4981 } /* re_compile_fastmap */
   4982 #ifdef _LIBC
   4983 weak_alias (__re_compile_fastmap, re_compile_fastmap)
   4984 #endif
   4985 
   4986 
   4988 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
   4989    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
   4990    this memory for recording register information.  STARTS and ENDS
   4991    must be allocated using the malloc library routine, and must each
   4992    be at least NUM_REGS * sizeof (regoff_t) bytes long.
   4993 
   4994    If NUM_REGS == 0, then subsequent matches should allocate their own
   4995    register data.
   4996 
   4997    Unless this function is called, the first search or match using
   4998    PATTERN_BUFFER will allocate its own register data, without
   4999    freeing the old data.  */
   5000 
   5001 void
   5002 re_set_registers (bufp, regs, num_regs, starts, ends)
   5003     struct re_pattern_buffer *bufp;
   5004     struct re_registers *regs;
   5005     unsigned num_regs;
   5006     regoff_t *starts, *ends;
   5007 {
   5008   if (num_regs)
   5009     {
   5010       bufp->regs_allocated = REGS_REALLOCATE;
   5011       regs->num_regs = num_regs;
   5012       regs->start = starts;
   5013       regs->end = ends;
   5014     }
   5015   else
   5016     {
   5017       bufp->regs_allocated = REGS_UNALLOCATED;
   5018       regs->num_regs = 0;
   5019       regs->start = regs->end = (regoff_t *) 0;
   5020     }
   5021 }
   5022 #ifdef _LIBC
   5023 weak_alias (__re_set_registers, re_set_registers)
   5024 #endif
   5025 
   5026 /* Searching routines.  */
   5028 
   5029 /* Like re_search_2, below, but only one string is specified, and
   5030    doesn't let you say where to stop matching.  */
   5031 
   5032 int
   5033 re_search (bufp, string, size, startpos, range, regs)
   5034      struct re_pattern_buffer *bufp;
   5035      const char *string;
   5036      int size, startpos, range;
   5037      struct re_registers *regs;
   5038 {
   5039   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
   5040 		      regs, size);
   5041 }
   5042 #ifdef _LIBC
   5043 weak_alias (__re_search, re_search)
   5044 #endif
   5045 
   5046 
   5047 /* Using the compiled pattern in BUFP->buffer, first tries to match the
   5048    virtual concatenation of STRING1 and STRING2, starting first at index
   5049    STARTPOS, then at STARTPOS + 1, and so on.
   5050 
   5051    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
   5052 
   5053    RANGE is how far to scan while trying to match.  RANGE = 0 means try
   5054    only at STARTPOS; in general, the last start tried is STARTPOS +
   5055    RANGE.
   5056 
   5057    In REGS, return the indices of the virtual concatenation of STRING1
   5058    and STRING2 that matched the entire BUFP->buffer and its contained
   5059    subexpressions.
   5060 
   5061    Do not consider matching one past the index STOP in the virtual
   5062    concatenation of STRING1 and STRING2.
   5063 
   5064    We return either the position in the strings at which the match was
   5065    found, -1 if no match, or -2 if error (such as failure
   5066    stack overflow).  */
   5067 
   5068 int
   5069 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
   5070      struct re_pattern_buffer *bufp;
   5071      const char *string1, *string2;
   5072      int size1, size2;
   5073      int startpos;
   5074      int range;
   5075      struct re_registers *regs;
   5076      int stop;
   5077 {
   5078 # ifdef MBS_SUPPORT
   5079   if (MB_CUR_MAX != 1)
   5080     return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
   5081 			    range, regs, stop);
   5082   else
   5083 # endif
   5084     return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
   5085 			     range, regs, stop);
   5086 } /* re_search_2 */
   5087 #ifdef _LIBC
   5088 weak_alias (__re_search_2, re_search_2)
   5089 #endif
   5090 
   5091 #endif /* not INSIDE_RECURSION */
   5092 
   5093 #ifdef INSIDE_RECURSION
   5094 
   5095 #ifdef MATCH_MAY_ALLOCATE
   5096 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
   5097 #else
   5098 # define FREE_VAR(var) if (var) free (var); var = NULL
   5099 #endif
   5100 
   5101 #ifdef WCHAR
   5102 # define MAX_ALLOCA_SIZE	2000
   5103 
   5104 # define FREE_WCS_BUFFERS() \
   5105   do {									      \
   5106     if (size1 > MAX_ALLOCA_SIZE)					      \
   5107       {									      \
   5108 	free (wcs_string1);						      \
   5109 	free (mbs_offset1);						      \
   5110       }									      \
   5111     else								      \
   5112       {									      \
   5113 	FREE_VAR (wcs_string1);						      \
   5114 	FREE_VAR (mbs_offset1);						      \
   5115       }									      \
   5116     if (size2 > MAX_ALLOCA_SIZE) 					      \
   5117       {									      \
   5118 	free (wcs_string2);						      \
   5119 	free (mbs_offset2);						      \
   5120       }									      \
   5121     else								      \
   5122       {									      \
   5123 	FREE_VAR (wcs_string2);						      \
   5124 	FREE_VAR (mbs_offset2);						      \
   5125       }									      \
   5126   } while (0)
   5127 
   5128 #endif
   5129 
   5130 
   5131 static int
   5132 PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
   5133 		     regs, stop)
   5134      struct re_pattern_buffer *bufp;
   5135      const char *string1, *string2;
   5136      int size1, size2;
   5137      int startpos;
   5138      int range;
   5139      struct re_registers *regs;
   5140      int stop;
   5141 {
   5142   int val;
   5143   register char *fastmap = bufp->fastmap;
   5144   register RE_TRANSLATE_TYPE translate = bufp->translate;
   5145   int total_size = size1 + size2;
   5146   int endpos = startpos + range;
   5147 #ifdef WCHAR
   5148   /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
   5149   wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
   5150   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
   5151   int wcs_size1 = 0, wcs_size2 = 0;
   5152   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   5153   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
   5154   /* They hold whether each wchar_t is binary data or not.  */
   5155   char *is_binary = NULL;
   5156 #endif /* WCHAR */
   5157 
   5158   /* Check for out-of-range STARTPOS.  */
   5159   if (startpos < 0 || startpos > total_size)
   5160     return -1;
   5161 
   5162   /* Fix up RANGE if it might eventually take us outside
   5163      the virtual concatenation of STRING1 and STRING2.
   5164      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
   5165   if (endpos < 0)
   5166     range = 0 - startpos;
   5167   else if (endpos > total_size)
   5168     range = total_size - startpos;
   5169 
   5170   /* If the search isn't to be a backwards one, don't waste time in a
   5171      search for a pattern that must be anchored.  */
   5172   if (bufp->used > 0 && range > 0
   5173       && ((re_opcode_t) bufp->buffer[0] == begbuf
   5174 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
   5175 	  || ((re_opcode_t) bufp->buffer[0] == begline
   5176 	      && !bufp->newline_anchor)))
   5177     {
   5178       if (startpos > 0)
   5179 	return -1;
   5180       else
   5181 	range = 1;
   5182     }
   5183 
   5184 #ifdef emacs
   5185   /* In a forward search for something that starts with \=.
   5186      don't keep searching past point.  */
   5187   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
   5188     {
   5189       range = PT - startpos;
   5190       if (range <= 0)
   5191 	return -1;
   5192     }
   5193 #endif /* emacs */
   5194 
   5195   /* Update the fastmap now if not correct already.  */
   5196   if (fastmap && !bufp->fastmap_accurate)
   5197     if (re_compile_fastmap (bufp) == -2)
   5198       return -2;
   5199 
   5200 #ifdef WCHAR
   5201   /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
   5202      fill them with converted string.  */
   5203   if (size1 != 0)
   5204     {
   5205       if (size1 > MAX_ALLOCA_SIZE)
   5206 	{
   5207 	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
   5208 	  mbs_offset1 = TALLOC (size1 + 1, int);
   5209 	  is_binary = TALLOC (size1 + 1, char);
   5210 	}
   5211       else
   5212 	{
   5213 	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
   5214 	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
   5215 	  is_binary = REGEX_TALLOC (size1 + 1, char);
   5216 	}
   5217       if (!wcs_string1 || !mbs_offset1 || !is_binary)
   5218 	{
   5219 	  if (size1 > MAX_ALLOCA_SIZE)
   5220 	    {
   5221 	      free (wcs_string1);
   5222 	      free (mbs_offset1);
   5223 	      free (is_binary);
   5224 	    }
   5225 	  else
   5226 	    {
   5227 	      FREE_VAR (wcs_string1);
   5228 	      FREE_VAR (mbs_offset1);
   5229 	      FREE_VAR (is_binary);
   5230 	    }
   5231 	  return -2;
   5232 	}
   5233       wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
   5234 				     mbs_offset1, is_binary);
   5235       wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
   5236       if (size1 > MAX_ALLOCA_SIZE)
   5237 	free (is_binary);
   5238       else
   5239 	FREE_VAR (is_binary);
   5240     }
   5241   if (size2 != 0)
   5242     {
   5243       if (size2 > MAX_ALLOCA_SIZE)
   5244 	{
   5245 	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
   5246 	  mbs_offset2 = TALLOC (size2 + 1, int);
   5247 	  is_binary = TALLOC (size2 + 1, char);
   5248 	}
   5249       else
   5250 	{
   5251 	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
   5252 	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
   5253 	  is_binary = REGEX_TALLOC (size2 + 1, char);
   5254 	}
   5255       if (!wcs_string2 || !mbs_offset2 || !is_binary)
   5256 	{
   5257 	  FREE_WCS_BUFFERS ();
   5258 	  if (size2 > MAX_ALLOCA_SIZE)
   5259 	    free (is_binary);
   5260 	  else
   5261 	    FREE_VAR (is_binary);
   5262 	  return -2;
   5263 	}
   5264       wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
   5265 				     mbs_offset2, is_binary);
   5266       wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
   5267       if (size2 > MAX_ALLOCA_SIZE)
   5268 	free (is_binary);
   5269       else
   5270 	FREE_VAR (is_binary);
   5271     }
   5272 #endif /* WCHAR */
   5273 
   5274 
   5275   /* Loop through the string, looking for a place to start matching.  */
   5276   for (;;)
   5277     {
   5278       /* If a fastmap is supplied, skip quickly over characters that
   5279          cannot be the start of a match.  If the pattern can match the
   5280          null string, however, we don't need to skip characters; we want
   5281          the first null string.  */
   5282       if (fastmap && startpos < total_size && !bufp->can_be_null)
   5283 	{
   5284 	  if (range > 0)	/* Searching forwards.  */
   5285 	    {
   5286 	      register const char *d;
   5287 	      register int lim = 0;
   5288 	      int irange = range;
   5289 
   5290               if (startpos < size1 && startpos + range >= size1)
   5291                 lim = range - (size1 - startpos);
   5292 
   5293 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
   5294 
   5295               /* Written out as an if-else to avoid testing `translate'
   5296                  inside the loop.  */
   5297 	      if (translate)
   5298                 while (range > lim
   5299                        && !fastmap[(unsigned char)
   5300 				   translate[(unsigned char) *d++]])
   5301                   range--;
   5302 	      else
   5303                 while (range > lim && !fastmap[(unsigned char) *d++])
   5304                   range--;
   5305 
   5306 	      startpos += irange - range;
   5307 	    }
   5308 	  else				/* Searching backwards.  */
   5309 	    {
   5310 	      register CHAR_T c = (size1 == 0 || startpos >= size1
   5311 				      ? string2[startpos - size1]
   5312 				      : string1[startpos]);
   5313 
   5314 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
   5315 		goto advance;
   5316 	    }
   5317 	}
   5318 
   5319       /* If can't match the null string, and that's all we have left, fail.  */
   5320       if (range >= 0 && startpos == total_size && fastmap
   5321           && !bufp->can_be_null)
   5322        {
   5323 #ifdef WCHAR
   5324          FREE_WCS_BUFFERS ();
   5325 #endif
   5326          return -1;
   5327        }
   5328 
   5329 #ifdef WCHAR
   5330       val = wcs_re_match_2_internal (bufp, string1, size1, string2,
   5331 				     size2, startpos, regs, stop,
   5332 				     wcs_string1, wcs_size1,
   5333 				     wcs_string2, wcs_size2,
   5334 				     mbs_offset1, mbs_offset2);
   5335 #else /* BYTE */
   5336       val = byte_re_match_2_internal (bufp, string1, size1, string2,
   5337 				      size2, startpos, regs, stop);
   5338 #endif /* BYTE */
   5339 
   5340 #ifndef REGEX_MALLOC
   5341 # ifdef C_ALLOCA
   5342       alloca (0);
   5343 # endif
   5344 #endif
   5345 
   5346       if (val >= 0)
   5347 	{
   5348 #ifdef WCHAR
   5349 	  FREE_WCS_BUFFERS ();
   5350 #endif
   5351 	  return startpos;
   5352 	}
   5353 
   5354       if (val == -2)
   5355 	{
   5356 #ifdef WCHAR
   5357 	  FREE_WCS_BUFFERS ();
   5358 #endif
   5359 	  return -2;
   5360 	}
   5361 
   5362     advance:
   5363       if (!range)
   5364         break;
   5365       else if (range > 0)
   5366         {
   5367           range--;
   5368           startpos++;
   5369         }
   5370       else
   5371         {
   5372           range++;
   5373           startpos--;
   5374         }
   5375     }
   5376 #ifdef WCHAR
   5377   FREE_WCS_BUFFERS ();
   5378 #endif
   5379   return -1;
   5380 }
   5381 
   5382 #ifdef WCHAR
   5383 /* This converts PTR, a pointer into one of the search wchar_t strings
   5384    `string1' and `string2' into an multibyte string offset from the
   5385    beginning of that string. We use mbs_offset to optimize.
   5386    See convert_mbs_to_wcs.  */
   5387 # define POINTER_TO_OFFSET(ptr)						\
   5388   (FIRST_STRING_P (ptr)							\
   5389    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
   5390    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
   5391 		 + csize1)))
   5392 #else /* BYTE */
   5393 /* This converts PTR, a pointer into one of the search strings `string1'
   5394    and `string2' into an offset from the beginning of that string.  */
   5395 # define POINTER_TO_OFFSET(ptr)			\
   5396   (FIRST_STRING_P (ptr)				\
   5397    ? ((regoff_t) ((ptr) - string1))		\
   5398    : ((regoff_t) ((ptr) - string2 + size1)))
   5399 #endif /* WCHAR */
   5400 
   5401 /* Macros for dealing with the split strings in re_match_2.  */
   5402 
   5403 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
   5404 
   5405 /* Call before fetching a character with *d.  This switches over to
   5406    string2 if necessary.  */
   5407 #define PREFETCH()							\
   5408   while (d == dend)						    	\
   5409     {									\
   5410       /* End of string2 => fail.  */					\
   5411       if (dend == end_match_2) 						\
   5412         goto fail;							\
   5413       /* End of string1 => advance to string2.  */ 			\
   5414       d = string2;						        \
   5415       dend = end_match_2;						\
   5416     }
   5417 
   5418 /* Test if at very beginning or at very end of the virtual concatenation
   5419    of `string1' and `string2'.  If only one string, it's `string2'.  */
   5420 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
   5421 #define AT_STRINGS_END(d) ((d) == end2)
   5422 
   5423 
   5424 /* Test if D points to a character which is word-constituent.  We have
   5425    two special cases to check for: if past the end of string1, look at
   5426    the first character in string2; and if before the beginning of
   5427    string2, look at the last character in string1.  */
   5428 #ifdef WCHAR
   5429 /* Use internationalized API instead of SYNTAX.  */
   5430 # define WORDCHAR_P(d)							\
   5431   (iswalnum ((wint_t)((d) == end1 ? *string2				\
   5432            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
   5433    || ((d) == end1 ? *string2						\
   5434        : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
   5435 #else /* BYTE */
   5436 # define WORDCHAR_P(d)							\
   5437   (SYNTAX ((d) == end1 ? *string2					\
   5438            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
   5439    == Sword)
   5440 #endif /* WCHAR */
   5441 
   5442 /* Disabled due to a compiler bug -- see comment at case wordbound */
   5443 #if 0
   5444 /* Test if the character before D and the one at D differ with respect
   5445    to being word-constituent.  */
   5446 #define AT_WORD_BOUNDARY(d)						\
   5447   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
   5448    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
   5449 #endif
   5450 
   5451 /* Free everything we malloc.  */
   5452 #ifdef MATCH_MAY_ALLOCATE
   5453 # ifdef WCHAR
   5454 #  define FREE_VARIABLES()						\
   5455   do {									\
   5456     REGEX_FREE_STACK (fail_stack.stack);				\
   5457     FREE_VAR (regstart);						\
   5458     FREE_VAR (regend);							\
   5459     FREE_VAR (old_regstart);						\
   5460     FREE_VAR (old_regend);						\
   5461     FREE_VAR (best_regstart);						\
   5462     FREE_VAR (best_regend);						\
   5463     FREE_VAR (reg_info);						\
   5464     FREE_VAR (reg_dummy);						\
   5465     FREE_VAR (reg_info_dummy);						\
   5466     if (!cant_free_wcs_buf)						\
   5467       {									\
   5468         FREE_VAR (string1);						\
   5469         FREE_VAR (string2);						\
   5470         FREE_VAR (mbs_offset1);						\
   5471         FREE_VAR (mbs_offset2);						\
   5472       }									\
   5473   } while (0)
   5474 # else /* BYTE */
   5475 #  define FREE_VARIABLES()						\
   5476   do {									\
   5477     REGEX_FREE_STACK (fail_stack.stack);				\
   5478     FREE_VAR (regstart);						\
   5479     FREE_VAR (regend);							\
   5480     FREE_VAR (old_regstart);						\
   5481     FREE_VAR (old_regend);						\
   5482     FREE_VAR (best_regstart);						\
   5483     FREE_VAR (best_regend);						\
   5484     FREE_VAR (reg_info);						\
   5485     FREE_VAR (reg_dummy);						\
   5486     FREE_VAR (reg_info_dummy);						\
   5487   } while (0)
   5488 # endif /* WCHAR */
   5489 #else
   5490 # ifdef WCHAR
   5491 #  define FREE_VARIABLES()						\
   5492   do {									\
   5493     if (!cant_free_wcs_buf)						\
   5494       {									\
   5495         FREE_VAR (string1);						\
   5496         FREE_VAR (string2);						\
   5497         FREE_VAR (mbs_offset1);						\
   5498         FREE_VAR (mbs_offset2);						\
   5499       }									\
   5500   } while (0)
   5501 # else /* BYTE */
   5502 #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
   5503 # endif /* WCHAR */
   5504 #endif /* not MATCH_MAY_ALLOCATE */
   5505 
   5506 /* These values must meet several constraints.  They must not be valid
   5507    register values; since we have a limit of 255 registers (because
   5508    we use only one byte in the pattern for the register number), we can
   5509    use numbers larger than 255.  They must differ by 1, because of
   5510    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
   5511    be larger than the value for the highest register, so we do not try
   5512    to actually save any registers when none are active.  */
   5513 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
   5514 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
   5515 
   5516 #else /* not INSIDE_RECURSION */
   5518 /* Matching routines.  */
   5519 
   5520 #ifndef emacs   /* Emacs never uses this.  */
   5521 /* re_match is like re_match_2 except it takes only a single string.  */
   5522 
   5523 int
   5524 re_match (bufp, string, size, pos, regs)
   5525      struct re_pattern_buffer *bufp;
   5526      const char *string;
   5527      int size, pos;
   5528      struct re_registers *regs;
   5529 {
   5530   int result;
   5531 # ifdef MBS_SUPPORT
   5532   if (MB_CUR_MAX != 1)
   5533     result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
   5534 				      pos, regs, size,
   5535 				      NULL, 0, NULL, 0, NULL, NULL);
   5536   else
   5537 # endif
   5538     result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
   5539 				  pos, regs, size);
   5540 # ifndef REGEX_MALLOC
   5541 #  ifdef C_ALLOCA
   5542   alloca (0);
   5543 #  endif
   5544 # endif
   5545   return result;
   5546 }
   5547 # ifdef _LIBC
   5548 weak_alias (__re_match, re_match)
   5549 # endif
   5550 #endif /* not emacs */
   5551 
   5552 #endif /* not INSIDE_RECURSION */
   5553 
   5554 #ifdef INSIDE_RECURSION
   5555 static boolean PREFIX(group_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
   5556 						    UCHAR_T *end,
   5557 					PREFIX(register_info_type) *reg_info));
   5558 static boolean PREFIX(alt_match_null_string_p) _RE_ARGS ((UCHAR_T *p,
   5559 						  UCHAR_T *end,
   5560 					PREFIX(register_info_type) *reg_info));
   5561 static boolean PREFIX(common_op_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
   5562 							UCHAR_T *end,
   5563 					PREFIX(register_info_type) *reg_info));
   5564 static int PREFIX(bcmp_translate) _RE_ARGS ((const CHAR_T *s1, const CHAR_T *s2,
   5565 				     int len, char *translate));
   5566 #else /* not INSIDE_RECURSION */
   5567 
   5568 /* re_match_2 matches the compiled pattern in BUFP against the
   5569    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
   5570    and SIZE2, respectively).  We start matching at POS, and stop
   5571    matching at STOP.
   5572 
   5573    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
   5574    store offsets for the substring each group matched in REGS.  See the
   5575    documentation for exactly how many groups we fill.
   5576 
   5577    We return -1 if no match, -2 if an internal error (such as the
   5578    failure stack overflowing).  Otherwise, we return the length of the
   5579    matched substring.  */
   5580 
   5581 int
   5582 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
   5583      struct re_pattern_buffer *bufp;
   5584      const char *string1, *string2;
   5585      int size1, size2;
   5586      int pos;
   5587      struct re_registers *regs;
   5588      int stop;
   5589 {
   5590   int result;
   5591 # ifdef MBS_SUPPORT
   5592   if (MB_CUR_MAX != 1)
   5593     result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
   5594 				      pos, regs, stop,
   5595 				      NULL, 0, NULL, 0, NULL, NULL);
   5596   else
   5597 # endif
   5598     result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
   5599 				  pos, regs, stop);
   5600 
   5601 #ifndef REGEX_MALLOC
   5602 # ifdef C_ALLOCA
   5603   alloca (0);
   5604 # endif
   5605 #endif
   5606   return result;
   5607 }
   5608 #ifdef _LIBC
   5609 weak_alias (__re_match_2, re_match_2)
   5610 #endif
   5611 
   5612 #endif /* not INSIDE_RECURSION */
   5613 
   5614 #ifdef INSIDE_RECURSION
   5615 
   5616 #ifdef WCHAR
   5617 static int count_mbs_length PARAMS ((int *, int));
   5618 
   5619 /* This check the substring (from 0, to length) of the multibyte string,
   5620    to which offset_buffer correspond. And count how many wchar_t_characters
   5621    the substring occupy. We use offset_buffer to optimization.
   5622    See convert_mbs_to_wcs.  */
   5623 
   5624 static int
   5625 count_mbs_length(offset_buffer, length)
   5626      int *offset_buffer;
   5627      int length;
   5628 {
   5629   int upper, lower;
   5630 
   5631   /* Check whether the size is valid.  */
   5632   if (length < 0)
   5633     return -1;
   5634 
   5635   if (offset_buffer == NULL)
   5636     return 0;
   5637 
   5638   /* If there are no multibyte character, offset_buffer[i] == i.
   5639    Optmize for this case.  */
   5640   if (offset_buffer[length] == length)
   5641     return length;
   5642 
   5643   /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
   5644   upper = length;
   5645   lower = 0;
   5646 
   5647   while (true)
   5648     {
   5649       int middle = (lower + upper) / 2;
   5650       if (middle == lower || middle == upper)
   5651 	break;
   5652       if (offset_buffer[middle] > length)
   5653 	upper = middle;
   5654       else if (offset_buffer[middle] < length)
   5655 	lower = middle;
   5656       else
   5657 	return middle;
   5658     }
   5659 
   5660   return -1;
   5661 }
   5662 #endif /* WCHAR */
   5663 
   5664 /* This is a separate function so that we can force an alloca cleanup
   5665    afterwards.  */
   5666 #ifdef WCHAR
   5667 static int
   5668 wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos,
   5669 			 regs, stop, string1, size1, string2, size2,
   5670 			 mbs_offset1, mbs_offset2)
   5671      struct re_pattern_buffer *bufp;
   5672      const char *cstring1, *cstring2;
   5673      int csize1, csize2;
   5674      int pos;
   5675      struct re_registers *regs;
   5676      int stop;
   5677      /* string1 == string2 == NULL means string1/2, size1/2 and
   5678 	mbs_offset1/2 need seting up in this function.  */
   5679      /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
   5680      wchar_t *string1, *string2;
   5681      /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
   5682      int size1, size2;
   5683      /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   5684      int *mbs_offset1, *mbs_offset2;
   5685 #else /* BYTE */
   5686 static int
   5687 byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos,
   5688 			  regs, stop)
   5689      struct re_pattern_buffer *bufp;
   5690      const char *string1, *string2;
   5691      int size1, size2;
   5692      int pos;
   5693      struct re_registers *regs;
   5694      int stop;
   5695 #endif /* BYTE */
   5696 {
   5697   /* General temporaries.  */
   5698   int mcnt;
   5699   UCHAR_T *p1;
   5700 #ifdef WCHAR
   5701   /* They hold whether each wchar_t is binary data or not.  */
   5702   char *is_binary = NULL;
   5703   /* If true, we can't free string1/2, mbs_offset1/2.  */
   5704   int cant_free_wcs_buf = 1;
   5705 #endif /* WCHAR */
   5706 
   5707   /* Just past the end of the corresponding string.  */
   5708   const CHAR_T *end1, *end2;
   5709 
   5710   /* Pointers into string1 and string2, just past the last characters in
   5711      each to consider matching.  */
   5712   const CHAR_T *end_match_1, *end_match_2;
   5713 
   5714   /* Where we are in the data, and the end of the current string.  */
   5715   const CHAR_T *d, *dend;
   5716 
   5717   /* Where we are in the pattern, and the end of the pattern.  */
   5718 #ifdef WCHAR
   5719   UCHAR_T *pattern, *p;
   5720   register UCHAR_T *pend;
   5721 #else /* BYTE */
   5722   UCHAR_T *p = bufp->buffer;
   5723   register UCHAR_T *pend = p + bufp->used;
   5724 #endif /* WCHAR */
   5725 
   5726   /* Mark the opcode just after a start_memory, so we can test for an
   5727      empty subpattern when we get to the stop_memory.  */
   5728   UCHAR_T *just_past_start_mem = 0;
   5729 
   5730   /* We use this to map every character in the string.  */
   5731   RE_TRANSLATE_TYPE translate = bufp->translate;
   5732 
   5733   /* Failure point stack.  Each place that can handle a failure further
   5734      down the line pushes a failure point on this stack.  It consists of
   5735      restart, regend, and reg_info for all registers corresponding to
   5736      the subexpressions we're currently inside, plus the number of such
   5737      registers, and, finally, two char *'s.  The first char * is where
   5738      to resume scanning the pattern; the second one is where to resume
   5739      scanning the strings.  If the latter is zero, the failure point is
   5740      a ``dummy''; if a failure happens and the failure point is a dummy,
   5741      it gets discarded and the next next one is tried.  */
   5742 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5743   PREFIX(fail_stack_type) fail_stack;
   5744 #endif
   5745 #ifdef DEBUG
   5746   static unsigned failure_id;
   5747   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
   5748 #endif
   5749 
   5750 #ifdef REL_ALLOC
   5751   /* This holds the pointer to the failure stack, when
   5752      it is allocated relocatably.  */
   5753   fail_stack_elt_t *failure_stack_ptr;
   5754 #endif
   5755 
   5756   /* We fill all the registers internally, independent of what we
   5757      return, for use in backreferences.  The number here includes
   5758      an element for register zero.  */
   5759   size_t num_regs = bufp->re_nsub + 1;
   5760 
   5761   /* The currently active registers.  */
   5762   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   5763   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   5764 
   5765   /* Information on the contents of registers. These are pointers into
   5766      the input strings; they record just what was matched (on this
   5767      attempt) by a subexpression part of the pattern, that is, the
   5768      regnum-th regstart pointer points to where in the pattern we began
   5769      matching and the regnum-th regend points to right after where we
   5770      stopped matching the regnum-th subexpression.  (The zeroth register
   5771      keeps track of what the whole pattern matches.)  */
   5772 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5773   const CHAR_T **regstart, **regend;
   5774 #endif
   5775 
   5776   /* If a group that's operated upon by a repetition operator fails to
   5777      match anything, then the register for its start will need to be
   5778      restored because it will have been set to wherever in the string we
   5779      are when we last see its open-group operator.  Similarly for a
   5780      register's end.  */
   5781 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5782   const CHAR_T **old_regstart, **old_regend;
   5783 #endif
   5784 
   5785   /* The is_active field of reg_info helps us keep track of which (possibly
   5786      nested) subexpressions we are currently in. The matched_something
   5787      field of reg_info[reg_num] helps us tell whether or not we have
   5788      matched any of the pattern so far this time through the reg_num-th
   5789      subexpression.  These two fields get reset each time through any
   5790      loop their register is in.  */
   5791 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5792   PREFIX(register_info_type) *reg_info;
   5793 #endif
   5794 
   5795   /* The following record the register info as found in the above
   5796      variables when we find a match better than any we've seen before.
   5797      This happens as we backtrack through the failure points, which in
   5798      turn happens only if we have not yet matched the entire string. */
   5799   unsigned best_regs_set = false;
   5800 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5801   const CHAR_T **best_regstart, **best_regend;
   5802 #endif
   5803 
   5804   /* Logically, this is `best_regend[0]'.  But we don't want to have to
   5805      allocate space for that if we're not allocating space for anything
   5806      else (see below).  Also, we never need info about register 0 for
   5807      any of the other register vectors, and it seems rather a kludge to
   5808      treat `best_regend' differently than the rest.  So we keep track of
   5809      the end of the best match so far in a separate variable.  We
   5810      initialize this to NULL so that when we backtrack the first time
   5811      and need to test it, it's not garbage.  */
   5812   const CHAR_T *match_end = NULL;
   5813 
   5814   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
   5815   int set_regs_matched_done = 0;
   5816 
   5817   /* Used when we pop values we don't care about.  */
   5818 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5819   const CHAR_T **reg_dummy;
   5820   PREFIX(register_info_type) *reg_info_dummy;
   5821 #endif
   5822 
   5823 #ifdef DEBUG
   5824   /* Counts the total number of registers pushed.  */
   5825   unsigned num_regs_pushed = 0;
   5826 #endif
   5827 
   5828   /* Definitions for state transitions.  More efficiently for gcc.  */
   5829 #ifdef __GNUC__
   5830 # if defined HAVE_SUBTRACT_LOCAL_LABELS && defined SHARED
   5831 #  define NEXT \
   5832       do								      \
   5833 	{								      \
   5834 	  int offset;							      \
   5835 	  const void *__unbounded ptr;					      \
   5836 	  offset = (p == pend						      \
   5837 		    ? 0 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);   \
   5838 	  ptr = &&end_of_pattern + offset;				      \
   5839 	  goto *ptr;							      \
   5840 	}								      \
   5841       while (0)
   5842 #  define REF(x) \
   5843   &&label_##x - &&end_of_pattern
   5844 #  define JUMP_TABLE_TYPE const int
   5845 # else
   5846 #  define NEXT \
   5847       do								      \
   5848 	{								      \
   5849 	  const void *__unbounded ptr;					      \
   5850 	  ptr = (p == pend ? &&end_of_pattern				      \
   5851 		 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);	      \
   5852 	  goto *ptr;							      \
   5853 	}								      \
   5854       while (0)
   5855 #  define REF(x) \
   5856   &&label_##x
   5857 #  define JUMP_TABLE_TYPE const void *const
   5858 # endif
   5859 # define CASE(x) label_##x
   5860   static JUMP_TABLE_TYPE jmptable[] =
   5861     {
   5862     REF (no_op),
   5863     REF (succeed),
   5864     REF (exactn),
   5865 # ifdef MBS_SUPPORT
   5866     REF (exactn_bin),
   5867 # endif
   5868     REF (anychar),
   5869     REF (charset),
   5870     REF (charset_not),
   5871     REF (start_memory),
   5872     REF (stop_memory),
   5873     REF (duplicate),
   5874     REF (begline),
   5875     REF (endline),
   5876     REF (begbuf),
   5877     REF (endbuf),
   5878     REF (jump),
   5879     REF (jump_past_alt),
   5880     REF (on_failure_jump),
   5881     REF (on_failure_keep_string_jump),
   5882     REF (pop_failure_jump),
   5883     REF (maybe_pop_jump),
   5884     REF (dummy_failure_jump),
   5885     REF (push_dummy_failure),
   5886     REF (succeed_n),
   5887     REF (jump_n),
   5888     REF (set_number_at),
   5889     REF (wordchar),
   5890     REF (notwordchar),
   5891     REF (wordbeg),
   5892     REF (wordend),
   5893     REF (wordbound),
   5894     REF (notwordbound)
   5895 # ifdef emacs
   5896     ,REF (before_dot),
   5897     REF (at_dot),
   5898     REF (after_dot),
   5899     REF (syntaxspec),
   5900     REF (notsyntaxspec)
   5901 # endif
   5902     };
   5903 #else
   5904 # define NEXT \
   5905   break
   5906 # define CASE(x) \
   5907   case x
   5908 #endif
   5909 
   5910   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
   5911 
   5912   INIT_FAIL_STACK ();
   5913 
   5914 #ifdef MATCH_MAY_ALLOCATE
   5915   /* Do not bother to initialize all the register variables if there are
   5916      no groups in the pattern, as it takes a fair amount of time.  If
   5917      there are groups, we include space for register 0 (the whole
   5918      pattern), even though we never use it, since it simplifies the
   5919      array indexing.  We should fix this.  */
   5920   if (bufp->re_nsub)
   5921     {
   5922       regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5923       regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5924       old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5925       old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5926       best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5927       best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5928       reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
   5929       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
   5930       reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
   5931 
   5932       if (!(regstart && regend && old_regstart && old_regend && reg_info
   5933             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
   5934         {
   5935           FREE_VARIABLES ();
   5936           return -2;
   5937         }
   5938     }
   5939   else
   5940     {
   5941       /* We must initialize all our variables to NULL, so that
   5942          `FREE_VARIABLES' doesn't try to free them.  */
   5943       regstart = regend = old_regstart = old_regend = best_regstart
   5944         = best_regend = reg_dummy = NULL;
   5945       reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
   5946     }
   5947 #endif /* MATCH_MAY_ALLOCATE */
   5948 
   5949   /* The starting position is bogus.  */
   5950 #ifdef WCHAR
   5951   if (pos < 0 || pos > csize1 + csize2)
   5952 #else /* BYTE */
   5953   if (pos < 0 || pos > size1 + size2)
   5954 #endif
   5955     {
   5956       FREE_VARIABLES ();
   5957       return -1;
   5958     }
   5959 
   5960 #ifdef WCHAR
   5961   /* Allocate wchar_t array for string1 and string2 and
   5962      fill them with converted string.  */
   5963   if (string1 == NULL && string2 == NULL)
   5964     {
   5965       /* We need seting up buffers here.  */
   5966 
   5967       /* We must free wcs buffers in this function.  */
   5968       cant_free_wcs_buf = 0;
   5969 
   5970       if (csize1 != 0)
   5971 	{
   5972 	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
   5973 	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
   5974 	  is_binary = REGEX_TALLOC (csize1 + 1, char);
   5975 	  if (!string1 || !mbs_offset1 || !is_binary)
   5976 	    {
   5977 	      FREE_VAR (string1);
   5978 	      FREE_VAR (mbs_offset1);
   5979 	      FREE_VAR (is_binary);
   5980 	      return -2;
   5981 	    }
   5982 	}
   5983       if (csize2 != 0)
   5984 	{
   5985 	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
   5986 	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
   5987 	  is_binary = REGEX_TALLOC (csize2 + 1, char);
   5988 	  if (!string2 || !mbs_offset2 || !is_binary)
   5989 	    {
   5990 	      FREE_VAR (string1);
   5991 	      FREE_VAR (mbs_offset1);
   5992 	      FREE_VAR (string2);
   5993 	      FREE_VAR (mbs_offset2);
   5994 	      FREE_VAR (is_binary);
   5995 	      return -2;
   5996 	    }
   5997 	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
   5998 				     mbs_offset2, is_binary);
   5999 	  string2[size2] = L'\0'; /* for a sentinel  */
   6000 	  FREE_VAR (is_binary);
   6001 	}
   6002     }
   6003 
   6004   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   6005      pattern to (char*) in regex_compile.  */
   6006   p = pattern = (CHAR_T*)bufp->buffer;
   6007   pend = (CHAR_T*)(bufp->buffer + bufp->used);
   6008 
   6009 #endif /* WCHAR */
   6010 
   6011   /* Initialize subexpression text positions to -1 to mark ones that no
   6012      start_memory/stop_memory has been seen for. Also initialize the
   6013      register information struct.  */
   6014   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   6015     {
   6016       regstart[mcnt] = regend[mcnt]
   6017         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
   6018 
   6019       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
   6020       IS_ACTIVE (reg_info[mcnt]) = 0;
   6021       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   6022       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   6023     }
   6024 
   6025   /* We move `string1' into `string2' if the latter's empty -- but not if
   6026      `string1' is null.  */
   6027   if (size2 == 0 && string1 != NULL)
   6028     {
   6029       string2 = string1;
   6030       size2 = size1;
   6031       string1 = 0;
   6032       size1 = 0;
   6033 #ifdef WCHAR
   6034       mbs_offset2 = mbs_offset1;
   6035       csize2 = csize1;
   6036       mbs_offset1 = NULL;
   6037       csize1 = 0;
   6038 #endif
   6039     }
   6040   end1 = string1 + size1;
   6041   end2 = string2 + size2;
   6042 
   6043   /* Compute where to stop matching, within the two strings.  */
   6044 #ifdef WCHAR
   6045   if (stop <= csize1)
   6046     {
   6047       mcnt = count_mbs_length(mbs_offset1, stop);
   6048       end_match_1 = string1 + mcnt;
   6049       end_match_2 = string2;
   6050     }
   6051   else
   6052     {
   6053       if (stop > csize1 + csize2)
   6054 	stop = csize1 + csize2;
   6055       end_match_1 = end1;
   6056       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
   6057       end_match_2 = string2 + mcnt;
   6058     }
   6059   if (mcnt < 0)
   6060     { /* count_mbs_length return error.  */
   6061       FREE_VARIABLES ();
   6062       return -1;
   6063     }
   6064 #else
   6065   if (stop <= size1)
   6066     {
   6067       end_match_1 = string1 + stop;
   6068       end_match_2 = string2;
   6069     }
   6070   else
   6071     {
   6072       end_match_1 = end1;
   6073       end_match_2 = string2 + stop - size1;
   6074     }
   6075 #endif /* WCHAR */
   6076 
   6077   /* `p' scans through the pattern as `d' scans through the data.
   6078      `dend' is the end of the input string that `d' points within.  `d'
   6079      is advanced into the following input string whenever necessary, but
   6080      this happens before fetching; therefore, at the beginning of the
   6081      loop, `d' can be pointing at the end of a string, but it cannot
   6082      equal `string2'.  */
   6083 #ifdef WCHAR
   6084   if (size1 > 0 && pos <= csize1)
   6085     {
   6086       mcnt = count_mbs_length(mbs_offset1, pos);
   6087       d = string1 + mcnt;
   6088       dend = end_match_1;
   6089     }
   6090   else
   6091     {
   6092       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
   6093       d = string2 + mcnt;
   6094       dend = end_match_2;
   6095     }
   6096 
   6097   if (mcnt < 0)
   6098     { /* count_mbs_length return error.  */
   6099       FREE_VARIABLES ();
   6100       return -1;
   6101     }
   6102 #else
   6103   if (size1 > 0 && pos <= size1)
   6104     {
   6105       d = string1 + pos;
   6106       dend = end_match_1;
   6107     }
   6108   else
   6109     {
   6110       d = string2 + pos - size1;
   6111       dend = end_match_2;
   6112     }
   6113 #endif /* WCHAR */
   6114 
   6115   DEBUG_PRINT1 ("The compiled pattern is:\n");
   6116   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
   6117   DEBUG_PRINT1 ("The string to match is: `");
   6118   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
   6119   DEBUG_PRINT1 ("'\n");
   6120 
   6121   /* This loops over pattern commands.  It exits by returning from the
   6122      function if the match is complete, or it drops through if the match
   6123      fails at this starting point in the input data.  */
   6124   for (;;)
   6125     {
   6126 #ifdef _LIBC
   6127       DEBUG_PRINT2 ("\n%p: ", p);
   6128 #else
   6129       DEBUG_PRINT2 ("\n0x%x: ", p);
   6130 #endif
   6131 
   6132 #ifdef __GNUC__
   6133       NEXT;
   6134 #else
   6135       if (p == pend)
   6136 #endif
   6137 	{
   6138 #ifdef __GNUC__
   6139 	end_of_pattern:
   6140 #endif
   6141 	  /* End of pattern means we might have succeeded.  */
   6142 	  DEBUG_PRINT1 ("end of pattern ... ");
   6143 
   6144 	  /* If we haven't matched the entire string, and we want the
   6145 	     longest match, try backtracking.  */
   6146 	  if (d != end_match_2)
   6147 	    {
   6148 	      /* 1 if this match ends in the same string (string1 or string2)
   6149 		 as the best previous match.  */
   6150 	      boolean same_str_p = (FIRST_STRING_P (match_end)
   6151 				    == MATCHING_IN_FIRST_STRING);
   6152 	      /* 1 if this match is the best seen so far.  */
   6153 	      boolean best_match_p;
   6154 
   6155 	      /* AIX compiler got confused when this was combined
   6156 		 with the previous declaration.  */
   6157 	      if (same_str_p)
   6158 		best_match_p = d > match_end;
   6159 	      else
   6160 		best_match_p = !MATCHING_IN_FIRST_STRING;
   6161 
   6162 	      DEBUG_PRINT1 ("backtracking.\n");
   6163 
   6164 	      if (!FAIL_STACK_EMPTY ())
   6165 		{ /* More failure points to try.  */
   6166 
   6167 		  /* If exceeds best match so far, save it.  */
   6168 		  if (!best_regs_set || best_match_p)
   6169 		    {
   6170 		      best_regs_set = true;
   6171 		      match_end = d;
   6172 
   6173 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
   6174 
   6175 		      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   6176 			{
   6177 			  best_regstart[mcnt] = regstart[mcnt];
   6178 			  best_regend[mcnt] = regend[mcnt];
   6179 			}
   6180 		    }
   6181 		  goto fail;
   6182 		}
   6183 
   6184 	      /* If no failure points, don't restore garbage.  And if
   6185 		 last match is real best match, don't restore second
   6186 		 best one. */
   6187 	      else if (best_regs_set && !best_match_p)
   6188 		{
   6189 		restore_best_regs:
   6190 		  /* Restore best match.  It may happen that `dend ==
   6191 		     end_match_1' while the restored d is in string2.
   6192 		     For example, the pattern `x.*y.*z' against the
   6193 		     strings `x-' and `y-z-', if the two strings are
   6194 		     not consecutive in memory.  */
   6195 		  DEBUG_PRINT1 ("Restoring best registers.\n");
   6196 
   6197 		  d = match_end;
   6198 		  dend = ((d >= string1 && d <= end1)
   6199 			  ? end_match_1 : end_match_2);
   6200 
   6201 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   6202 		    {
   6203 		      regstart[mcnt] = best_regstart[mcnt];
   6204 		      regend[mcnt] = best_regend[mcnt];
   6205 		    }
   6206 		}
   6207 	    } /* d != end_match_2 */
   6208 
   6209 	succeed_label:
   6210 	  DEBUG_PRINT1 ("Accepting match.\n");
   6211 	  /* If caller wants register contents data back, do it.  */
   6212 	  if (regs && !bufp->no_sub)
   6213 	    {
   6214 	      /* Have the register data arrays been allocated?  */
   6215 	      if (bufp->regs_allocated == REGS_UNALLOCATED)
   6216 		{ /* No.  So allocate them with malloc.  We need one
   6217 		     extra element beyond `num_regs' for the `-1' marker
   6218 		     GNU code uses.  */
   6219 		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
   6220 		  regs->start = TALLOC (regs->num_regs, regoff_t);
   6221 		  regs->end = TALLOC (regs->num_regs, regoff_t);
   6222 		  if (regs->start == NULL || regs->end == NULL)
   6223 		    {
   6224 		      FREE_VARIABLES ();
   6225 		      return -2;
   6226 		    }
   6227 		  bufp->regs_allocated = REGS_REALLOCATE;
   6228 		}
   6229 	      else if (bufp->regs_allocated == REGS_REALLOCATE)
   6230 		{ /* Yes.  If we need more elements than were already
   6231 		     allocated, reallocate them.  If we need fewer, just
   6232 		     leave it alone.  */
   6233 		  if (regs->num_regs < num_regs + 1)
   6234 		    {
   6235 		      regs->num_regs = num_regs + 1;
   6236 		      RETALLOC (regs->start, regs->num_regs, regoff_t);
   6237 		      RETALLOC (regs->end, regs->num_regs, regoff_t);
   6238 		      if (regs->start == NULL || regs->end == NULL)
   6239 			{
   6240 			  FREE_VARIABLES ();
   6241 			  return -2;
   6242 			}
   6243 		    }
   6244 		}
   6245 	      else
   6246 		{
   6247 		  /* These braces fend off a "empty body in an else-statement"
   6248 		     warning under GCC when assert expands to nothing.  */
   6249 		  assert (bufp->regs_allocated == REGS_FIXED);
   6250 		}
   6251 
   6252 	      /* Convert the pointer data in `regstart' and `regend' to
   6253 		 indices.  Register zero has to be set differently,
   6254 		 since we haven't kept track of any info for it.  */
   6255 	      if (regs->num_regs > 0)
   6256 		{
   6257 		  regs->start[0] = pos;
   6258 #ifdef WCHAR
   6259 		  if (MATCHING_IN_FIRST_STRING)
   6260 		    regs->end[0] = (mbs_offset1 != NULL ?
   6261 				    mbs_offset1[d-string1] : 0);
   6262 		  else
   6263 		    regs->end[0] = csize1 + (mbs_offset2 != NULL
   6264 					     ? mbs_offset2[d-string2] : 0);
   6265 #else
   6266 		  regs->end[0] = (MATCHING_IN_FIRST_STRING
   6267 				  ? ((regoff_t) (d - string1))
   6268 				  : ((regoff_t) (d - string2 + size1)));
   6269 #endif /* WCHAR */
   6270 		}
   6271 
   6272 	      /* Go through the first `min (num_regs, regs->num_regs)'
   6273 		 registers, since that is all we initialized.  */
   6274 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
   6275 		   mcnt++)
   6276 		{
   6277 		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
   6278 		    regs->start[mcnt] = regs->end[mcnt] = -1;
   6279 		  else
   6280 		    {
   6281 		      regs->start[mcnt]
   6282 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
   6283 		      regs->end[mcnt]
   6284 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
   6285 		    }
   6286 		}
   6287 
   6288 	      /* If the regs structure we return has more elements than
   6289 		 were in the pattern, set the extra elements to -1.  If
   6290 		 we (re)allocated the registers, this is the case,
   6291 		 because we always allocate enough to have at least one
   6292 		 -1 at the end.  */
   6293 	      for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
   6294 		regs->start[mcnt] = regs->end[mcnt] = -1;
   6295 	    } /* regs && !bufp->no_sub */
   6296 
   6297 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
   6298 			nfailure_points_pushed, nfailure_points_popped,
   6299 			nfailure_points_pushed - nfailure_points_popped);
   6300 	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
   6301 
   6302 #ifdef WCHAR
   6303 	  if (MATCHING_IN_FIRST_STRING)
   6304 	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
   6305 	  else
   6306 	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
   6307 	      csize1;
   6308 	  mcnt -= pos;
   6309 #else
   6310 	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
   6311 			    ? string1 : string2 - size1);
   6312 #endif /* WCHAR */
   6313 
   6314 	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
   6315 
   6316 	  FREE_VARIABLES ();
   6317 	  return mcnt;
   6318 	}
   6319 
   6320 #ifndef __GNUC__
   6321       /* Otherwise match next pattern command.  */
   6322       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   6323 	{
   6324 #endif
   6325         /* Ignore these.  Used to ignore the n of succeed_n's which
   6326            currently have n == 0.  */
   6327         CASE (no_op):
   6328           DEBUG_PRINT1 ("EXECUTING no_op.\n");
   6329           NEXT;
   6330 
   6331 	CASE (succeed):
   6332           DEBUG_PRINT1 ("EXECUTING succeed.\n");
   6333 	  goto succeed_label;
   6334 
   6335         /* Match the next n pattern characters exactly.  The following
   6336            byte in the pattern defines n, and the n bytes after that
   6337            are the characters to match.  */
   6338 	CASE (exactn):
   6339 #ifdef MBS_SUPPORT
   6340 	CASE (exactn_bin):
   6341 #endif
   6342 	  mcnt = *p++;
   6343           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
   6344 
   6345           /* This is written out as an if-else so we don't waste time
   6346              testing `translate' inside the loop.  */
   6347           if (translate)
   6348 	    {
   6349 	      do
   6350 		{
   6351 		  PREFETCH ();
   6352 #ifdef WCHAR
   6353 		  if (*d <= 0xff)
   6354 		    {
   6355 		      if ((UCHAR_T) translate[(unsigned char) *d++]
   6356 			  != (UCHAR_T) *p++)
   6357 			goto fail;
   6358 		    }
   6359 		  else
   6360 		    {
   6361 		      if (*d++ != (CHAR_T) *p++)
   6362 			goto fail;
   6363 		    }
   6364 #else
   6365 		  if ((UCHAR_T) translate[(unsigned char) *d++]
   6366 		      != (UCHAR_T) *p++)
   6367                     goto fail;
   6368 #endif /* WCHAR */
   6369 		}
   6370 	      while (--mcnt);
   6371 	    }
   6372 	  else
   6373 	    {
   6374 	      do
   6375 		{
   6376 		  PREFETCH ();
   6377 		  if (*d++ != (CHAR_T) *p++) goto fail;
   6378 		}
   6379 	      while (--mcnt);
   6380 	    }
   6381 	  SET_REGS_MATCHED ();
   6382           NEXT;
   6383 
   6384 
   6385         /* Match any character except possibly a newline or a null.  */
   6386 	CASE (anychar):
   6387           DEBUG_PRINT1 ("EXECUTING anychar.\n");
   6388 
   6389           PREFETCH ();
   6390 
   6391           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
   6392               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
   6393 	    goto fail;
   6394 
   6395           SET_REGS_MATCHED ();
   6396           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
   6397           d++;
   6398 	  NEXT;
   6399 
   6400 
   6401 	CASE (charset):
   6402 	CASE (charset_not):
   6403 	  {
   6404 	    register UCHAR_T c;
   6405 #ifdef WCHAR
   6406 	    unsigned int i, char_class_length, coll_symbol_length,
   6407               equiv_class_length, ranges_length, chars_length, length;
   6408 	    CHAR_T *workp, *workp2, *charset_top;
   6409 #define WORK_BUFFER_SIZE 128
   6410             CHAR_T str_buf[WORK_BUFFER_SIZE];
   6411 # ifdef _LIBC
   6412 	    uint32_t nrules;
   6413 # endif /* _LIBC */
   6414 #endif /* WCHAR */
   6415 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
   6416 
   6417             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
   6418 	    PREFETCH ();
   6419 	    c = TRANSLATE (*d); /* The character to match.  */
   6420 #ifdef WCHAR
   6421 # ifdef _LIBC
   6422 	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   6423 # endif /* _LIBC */
   6424 	    charset_top = p - 1;
   6425 	    char_class_length = *p++;
   6426 	    coll_symbol_length = *p++;
   6427 	    equiv_class_length = *p++;
   6428 	    ranges_length = *p++;
   6429 	    chars_length = *p++;
   6430 	    /* p points charset[6], so the address of the next instruction
   6431 	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
   6432 	       where l=length of char_classes, m=length of collating_symbol,
   6433 	       n=equivalence_class, o=length of char_range,
   6434 	       p'=length of character.  */
   6435 	    workp = p;
   6436 	    /* Update p to indicate the next instruction.  */
   6437 	    p += char_class_length + coll_symbol_length+ equiv_class_length +
   6438               2*ranges_length + chars_length;
   6439 
   6440             /* match with char_class?  */
   6441 	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
   6442 	      {
   6443 		wctype_t wctype;
   6444 		uintptr_t alignedp = ((uintptr_t)workp
   6445 				      + __alignof__(wctype_t) - 1)
   6446 		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   6447 		wctype = *((wctype_t*)alignedp);
   6448 		workp += CHAR_CLASS_SIZE;
   6449 		if (iswctype((wint_t)c, wctype))
   6450 		  goto char_set_matched;
   6451 	      }
   6452 
   6453             /* match with collating_symbol?  */
   6454 # ifdef _LIBC
   6455 	    if (nrules != 0)
   6456 	      {
   6457 		const unsigned char *extra = (const unsigned char *)
   6458 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   6459 
   6460 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
   6461 		     workp++)
   6462 		  {
   6463 		    int32_t *wextra;
   6464 		    wextra = (int32_t*)(extra + *workp++);
   6465 		    for (i = 0; i < *wextra; ++i)
   6466 		      if (TRANSLATE(d[i]) != wextra[1 + i])
   6467 			break;
   6468 
   6469 		    if (i == *wextra)
   6470 		      {
   6471 			/* Update d, however d will be incremented at
   6472 			   char_set_matched:, we decrement d here.  */
   6473 			d += i - 1;
   6474 			goto char_set_matched;
   6475 		      }
   6476 		  }
   6477 	      }
   6478 	    else /* (nrules == 0) */
   6479 # endif
   6480 	      /* If we can't look up collation data, we use wcscoll
   6481 		 instead.  */
   6482 	      {
   6483 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
   6484 		  {
   6485 		    const CHAR_T *backup_d = d, *backup_dend = dend;
   6486 		    length = wcslen (workp);
   6487 
   6488 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6489 		       any substring of the string never match with the
   6490 		       collating symbol.  */
   6491 		    if (wcscoll (workp, d) > 0)
   6492 		      {
   6493 			workp += length + 1;
   6494 			continue;
   6495 		      }
   6496 
   6497 		    /* First, we compare the collating symbol with
   6498 		       the first character of the string.
   6499 		       If it don't match, we add the next character to
   6500 		       the compare buffer in turn.  */
   6501 		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
   6502 		      {
   6503 			int match;
   6504 			if (d == dend)
   6505 			  {
   6506 			    if (dend == end_match_2)
   6507 			      break;
   6508 			    d = string2;
   6509 			    dend = end_match_2;
   6510 			  }
   6511 
   6512 			/* add next character to the compare buffer.  */
   6513 			str_buf[i] = TRANSLATE(*d);
   6514 			str_buf[i+1] = '\0';
   6515 
   6516 			match = wcscoll (workp, str_buf);
   6517 			if (match == 0)
   6518 			  goto char_set_matched;
   6519 
   6520 			if (match < 0)
   6521 			  /* (str_buf > workp) indicate (str_buf + X > workp),
   6522 			     because for all X (str_buf + X > str_buf).
   6523 			     So we don't need continue this loop.  */
   6524 			  break;
   6525 
   6526 			/* Otherwise(str_buf < workp),
   6527 			   (str_buf+next_character) may equals (workp).
   6528 			   So we continue this loop.  */
   6529 		      }
   6530 		    /* not matched */
   6531 		    d = backup_d;
   6532 		    dend = backup_dend;
   6533 		    workp += length + 1;
   6534 		  }
   6535               }
   6536             /* match with equivalence_class?  */
   6537 # ifdef _LIBC
   6538 	    if (nrules != 0)
   6539 	      {
   6540                 const CHAR_T *backup_d = d, *backup_dend = dend;
   6541 		/* Try to match the equivalence class against
   6542 		   those known to the collate implementation.  */
   6543 		const int32_t *table;
   6544 		const int32_t *weights;
   6545 		const int32_t *extra;
   6546 		const int32_t *indirect;
   6547 		int32_t idx, idx2;
   6548 		wint_t *cp;
   6549 		size_t len;
   6550 
   6551 		/* This #include defines a local function!  */
   6552 #  include <locale/weightwc.h>
   6553 
   6554 		table = (const int32_t *)
   6555 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
   6556 		weights = (const wint_t *)
   6557 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
   6558 		extra = (const wint_t *)
   6559 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
   6560 		indirect = (const int32_t *)
   6561 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
   6562 
   6563 		/* Write 1 collating element to str_buf, and
   6564 		   get its index.  */
   6565 		idx2 = 0;
   6566 
   6567 		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
   6568 		  {
   6569 		    cp = (wint_t*)str_buf;
   6570 		    if (d == dend)
   6571 		      {
   6572 			if (dend == end_match_2)
   6573 			  break;
   6574 			d = string2;
   6575 			dend = end_match_2;
   6576 		      }
   6577 		    str_buf[i] = TRANSLATE(*(d+i));
   6578 		    str_buf[i+1] = '\0'; /* sentinel */
   6579 		    idx2 = findidx ((const wint_t**)&cp);
   6580 		  }
   6581 
   6582 		/* Update d, however d will be incremented at
   6583 		   char_set_matched:, we decrement d here.  */
   6584 		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
   6585 		if (d >= dend)
   6586 		  {
   6587 		    if (dend == end_match_2)
   6588 			d = dend;
   6589 		    else
   6590 		      {
   6591 			d = string2;
   6592 			dend = end_match_2;
   6593 		      }
   6594 		  }
   6595 
   6596 		len = weights[idx2];
   6597 
   6598 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
   6599 		     workp++)
   6600 		  {
   6601 		    idx = (int32_t)*workp;
   6602 		    /* We already checked idx != 0 in regex_compile. */
   6603 
   6604 		    if (idx2 != 0 && len == weights[idx])
   6605 		      {
   6606 			int cnt = 0;
   6607 			while (cnt < len && (weights[idx + 1 + cnt]
   6608 					     == weights[idx2 + 1 + cnt]))
   6609 			  ++cnt;
   6610 
   6611 			if (cnt == len)
   6612 			  goto char_set_matched;
   6613 		      }
   6614 		  }
   6615 		/* not matched */
   6616                 d = backup_d;
   6617                 dend = backup_dend;
   6618 	      }
   6619 	    else /* (nrules == 0) */
   6620 # endif
   6621 	      /* If we can't look up collation data, we use wcscoll
   6622 		 instead.  */
   6623 	      {
   6624 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
   6625 		  {
   6626 		    const CHAR_T *backup_d = d, *backup_dend = dend;
   6627 		    length = wcslen (workp);
   6628 
   6629 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6630 		       any substring of the string never match with the
   6631 		       collating symbol.  */
   6632 		    if (wcscoll (workp, d) > 0)
   6633 		      {
   6634 			workp += length + 1;
   6635 			break;
   6636 		      }
   6637 
   6638 		    /* First, we compare the equivalence class with
   6639 		       the first character of the string.
   6640 		       If it don't match, we add the next character to
   6641 		       the compare buffer in turn.  */
   6642 		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
   6643 		      {
   6644 			int match;
   6645 			if (d == dend)
   6646 			  {
   6647 			    if (dend == end_match_2)
   6648 			      break;
   6649 			    d = string2;
   6650 			    dend = end_match_2;
   6651 			  }
   6652 
   6653 			/* add next character to the compare buffer.  */
   6654 			str_buf[i] = TRANSLATE(*d);
   6655 			str_buf[i+1] = '\0';
   6656 
   6657 			match = wcscoll (workp, str_buf);
   6658 
   6659 			if (match == 0)
   6660 			  goto char_set_matched;
   6661 
   6662 			if (match < 0)
   6663 			/* (str_buf > workp) indicate (str_buf + X > workp),
   6664 			   because for all X (str_buf + X > str_buf).
   6665 			   So we don't need continue this loop.  */
   6666 			  break;
   6667 
   6668 			/* Otherwise(str_buf < workp),
   6669 			   (str_buf+next_character) may equals (workp).
   6670 			   So we continue this loop.  */
   6671 		      }
   6672 		    /* not matched */
   6673 		    d = backup_d;
   6674 		    dend = backup_dend;
   6675 		    workp += length + 1;
   6676 		  }
   6677 	      }
   6678 
   6679             /* match with char_range?  */
   6680 # ifdef _LIBC
   6681 	    if (nrules != 0)
   6682 	      {
   6683 		uint32_t collseqval;
   6684 		const char *collseq = (const char *)
   6685 		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
   6686 
   6687 		collseqval = collseq_table_lookup (collseq, c);
   6688 
   6689 		for (; workp < p - chars_length ;)
   6690 		  {
   6691 		    uint32_t start_val, end_val;
   6692 
   6693 		    /* We already compute the collation sequence value
   6694 		       of the characters (or collating symbols).  */
   6695 		    start_val = (uint32_t) *workp++; /* range_start */
   6696 		    end_val = (uint32_t) *workp++; /* range_end */
   6697 
   6698 		    if (start_val <= collseqval && collseqval <= end_val)
   6699 		      goto char_set_matched;
   6700 		  }
   6701 	      }
   6702 	    else
   6703 # endif
   6704 	      {
   6705 		/* We set range_start_char at str_buf[0], range_end_char
   6706 		   at str_buf[4], and compared char at str_buf[2].  */
   6707 		str_buf[1] = 0;
   6708 		str_buf[2] = c;
   6709 		str_buf[3] = 0;
   6710 		str_buf[5] = 0;
   6711 		for (; workp < p - chars_length ;)
   6712 		  {
   6713 		    wchar_t *range_start_char, *range_end_char;
   6714 
   6715 		    /* match if (range_start_char <= c <= range_end_char).  */
   6716 
   6717 		    /* If range_start(or end) < 0, we assume -range_start(end)
   6718 		       is the offset of the collating symbol which is specified
   6719 		       as the character of the range start(end).  */
   6720 
   6721 		    /* range_start */
   6722 		    if (*workp < 0)
   6723 		      range_start_char = charset_top - (*workp++);
   6724 		    else
   6725 		      {
   6726 			str_buf[0] = *workp++;
   6727 			range_start_char = str_buf;
   6728 		      }
   6729 
   6730 		    /* range_end */
   6731 		    if (*workp < 0)
   6732 		      range_end_char = charset_top - (*workp++);
   6733 		    else
   6734 		      {
   6735 			str_buf[4] = *workp++;
   6736 			range_end_char = str_buf + 4;
   6737 		      }
   6738 
   6739 		    if (wcscoll (range_start_char, str_buf+2) <= 0
   6740 			&& wcscoll (str_buf+2, range_end_char) <= 0)
   6741 		      goto char_set_matched;
   6742 		  }
   6743 	      }
   6744 
   6745             /* match with char?  */
   6746 	    for (; workp < p ; workp++)
   6747 	      if (c == *workp)
   6748 		goto char_set_matched;
   6749 
   6750 	    not = !not;
   6751 
   6752 	  char_set_matched:
   6753 	    if (not) goto fail;
   6754 #else
   6755             /* Cast to `unsigned' instead of `unsigned char' in case the
   6756                bit list is a full 32 bytes long.  */
   6757 	    if (c < (unsigned) (*p * BYTEWIDTH)
   6758 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   6759 	      not = !not;
   6760 
   6761 	    p += 1 + *p;
   6762 
   6763 	    if (!not) goto fail;
   6764 #undef WORK_BUFFER_SIZE
   6765 #endif /* WCHAR */
   6766 	    SET_REGS_MATCHED ();
   6767             d++;
   6768 	    NEXT;
   6769 	  }
   6770 
   6771 
   6772         /* The beginning of a group is represented by start_memory.
   6773            The arguments are the register number in the next byte, and the
   6774            number of groups inner to this one in the next.  The text
   6775            matched within the group is recorded (in the internal
   6776            registers data structure) under the register number.  */
   6777         CASE (start_memory):
   6778 	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
   6779 			(long int) *p, (long int) p[1]);
   6780 
   6781           /* Find out if this group can match the empty string.  */
   6782 	  p1 = p;		/* To send to group_match_null_string_p.  */
   6783 
   6784           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
   6785             REG_MATCH_NULL_STRING_P (reg_info[*p])
   6786               = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
   6787 
   6788           /* Save the position in the string where we were the last time
   6789              we were at this open-group operator in case the group is
   6790              operated upon by a repetition operator, e.g., with `(a*)*b'
   6791              against `ab'; then we want to ignore where we are now in
   6792              the string in case this attempt to match fails.  */
   6793           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6794                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
   6795                              : regstart[*p];
   6796 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
   6797 			 POINTER_TO_OFFSET (old_regstart[*p]));
   6798 
   6799           regstart[*p] = d;
   6800 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
   6801 
   6802           IS_ACTIVE (reg_info[*p]) = 1;
   6803           MATCHED_SOMETHING (reg_info[*p]) = 0;
   6804 
   6805 	  /* Clear this whenever we change the register activity status.  */
   6806 	  set_regs_matched_done = 0;
   6807 
   6808           /* This is the new highest active register.  */
   6809           highest_active_reg = *p;
   6810 
   6811           /* If nothing was active before, this is the new lowest active
   6812              register.  */
   6813           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   6814             lowest_active_reg = *p;
   6815 
   6816           /* Move past the register number and inner group count.  */
   6817           p += 2;
   6818 	  just_past_start_mem = p;
   6819 
   6820           NEXT;
   6821 
   6822 
   6823         /* The stop_memory opcode represents the end of a group.  Its
   6824            arguments are the same as start_memory's: the register
   6825            number, and the number of inner groups.  */
   6826 	CASE (stop_memory):
   6827 	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
   6828 			(long int) *p, (long int) p[1]);
   6829 
   6830           /* We need to save the string position the last time we were at
   6831              this close-group operator in case the group is operated
   6832              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
   6833              against `aba'; then we want to ignore where we are now in
   6834              the string in case this attempt to match fails.  */
   6835           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6836                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
   6837 			   : regend[*p];
   6838 	  DEBUG_PRINT2 ("      old_regend: %d\n",
   6839 			 POINTER_TO_OFFSET (old_regend[*p]));
   6840 
   6841           regend[*p] = d;
   6842 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
   6843 
   6844           /* This register isn't active anymore.  */
   6845           IS_ACTIVE (reg_info[*p]) = 0;
   6846 
   6847 	  /* Clear this whenever we change the register activity status.  */
   6848 	  set_regs_matched_done = 0;
   6849 
   6850           /* If this was the only register active, nothing is active
   6851              anymore.  */
   6852           if (lowest_active_reg == highest_active_reg)
   6853             {
   6854               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6855               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6856             }
   6857           else
   6858             { /* We must scan for the new highest active register, since
   6859                  it isn't necessarily one less than now: consider
   6860                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
   6861                  new highest active register is 1.  */
   6862               UCHAR_T r = *p - 1;
   6863               while (r > 0 && !IS_ACTIVE (reg_info[r]))
   6864                 r--;
   6865 
   6866               /* If we end up at register zero, that means that we saved
   6867                  the registers as the result of an `on_failure_jump', not
   6868                  a `start_memory', and we jumped to past the innermost
   6869                  `stop_memory'.  For example, in ((.)*) we save
   6870                  registers 1 and 2 as a result of the *, but when we pop
   6871                  back to the second ), we are at the stop_memory 1.
   6872                  Thus, nothing is active.  */
   6873 	      if (r == 0)
   6874                 {
   6875                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6876                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6877                 }
   6878               else
   6879                 highest_active_reg = r;
   6880             }
   6881 
   6882           /* If just failed to match something this time around with a
   6883              group that's operated on by a repetition operator, try to
   6884              force exit from the ``loop'', and restore the register
   6885              information for this group that we had before trying this
   6886              last match.  */
   6887           if ((!MATCHED_SOMETHING (reg_info[*p])
   6888                || just_past_start_mem == p - 1)
   6889 	      && (p + 2) < pend)
   6890             {
   6891               boolean is_a_jump_n = false;
   6892 
   6893               p1 = p + 2;
   6894               mcnt = 0;
   6895               switch ((re_opcode_t) *p1++)
   6896                 {
   6897                   case jump_n:
   6898 		    is_a_jump_n = true;
   6899                   case pop_failure_jump:
   6900 		  case maybe_pop_jump:
   6901 		  case jump:
   6902 		  case dummy_failure_jump:
   6903                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6904 		    if (is_a_jump_n)
   6905 		      p1 += OFFSET_ADDRESS_SIZE;
   6906                     break;
   6907 
   6908                   default:
   6909                     /* do nothing */ ;
   6910                 }
   6911 	      p1 += mcnt;
   6912 
   6913               /* If the next operation is a jump backwards in the pattern
   6914 	         to an on_failure_jump right before the start_memory
   6915                  corresponding to this stop_memory, exit from the loop
   6916                  by forcing a failure after pushing on the stack the
   6917                  on_failure_jump's jump in the pattern, and d.  */
   6918               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
   6919                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
   6920 		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
   6921 		{
   6922                   /* If this group ever matched anything, then restore
   6923                      what its registers were before trying this last
   6924                      failed match, e.g., with `(a*)*b' against `ab' for
   6925                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
   6926                      against `aba' for regend[3].
   6927 
   6928                      Also restore the registers for inner groups for,
   6929                      e.g., `((a*)(b*))*' against `aba' (register 3 would
   6930                      otherwise get trashed).  */
   6931 
   6932                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
   6933 		    {
   6934 		      unsigned r;
   6935 
   6936                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
   6937 
   6938 		      /* Restore this and inner groups' (if any) registers.  */
   6939                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
   6940 			   r++)
   6941                         {
   6942                           regstart[r] = old_regstart[r];
   6943 
   6944                           /* xx why this test?  */
   6945                           if (old_regend[r] >= regstart[r])
   6946                             regend[r] = old_regend[r];
   6947                         }
   6948                     }
   6949 		  p1++;
   6950                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6951                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
   6952 
   6953                   goto fail;
   6954                 }
   6955             }
   6956 
   6957           /* Move past the register number and the inner group count.  */
   6958           p += 2;
   6959           NEXT;
   6960 
   6961 
   6962 	/* \<digit> has been turned into a `duplicate' command which is
   6963            followed by the numeric value of <digit> as the register number.  */
   6964         CASE (duplicate):
   6965 	  {
   6966 	    register const CHAR_T *d2, *dend2;
   6967 	    int regno = *p++;   /* Get which register to match against.  */
   6968 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
   6969 
   6970 	    /* Can't back reference a group which we've never matched.  */
   6971             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
   6972               goto fail;
   6973 
   6974             /* Where in input to try to start matching.  */
   6975             d2 = regstart[regno];
   6976 
   6977             /* Where to stop matching; if both the place to start and
   6978                the place to stop matching are in the same string, then
   6979                set to the place to stop, otherwise, for now have to use
   6980                the end of the first string.  */
   6981 
   6982             dend2 = ((FIRST_STRING_P (regstart[regno])
   6983 		      == FIRST_STRING_P (regend[regno]))
   6984 		     ? regend[regno] : end_match_1);
   6985 	    for (;;)
   6986 	      {
   6987 		/* If necessary, advance to next segment in register
   6988                    contents.  */
   6989 		while (d2 == dend2)
   6990 		  {
   6991 		    if (dend2 == end_match_2) break;
   6992 		    if (dend2 == regend[regno]) break;
   6993 
   6994                     /* End of string1 => advance to string2. */
   6995                     d2 = string2;
   6996                     dend2 = regend[regno];
   6997 		  }
   6998 		/* At end of register contents => success */
   6999 		if (d2 == dend2) break;
   7000 
   7001 		/* If necessary, advance to next segment in data.  */
   7002 		PREFETCH ();
   7003 
   7004 		/* How many characters left in this segment to match.  */
   7005 		mcnt = dend - d;
   7006 
   7007 		/* Want how many consecutive characters we can match in
   7008                    one shot, so, if necessary, adjust the count.  */
   7009                 if (mcnt > dend2 - d2)
   7010 		  mcnt = dend2 - d2;
   7011 
   7012 		/* Compare that many; failure if mismatch, else move
   7013                    past them.  */
   7014 		if (translate
   7015                     ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
   7016                     : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
   7017 		  goto fail;
   7018 		d += mcnt, d2 += mcnt;
   7019 
   7020 		/* Do this because we've match some characters.  */
   7021 		SET_REGS_MATCHED ();
   7022 	      }
   7023 	  }
   7024 	  NEXT;
   7025 
   7026 
   7027         /* begline matches the empty string at the beginning of the string
   7028            (unless `not_bol' is set in `bufp'), and, if
   7029            `newline_anchor' is set, after newlines.  */
   7030 	CASE (begline):
   7031           DEBUG_PRINT1 ("EXECUTING begline.\n");
   7032 
   7033           if (AT_STRINGS_BEG (d))
   7034             {
   7035               if (!bufp->not_bol)
   7036 		{
   7037 		  NEXT;
   7038 		}
   7039             }
   7040           else if (d[-1] == '\n' && bufp->newline_anchor)
   7041             {
   7042               NEXT;
   7043             }
   7044           /* In all other cases, we fail.  */
   7045           goto fail;
   7046 
   7047 
   7048         /* endline is the dual of begline.  */
   7049 	CASE (endline):
   7050           DEBUG_PRINT1 ("EXECUTING endline.\n");
   7051 
   7052           if (AT_STRINGS_END (d))
   7053             {
   7054               if (!bufp->not_eol)
   7055 		{
   7056 		  NEXT;
   7057 		}
   7058             }
   7059 
   7060           /* We have to ``prefetch'' the next character.  */
   7061           else if ((d == end1 ? *string2 : *d) == '\n'
   7062                    && bufp->newline_anchor)
   7063             {
   7064               NEXT;
   7065             }
   7066           goto fail;
   7067 
   7068 
   7069 	/* Match at the very beginning of the data.  */
   7070         CASE (begbuf):
   7071           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
   7072           if (AT_STRINGS_BEG (d))
   7073 	    {
   7074 	      NEXT;
   7075 	    }
   7076           goto fail;
   7077 
   7078 
   7079 	/* Match at the very end of the data.  */
   7080         CASE (endbuf):
   7081           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
   7082 	  if (AT_STRINGS_END (d))
   7083 	    {
   7084 	      NEXT;
   7085 	    }
   7086           goto fail;
   7087 
   7088 
   7089         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
   7090            pushes NULL as the value for the string on the stack.  Then
   7091            `pop_failure_point' will keep the current value for the
   7092            string, instead of restoring it.  To see why, consider
   7093            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
   7094            then the . fails against the \n.  But the next thing we want
   7095            to do is match the \n against the \n; if we restored the
   7096            string value, we would be back at the foo.
   7097 
   7098            Because this is used only in specific cases, we don't need to
   7099            check all the things that `on_failure_jump' does, to make
   7100            sure the right things get saved on the stack.  Hence we don't
   7101            share its code.  The only reason to push anything on the
   7102            stack at all is that otherwise we would have to change
   7103            `anychar's code to do something besides goto fail in this
   7104            case; that seems worse than this.  */
   7105         CASE (on_failure_keep_string_jump):
   7106           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
   7107 
   7108           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7109 #ifdef _LIBC
   7110           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
   7111 #else
   7112           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
   7113 #endif
   7114 
   7115           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
   7116           NEXT;
   7117 
   7118 
   7119 	/* Uses of on_failure_jump:
   7120 
   7121            Each alternative starts with an on_failure_jump that points
   7122            to the beginning of the next alternative.  Each alternative
   7123            except the last ends with a jump that in effect jumps past
   7124            the rest of the alternatives.  (They really jump to the
   7125            ending jump of the following alternative, because tensioning
   7126            these jumps is a hassle.)
   7127 
   7128            Repeats start with an on_failure_jump that points past both
   7129            the repetition text and either the following jump or
   7130            pop_failure_jump back to this on_failure_jump.  */
   7131 	CASE (on_failure_jump):
   7132         on_failure:
   7133           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
   7134 
   7135           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7136 #ifdef _LIBC
   7137           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
   7138 #else
   7139           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
   7140 #endif
   7141 
   7142           /* If this on_failure_jump comes right before a group (i.e.,
   7143              the original * applied to a group), save the information
   7144              for that group and all inner ones, so that if we fail back
   7145              to this point, the group's information will be correct.
   7146              For example, in \(a*\)*\1, we need the preceding group,
   7147              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
   7148 
   7149           /* We can't use `p' to check ahead because we push
   7150              a failure point to `p + mcnt' after we do this.  */
   7151           p1 = p;
   7152 
   7153           /* We need to skip no_op's before we look for the
   7154              start_memory in case this on_failure_jump is happening as
   7155              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
   7156              against aba.  */
   7157           while (p1 < pend && (re_opcode_t) *p1 == no_op)
   7158             p1++;
   7159 
   7160           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
   7161             {
   7162               /* We have a new highest active register now.  This will
   7163                  get reset at the start_memory we are about to get to,
   7164                  but we will have saved all the registers relevant to
   7165                  this repetition op, as described above.  */
   7166               highest_active_reg = *(p1 + 1) + *(p1 + 2);
   7167               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   7168                 lowest_active_reg = *(p1 + 1);
   7169             }
   7170 
   7171           DEBUG_PRINT1 (":\n");
   7172           PUSH_FAILURE_POINT (p + mcnt, d, -2);
   7173           NEXT;
   7174 
   7175 
   7176         /* A smart repeat ends with `maybe_pop_jump'.
   7177 	   We change it to either `pop_failure_jump' or `jump'.  */
   7178         CASE (maybe_pop_jump):
   7179           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7180           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
   7181           {
   7182 	    register UCHAR_T *p2 = p;
   7183 
   7184             /* Compare the beginning of the repeat with what in the
   7185                pattern follows its end. If we can establish that there
   7186                is nothing that they would both match, i.e., that we
   7187                would have to backtrack because of (as in, e.g., `a*a')
   7188                then we can change to pop_failure_jump, because we'll
   7189                never have to backtrack.
   7190 
   7191                This is not true in the case of alternatives: in
   7192                `(a|ab)*' we do need to backtrack to the `ab' alternative
   7193                (e.g., if the string was `ab').  But instead of trying to
   7194                detect that here, the alternative has put on a dummy
   7195                failure point which is what we will end up popping.  */
   7196 
   7197 	    /* Skip over open/close-group commands.
   7198 	       If what follows this loop is a ...+ construct,
   7199 	       look at what begins its body, since we will have to
   7200 	       match at least one of that.  */
   7201 	    while (1)
   7202 	      {
   7203 		if (p2 + 2 < pend
   7204 		    && ((re_opcode_t) *p2 == stop_memory
   7205 			|| (re_opcode_t) *p2 == start_memory))
   7206 		  p2 += 3;
   7207 		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
   7208 			 && (re_opcode_t) *p2 == dummy_failure_jump)
   7209 		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
   7210 		else
   7211 		  break;
   7212 	      }
   7213 
   7214 	    p1 = p + mcnt;
   7215 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
   7216 	       to the `maybe_finalize_jump' of this case.  Examine what
   7217 	       follows.  */
   7218 
   7219             /* If we're at the end of the pattern, we can change.  */
   7220             if (p2 == pend)
   7221 	      {
   7222 		/* Consider what happens when matching ":\(.*\)"
   7223 		   against ":/".  I don't really understand this code
   7224 		   yet.  */
   7225   	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
   7226 		  pop_failure_jump;
   7227                 DEBUG_PRINT1
   7228                   ("  End of pattern: change to `pop_failure_jump'.\n");
   7229               }
   7230 
   7231             else if ((re_opcode_t) *p2 == exactn
   7232 #ifdef MBS_SUPPORT
   7233 		     || (re_opcode_t) *p2 == exactn_bin
   7234 #endif
   7235 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
   7236 	      {
   7237 		register UCHAR_T c
   7238                   = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
   7239 
   7240                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
   7241 #ifdef MBS_SUPPORT
   7242 		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
   7243 #endif
   7244 		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
   7245                   {
   7246   		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
   7247 		      pop_failure_jump;
   7248 #ifdef WCHAR
   7249 		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
   7250 				    (wint_t) c,
   7251 				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
   7252 #else
   7253 		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
   7254 				    (char) c,
   7255 				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
   7256 #endif
   7257                   }
   7258 
   7259 #ifndef WCHAR
   7260 		else if ((re_opcode_t) p1[3] == charset
   7261 			 || (re_opcode_t) p1[3] == charset_not)
   7262 		  {
   7263 		    int not = (re_opcode_t) p1[3] == charset_not;
   7264 
   7265 		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
   7266 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   7267 		      not = !not;
   7268 
   7269                     /* `not' is equal to 1 if c would match, which means
   7270                         that we can't change to pop_failure_jump.  */
   7271 		    if (!not)
   7272                       {
   7273   		        p[-3] = (unsigned char) pop_failure_jump;
   7274                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7275                       }
   7276 		  }
   7277 #endif /* not WCHAR */
   7278 	      }
   7279 #ifndef WCHAR
   7280             else if ((re_opcode_t) *p2 == charset)
   7281 	      {
   7282 		/* We win if the first character of the loop is not part
   7283                    of the charset.  */
   7284                 if ((re_opcode_t) p1[3] == exactn
   7285  		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
   7286  			  && (p2[2 + p1[5] / BYTEWIDTH]
   7287  			      & (1 << (p1[5] % BYTEWIDTH)))))
   7288 		  {
   7289 		    p[-3] = (unsigned char) pop_failure_jump;
   7290 		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7291                   }
   7292 
   7293 		else if ((re_opcode_t) p1[3] == charset_not)
   7294 		  {
   7295 		    int idx;
   7296 		    /* We win if the charset_not inside the loop
   7297 		       lists every character listed in the charset after.  */
   7298 		    for (idx = 0; idx < (int) p2[1]; idx++)
   7299 		      if (! (p2[2 + idx] == 0
   7300 			     || (idx < (int) p1[4]
   7301 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
   7302 			break;
   7303 
   7304 		    if (idx == p2[1])
   7305                       {
   7306   		        p[-3] = (unsigned char) pop_failure_jump;
   7307                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7308                       }
   7309 		  }
   7310 		else if ((re_opcode_t) p1[3] == charset)
   7311 		  {
   7312 		    int idx;
   7313 		    /* We win if the charset inside the loop
   7314 		       has no overlap with the one after the loop.  */
   7315 		    for (idx = 0;
   7316 			 idx < (int) p2[1] && idx < (int) p1[4];
   7317 			 idx++)
   7318 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
   7319 			break;
   7320 
   7321 		    if (idx == p2[1] || idx == p1[4])
   7322                       {
   7323   		        p[-3] = (unsigned char) pop_failure_jump;
   7324                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7325                       }
   7326 		  }
   7327 	      }
   7328 #endif /* not WCHAR */
   7329 	  }
   7330 	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
   7331 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
   7332 	    {
   7333 	      p[-1] = (UCHAR_T) jump;
   7334               DEBUG_PRINT1 ("  Match => jump.\n");
   7335 	      goto unconditional_jump;
   7336 	    }
   7337         /* Note fall through.  */
   7338 
   7339 
   7340 	/* The end of a simple repeat has a pop_failure_jump back to
   7341            its matching on_failure_jump, where the latter will push a
   7342            failure point.  The pop_failure_jump takes off failure
   7343            points put on by this pop_failure_jump's matching
   7344            on_failure_jump; we got through the pattern to here from the
   7345            matching on_failure_jump, so didn't fail.  */
   7346         CASE (pop_failure_jump):
   7347           {
   7348             /* We need to pass separate storage for the lowest and
   7349                highest registers, even though we don't care about the
   7350                actual values.  Otherwise, we will restore only one
   7351                register from the stack, since lowest will == highest in
   7352                `pop_failure_point'.  */
   7353             active_reg_t dummy_low_reg, dummy_high_reg;
   7354             UCHAR_T *pdummy = NULL;
   7355             const CHAR_T *sdummy = NULL;
   7356 
   7357             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
   7358             POP_FAILURE_POINT (sdummy, pdummy,
   7359                                dummy_low_reg, dummy_high_reg,
   7360                                reg_dummy, reg_dummy, reg_info_dummy);
   7361           }
   7362 	  /* Note fall through.  */
   7363 
   7364 	unconditional_jump:
   7365 #ifdef _LIBC
   7366 	  DEBUG_PRINT2 ("\n%p: ", p);
   7367 #else
   7368 	  DEBUG_PRINT2 ("\n0x%x: ", p);
   7369 #endif
   7370           /* Note fall through.  */
   7371 
   7372         /* Unconditionally jump (without popping any failure points).  */
   7373         CASE (jump):
   7374 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
   7375           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
   7376 	  p += mcnt;				/* Do the jump.  */
   7377 #ifdef _LIBC
   7378           DEBUG_PRINT2 ("(to %p).\n", p);
   7379 #else
   7380           DEBUG_PRINT2 ("(to 0x%x).\n", p);
   7381 #endif
   7382 	  NEXT;
   7383 
   7384 
   7385         /* We need this opcode so we can detect where alternatives end
   7386            in `group_match_null_string_p' et al.  */
   7387         CASE (jump_past_alt):
   7388           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
   7389           goto unconditional_jump;
   7390 
   7391 
   7392         /* Normally, the on_failure_jump pushes a failure point, which
   7393            then gets popped at pop_failure_jump.  We will end up at
   7394            pop_failure_jump, also, and with a pattern of, say, `a+', we
   7395            are skipping over the on_failure_jump, so we have to push
   7396            something meaningless for pop_failure_jump to pop.  */
   7397         CASE (dummy_failure_jump):
   7398           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
   7399           /* It doesn't matter what we push for the string here.  What
   7400              the code at `fail' tests is the value for the pattern.  */
   7401           PUSH_FAILURE_POINT (NULL, NULL, -2);
   7402           goto unconditional_jump;
   7403 
   7404 
   7405         /* At the end of an alternative, we need to push a dummy failure
   7406            point in case we are followed by a `pop_failure_jump', because
   7407            we don't want the failure point for the alternative to be
   7408            popped.  For example, matching `(a|ab)*' against `aab'
   7409            requires that we match the `ab' alternative.  */
   7410         CASE (push_dummy_failure):
   7411           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
   7412           /* See comments just above at `dummy_failure_jump' about the
   7413              two zeroes.  */
   7414           PUSH_FAILURE_POINT (NULL, NULL, -2);
   7415           NEXT;
   7416 
   7417         /* Have to succeed matching what follows at least n times.
   7418            After that, handle like `on_failure_jump'.  */
   7419         CASE (succeed_n):
   7420           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   7421           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
   7422 
   7423           assert (mcnt >= 0);
   7424           /* Originally, this is how many times we HAVE to succeed.  */
   7425           if (mcnt > 0)
   7426             {
   7427                mcnt--;
   7428 	       p += OFFSET_ADDRESS_SIZE;
   7429                STORE_NUMBER_AND_INCR (p, mcnt);
   7430 #ifdef _LIBC
   7431                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
   7432 			     , mcnt);
   7433 #else
   7434                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
   7435 			     , mcnt);
   7436 #endif
   7437             }
   7438 	  else if (mcnt == 0)
   7439             {
   7440 #ifdef _LIBC
   7441               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
   7442 			    p + OFFSET_ADDRESS_SIZE);
   7443 #else
   7444               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
   7445 			    p + OFFSET_ADDRESS_SIZE);
   7446 #endif /* _LIBC */
   7447 
   7448 #ifdef WCHAR
   7449 	      p[1] = (UCHAR_T) no_op;
   7450 #else
   7451 	      p[2] = (UCHAR_T) no_op;
   7452               p[3] = (UCHAR_T) no_op;
   7453 #endif /* WCHAR */
   7454               goto on_failure;
   7455             }
   7456           NEXT;
   7457 
   7458         CASE (jump_n):
   7459           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   7460           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
   7461 
   7462           /* Originally, this is how many times we CAN jump.  */
   7463           if (mcnt)
   7464             {
   7465                mcnt--;
   7466                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
   7467 
   7468 #ifdef _LIBC
   7469                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7470 			     mcnt);
   7471 #else
   7472                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7473 			     mcnt);
   7474 #endif /* _LIBC */
   7475 	       goto unconditional_jump;
   7476             }
   7477           /* If don't have to jump any more, skip over the rest of command.  */
   7478 	  else
   7479 	    p += 2 * OFFSET_ADDRESS_SIZE;
   7480           NEXT;
   7481 
   7482 	CASE (set_number_at):
   7483 	  {
   7484             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
   7485 
   7486             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7487             p1 = p + mcnt;
   7488             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7489 #ifdef _LIBC
   7490             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
   7491 #else
   7492             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
   7493 #endif
   7494 	    STORE_NUMBER (p1, mcnt);
   7495             NEXT;
   7496           }
   7497 
   7498 #if 0
   7499 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
   7500 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
   7501 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
   7502 	   macro and introducing temporary variables works around the bug.  */
   7503 
   7504 	CASE (wordbound):
   7505 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7506 	  if (AT_WORD_BOUNDARY (d))
   7507 	    {
   7508 	      NEXT;
   7509 	    }
   7510 	  goto fail;
   7511 
   7512 	CASE (notwordbound):
   7513 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7514 	  if (AT_WORD_BOUNDARY (d))
   7515 	    goto fail;
   7516 	  NEXT;
   7517 #else
   7518 	CASE (wordbound):
   7519 	{
   7520 	  boolean prevchar, thischar;
   7521 
   7522 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7523 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7524 	    {
   7525 	      NEXT;
   7526 	    }
   7527 
   7528 	  prevchar = WORDCHAR_P (d - 1);
   7529 	  thischar = WORDCHAR_P (d);
   7530 	  if (prevchar != thischar)
   7531 	    {
   7532 	      NEXT;
   7533 	    }
   7534 	  goto fail;
   7535 	}
   7536 
   7537       CASE (notwordbound):
   7538 	{
   7539 	  boolean prevchar, thischar;
   7540 
   7541 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7542 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7543 	    goto fail;
   7544 
   7545 	  prevchar = WORDCHAR_P (d - 1);
   7546 	  thischar = WORDCHAR_P (d);
   7547 	  if (prevchar != thischar)
   7548 	    goto fail;
   7549 	  NEXT;
   7550 	}
   7551 #endif
   7552 
   7553 	CASE (wordbeg):
   7554           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
   7555 	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
   7556 	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
   7557 	    {
   7558 	      NEXT;
   7559 	    }
   7560           goto fail;
   7561 
   7562 	CASE (wordend):
   7563           DEBUG_PRINT1 ("EXECUTING wordend.\n");
   7564 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
   7565               && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
   7566 	    {
   7567 	      NEXT;
   7568 	    }
   7569           goto fail;
   7570 
   7571 #ifdef emacs
   7572   	CASE (before_dot):
   7573           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
   7574  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
   7575   	    goto fail;
   7576   	  NEXT;
   7577 
   7578   	CASE (at_dot):
   7579           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
   7580  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
   7581   	    goto fail;
   7582   	  NEXT;
   7583 
   7584   	CASE (after_dot):
   7585           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
   7586           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
   7587   	    goto fail;
   7588   	  NEXT;
   7589 
   7590 	CASE (syntaxspec):
   7591           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
   7592 	  mcnt = *p++;
   7593 	  goto matchsyntax;
   7594 
   7595         CASE (wordchar):
   7596           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
   7597 	  mcnt = (int) Sword;
   7598         matchsyntax:
   7599 	  PREFETCH ();
   7600 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7601 	  d++;
   7602 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
   7603 	    goto fail;
   7604           SET_REGS_MATCHED ();
   7605 	  NEXT;
   7606 
   7607 	CASE (notsyntaxspec):
   7608           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
   7609 	  mcnt = *p++;
   7610 	  goto matchnotsyntax;
   7611 
   7612         CASE (notwordchar):
   7613           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
   7614 	  mcnt = (int) Sword;
   7615         matchnotsyntax:
   7616 	  PREFETCH ();
   7617 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7618 	  d++;
   7619 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
   7620 	    goto fail;
   7621 	  SET_REGS_MATCHED ();
   7622           NEXT;
   7623 
   7624 #else /* not emacs */
   7625 	CASE (wordchar):
   7626           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
   7627 	  PREFETCH ();
   7628           if (!WORDCHAR_P (d))
   7629             goto fail;
   7630 	  SET_REGS_MATCHED ();
   7631           d++;
   7632 	  NEXT;
   7633 
   7634 	CASE (notwordchar):
   7635           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
   7636 	  PREFETCH ();
   7637 	  if (WORDCHAR_P (d))
   7638             goto fail;
   7639           SET_REGS_MATCHED ();
   7640           d++;
   7641 	  NEXT;
   7642 #endif /* not emacs */
   7643 
   7644 #ifndef __GNUC__
   7645         default:
   7646           abort ();
   7647 	}
   7648       continue;  /* Successfully executed one pattern command; keep going.  */
   7649 #endif
   7650 
   7651 
   7652     /* We goto here if a matching operation fails. */
   7653     fail:
   7654       if (!FAIL_STACK_EMPTY ())
   7655 	{ /* A restart point is known.  Restore to that state.  */
   7656           DEBUG_PRINT1 ("\nFAIL:\n");
   7657           POP_FAILURE_POINT (d, p,
   7658                              lowest_active_reg, highest_active_reg,
   7659                              regstart, regend, reg_info);
   7660 
   7661           /* If this failure point is a dummy, try the next one.  */
   7662           if (!p)
   7663 	    goto fail;
   7664 
   7665           /* If we failed to the end of the pattern, don't examine *p.  */
   7666 	  assert (p <= pend);
   7667           if (p < pend)
   7668             {
   7669               boolean is_a_jump_n = false;
   7670 
   7671               /* If failed to a backwards jump that's part of a repetition
   7672                  loop, need to pop this failure point and use the next one.  */
   7673               switch ((re_opcode_t) *p)
   7674                 {
   7675                 case jump_n:
   7676                   is_a_jump_n = true;
   7677                 case maybe_pop_jump:
   7678                 case pop_failure_jump:
   7679                 case jump:
   7680                   p1 = p + 1;
   7681                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7682                   p1 += mcnt;
   7683 
   7684                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
   7685                       || (!is_a_jump_n
   7686                           && (re_opcode_t) *p1 == on_failure_jump))
   7687                     goto fail;
   7688                   break;
   7689                 default:
   7690                   /* do nothing */ ;
   7691                 }
   7692             }
   7693 
   7694           if (d >= string1 && d <= end1)
   7695 	    dend = end_match_1;
   7696         }
   7697       else
   7698         break;   /* Matching at this starting point really fails.  */
   7699     } /* for (;;) */
   7700 
   7701   if (best_regs_set)
   7702     goto restore_best_regs;
   7703 
   7704   FREE_VARIABLES ();
   7705 
   7706   return -1;         			/* Failure to match.  */
   7707 } /* re_match_2 */
   7708 
   7709 /* Subroutine definitions for re_match_2.  */
   7711 
   7712 
   7713 /* We are passed P pointing to a register number after a start_memory.
   7714 
   7715    Return true if the pattern up to the corresponding stop_memory can
   7716    match the empty string, and false otherwise.
   7717 
   7718    If we find the matching stop_memory, sets P to point to one past its number.
   7719    Otherwise, sets P to an undefined byte less than or equal to END.
   7720 
   7721    We don't handle duplicates properly (yet).  */
   7722 
   7723 static boolean
   7724 PREFIX(group_match_null_string_p) (p, end, reg_info)
   7725     UCHAR_T **p, *end;
   7726     PREFIX(register_info_type) *reg_info;
   7727 {
   7728   int mcnt;
   7729   /* Point to after the args to the start_memory.  */
   7730   UCHAR_T *p1 = *p + 2;
   7731 
   7732   while (p1 < end)
   7733     {
   7734       /* Skip over opcodes that can match nothing, and return true or
   7735 	 false, as appropriate, when we get to one that can't, or to the
   7736          matching stop_memory.  */
   7737 
   7738       switch ((re_opcode_t) *p1)
   7739         {
   7740         /* Could be either a loop or a series of alternatives.  */
   7741         case on_failure_jump:
   7742           p1++;
   7743           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7744 
   7745           /* If the next operation is not a jump backwards in the
   7746 	     pattern.  */
   7747 
   7748 	  if (mcnt >= 0)
   7749 	    {
   7750               /* Go through the on_failure_jumps of the alternatives,
   7751                  seeing if any of the alternatives cannot match nothing.
   7752                  The last alternative starts with only a jump,
   7753                  whereas the rest start with on_failure_jump and end
   7754                  with a jump, e.g., here is the pattern for `a|b|c':
   7755 
   7756                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
   7757                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
   7758                  /exactn/1/c
   7759 
   7760                  So, we have to first go through the first (n-1)
   7761                  alternatives and then deal with the last one separately.  */
   7762 
   7763 
   7764               /* Deal with the first (n-1) alternatives, which start
   7765                  with an on_failure_jump (see above) that jumps to right
   7766                  past a jump_past_alt.  */
   7767 
   7768               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
   7769 		     jump_past_alt)
   7770                 {
   7771                   /* `mcnt' holds how many bytes long the alternative
   7772                      is, including the ending `jump_past_alt' and
   7773                      its number.  */
   7774 
   7775                   if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
   7776 						(1 + OFFSET_ADDRESS_SIZE),
   7777 						reg_info))
   7778                     return false;
   7779 
   7780                   /* Move to right after this alternative, including the
   7781 		     jump_past_alt.  */
   7782                   p1 += mcnt;
   7783 
   7784                   /* Break if it's the beginning of an n-th alternative
   7785                      that doesn't begin with an on_failure_jump.  */
   7786                   if ((re_opcode_t) *p1 != on_failure_jump)
   7787                     break;
   7788 
   7789 		  /* Still have to check that it's not an n-th
   7790 		     alternative that starts with an on_failure_jump.  */
   7791 		  p1++;
   7792                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7793                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
   7794 		      jump_past_alt)
   7795                     {
   7796 		      /* Get to the beginning of the n-th alternative.  */
   7797                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
   7798                       break;
   7799                     }
   7800                 }
   7801 
   7802               /* Deal with the last alternative: go back and get number
   7803                  of the `jump_past_alt' just before it.  `mcnt' contains
   7804                  the length of the alternative.  */
   7805               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
   7806 
   7807               if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
   7808                 return false;
   7809 
   7810               p1 += mcnt;	/* Get past the n-th alternative.  */
   7811             } /* if mcnt > 0 */
   7812           break;
   7813 
   7814 
   7815         case stop_memory:
   7816 	  assert (p1[1] == **p);
   7817           *p = p1 + 2;
   7818           return true;
   7819 
   7820 
   7821         default:
   7822           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
   7823             return false;
   7824         }
   7825     } /* while p1 < end */
   7826 
   7827   return false;
   7828 } /* group_match_null_string_p */
   7829 
   7830 
   7831 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
   7832    It expects P to be the first byte of a single alternative and END one
   7833    byte past the last. The alternative can contain groups.  */
   7834 
   7835 static boolean
   7836 PREFIX(alt_match_null_string_p) (p, end, reg_info)
   7837     UCHAR_T *p, *end;
   7838     PREFIX(register_info_type) *reg_info;
   7839 {
   7840   int mcnt;
   7841   UCHAR_T *p1 = p;
   7842 
   7843   while (p1 < end)
   7844     {
   7845       /* Skip over opcodes that can match nothing, and break when we get
   7846          to one that can't.  */
   7847 
   7848       switch ((re_opcode_t) *p1)
   7849         {
   7850 	/* It's a loop.  */
   7851         case on_failure_jump:
   7852           p1++;
   7853           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7854           p1 += mcnt;
   7855           break;
   7856 
   7857 	default:
   7858           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
   7859             return false;
   7860         }
   7861     }  /* while p1 < end */
   7862 
   7863   return true;
   7864 } /* alt_match_null_string_p */
   7865 
   7866 
   7867 /* Deals with the ops common to group_match_null_string_p and
   7868    alt_match_null_string_p.
   7869 
   7870    Sets P to one after the op and its arguments, if any.  */
   7871 
   7872 static boolean
   7873 PREFIX(common_op_match_null_string_p) (p, end, reg_info)
   7874     UCHAR_T **p, *end;
   7875     PREFIX(register_info_type) *reg_info;
   7876 {
   7877   int mcnt;
   7878   boolean ret;
   7879   int reg_no;
   7880   UCHAR_T *p1 = *p;
   7881 
   7882   switch ((re_opcode_t) *p1++)
   7883     {
   7884     case no_op:
   7885     case begline:
   7886     case endline:
   7887     case begbuf:
   7888     case endbuf:
   7889     case wordbeg:
   7890     case wordend:
   7891     case wordbound:
   7892     case notwordbound:
   7893 #ifdef emacs
   7894     case before_dot:
   7895     case at_dot:
   7896     case after_dot:
   7897 #endif
   7898       break;
   7899 
   7900     case start_memory:
   7901       reg_no = *p1;
   7902       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
   7903       ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
   7904 
   7905       /* Have to set this here in case we're checking a group which
   7906          contains a group and a back reference to it.  */
   7907 
   7908       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
   7909         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
   7910 
   7911       if (!ret)
   7912         return false;
   7913       break;
   7914 
   7915     /* If this is an optimized succeed_n for zero times, make the jump.  */
   7916     case jump:
   7917       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7918       if (mcnt >= 0)
   7919         p1 += mcnt;
   7920       else
   7921         return false;
   7922       break;
   7923 
   7924     case succeed_n:
   7925       /* Get to the number of times to succeed.  */
   7926       p1 += OFFSET_ADDRESS_SIZE;
   7927       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7928 
   7929       if (mcnt == 0)
   7930         {
   7931           p1 -= 2 * OFFSET_ADDRESS_SIZE;
   7932           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7933           p1 += mcnt;
   7934         }
   7935       else
   7936         return false;
   7937       break;
   7938 
   7939     case duplicate:
   7940       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
   7941         return false;
   7942       break;
   7943 
   7944     case set_number_at:
   7945       p1 += 2 * OFFSET_ADDRESS_SIZE;
   7946 
   7947     default:
   7948       /* All other opcodes mean we cannot match the empty string.  */
   7949       return false;
   7950   }
   7951 
   7952   *p = p1;
   7953   return true;
   7954 } /* common_op_match_null_string_p */
   7955 
   7956 
   7957 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
   7958    bytes; nonzero otherwise.  */
   7959 
   7960 static int
   7961 PREFIX(bcmp_translate) (s1, s2, len, translate)
   7962      const CHAR_T *s1, *s2;
   7963      register int len;
   7964      RE_TRANSLATE_TYPE translate;
   7965 {
   7966   register const UCHAR_T *p1 = (const UCHAR_T *) s1;
   7967   register const UCHAR_T *p2 = (const UCHAR_T *) s2;
   7968   while (len)
   7969     {
   7970 #ifdef WCHAR
   7971       if (((*p1<=0xff)?translate[*p1++]:*p1++)
   7972 	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
   7973 	return 1;
   7974 #else /* BYTE */
   7975       if (translate[*p1++] != translate[*p2++]) return 1;
   7976 #endif /* WCHAR */
   7977       len--;
   7978     }
   7979   return 0;
   7980 }
   7981 
   7982 
   7984 #else /* not INSIDE_RECURSION */
   7985 
   7986 /* Entry points for GNU code.  */
   7987 
   7988 /* re_compile_pattern is the GNU regular expression compiler: it
   7989    compiles PATTERN (of length SIZE) and puts the result in BUFP.
   7990    Returns 0 if the pattern was valid, otherwise an error string.
   7991 
   7992    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
   7993    are set in BUFP on entry.
   7994 
   7995    We call regex_compile to do the actual compilation.  */
   7996 
   7997 const char *
   7998 re_compile_pattern (pattern, length, bufp)
   7999      const char *pattern;
   8000      size_t length;
   8001      struct re_pattern_buffer *bufp;
   8002 {
   8003   reg_errcode_t ret;
   8004 
   8005   /* GNU code is written to assume at least RE_NREGS registers will be set
   8006      (and at least one extra will be -1).  */
   8007   bufp->regs_allocated = REGS_UNALLOCATED;
   8008 
   8009   /* And GNU code determines whether or not to get register information
   8010      by passing null for the REGS argument to re_match, etc., not by
   8011      setting no_sub.  */
   8012   bufp->no_sub = 0;
   8013 
   8014   /* Match anchors at newline.  */
   8015   bufp->newline_anchor = 1;
   8016 
   8017 # ifdef MBS_SUPPORT
   8018   if (MB_CUR_MAX != 1)
   8019     ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
   8020   else
   8021 # endif
   8022     ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
   8023 
   8024   if (!ret)
   8025     return NULL;
   8026   return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
   8027 }
   8028 #ifdef _LIBC
   8029 weak_alias (__re_compile_pattern, re_compile_pattern)
   8030 #endif
   8031 
   8032 /* Entry points compatible with 4.2 BSD regex library.  We don't define
   8034    them unless specifically requested.  */
   8035 
   8036 #if defined _REGEX_RE_COMP || defined _LIBC
   8037 
   8038 /* BSD has one and only one pattern buffer.  */
   8039 static struct re_pattern_buffer re_comp_buf;
   8040 
   8041 char *
   8042 #ifdef _LIBC
   8043 /* Make these definitions weak in libc, so POSIX programs can redefine
   8044    these names if they don't use our functions, and still use
   8045    regcomp/regexec below without link errors.  */
   8046 weak_function
   8047 #endif
   8048 re_comp (s)
   8049     const char *s;
   8050 {
   8051   reg_errcode_t ret;
   8052 
   8053   if (!s)
   8054     {
   8055       if (!re_comp_buf.buffer)
   8056 	return gettext ("No previous regular expression");
   8057       return 0;
   8058     }
   8059 
   8060   if (!re_comp_buf.buffer)
   8061     {
   8062       re_comp_buf.buffer = (unsigned char *) malloc (200);
   8063       if (re_comp_buf.buffer == NULL)
   8064         return (char *) gettext (re_error_msgid
   8065 				 + re_error_msgid_idx[(int) REG_ESPACE]);
   8066       re_comp_buf.allocated = 200;
   8067 
   8068       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
   8069       if (re_comp_buf.fastmap == NULL)
   8070 	return (char *) gettext (re_error_msgid
   8071 				 + re_error_msgid_idx[(int) REG_ESPACE]);
   8072     }
   8073 
   8074   /* Since `re_exec' always passes NULL for the `regs' argument, we
   8075      don't need to initialize the pattern buffer fields which affect it.  */
   8076 
   8077   /* Match anchors at newlines.  */
   8078   re_comp_buf.newline_anchor = 1;
   8079 
   8080 # ifdef MBS_SUPPORT
   8081   if (MB_CUR_MAX != 1)
   8082     ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   8083   else
   8084 # endif
   8085     ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   8086 
   8087   if (!ret)
   8088     return NULL;
   8089 
   8090   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
   8091   return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
   8092 }
   8093 
   8094 
   8095 int
   8096 #ifdef _LIBC
   8097 weak_function
   8098 #endif
   8099 re_exec (s)
   8100     const char *s;
   8101 {
   8102   const int len = strlen (s);
   8103   return
   8104     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
   8105 }
   8106 
   8107 #endif /* _REGEX_RE_COMP */
   8108 
   8109 /* POSIX.2 functions.  Don't define these for Emacs.  */
   8111 
   8112 #ifndef emacs
   8113 
   8114 /* regcomp takes a regular expression as a string and compiles it.
   8115 
   8116    PREG is a regex_t *.  We do not expect any fields to be initialized,
   8117    since POSIX says we shouldn't.  Thus, we set
   8118 
   8119      `buffer' to the compiled pattern;
   8120      `used' to the length of the compiled pattern;
   8121      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
   8122        REG_EXTENDED bit in CFLAGS is set; otherwise, to
   8123        RE_SYNTAX_POSIX_BASIC;
   8124      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
   8125      `fastmap' to an allocated space for the fastmap;
   8126      `fastmap_accurate' to zero;
   8127      `re_nsub' to the number of subexpressions in PATTERN.
   8128 
   8129    PATTERN is the address of the pattern string.
   8130 
   8131    CFLAGS is a series of bits which affect compilation.
   8132 
   8133      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
   8134      use POSIX basic syntax.
   8135 
   8136      If REG_NEWLINE is set, then . and [^...] don't match newline.
   8137      Also, regexec will try a match beginning after every newline.
   8138 
   8139      If REG_ICASE is set, then we considers upper- and lowercase
   8140      versions of letters to be equivalent when matching.
   8141 
   8142      If REG_NOSUB is set, then when PREG is passed to regexec, that
   8143      routine will report only success or failure, and nothing about the
   8144      registers.
   8145 
   8146    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
   8147    the return codes and their meanings.)  */
   8148 
   8149 int
   8150 regcomp (preg, pattern, cflags)
   8151     regex_t *preg;
   8152     const char *pattern;
   8153     int cflags;
   8154 {
   8155   reg_errcode_t ret;
   8156   reg_syntax_t syntax
   8157     = (cflags & REG_EXTENDED) ?
   8158       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
   8159 
   8160   /* regex_compile will allocate the space for the compiled pattern.  */
   8161   preg->buffer = 0;
   8162   preg->allocated = 0;
   8163   preg->used = 0;
   8164 
   8165   /* Try to allocate space for the fastmap.  */
   8166   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
   8167 
   8168   if (cflags & REG_ICASE)
   8169     {
   8170       unsigned i;
   8171 
   8172       preg->translate
   8173 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
   8174 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
   8175       if (preg->translate == NULL)
   8176         return (int) REG_ESPACE;
   8177 
   8178       /* Map uppercase characters to corresponding lowercase ones.  */
   8179       for (i = 0; i < CHAR_SET_SIZE; i++)
   8180         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
   8181     }
   8182   else
   8183     preg->translate = NULL;
   8184 
   8185   /* If REG_NEWLINE is set, newlines are treated differently.  */
   8186   if (cflags & REG_NEWLINE)
   8187     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
   8188       syntax &= ~RE_DOT_NEWLINE;
   8189       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
   8190       /* It also changes the matching behavior.  */
   8191       preg->newline_anchor = 1;
   8192     }
   8193   else
   8194     preg->newline_anchor = 0;
   8195 
   8196   preg->no_sub = !!(cflags & REG_NOSUB);
   8197 
   8198   /* POSIX says a null character in the pattern terminates it, so we
   8199      can use strlen here in compiling the pattern.  */
   8200 # ifdef MBS_SUPPORT
   8201   if (MB_CUR_MAX != 1)
   8202     ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
   8203   else
   8204 # endif
   8205     ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
   8206 
   8207   /* POSIX doesn't distinguish between an unmatched open-group and an
   8208      unmatched close-group: both are REG_EPAREN.  */
   8209   if (ret == REG_ERPAREN) ret = REG_EPAREN;
   8210 
   8211   if (ret == REG_NOERROR && preg->fastmap)
   8212     {
   8213       /* Compute the fastmap now, since regexec cannot modify the pattern
   8214 	 buffer.  */
   8215       if (re_compile_fastmap (preg) == -2)
   8216 	{
   8217 	  /* Some error occurred while computing the fastmap, just forget
   8218 	     about it.  */
   8219 	  free (preg->fastmap);
   8220 	  preg->fastmap = NULL;
   8221 	}
   8222     }
   8223 
   8224   return (int) ret;
   8225 }
   8226 #ifdef _LIBC
   8227 weak_alias (__regcomp, regcomp)
   8228 #endif
   8229 
   8230 
   8231 /* regexec searches for a given pattern, specified by PREG, in the
   8232    string STRING.
   8233 
   8234    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
   8235    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
   8236    least NMATCH elements, and we set them to the offsets of the
   8237    corresponding matched substrings.
   8238 
   8239    EFLAGS specifies `execution flags' which affect matching: if
   8240    REG_NOTBOL is set, then ^ does not match at the beginning of the
   8241    string; if REG_NOTEOL is set, then $ does not match at the end.
   8242 
   8243    We return 0 if we find a match and REG_NOMATCH if not.  */
   8244 
   8245 int
   8246 regexec (preg, string, nmatch, pmatch, eflags)
   8247     const regex_t *preg;
   8248     const char *string;
   8249     size_t nmatch;
   8250     regmatch_t pmatch[];
   8251     int eflags;
   8252 {
   8253   int ret;
   8254   struct re_registers regs;
   8255   regex_t private_preg;
   8256   int len = strlen (string);
   8257   boolean want_reg_info = !preg->no_sub && nmatch > 0;
   8258 
   8259   private_preg = *preg;
   8260 
   8261   private_preg.not_bol = !!(eflags & REG_NOTBOL);
   8262   private_preg.not_eol = !!(eflags & REG_NOTEOL);
   8263 
   8264   /* The user has told us exactly how many registers to return
   8265      information about, via `nmatch'.  We have to pass that on to the
   8266      matching routines.  */
   8267   private_preg.regs_allocated = REGS_FIXED;
   8268 
   8269   if (want_reg_info)
   8270     {
   8271       regs.num_regs = nmatch;
   8272       regs.start = TALLOC (nmatch * 2, regoff_t);
   8273       if (regs.start == NULL)
   8274         return (int) REG_NOMATCH;
   8275       regs.end = regs.start + nmatch;
   8276     }
   8277 
   8278   /* Perform the searching operation.  */
   8279   ret = re_search (&private_preg, string, len,
   8280                    /* start: */ 0, /* range: */ len,
   8281                    want_reg_info ? &regs : (struct re_registers *) 0);
   8282 
   8283   /* Copy the register information to the POSIX structure.  */
   8284   if (want_reg_info)
   8285     {
   8286       if (ret >= 0)
   8287         {
   8288           unsigned r;
   8289 
   8290           for (r = 0; r < nmatch; r++)
   8291             {
   8292               pmatch[r].rm_so = regs.start[r];
   8293               pmatch[r].rm_eo = regs.end[r];
   8294             }
   8295         }
   8296 
   8297       /* If we needed the temporary register info, free the space now.  */
   8298       free (regs.start);
   8299     }
   8300 
   8301   /* We want zero return to mean success, unlike `re_search'.  */
   8302   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
   8303 }
   8304 #ifdef _LIBC
   8305 weak_alias (__regexec, regexec)
   8306 #endif
   8307 
   8308 
   8309 /* Returns a message corresponding to an error code, ERRCODE, returned
   8310    from either regcomp or regexec.   We don't use PREG here.  */
   8311 
   8312 size_t
   8313 regerror (errcode, preg, errbuf, errbuf_size)
   8314     int errcode;
   8315     const regex_t *preg;
   8316     char *errbuf;
   8317     size_t errbuf_size;
   8318 {
   8319   const char *msg;
   8320   size_t msg_size;
   8321 
   8322   if (errcode < 0
   8323       || errcode >= (int) (sizeof (re_error_msgid_idx)
   8324 			   / sizeof (re_error_msgid_idx[0])))
   8325     /* Only error codes returned by the rest of the code should be passed
   8326        to this routine.  If we are given anything else, or if other regex
   8327        code generates an invalid error code, then the program has a bug.
   8328        Dump core so we can fix it.  */
   8329     abort ();
   8330 
   8331   msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
   8332 
   8333   msg_size = strlen (msg) + 1; /* Includes the null.  */
   8334 
   8335   if (errbuf_size != 0)
   8336     {
   8337       if (msg_size > errbuf_size)
   8338         {
   8339 #if defined HAVE_MEMPCPY || defined _LIBC
   8340 	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
   8341 #else
   8342           memcpy (errbuf, msg, errbuf_size - 1);
   8343           errbuf[errbuf_size - 1] = 0;
   8344 #endif
   8345         }
   8346       else
   8347         memcpy (errbuf, msg, msg_size);
   8348     }
   8349 
   8350   return msg_size;
   8351 }
   8352 #ifdef _LIBC
   8353 weak_alias (__regerror, regerror)
   8354 #endif
   8355 
   8356 
   8357 /* Free dynamically allocated space used by PREG.  */
   8358 
   8359 void
   8360 regfree (preg)
   8361     regex_t *preg;
   8362 {
   8363   if (preg->buffer != NULL)
   8364     free (preg->buffer);
   8365   preg->buffer = NULL;
   8366 
   8367   preg->allocated = 0;
   8368   preg->used = 0;
   8369 
   8370   if (preg->fastmap != NULL)
   8371     free (preg->fastmap);
   8372   preg->fastmap = NULL;
   8373   preg->fastmap_accurate = 0;
   8374 
   8375   if (preg->translate != NULL)
   8376     free (preg->translate);
   8377   preg->translate = NULL;
   8378 }
   8379 #ifdef _LIBC
   8380 weak_alias (__regfree, regfree)
   8381 #endif
   8382 
   8383 #endif /* not emacs  */
   8384 
   8385 #endif /* not INSIDE_RECURSION */
   8386 
   8387 
   8388 #undef STORE_NUMBER
   8390 #undef STORE_NUMBER_AND_INCR
   8391 #undef EXTRACT_NUMBER
   8392 #undef EXTRACT_NUMBER_AND_INCR
   8393 
   8394 #undef DEBUG_PRINT_COMPILED_PATTERN
   8395 #undef DEBUG_PRINT_DOUBLE_STRING
   8396 
   8397 #undef INIT_FAIL_STACK
   8398 #undef RESET_FAIL_STACK
   8399 #undef DOUBLE_FAIL_STACK
   8400 #undef PUSH_PATTERN_OP
   8401 #undef PUSH_FAILURE_POINTER
   8402 #undef PUSH_FAILURE_INT
   8403 #undef PUSH_FAILURE_ELT
   8404 #undef POP_FAILURE_POINTER
   8405 #undef POP_FAILURE_INT
   8406 #undef POP_FAILURE_ELT
   8407 #undef DEBUG_PUSH
   8408 #undef DEBUG_POP
   8409 #undef PUSH_FAILURE_POINT
   8410 #undef POP_FAILURE_POINT
   8411 
   8412 #undef REG_UNSET_VALUE
   8413 #undef REG_UNSET
   8414 
   8415 #undef PATFETCH
   8416 #undef PATFETCH_RAW
   8417 #undef PATUNFETCH
   8418 #undef TRANSLATE
   8419 
   8420 #undef INIT_BUF_SIZE
   8421 #undef GET_BUFFER_SPACE
   8422 #undef BUF_PUSH
   8423 #undef BUF_PUSH_2
   8424 #undef BUF_PUSH_3
   8425 #undef STORE_JUMP
   8426 #undef STORE_JUMP2
   8427 #undef INSERT_JUMP
   8428 #undef INSERT_JUMP2
   8429 #undef EXTEND_BUFFER
   8430 #undef GET_UNSIGNED_NUMBER
   8431 #undef FREE_STACK_RETURN
   8432 
   8433 # undef POINTER_TO_OFFSET
   8434 # undef MATCHING_IN_FRST_STRING
   8435 # undef PREFETCH
   8436 # undef AT_STRINGS_BEG
   8437 # undef AT_STRINGS_END
   8438 # undef WORDCHAR_P
   8439 # undef FREE_VAR
   8440 # undef FREE_VARIABLES
   8441 # undef NO_HIGHEST_ACTIVE_REG
   8442 # undef NO_LOWEST_ACTIVE_REG
   8443 
   8444 # undef CHAR_T
   8445 # undef UCHAR_T
   8446 # undef COMPILED_BUFFER_VAR
   8447 # undef OFFSET_ADDRESS_SIZE
   8448 # undef CHAR_CLASS_SIZE
   8449 # undef PREFIX
   8450 # undef ARG_PREFIX
   8451 # undef PUT_CHAR
   8452 # undef BYTE
   8453 # undef WCHAR
   8454 
   8455 # define DEFINED_ONCE
   8456