Home | History | Annotate | Line # | Download | only in libiberty
regex.c revision 1.1
      1 /* Extended regular expression matching and search library,
      2    version 0.12.
      3    (Implements POSIX draft P1003.2/D11.2, except for some of the
      4    internationalization features.)
      5 
      6    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
      7    2002, 2005, 2010 Free Software Foundation, Inc.
      8    This file is part of the GNU C Library.
      9 
     10    The GNU C Library is free software; you can redistribute it and/or
     11    modify it under the terms of the GNU Lesser General Public
     12    License as published by the Free Software Foundation; either
     13    version 2.1 of the License, or (at your option) any later version.
     14 
     15    The GNU C Library is distributed in the hope that it will be useful,
     16    but WITHOUT ANY WARRANTY; without even the implied warranty of
     17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     18    Lesser General Public License for more details.
     19 
     20    You should have received a copy of the GNU Lesser General Public
     21    License along with the GNU C Library; if not, write to the Free
     22    Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     23    02110-1301 USA.  */
     24 
     25 /* This file has been modified for usage in libiberty.  It includes "xregex.h"
     26    instead of <regex.h>.  The "xregex.h" header file renames all external
     27    routines with an "x" prefix so they do not collide with the native regex
     28    routines or with other components regex routines. */
     29 /* AIX requires this to be the first thing in the file. */
     30 #if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
     31   #pragma alloca
     32 #endif
     33 
     34 #undef	_GNU_SOURCE
     35 #define _GNU_SOURCE
     36 
     37 #ifndef INSIDE_RECURSION
     38 # ifdef HAVE_CONFIG_H
     39 #  include <config.h>
     40 # endif
     41 #endif
     42 
     43 #include <ansidecl.h>
     44 
     45 #ifndef INSIDE_RECURSION
     46 
     47 # if defined STDC_HEADERS && !defined emacs
     48 #  include <stddef.h>
     49 # else
     50 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
     51 #  include <sys/types.h>
     52 # endif
     53 
     54 # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
     55 
     56 /* For platform which support the ISO C amendement 1 functionality we
     57    support user defined character classes.  */
     58 # if defined _LIBC || WIDE_CHAR_SUPPORT
     59 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
     60 #  include <wchar.h>
     61 #  include <wctype.h>
     62 # endif
     63 
     64 # ifdef _LIBC
     65 /* We have to keep the namespace clean.  */
     66 #  define regfree(preg) __regfree (preg)
     67 #  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
     68 #  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
     69 #  define regerror(errcode, preg, errbuf, errbuf_size) \
     70 	__regerror(errcode, preg, errbuf, errbuf_size)
     71 #  define re_set_registers(bu, re, nu, st, en) \
     72 	__re_set_registers (bu, re, nu, st, en)
     73 #  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
     74 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
     75 #  define re_match(bufp, string, size, pos, regs) \
     76 	__re_match (bufp, string, size, pos, regs)
     77 #  define re_search(bufp, string, size, startpos, range, regs) \
     78 	__re_search (bufp, string, size, startpos, range, regs)
     79 #  define re_compile_pattern(pattern, length, bufp) \
     80 	__re_compile_pattern (pattern, length, bufp)
     81 #  define re_set_syntax(syntax) __re_set_syntax (syntax)
     82 #  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
     83 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
     84 #  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
     85 
     86 #  define btowc __btowc
     87 
     88 /* We are also using some library internals.  */
     89 #  include <locale/localeinfo.h>
     90 #  include <locale/elem-hash.h>
     91 #  include <langinfo.h>
     92 #  include <locale/coll-lookup.h>
     93 # endif
     94 
     95 /* This is for other GNU distributions with internationalized messages.  */
     96 # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
     97 #  include <libintl.h>
     98 #  ifdef _LIBC
     99 #   undef gettext
    100 #   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
    101 #  endif
    102 # else
    103 #  define gettext(msgid) (msgid)
    104 # endif
    105 
    106 # ifndef gettext_noop
    107 /* This define is so xgettext can find the internationalizable
    108    strings.  */
    109 #  define gettext_noop(String) String
    110 # endif
    111 
    112 /* The `emacs' switch turns on certain matching commands
    113    that make sense only in Emacs. */
    114 # ifdef emacs
    115 
    116 #  include "lisp.h"
    117 #  include "buffer.h"
    118 #  include "syntax.h"
    119 
    120 # else  /* not emacs */
    121 
    122 /* If we are not linking with Emacs proper,
    123    we can't use the relocating allocator
    124    even if config.h says that we can.  */
    125 #  undef REL_ALLOC
    126 
    127 #  if defined STDC_HEADERS || defined _LIBC
    128 #   include <stdlib.h>
    129 #  else
    130 char *malloc ();
    131 char *realloc ();
    132 #  endif
    133 
    134 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
    135    If nothing else has been done, use the method below.  */
    136 #  ifdef INHIBIT_STRING_HEADER
    137 #   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
    138 #    if !defined bzero && !defined bcopy
    139 #     undef INHIBIT_STRING_HEADER
    140 #    endif
    141 #   endif
    142 #  endif
    143 
    144 /* This is the normal way of making sure we have a bcopy and a bzero.
    145    This is used in most programs--a few other programs avoid this
    146    by defining INHIBIT_STRING_HEADER.  */
    147 #  ifndef INHIBIT_STRING_HEADER
    148 #   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
    149 #    include <string.h>
    150 #    ifndef bzero
    151 #     ifndef _LIBC
    152 #      define bzero(s, n)	(memset (s, '\0', n), (s))
    153 #     else
    154 #      define bzero(s, n)	__bzero (s, n)
    155 #     endif
    156 #    endif
    157 #   else
    158 #    include <strings.h>
    159 #    ifndef memcmp
    160 #     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
    161 #    endif
    162 #    ifndef memcpy
    163 #     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
    164 #    endif
    165 #   endif
    166 #  endif
    167 
    168 /* Define the syntax stuff for \<, \>, etc.  */
    169 
    170 /* This must be nonzero for the wordchar and notwordchar pattern
    171    commands in re_match_2.  */
    172 #  ifndef Sword
    173 #   define Sword 1
    174 #  endif
    175 
    176 #  ifdef SWITCH_ENUM_BUG
    177 #   define SWITCH_ENUM_CAST(x) ((int)(x))
    178 #  else
    179 #   define SWITCH_ENUM_CAST(x) (x)
    180 #  endif
    181 
    182 # endif /* not emacs */
    183 
    184 # if defined _LIBC || HAVE_LIMITS_H
    185 #  include <limits.h>
    186 # endif
    187 
    188 # ifndef MB_LEN_MAX
    189 #  define MB_LEN_MAX 1
    190 # endif
    191 
    192 /* Get the interface, including the syntax bits.  */
    194 # include "xregex.h"  /* change for libiberty */
    195 
    196 /* isalpha etc. are used for the character classes.  */
    197 # include <ctype.h>
    198 
    199 /* Jim Meyering writes:
    200 
    201    "... Some ctype macros are valid only for character codes that
    202    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
    203    using /bin/cc or gcc but without giving an ansi option).  So, all
    204    ctype uses should be through macros like ISPRINT...  If
    205    STDC_HEADERS is defined, then autoconf has verified that the ctype
    206    macros don't need to be guarded with references to isascii. ...
    207    Defining isascii to 1 should let any compiler worth its salt
    208    eliminate the && through constant folding."
    209    Solaris defines some of these symbols so we must undefine them first.  */
    210 
    211 # undef ISASCII
    212 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
    213 #  define ISASCII(c) 1
    214 # else
    215 #  define ISASCII(c) isascii(c)
    216 # endif
    217 
    218 # ifdef isblank
    219 #  define ISBLANK(c) (ISASCII (c) && isblank (c))
    220 # else
    221 #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
    222 # endif
    223 # ifdef isgraph
    224 #  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
    225 # else
    226 #  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
    227 # endif
    228 
    229 # undef ISPRINT
    230 # define ISPRINT(c) (ISASCII (c) && isprint (c))
    231 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
    232 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
    233 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
    234 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
    235 # define ISLOWER(c) (ISASCII (c) && islower (c))
    236 # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
    237 # define ISSPACE(c) (ISASCII (c) && isspace (c))
    238 # define ISUPPER(c) (ISASCII (c) && isupper (c))
    239 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
    240 
    241 # ifdef _tolower
    242 #  define TOLOWER(c) _tolower(c)
    243 # else
    244 #  define TOLOWER(c) tolower(c)
    245 # endif
    246 
    247 # ifndef NULL
    248 #  define NULL (void *)0
    249 # endif
    250 
    251 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
    252    since ours (we hope) works properly with all combinations of
    253    machines, compilers, `char' and `unsigned char' argument types.
    254    (Per Bothner suggested the basic approach.)  */
    255 # undef SIGN_EXTEND_CHAR
    256 # if __STDC__
    257 #  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
    258 # else  /* not __STDC__ */
    259 /* As in Harbison and Steele.  */
    260 #  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
    261 # endif
    262 
    263 # ifndef emacs
    265 /* How many characters in the character set.  */
    266 #  define CHAR_SET_SIZE 256
    267 
    268 #  ifdef SYNTAX_TABLE
    269 
    270 extern char *re_syntax_table;
    271 
    272 #  else /* not SYNTAX_TABLE */
    273 
    274 static char re_syntax_table[CHAR_SET_SIZE];
    275 
    276 static void init_syntax_once (void);
    277 
    278 static void
    279 init_syntax_once (void)
    280 {
    281    register int c;
    282    static int done = 0;
    283 
    284    if (done)
    285      return;
    286    bzero (re_syntax_table, sizeof re_syntax_table);
    287 
    288    for (c = 0; c < CHAR_SET_SIZE; ++c)
    289      if (ISALNUM (c))
    290 	re_syntax_table[c] = Sword;
    291 
    292    re_syntax_table['_'] = Sword;
    293 
    294    done = 1;
    295 }
    296 
    297 #  endif /* not SYNTAX_TABLE */
    298 
    299 #  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
    300 
    301 # endif /* emacs */
    302 
    303 /* Integer type for pointers.  */
    305 # if !defined _LIBC && !defined HAVE_UINTPTR_T
    306 typedef unsigned long int uintptr_t;
    307 # endif
    308 
    309 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
    310    use `alloca' instead of `malloc'.  This is because using malloc in
    311    re_search* or re_match* could cause memory leaks when C-g is used in
    312    Emacs; also, malloc is slower and causes storage fragmentation.  On
    313    the other hand, malloc is more portable, and easier to debug.
    314 
    315    Because we sometimes use alloca, some routines have to be macros,
    316    not functions -- `alloca'-allocated space disappears at the end of the
    317    function it is called in.  */
    318 
    319 # ifdef REGEX_MALLOC
    320 
    321 #  define REGEX_ALLOCATE malloc
    322 #  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
    323 #  define REGEX_FREE free
    324 
    325 # else /* not REGEX_MALLOC  */
    326 
    327 /* Emacs already defines alloca, sometimes.  */
    328 #  ifndef alloca
    329 
    330 /* Make alloca work the best possible way.  */
    331 #   ifdef __GNUC__
    332 #    define alloca __builtin_alloca
    333 #   else /* not __GNUC__ */
    334 #    if HAVE_ALLOCA_H
    335 #     include <alloca.h>
    336 #    endif /* HAVE_ALLOCA_H */
    337 #   endif /* not __GNUC__ */
    338 
    339 #  endif /* not alloca */
    340 
    341 #  define REGEX_ALLOCATE alloca
    342 
    343 /* Assumes a `char *destination' variable.  */
    344 #  define REGEX_REALLOCATE(source, osize, nsize)			\
    345   (destination = (char *) alloca (nsize),				\
    346    memcpy (destination, source, osize))
    347 
    348 /* No need to do anything to free, after alloca.  */
    349 #  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
    350 
    351 # endif /* not REGEX_MALLOC */
    352 
    353 /* Define how to allocate the failure stack.  */
    354 
    355 # if defined REL_ALLOC && defined REGEX_MALLOC
    356 
    357 #  define REGEX_ALLOCATE_STACK(size)				\
    358   r_alloc (&failure_stack_ptr, (size))
    359 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
    360   r_re_alloc (&failure_stack_ptr, (nsize))
    361 #  define REGEX_FREE_STACK(ptr)					\
    362   r_alloc_free (&failure_stack_ptr)
    363 
    364 # else /* not using relocating allocator */
    365 
    366 #  ifdef REGEX_MALLOC
    367 
    368 #   define REGEX_ALLOCATE_STACK malloc
    369 #   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
    370 #   define REGEX_FREE_STACK free
    371 
    372 #  else /* not REGEX_MALLOC */
    373 
    374 #   define REGEX_ALLOCATE_STACK alloca
    375 
    376 #   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
    377    REGEX_REALLOCATE (source, osize, nsize)
    378 /* No need to explicitly free anything.  */
    379 #   define REGEX_FREE_STACK(arg)
    380 
    381 #  endif /* not REGEX_MALLOC */
    382 # endif /* not using relocating allocator */
    383 
    384 
    385 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
    386    `string1' or just past its end.  This works if PTR is NULL, which is
    387    a good thing.  */
    388 # define FIRST_STRING_P(ptr) 					\
    389   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
    390 
    391 /* (Re)Allocate N items of type T using malloc, or fail.  */
    392 # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
    393 # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
    394 # define RETALLOC_IF(addr, n, t) \
    395   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
    396 # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
    397 
    398 # define BYTEWIDTH 8 /* In bits.  */
    399 
    400 # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
    401 
    402 # undef MAX
    403 # undef MIN
    404 # define MAX(a, b) ((a) > (b) ? (a) : (b))
    405 # define MIN(a, b) ((a) < (b) ? (a) : (b))
    406 
    407 typedef char boolean;
    408 # define false 0
    409 # define true 1
    410 
    411 static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
    412                                          reg_syntax_t syntax,
    413                                          struct re_pattern_buffer *bufp);
    414 
    415 static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
    416                                      const char *string1, int size1,
    417                                      const char *string2, int size2,
    418                                      int pos,
    419                                      struct re_registers *regs,
    420                                      int stop);
    421 static int byte_re_search_2 (struct re_pattern_buffer *bufp,
    422                              const char *string1, int size1,
    423                              const char *string2, int size2,
    424                              int startpos, int range,
    425                              struct re_registers *regs, int stop);
    426 static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
    427 
    428 #ifdef MBS_SUPPORT
    429 static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
    430                                         reg_syntax_t syntax,
    431                                         struct re_pattern_buffer *bufp);
    432 
    433 
    434 static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
    435                                     const char *cstring1, int csize1,
    436                                     const char *cstring2, int csize2,
    437                                     int pos,
    438                                     struct re_registers *regs,
    439                                     int stop,
    440                                     wchar_t *string1, int size1,
    441                                     wchar_t *string2, int size2,
    442                                     int *mbs_offset1, int *mbs_offset2);
    443 static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
    444                             const char *string1, int size1,
    445                             const char *string2, int size2,
    446                             int startpos, int range,
    447                             struct re_registers *regs, int stop);
    448 static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
    449 #endif
    450 
    451 /* These are the command codes that appear in compiled regular
    453    expressions.  Some opcodes are followed by argument bytes.  A
    454    command code can specify any interpretation whatsoever for its
    455    arguments.  Zero bytes may appear in the compiled regular expression.  */
    456 
    457 typedef enum
    458 {
    459   no_op = 0,
    460 
    461   /* Succeed right away--no more backtracking.  */
    462   succeed,
    463 
    464         /* Followed by one byte giving n, then by n literal bytes.  */
    465   exactn,
    466 
    467 # ifdef MBS_SUPPORT
    468 	/* Same as exactn, but contains binary data.  */
    469   exactn_bin,
    470 # endif
    471 
    472         /* Matches any (more or less) character.  */
    473   anychar,
    474 
    475         /* Matches any one char belonging to specified set.  First
    476            following byte is number of bitmap bytes.  Then come bytes
    477            for a bitmap saying which chars are in.  Bits in each byte
    478            are ordered low-bit-first.  A character is in the set if its
    479            bit is 1.  A character too large to have a bit in the map is
    480            automatically not in the set.  */
    481         /* ifdef MBS_SUPPORT, following element is length of character
    482 	   classes, length of collating symbols, length of equivalence
    483 	   classes, length of character ranges, and length of characters.
    484 	   Next, character class element, collating symbols elements,
    485 	   equivalence class elements, range elements, and character
    486 	   elements follow.
    487 	   See regex_compile function.  */
    488   charset,
    489 
    490         /* Same parameters as charset, but match any character that is
    491            not one of those specified.  */
    492   charset_not,
    493 
    494         /* Start remembering the text that is matched, for storing in a
    495            register.  Followed by one byte with the register number, in
    496            the range 0 to one less than the pattern buffer's re_nsub
    497            field.  Then followed by one byte with the number of groups
    498            inner to this one.  (This last has to be part of the
    499            start_memory only because we need it in the on_failure_jump
    500            of re_match_2.)  */
    501   start_memory,
    502 
    503         /* Stop remembering the text that is matched and store it in a
    504            memory register.  Followed by one byte with the register
    505            number, in the range 0 to one less than `re_nsub' in the
    506            pattern buffer, and one byte with the number of inner groups,
    507            just like `start_memory'.  (We need the number of inner
    508            groups here because we don't have any easy way of finding the
    509            corresponding start_memory when we're at a stop_memory.)  */
    510   stop_memory,
    511 
    512         /* Match a duplicate of something remembered. Followed by one
    513            byte containing the register number.  */
    514   duplicate,
    515 
    516         /* Fail unless at beginning of line.  */
    517   begline,
    518 
    519         /* Fail unless at end of line.  */
    520   endline,
    521 
    522         /* Succeeds if at beginning of buffer (if emacs) or at beginning
    523            of string to be matched (if not).  */
    524   begbuf,
    525 
    526         /* Analogously, for end of buffer/string.  */
    527   endbuf,
    528 
    529         /* Followed by two byte relative address to which to jump.  */
    530   jump,
    531 
    532 	/* Same as jump, but marks the end of an alternative.  */
    533   jump_past_alt,
    534 
    535         /* Followed by two-byte relative address of place to resume at
    536            in case of failure.  */
    537         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    538   on_failure_jump,
    539 
    540         /* Like on_failure_jump, but pushes a placeholder instead of the
    541            current string position when executed.  */
    542   on_failure_keep_string_jump,
    543 
    544         /* Throw away latest failure point and then jump to following
    545            two-byte relative address.  */
    546         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    547   pop_failure_jump,
    548 
    549         /* Change to pop_failure_jump if know won't have to backtrack to
    550            match; otherwise change to jump.  This is used to jump
    551            back to the beginning of a repeat.  If what follows this jump
    552            clearly won't match what the repeat does, such that we can be
    553            sure that there is no use backtracking out of repetitions
    554            already matched, then we change it to a pop_failure_jump.
    555            Followed by two-byte address.  */
    556         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    557   maybe_pop_jump,
    558 
    559         /* Jump to following two-byte address, and push a dummy failure
    560            point. This failure point will be thrown away if an attempt
    561            is made to use it for a failure.  A `+' construct makes this
    562            before the first repeat.  Also used as an intermediary kind
    563            of jump when compiling an alternative.  */
    564         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    565   dummy_failure_jump,
    566 
    567 	/* Push a dummy failure point and continue.  Used at the end of
    568 	   alternatives.  */
    569   push_dummy_failure,
    570 
    571         /* Followed by two-byte relative address and two-byte number n.
    572            After matching N times, jump to the address upon failure.  */
    573         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    574   succeed_n,
    575 
    576         /* Followed by two-byte relative address, and two-byte number n.
    577            Jump to the address N times, then fail.  */
    578         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    579   jump_n,
    580 
    581         /* Set the following two-byte relative address to the
    582            subsequent two-byte number.  The address *includes* the two
    583            bytes of number.  */
    584         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    585   set_number_at,
    586 
    587   wordchar,	/* Matches any word-constituent character.  */
    588   notwordchar,	/* Matches any char that is not a word-constituent.  */
    589 
    590   wordbeg,	/* Succeeds if at word beginning.  */
    591   wordend,	/* Succeeds if at word end.  */
    592 
    593   wordbound,	/* Succeeds if at a word boundary.  */
    594   notwordbound	/* Succeeds if not at a word boundary.  */
    595 
    596 # ifdef emacs
    597   ,before_dot,	/* Succeeds if before point.  */
    598   at_dot,	/* Succeeds if at point.  */
    599   after_dot,	/* Succeeds if after point.  */
    600 
    601 	/* Matches any character whose syntax is specified.  Followed by
    602            a byte which contains a syntax code, e.g., Sword.  */
    603   syntaxspec,
    604 
    605 	/* Matches any character whose syntax is not that specified.  */
    606   notsyntaxspec
    607 # endif /* emacs */
    608 } re_opcode_t;
    609 #endif /* not INSIDE_RECURSION */
    610 
    611 
    613 #ifdef BYTE
    614 # define CHAR_T char
    615 # define UCHAR_T unsigned char
    616 # define COMPILED_BUFFER_VAR bufp->buffer
    617 # define OFFSET_ADDRESS_SIZE 2
    618 # define PREFIX(name) byte_##name
    619 # define ARG_PREFIX(name) name
    620 # define PUT_CHAR(c) putchar (c)
    621 #else
    622 # ifdef WCHAR
    623 #  define CHAR_T wchar_t
    624 #  define UCHAR_T wchar_t
    625 #  define COMPILED_BUFFER_VAR wc_buffer
    626 #  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
    627 #  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
    628 #  define PREFIX(name) wcs_##name
    629 #  define ARG_PREFIX(name) c##name
    630 /* Should we use wide stream??  */
    631 #  define PUT_CHAR(c) printf ("%C", c);
    632 #  define TRUE 1
    633 #  define FALSE 0
    634 # else
    635 #  ifdef MBS_SUPPORT
    636 #   define WCHAR
    637 #   define INSIDE_RECURSION
    638 #   include "regex.c"
    639 #   undef INSIDE_RECURSION
    640 #  endif
    641 #  define BYTE
    642 #  define INSIDE_RECURSION
    643 #  include "regex.c"
    644 #  undef INSIDE_RECURSION
    645 # endif
    646 #endif
    647 
    648 #ifdef INSIDE_RECURSION
    649 /* Common operations on the compiled pattern.  */
    650 
    651 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
    652 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    653 
    654 # ifdef WCHAR
    655 #  define STORE_NUMBER(destination, number)				\
    656   do {									\
    657     *(destination) = (UCHAR_T)(number);				\
    658   } while (0)
    659 # else /* BYTE */
    660 #  define STORE_NUMBER(destination, number)				\
    661   do {									\
    662     (destination)[0] = (number) & 0377;					\
    663     (destination)[1] = (number) >> 8;					\
    664   } while (0)
    665 # endif /* WCHAR */
    666 
    667 /* Same as STORE_NUMBER, except increment DESTINATION to
    668    the byte after where the number is stored.  Therefore, DESTINATION
    669    must be an lvalue.  */
    670 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    671 
    672 # define STORE_NUMBER_AND_INCR(destination, number)			\
    673   do {									\
    674     STORE_NUMBER (destination, number);					\
    675     (destination) += OFFSET_ADDRESS_SIZE;				\
    676   } while (0)
    677 
    678 /* Put into DESTINATION a number stored in two contiguous bytes starting
    679    at SOURCE.  */
    680 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    681 
    682 # ifdef WCHAR
    683 #  define EXTRACT_NUMBER(destination, source)				\
    684   do {									\
    685     (destination) = *(source);						\
    686   } while (0)
    687 # else /* BYTE */
    688 #  define EXTRACT_NUMBER(destination, source)				\
    689   do {									\
    690     (destination) = *(source) & 0377;					\
    691     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
    692   } while (0)
    693 # endif
    694 
    695 # ifdef DEBUG
    696 static void PREFIX(extract_number) (int *dest, UCHAR_T *source);
    697 static void
    698 PREFIX(extract_number) (int *dest, UCHAR_T *source)
    699 {
    700 #  ifdef WCHAR
    701   *dest = *source;
    702 #  else /* BYTE */
    703   int temp = SIGN_EXTEND_CHAR (*(source + 1));
    704   *dest = *source & 0377;
    705   *dest += temp << 8;
    706 #  endif
    707 }
    708 
    709 #  ifndef EXTRACT_MACROS /* To debug the macros.  */
    710 #   undef EXTRACT_NUMBER
    711 #   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
    712 #  endif /* not EXTRACT_MACROS */
    713 
    714 # endif /* DEBUG */
    715 
    716 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
    717    SOURCE must be an lvalue.  */
    718 
    719 # define EXTRACT_NUMBER_AND_INCR(destination, source)			\
    720   do {									\
    721     EXTRACT_NUMBER (destination, source);				\
    722     (source) += OFFSET_ADDRESS_SIZE; 					\
    723   } while (0)
    724 
    725 # ifdef DEBUG
    726 static void PREFIX(extract_number_and_incr) (int *destination,
    727                                              UCHAR_T **source);
    728 static void
    729 PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
    730 {
    731   PREFIX(extract_number) (destination, *source);
    732   *source += OFFSET_ADDRESS_SIZE;
    733 }
    734 
    735 #  ifndef EXTRACT_MACROS
    736 #   undef EXTRACT_NUMBER_AND_INCR
    737 #   define EXTRACT_NUMBER_AND_INCR(dest, src) \
    738   PREFIX(extract_number_and_incr) (&dest, &src)
    739 #  endif /* not EXTRACT_MACROS */
    740 
    741 # endif /* DEBUG */
    742 
    743 
    744 
    746 /* If DEBUG is defined, Regex prints many voluminous messages about what
    747    it is doing (if the variable `debug' is nonzero).  If linked with the
    748    main program in `iregex.c', you can enter patterns and strings
    749    interactively.  And if linked with the main program in `main.c' and
    750    the other test files, you can run the already-written tests.  */
    751 
    752 # ifdef DEBUG
    753 
    754 #  ifndef DEFINED_ONCE
    755 
    756 /* We use standard I/O for debugging.  */
    757 #   include <stdio.h>
    758 
    759 /* It is useful to test things that ``must'' be true when debugging.  */
    760 #   include <assert.h>
    761 
    762 static int debug;
    763 
    764 #   define DEBUG_STATEMENT(e) e
    765 #   define DEBUG_PRINT1(x) if (debug) printf (x)
    766 #   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
    767 #   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
    768 #   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
    769 #  endif /* not DEFINED_ONCE */
    770 
    771 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
    772   if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
    773 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
    774   if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
    775 
    776 
    777 /* Print the fastmap in human-readable form.  */
    778 
    779 #  ifndef DEFINED_ONCE
    780 void
    781 print_fastmap (char *fastmap)
    782 {
    783   unsigned was_a_range = 0;
    784   unsigned i = 0;
    785 
    786   while (i < (1 << BYTEWIDTH))
    787     {
    788       if (fastmap[i++])
    789 	{
    790 	  was_a_range = 0;
    791           putchar (i - 1);
    792           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
    793             {
    794               was_a_range = 1;
    795               i++;
    796             }
    797 	  if (was_a_range)
    798             {
    799               printf ("-");
    800               putchar (i - 1);
    801             }
    802         }
    803     }
    804   putchar ('\n');
    805 }
    806 #  endif /* not DEFINED_ONCE */
    807 
    808 
    809 /* Print a compiled pattern string in human-readable form, starting at
    810    the START pointer into it and ending just before the pointer END.  */
    811 
    812 void
    813 PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
    814 {
    815   int mcnt, mcnt2;
    816   UCHAR_T *p1;
    817   UCHAR_T *p = start;
    818   UCHAR_T *pend = end;
    819 
    820   if (start == NULL)
    821     {
    822       printf ("(null)\n");
    823       return;
    824     }
    825 
    826   /* Loop over pattern commands.  */
    827   while (p < pend)
    828     {
    829 #  ifdef _LIBC
    830       printf ("%td:\t", p - start);
    831 #  else
    832       printf ("%ld:\t", (long int) (p - start));
    833 #  endif
    834 
    835       switch ((re_opcode_t) *p++)
    836 	{
    837         case no_op:
    838           printf ("/no_op");
    839           break;
    840 
    841 	case exactn:
    842 	  mcnt = *p++;
    843           printf ("/exactn/%d", mcnt);
    844           do
    845 	    {
    846               putchar ('/');
    847 	      PUT_CHAR (*p++);
    848             }
    849           while (--mcnt);
    850           break;
    851 
    852 #  ifdef MBS_SUPPORT
    853 	case exactn_bin:
    854 	  mcnt = *p++;
    855 	  printf ("/exactn_bin/%d", mcnt);
    856           do
    857 	    {
    858 	      printf("/%lx", (long int) *p++);
    859             }
    860           while (--mcnt);
    861           break;
    862 #  endif /* MBS_SUPPORT */
    863 
    864 	case start_memory:
    865           mcnt = *p++;
    866           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
    867           break;
    868 
    869 	case stop_memory:
    870           mcnt = *p++;
    871 	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
    872           break;
    873 
    874 	case duplicate:
    875 	  printf ("/duplicate/%ld", (long int) *p++);
    876 	  break;
    877 
    878 	case anychar:
    879 	  printf ("/anychar");
    880 	  break;
    881 
    882 	case charset:
    883         case charset_not:
    884           {
    885 #  ifdef WCHAR
    886 	    int i, length;
    887 	    wchar_t *workp = p;
    888 	    printf ("/charset [%s",
    889 	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
    890 	    p += 5;
    891 	    length = *workp++; /* the length of char_classes */
    892 	    for (i=0 ; i<length ; i++)
    893 	      printf("[:%lx:]", (long int) *p++);
    894 	    length = *workp++; /* the length of collating_symbol */
    895 	    for (i=0 ; i<length ;)
    896 	      {
    897 		printf("[.");
    898 		while(*p != 0)
    899 		  PUT_CHAR((i++,*p++));
    900 		i++,p++;
    901 		printf(".]");
    902 	      }
    903 	    length = *workp++; /* the length of equivalence_class */
    904 	    for (i=0 ; i<length ;)
    905 	      {
    906 		printf("[=");
    907 		while(*p != 0)
    908 		  PUT_CHAR((i++,*p++));
    909 		i++,p++;
    910 		printf("=]");
    911 	      }
    912 	    length = *workp++; /* the length of char_range */
    913 	    for (i=0 ; i<length ; i++)
    914 	      {
    915 		wchar_t range_start = *p++;
    916 		wchar_t range_end = *p++;
    917 		printf("%C-%C", range_start, range_end);
    918 	      }
    919 	    length = *workp++; /* the length of char */
    920 	    for (i=0 ; i<length ; i++)
    921 	      printf("%C", *p++);
    922 	    putchar (']');
    923 #  else
    924             register int c, last = -100;
    925 	    register int in_range = 0;
    926 
    927 	    printf ("/charset [%s",
    928 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
    929 
    930             assert (p + *p < pend);
    931 
    932             for (c = 0; c < 256; c++)
    933 	      if (c / 8 < *p
    934 		  && (p[1 + (c/8)] & (1 << (c % 8))))
    935 		{
    936 		  /* Are we starting a range?  */
    937 		  if (last + 1 == c && ! in_range)
    938 		    {
    939 		      putchar ('-');
    940 		      in_range = 1;
    941 		    }
    942 		  /* Have we broken a range?  */
    943 		  else if (last + 1 != c && in_range)
    944               {
    945 		      putchar (last);
    946 		      in_range = 0;
    947 		    }
    948 
    949 		  if (! in_range)
    950 		    putchar (c);
    951 
    952 		  last = c;
    953               }
    954 
    955 	    if (in_range)
    956 	      putchar (last);
    957 
    958 	    putchar (']');
    959 
    960 	    p += 1 + *p;
    961 #  endif /* WCHAR */
    962 	  }
    963 	  break;
    964 
    965 	case begline:
    966 	  printf ("/begline");
    967           break;
    968 
    969 	case endline:
    970           printf ("/endline");
    971           break;
    972 
    973 	case on_failure_jump:
    974           PREFIX(extract_number_and_incr) (&mcnt, &p);
    975 #  ifdef _LIBC
    976   	  printf ("/on_failure_jump to %td", p + mcnt - start);
    977 #  else
    978   	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
    979 #  endif
    980           break;
    981 
    982 	case on_failure_keep_string_jump:
    983           PREFIX(extract_number_and_incr) (&mcnt, &p);
    984 #  ifdef _LIBC
    985   	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
    986 #  else
    987   	  printf ("/on_failure_keep_string_jump to %ld",
    988 		  (long int) (p + mcnt - start));
    989 #  endif
    990           break;
    991 
    992 	case dummy_failure_jump:
    993           PREFIX(extract_number_and_incr) (&mcnt, &p);
    994 #  ifdef _LIBC
    995   	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
    996 #  else
    997   	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
    998 #  endif
    999           break;
   1000 
   1001 	case push_dummy_failure:
   1002           printf ("/push_dummy_failure");
   1003           break;
   1004 
   1005         case maybe_pop_jump:
   1006           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1007 #  ifdef _LIBC
   1008   	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
   1009 #  else
   1010   	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
   1011 #  endif
   1012 	  break;
   1013 
   1014         case pop_failure_jump:
   1015 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1016 #  ifdef _LIBC
   1017   	  printf ("/pop_failure_jump to %td", p + mcnt - start);
   1018 #  else
   1019   	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
   1020 #  endif
   1021 	  break;
   1022 
   1023         case jump_past_alt:
   1024 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1025 #  ifdef _LIBC
   1026   	  printf ("/jump_past_alt to %td", p + mcnt - start);
   1027 #  else
   1028   	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
   1029 #  endif
   1030 	  break;
   1031 
   1032         case jump:
   1033 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1034 #  ifdef _LIBC
   1035   	  printf ("/jump to %td", p + mcnt - start);
   1036 #  else
   1037   	  printf ("/jump to %ld", (long int) (p + mcnt - start));
   1038 #  endif
   1039 	  break;
   1040 
   1041         case succeed_n:
   1042           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1043 	  p1 = p + mcnt;
   1044           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1045 #  ifdef _LIBC
   1046 	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
   1047 #  else
   1048 	  printf ("/succeed_n to %ld, %d times",
   1049 		  (long int) (p1 - start), mcnt2);
   1050 #  endif
   1051           break;
   1052 
   1053         case jump_n:
   1054           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1055 	  p1 = p + mcnt;
   1056           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1057 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
   1058           break;
   1059 
   1060         case set_number_at:
   1061           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1062 	  p1 = p + mcnt;
   1063           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1064 #  ifdef _LIBC
   1065 	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
   1066 #  else
   1067 	  printf ("/set_number_at location %ld to %d",
   1068 		  (long int) (p1 - start), mcnt2);
   1069 #  endif
   1070           break;
   1071 
   1072         case wordbound:
   1073 	  printf ("/wordbound");
   1074 	  break;
   1075 
   1076 	case notwordbound:
   1077 	  printf ("/notwordbound");
   1078           break;
   1079 
   1080 	case wordbeg:
   1081 	  printf ("/wordbeg");
   1082 	  break;
   1083 
   1084 	case wordend:
   1085 	  printf ("/wordend");
   1086 	  break;
   1087 
   1088 #  ifdef emacs
   1089 	case before_dot:
   1090 	  printf ("/before_dot");
   1091           break;
   1092 
   1093 	case at_dot:
   1094 	  printf ("/at_dot");
   1095           break;
   1096 
   1097 	case after_dot:
   1098 	  printf ("/after_dot");
   1099           break;
   1100 
   1101 	case syntaxspec:
   1102           printf ("/syntaxspec");
   1103 	  mcnt = *p++;
   1104 	  printf ("/%d", mcnt);
   1105           break;
   1106 
   1107 	case notsyntaxspec:
   1108           printf ("/notsyntaxspec");
   1109 	  mcnt = *p++;
   1110 	  printf ("/%d", mcnt);
   1111 	  break;
   1112 #  endif /* emacs */
   1113 
   1114 	case wordchar:
   1115 	  printf ("/wordchar");
   1116           break;
   1117 
   1118 	case notwordchar:
   1119 	  printf ("/notwordchar");
   1120           break;
   1121 
   1122 	case begbuf:
   1123 	  printf ("/begbuf");
   1124           break;
   1125 
   1126 	case endbuf:
   1127 	  printf ("/endbuf");
   1128           break;
   1129 
   1130         default:
   1131           printf ("?%ld", (long int) *(p-1));
   1132 	}
   1133 
   1134       putchar ('\n');
   1135     }
   1136 
   1137 #  ifdef _LIBC
   1138   printf ("%td:\tend of pattern.\n", p - start);
   1139 #  else
   1140   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
   1141 #  endif
   1142 }
   1143 
   1144 
   1145 void
   1146 PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
   1147 {
   1148   UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
   1149 
   1150   PREFIX(print_partial_compiled_pattern) (buffer, buffer
   1151 				  + bufp->used / sizeof(UCHAR_T));
   1152   printf ("%ld bytes used/%ld bytes allocated.\n",
   1153 	  bufp->used, bufp->allocated);
   1154 
   1155   if (bufp->fastmap_accurate && bufp->fastmap)
   1156     {
   1157       printf ("fastmap: ");
   1158       print_fastmap (bufp->fastmap);
   1159     }
   1160 
   1161 #  ifdef _LIBC
   1162   printf ("re_nsub: %Zd\t", bufp->re_nsub);
   1163 #  else
   1164   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
   1165 #  endif
   1166   printf ("regs_alloc: %d\t", bufp->regs_allocated);
   1167   printf ("can_be_null: %d\t", bufp->can_be_null);
   1168   printf ("newline_anchor: %d\n", bufp->newline_anchor);
   1169   printf ("no_sub: %d\t", bufp->no_sub);
   1170   printf ("not_bol: %d\t", bufp->not_bol);
   1171   printf ("not_eol: %d\t", bufp->not_eol);
   1172   printf ("syntax: %lx\n", bufp->syntax);
   1173   /* Perhaps we should print the translate table?  */
   1174 }
   1175 
   1176 
   1177 void
   1178 PREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
   1179                              int size1, const CHAR_T *string2, int size2)
   1180 {
   1181   int this_char;
   1182 
   1183   if (where == NULL)
   1184     printf ("(null)");
   1185   else
   1186     {
   1187       int cnt;
   1188 
   1189       if (FIRST_STRING_P (where))
   1190         {
   1191           for (this_char = where - string1; this_char < size1; this_char++)
   1192 	    PUT_CHAR (string1[this_char]);
   1193 
   1194           where = string2;
   1195         }
   1196 
   1197       cnt = 0;
   1198       for (this_char = where - string2; this_char < size2; this_char++)
   1199 	{
   1200 	  PUT_CHAR (string2[this_char]);
   1201 	  if (++cnt > 100)
   1202 	    {
   1203 	      fputs ("...", stdout);
   1204 	      break;
   1205 	    }
   1206 	}
   1207     }
   1208 }
   1209 
   1210 #  ifndef DEFINED_ONCE
   1211 void
   1212 printchar (int c)
   1213 {
   1214   putc (c, stderr);
   1215 }
   1216 #  endif
   1217 
   1218 # else /* not DEBUG */
   1219 
   1220 #  ifndef DEFINED_ONCE
   1221 #   undef assert
   1222 #   define assert(e)
   1223 
   1224 #   define DEBUG_STATEMENT(e)
   1225 #   define DEBUG_PRINT1(x)
   1226 #   define DEBUG_PRINT2(x1, x2)
   1227 #   define DEBUG_PRINT3(x1, x2, x3)
   1228 #   define DEBUG_PRINT4(x1, x2, x3, x4)
   1229 #  endif /* not DEFINED_ONCE */
   1230 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
   1231 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
   1232 
   1233 # endif /* not DEBUG */
   1234 
   1235 
   1236 
   1238 # ifdef WCHAR
   1239 /* This  convert a multibyte string to a wide character string.
   1240    And write their correspondances to offset_buffer(see below)
   1241    and write whether each wchar_t is binary data to is_binary.
   1242    This assume invalid multibyte sequences as binary data.
   1243    We assume offset_buffer and is_binary is already allocated
   1244    enough space.  */
   1245 
   1246 static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
   1247 				  size_t len, int *offset_buffer,
   1248 				  char *is_binary);
   1249 static size_t
   1250 convert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
   1251                     int *offset_buffer, char *is_binary)
   1252      /* It hold correspondances between src(char string) and
   1253 	dest(wchar_t string) for optimization.
   1254 	e.g. src  = "xxxyzz"
   1255              dest = {'X', 'Y', 'Z'}
   1256 	      (each "xxx", "y" and "zz" represent one multibyte character
   1257 	       corresponding to 'X', 'Y' and 'Z'.)
   1258 	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
   1259 	  	        = {0, 3, 4, 6}
   1260      */
   1261 {
   1262   wchar_t *pdest = dest;
   1263   const unsigned char *psrc = src;
   1264   size_t wc_count = 0;
   1265 
   1266   mbstate_t mbs;
   1267   int i, consumed;
   1268   size_t mb_remain = len;
   1269   size_t mb_count = 0;
   1270 
   1271   /* Initialize the conversion state.  */
   1272   memset (&mbs, 0, sizeof (mbstate_t));
   1273 
   1274   offset_buffer[0] = 0;
   1275   for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
   1276 	 psrc += consumed)
   1277     {
   1278 #ifdef _LIBC
   1279       consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
   1280 #else
   1281       consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
   1282 #endif
   1283 
   1284       if (consumed <= 0)
   1285 	/* failed to convert. maybe src contains binary data.
   1286 	   So we consume 1 byte manualy.  */
   1287 	{
   1288 	  *pdest = *psrc;
   1289 	  consumed = 1;
   1290 	  is_binary[wc_count] = TRUE;
   1291 	}
   1292       else
   1293 	is_binary[wc_count] = FALSE;
   1294       /* In sjis encoding, we use yen sign as escape character in
   1295 	 place of reverse solidus. So we convert 0x5c(yen sign in
   1296 	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
   1297 	 solidus in UCS2).  */
   1298       if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
   1299 	*pdest = (wchar_t) *psrc;
   1300 
   1301       offset_buffer[wc_count + 1] = mb_count += consumed;
   1302     }
   1303 
   1304   /* Fill remain of the buffer with sentinel.  */
   1305   for (i = wc_count + 1 ; i <= len ; i++)
   1306     offset_buffer[i] = mb_count + 1;
   1307 
   1308   return wc_count;
   1309 }
   1310 
   1311 # endif /* WCHAR */
   1312 
   1313 #else /* not INSIDE_RECURSION */
   1314 
   1315 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
   1316    also be assigned to arbitrarily: each pattern buffer stores its own
   1317    syntax, so it can be changed between regex compilations.  */
   1318 /* This has no initializer because initialized variables in Emacs
   1319    become read-only after dumping.  */
   1320 reg_syntax_t re_syntax_options;
   1321 
   1322 
   1323 /* Specify the precise syntax of regexps for compilation.  This provides
   1324    for compatibility for various utilities which historically have
   1325    different, incompatible syntaxes.
   1326 
   1327    The argument SYNTAX is a bit mask comprised of the various bits
   1328    defined in regex.h.  We return the old syntax.  */
   1329 
   1330 reg_syntax_t
   1331 re_set_syntax (reg_syntax_t syntax)
   1332 {
   1333   reg_syntax_t ret = re_syntax_options;
   1334 
   1335   re_syntax_options = syntax;
   1336 # ifdef DEBUG
   1337   if (syntax & RE_DEBUG)
   1338     debug = 1;
   1339   else if (debug) /* was on but now is not */
   1340     debug = 0;
   1341 # endif /* DEBUG */
   1342   return ret;
   1343 }
   1344 # ifdef _LIBC
   1345 weak_alias (__re_set_syntax, re_set_syntax)
   1346 # endif
   1347 
   1348 /* This table gives an error message for each of the error codes listed
   1350    in regex.h.  Obviously the order here has to be same as there.
   1351    POSIX doesn't require that we do anything for REG_NOERROR,
   1352    but why not be nice?  */
   1353 
   1354 static const char *re_error_msgid[] =
   1355   {
   1356     gettext_noop ("Success"),	/* REG_NOERROR */
   1357     gettext_noop ("No match"),	/* REG_NOMATCH */
   1358     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
   1359     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
   1360     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
   1361     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
   1362     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
   1363     gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
   1364     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
   1365     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
   1366     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
   1367     gettext_noop ("Invalid range end"),	/* REG_ERANGE */
   1368     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
   1369     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
   1370     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
   1371     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
   1372     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
   1373   };
   1374 
   1375 #endif /* INSIDE_RECURSION */
   1377 
   1378 #ifndef DEFINED_ONCE
   1379 /* Avoiding alloca during matching, to placate r_alloc.  */
   1380 
   1381 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
   1382    searching and matching functions should not call alloca.  On some
   1383    systems, alloca is implemented in terms of malloc, and if we're
   1384    using the relocating allocator routines, then malloc could cause a
   1385    relocation, which might (if the strings being searched are in the
   1386    ralloc heap) shift the data out from underneath the regexp
   1387    routines.
   1388 
   1389    Here's another reason to avoid allocation: Emacs
   1390    processes input from X in a signal handler; processing X input may
   1391    call malloc; if input arrives while a matching routine is calling
   1392    malloc, then we're scrod.  But Emacs can't just block input while
   1393    calling matching routines; then we don't notice interrupts when
   1394    they come in.  So, Emacs blocks input around all regexp calls
   1395    except the matching calls, which it leaves unprotected, in the
   1396    faith that they will not malloc.  */
   1397 
   1398 /* Normally, this is fine.  */
   1399 # define MATCH_MAY_ALLOCATE
   1400 
   1401 /* When using GNU C, we are not REALLY using the C alloca, no matter
   1402    what config.h may say.  So don't take precautions for it.  */
   1403 # ifdef __GNUC__
   1404 #  undef C_ALLOCA
   1405 # endif
   1406 
   1407 /* The match routines may not allocate if (1) they would do it with malloc
   1408    and (2) it's not safe for them to use malloc.
   1409    Note that if REL_ALLOC is defined, matching would not use malloc for the
   1410    failure stack, but we would still use it for the register vectors;
   1411    so REL_ALLOC should not affect this.  */
   1412 # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
   1413 #  undef MATCH_MAY_ALLOCATE
   1414 # endif
   1415 #endif /* not DEFINED_ONCE */
   1416 
   1417 #ifdef INSIDE_RECURSION
   1419 /* Failure stack declarations and macros; both re_compile_fastmap and
   1420    re_match_2 use a failure stack.  These have to be macros because of
   1421    REGEX_ALLOCATE_STACK.  */
   1422 
   1423 
   1424 /* Number of failure points for which to initially allocate space
   1425    when matching.  If this number is exceeded, we allocate more
   1426    space, so it is not a hard limit.  */
   1427 # ifndef INIT_FAILURE_ALLOC
   1428 #  define INIT_FAILURE_ALLOC 5
   1429 # endif
   1430 
   1431 /* Roughly the maximum number of failure points on the stack.  Would be
   1432    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
   1433    This is a variable only so users of regex can assign to it; we never
   1434    change it ourselves.  */
   1435 
   1436 # ifdef INT_IS_16BIT
   1437 
   1438 #  ifndef DEFINED_ONCE
   1439 #   if defined MATCH_MAY_ALLOCATE
   1440 /* 4400 was enough to cause a crash on Alpha OSF/1,
   1441    whose default stack limit is 2mb.  */
   1442 long int re_max_failures = 4000;
   1443 #   else
   1444 long int re_max_failures = 2000;
   1445 #   endif
   1446 #  endif
   1447 
   1448 union PREFIX(fail_stack_elt)
   1449 {
   1450   UCHAR_T *pointer;
   1451   long int integer;
   1452 };
   1453 
   1454 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
   1455 
   1456 typedef struct
   1457 {
   1458   PREFIX(fail_stack_elt_t) *stack;
   1459   unsigned long int size;
   1460   unsigned long int avail;		/* Offset of next open position.  */
   1461 } PREFIX(fail_stack_type);
   1462 
   1463 # else /* not INT_IS_16BIT */
   1464 
   1465 #  ifndef DEFINED_ONCE
   1466 #   if defined MATCH_MAY_ALLOCATE
   1467 /* 4400 was enough to cause a crash on Alpha OSF/1,
   1468    whose default stack limit is 2mb.  */
   1469 int re_max_failures = 4000;
   1470 #   else
   1471 int re_max_failures = 2000;
   1472 #   endif
   1473 #  endif
   1474 
   1475 union PREFIX(fail_stack_elt)
   1476 {
   1477   UCHAR_T *pointer;
   1478   int integer;
   1479 };
   1480 
   1481 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
   1482 
   1483 typedef struct
   1484 {
   1485   PREFIX(fail_stack_elt_t) *stack;
   1486   unsigned size;
   1487   unsigned avail;			/* Offset of next open position.  */
   1488 } PREFIX(fail_stack_type);
   1489 
   1490 # endif /* INT_IS_16BIT */
   1491 
   1492 # ifndef DEFINED_ONCE
   1493 #  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
   1494 #  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
   1495 #  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
   1496 # endif
   1497 
   1498 
   1499 /* Define macros to initialize and free the failure stack.
   1500    Do `return -2' if the alloc fails.  */
   1501 
   1502 # ifdef MATCH_MAY_ALLOCATE
   1503 #  define INIT_FAIL_STACK()						\
   1504   do {									\
   1505     fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
   1506       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
   1507 									\
   1508     if (fail_stack.stack == NULL)				\
   1509       return -2;							\
   1510 									\
   1511     fail_stack.size = INIT_FAILURE_ALLOC;			\
   1512     fail_stack.avail = 0;					\
   1513   } while (0)
   1514 
   1515 #  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
   1516 # else
   1517 #  define INIT_FAIL_STACK()						\
   1518   do {									\
   1519     fail_stack.avail = 0;					\
   1520   } while (0)
   1521 
   1522 #  define RESET_FAIL_STACK()
   1523 # endif
   1524 
   1525 
   1526 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
   1527 
   1528    Return 1 if succeeds, and 0 if either ran out of memory
   1529    allocating space for it or it was already too large.
   1530 
   1531    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
   1532 
   1533 # define DOUBLE_FAIL_STACK(fail_stack)					\
   1534   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
   1535    ? 0									\
   1536    : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
   1537         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
   1538           (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
   1539           ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
   1540 									\
   1541       (fail_stack).stack == NULL					\
   1542       ? 0								\
   1543       : ((fail_stack).size <<= 1, 					\
   1544          1)))
   1545 
   1546 
   1547 /* Push pointer POINTER on FAIL_STACK.
   1548    Return 1 if was able to do so and 0 if ran out of memory allocating
   1549    space to do so.  */
   1550 # define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
   1551   ((FAIL_STACK_FULL ()							\
   1552     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
   1553    ? 0									\
   1554    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
   1555       1))
   1556 
   1557 /* Push a pointer value onto the failure stack.
   1558    Assumes the variable `fail_stack'.  Probably should only
   1559    be called from within `PUSH_FAILURE_POINT'.  */
   1560 # define PUSH_FAILURE_POINTER(item)					\
   1561   fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
   1562 
   1563 /* This pushes an integer-valued item onto the failure stack.
   1564    Assumes the variable `fail_stack'.  Probably should only
   1565    be called from within `PUSH_FAILURE_POINT'.  */
   1566 # define PUSH_FAILURE_INT(item)					\
   1567   fail_stack.stack[fail_stack.avail++].integer = (item)
   1568 
   1569 /* Push a fail_stack_elt_t value onto the failure stack.
   1570    Assumes the variable `fail_stack'.  Probably should only
   1571    be called from within `PUSH_FAILURE_POINT'.  */
   1572 # define PUSH_FAILURE_ELT(item)					\
   1573   fail_stack.stack[fail_stack.avail++] =  (item)
   1574 
   1575 /* These three POP... operations complement the three PUSH... operations.
   1576    All assume that `fail_stack' is nonempty.  */
   1577 # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
   1578 # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
   1579 # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
   1580 
   1581 /* Used to omit pushing failure point id's when we're not debugging.  */
   1582 # ifdef DEBUG
   1583 #  define DEBUG_PUSH PUSH_FAILURE_INT
   1584 #  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
   1585 # else
   1586 #  define DEBUG_PUSH(item)
   1587 #  define DEBUG_POP(item_addr)
   1588 # endif
   1589 
   1590 
   1591 /* Push the information about the state we will need
   1592    if we ever fail back to it.
   1593 
   1594    Requires variables fail_stack, regstart, regend, reg_info, and
   1595    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
   1596    be declared.
   1597 
   1598    Does `return FAILURE_CODE' if runs out of memory.  */
   1599 
   1600 # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
   1601   do {									\
   1602     char *destination;							\
   1603     /* Must be int, so when we don't save any registers, the arithmetic	\
   1604        of 0 + -1 isn't done as unsigned.  */				\
   1605     /* Can't be int, since there is not a shred of a guarantee that int	\
   1606        is wide enough to hold a value of something to which pointer can	\
   1607        be assigned */							\
   1608     active_reg_t this_reg;						\
   1609     									\
   1610     DEBUG_STATEMENT (failure_id++);					\
   1611     DEBUG_STATEMENT (nfailure_points_pushed++);				\
   1612     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
   1613     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
   1614     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
   1615 									\
   1616     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
   1617     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
   1618 									\
   1619     /* Ensure we have enough space allocated for what we will push.  */	\
   1620     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
   1621       {									\
   1622         if (!DOUBLE_FAIL_STACK (fail_stack))				\
   1623           return failure_code;						\
   1624 									\
   1625         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
   1626 		       (fail_stack).size);				\
   1627         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
   1628       }									\
   1629 									\
   1630     /* Push the info, starting with the registers.  */			\
   1631     DEBUG_PRINT1 ("\n");						\
   1632 									\
   1633     if (1)								\
   1634       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
   1635 	   this_reg++)							\
   1636 	{								\
   1637 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
   1638 	  DEBUG_STATEMENT (num_regs_pushed++);				\
   1639 									\
   1640 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
   1641 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
   1642 									\
   1643 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
   1644 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
   1645 									\
   1646 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
   1647 			reg_info[this_reg].word.pointer);		\
   1648 	  DEBUG_PRINT2 (" match_null=%d",				\
   1649 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
   1650 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
   1651 	  DEBUG_PRINT2 (" matched_something=%d",			\
   1652 			MATCHED_SOMETHING (reg_info[this_reg]));	\
   1653 	  DEBUG_PRINT2 (" ever_matched=%d",				\
   1654 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
   1655 	  DEBUG_PRINT1 ("\n");						\
   1656 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
   1657 	}								\
   1658 									\
   1659     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
   1660     PUSH_FAILURE_INT (lowest_active_reg);				\
   1661 									\
   1662     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
   1663     PUSH_FAILURE_INT (highest_active_reg);				\
   1664 									\
   1665     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
   1666     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
   1667     PUSH_FAILURE_POINTER (pattern_place);				\
   1668 									\
   1669     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
   1670     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
   1671 				 size2);				\
   1672     DEBUG_PRINT1 ("'\n");						\
   1673     PUSH_FAILURE_POINTER (string_place);				\
   1674 									\
   1675     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
   1676     DEBUG_PUSH (failure_id);						\
   1677   } while (0)
   1678 
   1679 # ifndef DEFINED_ONCE
   1680 /* This is the number of items that are pushed and popped on the stack
   1681    for each register.  */
   1682 #  define NUM_REG_ITEMS  3
   1683 
   1684 /* Individual items aside from the registers.  */
   1685 #  ifdef DEBUG
   1686 #   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
   1687 #  else
   1688 #   define NUM_NONREG_ITEMS 4
   1689 #  endif
   1690 
   1691 /* We push at most this many items on the stack.  */
   1692 /* We used to use (num_regs - 1), which is the number of registers
   1693    this regexp will save; but that was changed to 5
   1694    to avoid stack overflow for a regexp with lots of parens.  */
   1695 #  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
   1696 
   1697 /* We actually push this many items.  */
   1698 #  define NUM_FAILURE_ITEMS				\
   1699   (((0							\
   1700      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
   1701     * NUM_REG_ITEMS)					\
   1702    + NUM_NONREG_ITEMS)
   1703 
   1704 /* How many items can still be added to the stack without overflowing it.  */
   1705 #  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
   1706 # endif /* not DEFINED_ONCE */
   1707 
   1708 
   1709 /* Pops what PUSH_FAIL_STACK pushes.
   1710 
   1711    We restore into the parameters, all of which should be lvalues:
   1712      STR -- the saved data position.
   1713      PAT -- the saved pattern position.
   1714      LOW_REG, HIGH_REG -- the highest and lowest active registers.
   1715      REGSTART, REGEND -- arrays of string positions.
   1716      REG_INFO -- array of information about each subexpression.
   1717 
   1718    Also assumes the variables `fail_stack' and (if debugging), `bufp',
   1719    `pend', `string1', `size1', `string2', and `size2'.  */
   1720 # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
   1721 {									\
   1722   DEBUG_STATEMENT (unsigned failure_id;)				\
   1723   active_reg_t this_reg;						\
   1724   const UCHAR_T *string_temp;						\
   1725 									\
   1726   assert (!FAIL_STACK_EMPTY ());					\
   1727 									\
   1728   /* Remove failure points and point to how many regs pushed.  */	\
   1729   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
   1730   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
   1731   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
   1732 									\
   1733   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
   1734 									\
   1735   DEBUG_POP (&failure_id);						\
   1736   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
   1737 									\
   1738   /* If the saved string location is NULL, it came from an		\
   1739      on_failure_keep_string_jump opcode, and we want to throw away the	\
   1740      saved NULL, thus retaining our current position in the string.  */	\
   1741   string_temp = POP_FAILURE_POINTER ();					\
   1742   if (string_temp != NULL)						\
   1743     str = (const CHAR_T *) string_temp;					\
   1744 									\
   1745   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
   1746   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
   1747   DEBUG_PRINT1 ("'\n");							\
   1748 									\
   1749   pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
   1750   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
   1751   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
   1752 									\
   1753   /* Restore register info.  */						\
   1754   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1755   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
   1756 									\
   1757   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1758   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
   1759 									\
   1760   if (1)								\
   1761     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
   1762       {									\
   1763 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
   1764 									\
   1765 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
   1766 	DEBUG_PRINT2 ("      info: %p\n",				\
   1767 		      reg_info[this_reg].word.pointer);			\
   1768 									\
   1769 	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
   1770 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
   1771 									\
   1772 	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
   1773 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
   1774       }									\
   1775   else									\
   1776     {									\
   1777       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
   1778 	{								\
   1779 	  reg_info[this_reg].word.integer = 0;				\
   1780 	  regend[this_reg] = 0;						\
   1781 	  regstart[this_reg] = 0;					\
   1782 	}								\
   1783       highest_active_reg = high_reg;					\
   1784     }									\
   1785 									\
   1786   set_regs_matched_done = 0;						\
   1787   DEBUG_STATEMENT (nfailure_points_popped++);				\
   1788 } /* POP_FAILURE_POINT */
   1789 
   1790 /* Structure for per-register (a.k.a. per-group) information.
   1792    Other register information, such as the
   1793    starting and ending positions (which are addresses), and the list of
   1794    inner groups (which is a bits list) are maintained in separate
   1795    variables.
   1796 
   1797    We are making a (strictly speaking) nonportable assumption here: that
   1798    the compiler will pack our bit fields into something that fits into
   1799    the type of `word', i.e., is something that fits into one item on the
   1800    failure stack.  */
   1801 
   1802 
   1803 /* Declarations and macros for re_match_2.  */
   1804 
   1805 typedef union
   1806 {
   1807   PREFIX(fail_stack_elt_t) word;
   1808   struct
   1809   {
   1810       /* This field is one if this group can match the empty string,
   1811          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
   1812 # define MATCH_NULL_UNSET_VALUE 3
   1813     unsigned match_null_string_p : 2;
   1814     unsigned is_active : 1;
   1815     unsigned matched_something : 1;
   1816     unsigned ever_matched_something : 1;
   1817   } bits;
   1818 } PREFIX(register_info_type);
   1819 
   1820 # ifndef DEFINED_ONCE
   1821 #  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
   1822 #  define IS_ACTIVE(R)  ((R).bits.is_active)
   1823 #  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
   1824 #  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
   1825 
   1826 
   1827 /* Call this when have matched a real character; it sets `matched' flags
   1828    for the subexpressions which we are currently inside.  Also records
   1829    that those subexprs have matched.  */
   1830 #  define SET_REGS_MATCHED()						\
   1831   do									\
   1832     {									\
   1833       if (!set_regs_matched_done)					\
   1834 	{								\
   1835 	  active_reg_t r;						\
   1836 	  set_regs_matched_done = 1;					\
   1837 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
   1838 	    {								\
   1839 	      MATCHED_SOMETHING (reg_info[r])				\
   1840 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
   1841 		= 1;							\
   1842 	    }								\
   1843 	}								\
   1844     }									\
   1845   while (0)
   1846 # endif /* not DEFINED_ONCE */
   1847 
   1848 /* Registers are set to a sentinel when they haven't yet matched.  */
   1849 static CHAR_T PREFIX(reg_unset_dummy);
   1850 # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
   1851 # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
   1852 
   1853 /* Subroutine declarations and macros for regex_compile.  */
   1854 static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
   1855 static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
   1856                                int arg1, int arg2);
   1857 static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
   1858                                 int arg, UCHAR_T *end);
   1859 static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
   1860                                 int arg1, int arg2, UCHAR_T *end);
   1861 static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
   1862                                          const CHAR_T *p,
   1863                                          reg_syntax_t syntax);
   1864 static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
   1865                                          const CHAR_T *pend,
   1866                                          reg_syntax_t syntax);
   1867 # ifdef WCHAR
   1868 static reg_errcode_t wcs_compile_range (CHAR_T range_start,
   1869                                         const CHAR_T **p_ptr,
   1870                                         const CHAR_T *pend,
   1871                                         char *translate,
   1872                                         reg_syntax_t syntax,
   1873                                         UCHAR_T *b,
   1874                                         CHAR_T *char_set);
   1875 static void insert_space (int num, CHAR_T *loc, CHAR_T *end);
   1876 # else /* BYTE */
   1877 static reg_errcode_t byte_compile_range (unsigned int range_start,
   1878                                          const char **p_ptr,
   1879                                          const char *pend,
   1880                                          char *translate,
   1881                                          reg_syntax_t syntax,
   1882                                          unsigned char *b);
   1883 # endif /* WCHAR */
   1884 
   1885 /* Fetch the next character in the uncompiled pattern---translating it
   1886    if necessary.  Also cast from a signed character in the constant
   1887    string passed to us by the user to an unsigned char that we can use
   1888    as an array index (in, e.g., `translate').  */
   1889 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1890    because it is impossible to allocate 4GB array for some encodings
   1891    which have 4 byte character_set like UCS4.  */
   1892 # ifndef PATFETCH
   1893 #  ifdef WCHAR
   1894 #   define PATFETCH(c)							\
   1895   do {if (p == pend) return REG_EEND;					\
   1896     c = (UCHAR_T) *p++;							\
   1897     if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
   1898   } while (0)
   1899 #  else /* BYTE */
   1900 #   define PATFETCH(c)							\
   1901   do {if (p == pend) return REG_EEND;					\
   1902     c = (unsigned char) *p++;						\
   1903     if (translate) c = (unsigned char) translate[c];			\
   1904   } while (0)
   1905 #  endif /* WCHAR */
   1906 # endif
   1907 
   1908 /* Fetch the next character in the uncompiled pattern, with no
   1909    translation.  */
   1910 # define PATFETCH_RAW(c)						\
   1911   do {if (p == pend) return REG_EEND;					\
   1912     c = (UCHAR_T) *p++; 	       					\
   1913   } while (0)
   1914 
   1915 /* Go backwards one character in the pattern.  */
   1916 # define PATUNFETCH p--
   1917 
   1918 
   1919 /* If `translate' is non-null, return translate[D], else just D.  We
   1920    cast the subscript to translate because some data is declared as
   1921    `char *', to avoid warnings when a string constant is passed.  But
   1922    when we use a character as a subscript we must make it unsigned.  */
   1923 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1924    because it is impossible to allocate 4GB array for some encodings
   1925    which have 4 byte character_set like UCS4.  */
   1926 
   1927 # ifndef TRANSLATE
   1928 #  ifdef WCHAR
   1929 #   define TRANSLATE(d) \
   1930   ((translate && ((UCHAR_T) (d)) <= 0xff) \
   1931    ? (char) translate[(unsigned char) (d)] : (d))
   1932 # else /* BYTE */
   1933 #   define TRANSLATE(d) \
   1934   (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
   1935 #  endif /* WCHAR */
   1936 # endif
   1937 
   1938 
   1939 /* Macros for outputting the compiled pattern into `buffer'.  */
   1940 
   1941 /* If the buffer isn't allocated when it comes in, use this.  */
   1942 # define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
   1943 
   1944 /* Make sure we have at least N more bytes of space in buffer.  */
   1945 # ifdef WCHAR
   1946 #  define GET_BUFFER_SPACE(n)						\
   1947     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
   1948             + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
   1949       EXTEND_BUFFER ()
   1950 # else /* BYTE */
   1951 #  define GET_BUFFER_SPACE(n)						\
   1952     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
   1953       EXTEND_BUFFER ()
   1954 # endif /* WCHAR */
   1955 
   1956 /* Make sure we have one more byte of buffer space and then add C to it.  */
   1957 # define BUF_PUSH(c)							\
   1958   do {									\
   1959     GET_BUFFER_SPACE (1);						\
   1960     *b++ = (UCHAR_T) (c);						\
   1961   } while (0)
   1962 
   1963 
   1964 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
   1965 # define BUF_PUSH_2(c1, c2)						\
   1966   do {									\
   1967     GET_BUFFER_SPACE (2);						\
   1968     *b++ = (UCHAR_T) (c1);						\
   1969     *b++ = (UCHAR_T) (c2);						\
   1970   } while (0)
   1971 
   1972 
   1973 /* As with BUF_PUSH_2, except for three bytes.  */
   1974 # define BUF_PUSH_3(c1, c2, c3)						\
   1975   do {									\
   1976     GET_BUFFER_SPACE (3);						\
   1977     *b++ = (UCHAR_T) (c1);						\
   1978     *b++ = (UCHAR_T) (c2);						\
   1979     *b++ = (UCHAR_T) (c3);						\
   1980   } while (0)
   1981 
   1982 /* Store a jump with opcode OP at LOC to location TO.  We store a
   1983    relative address offset by the three bytes the jump itself occupies.  */
   1984 # define STORE_JUMP(op, loc, to) \
   1985  PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
   1986 
   1987 /* Likewise, for a two-argument jump.  */
   1988 # define STORE_JUMP2(op, loc, to, arg) \
   1989   PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
   1990 
   1991 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
   1992 # define INSERT_JUMP(op, loc, to) \
   1993   PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
   1994 
   1995 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
   1996 # define INSERT_JUMP2(op, loc, to, arg) \
   1997   PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
   1998 	      arg, b)
   1999 
   2000 /* This is not an arbitrary limit: the arguments which represent offsets
   2001    into the pattern are two bytes long.  So if 2^16 bytes turns out to
   2002    be too small, many things would have to change.  */
   2003 /* Any other compiler which, like MSC, has allocation limit below 2^16
   2004    bytes will have to use approach similar to what was done below for
   2005    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
   2006    reallocating to 0 bytes.  Such thing is not going to work too well.
   2007    You have been warned!!  */
   2008 # ifndef DEFINED_ONCE
   2009 #  if defined _MSC_VER  && !defined WIN32
   2010 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
   2011    The REALLOC define eliminates a flurry of conversion warnings,
   2012    but is not required. */
   2013 #   define MAX_BUF_SIZE  65500L
   2014 #   define REALLOC(p,s) realloc ((p), (size_t) (s))
   2015 #  else
   2016 #   define MAX_BUF_SIZE (1L << 16)
   2017 #   define REALLOC(p,s) realloc ((p), (s))
   2018 #  endif
   2019 
   2020 /* Extend the buffer by twice its current size via realloc and
   2021    reset the pointers that pointed into the old block to point to the
   2022    correct places in the new one.  If extending the buffer results in it
   2023    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
   2024 #  if __BOUNDED_POINTERS__
   2025 #   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
   2026 #   define MOVE_BUFFER_POINTER(P) \
   2027   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
   2028 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
   2029   else						\
   2030     {						\
   2031       SET_HIGH_BOUND (b);			\
   2032       SET_HIGH_BOUND (begalt);			\
   2033       if (fixup_alt_jump)			\
   2034 	SET_HIGH_BOUND (fixup_alt_jump);	\
   2035       if (laststart)				\
   2036 	SET_HIGH_BOUND (laststart);		\
   2037       if (pending_exact)			\
   2038 	SET_HIGH_BOUND (pending_exact);		\
   2039     }
   2040 #  else
   2041 #   define MOVE_BUFFER_POINTER(P) (P) += incr
   2042 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND
   2043 #  endif
   2044 # endif /* not DEFINED_ONCE */
   2045 
   2046 # ifdef WCHAR
   2047 #  define EXTEND_BUFFER()						\
   2048   do {									\
   2049     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
   2050     int wchar_count;							\
   2051     if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
   2052       return REG_ESIZE;							\
   2053     bufp->allocated <<= 1;						\
   2054     if (bufp->allocated > MAX_BUF_SIZE)					\
   2055       bufp->allocated = MAX_BUF_SIZE;					\
   2056     /* How many characters the new buffer can have?  */			\
   2057     wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
   2058     if (wchar_count == 0) wchar_count = 1;				\
   2059     /* Truncate the buffer to CHAR_T align.  */			\
   2060     bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
   2061     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
   2062     bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
   2063     if (COMPILED_BUFFER_VAR == NULL)					\
   2064       return REG_ESPACE;						\
   2065     /* If the buffer moved, move all the pointers into it.  */		\
   2066     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2067       {									\
   2068 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
   2069 	MOVE_BUFFER_POINTER (b);					\
   2070 	MOVE_BUFFER_POINTER (begalt);					\
   2071 	if (fixup_alt_jump)						\
   2072 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2073 	if (laststart)							\
   2074 	  MOVE_BUFFER_POINTER (laststart);				\
   2075 	if (pending_exact)						\
   2076 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2077       }									\
   2078     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2079   } while (0)
   2080 # else /* BYTE */
   2081 #  define EXTEND_BUFFER()						\
   2082   do {									\
   2083     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
   2084     if (bufp->allocated == MAX_BUF_SIZE)				\
   2085       return REG_ESIZE;							\
   2086     bufp->allocated <<= 1;						\
   2087     if (bufp->allocated > MAX_BUF_SIZE)					\
   2088       bufp->allocated = MAX_BUF_SIZE;					\
   2089     bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,		\
   2090 						bufp->allocated);	\
   2091     if (COMPILED_BUFFER_VAR == NULL)					\
   2092       return REG_ESPACE;						\
   2093     /* If the buffer moved, move all the pointers into it.  */		\
   2094     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2095       {									\
   2096 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
   2097 	MOVE_BUFFER_POINTER (b);					\
   2098 	MOVE_BUFFER_POINTER (begalt);					\
   2099 	if (fixup_alt_jump)						\
   2100 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2101 	if (laststart)							\
   2102 	  MOVE_BUFFER_POINTER (laststart);				\
   2103 	if (pending_exact)						\
   2104 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2105       }									\
   2106     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2107   } while (0)
   2108 # endif /* WCHAR */
   2109 
   2110 # ifndef DEFINED_ONCE
   2111 /* Since we have one byte reserved for the register number argument to
   2112    {start,stop}_memory, the maximum number of groups we can report
   2113    things about is what fits in that byte.  */
   2114 #  define MAX_REGNUM 255
   2115 
   2116 /* But patterns can have more than `MAX_REGNUM' registers.  We just
   2117    ignore the excess.  */
   2118 typedef unsigned regnum_t;
   2119 
   2120 
   2121 /* Macros for the compile stack.  */
   2122 
   2123 /* Since offsets can go either forwards or backwards, this type needs to
   2124    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
   2125 /* int may be not enough when sizeof(int) == 2.  */
   2126 typedef long pattern_offset_t;
   2127 
   2128 typedef struct
   2129 {
   2130   pattern_offset_t begalt_offset;
   2131   pattern_offset_t fixup_alt_jump;
   2132   pattern_offset_t inner_group_offset;
   2133   pattern_offset_t laststart_offset;
   2134   regnum_t regnum;
   2135 } compile_stack_elt_t;
   2136 
   2137 
   2138 typedef struct
   2139 {
   2140   compile_stack_elt_t *stack;
   2141   unsigned size;
   2142   unsigned avail;			/* Offset of next open position.  */
   2143 } compile_stack_type;
   2144 
   2145 
   2146 #  define INIT_COMPILE_STACK_SIZE 32
   2147 
   2148 #  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
   2149 #  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
   2150 
   2151 /* The next available element.  */
   2152 #  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
   2153 
   2154 # endif /* not DEFINED_ONCE */
   2155 
   2156 /* Set the bit for character C in a list.  */
   2157 # ifndef DEFINED_ONCE
   2158 #  define SET_LIST_BIT(c)                               \
   2159   (b[((unsigned char) (c)) / BYTEWIDTH]               \
   2160    |= 1 << (((unsigned char) c) % BYTEWIDTH))
   2161 # endif /* DEFINED_ONCE */
   2162 
   2163 /* Get the next unsigned number in the uncompiled pattern.  */
   2164 # define GET_UNSIGNED_NUMBER(num) \
   2165   {									\
   2166     while (p != pend)							\
   2167       {									\
   2168 	PATFETCH (c);							\
   2169 	if (c < '0' || c > '9')						\
   2170 	  break;							\
   2171 	if (num <= RE_DUP_MAX)						\
   2172 	  {								\
   2173 	    if (num < 0)						\
   2174 	      num = 0;							\
   2175 	    num = num * 10 + c - '0';					\
   2176 	  }								\
   2177       }									\
   2178   }
   2179 
   2180 # ifndef DEFINED_ONCE
   2181 #  if defined _LIBC || WIDE_CHAR_SUPPORT
   2182 /* The GNU C library provides support for user-defined character classes
   2183    and the functions from ISO C amendement 1.  */
   2184 #   ifdef CHARCLASS_NAME_MAX
   2185 #    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
   2186 #   else
   2187 /* This shouldn't happen but some implementation might still have this
   2188    problem.  Use a reasonable default value.  */
   2189 #    define CHAR_CLASS_MAX_LENGTH 256
   2190 #   endif
   2191 
   2192 #   ifdef _LIBC
   2193 #    define IS_CHAR_CLASS(string) __wctype (string)
   2194 #   else
   2195 #    define IS_CHAR_CLASS(string) wctype (string)
   2196 #   endif
   2197 #  else
   2198 #   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
   2199 
   2200 #   define IS_CHAR_CLASS(string)					\
   2201    (STREQ (string, "alpha") || STREQ (string, "upper")			\
   2202     || STREQ (string, "lower") || STREQ (string, "digit")		\
   2203     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
   2204     || STREQ (string, "space") || STREQ (string, "print")		\
   2205     || STREQ (string, "punct") || STREQ (string, "graph")		\
   2206     || STREQ (string, "cntrl") || STREQ (string, "blank"))
   2207 #  endif
   2208 # endif /* DEFINED_ONCE */
   2209 
   2210 # ifndef MATCH_MAY_ALLOCATE
   2212 
   2213 /* If we cannot allocate large objects within re_match_2_internal,
   2214    we make the fail stack and register vectors global.
   2215    The fail stack, we grow to the maximum size when a regexp
   2216    is compiled.
   2217    The register vectors, we adjust in size each time we
   2218    compile a regexp, according to the number of registers it needs.  */
   2219 
   2220 static PREFIX(fail_stack_type) fail_stack;
   2221 
   2222 /* Size with which the following vectors are currently allocated.
   2223    That is so we can make them bigger as needed,
   2224    but never make them smaller.  */
   2225 #  ifdef DEFINED_ONCE
   2226 static int regs_allocated_size;
   2227 
   2228 static const char **     regstart, **     regend;
   2229 static const char ** old_regstart, ** old_regend;
   2230 static const char **best_regstart, **best_regend;
   2231 static const char **reg_dummy;
   2232 #  endif /* DEFINED_ONCE */
   2233 
   2234 static PREFIX(register_info_type) *PREFIX(reg_info);
   2235 static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
   2236 
   2237 /* Make the register vectors big enough for NUM_REGS registers,
   2238    but don't make them smaller.  */
   2239 
   2240 static void
   2241 PREFIX(regex_grow_registers) (int num_regs)
   2242 {
   2243   if (num_regs > regs_allocated_size)
   2244     {
   2245       RETALLOC_IF (regstart,	 num_regs, const char *);
   2246       RETALLOC_IF (regend,	 num_regs, const char *);
   2247       RETALLOC_IF (old_regstart, num_regs, const char *);
   2248       RETALLOC_IF (old_regend,	 num_regs, const char *);
   2249       RETALLOC_IF (best_regstart, num_regs, const char *);
   2250       RETALLOC_IF (best_regend,	 num_regs, const char *);
   2251       RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
   2252       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
   2253       RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
   2254 
   2255       regs_allocated_size = num_regs;
   2256     }
   2257 }
   2258 
   2259 # endif /* not MATCH_MAY_ALLOCATE */
   2260 
   2261 # ifndef DEFINED_ONCE
   2263 static boolean group_in_compile_stack (compile_stack_type compile_stack,
   2264                                        regnum_t regnum);
   2265 # endif /* not DEFINED_ONCE */
   2266 
   2267 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
   2268    Returns one of error codes defined in `regex.h', or zero for success.
   2269 
   2270    Assumes the `allocated' (and perhaps `buffer') and `translate'
   2271    fields are set in BUFP on entry.
   2272 
   2273    If it succeeds, results are put in BUFP (if it returns an error, the
   2274    contents of BUFP are undefined):
   2275      `buffer' is the compiled pattern;
   2276      `syntax' is set to SYNTAX;
   2277      `used' is set to the length of the compiled pattern;
   2278      `fastmap_accurate' is zero;
   2279      `re_nsub' is the number of subexpressions in PATTERN;
   2280      `not_bol' and `not_eol' are zero;
   2281 
   2282    The `fastmap' and `newline_anchor' fields are neither
   2283    examined nor set.  */
   2284 
   2285 /* Return, freeing storage we allocated.  */
   2286 # ifdef WCHAR
   2287 #  define FREE_STACK_RETURN(value)		\
   2288   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
   2289 # else
   2290 #  define FREE_STACK_RETURN(value)		\
   2291   return (free (compile_stack.stack), value)
   2292 # endif /* WCHAR */
   2293 
   2294 static reg_errcode_t
   2295 PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
   2296                        size_t ARG_PREFIX(size), reg_syntax_t syntax,
   2297                        struct re_pattern_buffer *bufp)
   2298 {
   2299   /* We fetch characters from PATTERN here.  Even though PATTERN is
   2300      `char *' (i.e., signed), we declare these variables as unsigned, so
   2301      they can be reliably used as array indices.  */
   2302   register UCHAR_T c, c1;
   2303 
   2304 #ifdef WCHAR
   2305   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
   2306   CHAR_T *pattern, *COMPILED_BUFFER_VAR;
   2307   size_t size;
   2308   /* offset buffer for optimization. See convert_mbs_to_wc.  */
   2309   int *mbs_offset = NULL;
   2310   /* It hold whether each wchar_t is binary data or not.  */
   2311   char *is_binary = NULL;
   2312   /* A flag whether exactn is handling binary data or not.  */
   2313   char is_exactn_bin = FALSE;
   2314 #endif /* WCHAR */
   2315 
   2316   /* A random temporary spot in PATTERN.  */
   2317   const CHAR_T *p1;
   2318 
   2319   /* Points to the end of the buffer, where we should append.  */
   2320   register UCHAR_T *b;
   2321 
   2322   /* Keeps track of unclosed groups.  */
   2323   compile_stack_type compile_stack;
   2324 
   2325   /* Points to the current (ending) position in the pattern.  */
   2326 #ifdef WCHAR
   2327   const CHAR_T *p;
   2328   const CHAR_T *pend;
   2329 #else /* BYTE */
   2330   const CHAR_T *p = pattern;
   2331   const CHAR_T *pend = pattern + size;
   2332 #endif /* WCHAR */
   2333 
   2334   /* How to translate the characters in the pattern.  */
   2335   RE_TRANSLATE_TYPE translate = bufp->translate;
   2336 
   2337   /* Address of the count-byte of the most recently inserted `exactn'
   2338      command.  This makes it possible to tell if a new exact-match
   2339      character can be added to that command or if the character requires
   2340      a new `exactn' command.  */
   2341   UCHAR_T *pending_exact = 0;
   2342 
   2343   /* Address of start of the most recently finished expression.
   2344      This tells, e.g., postfix * where to find the start of its
   2345      operand.  Reset at the beginning of groups and alternatives.  */
   2346   UCHAR_T *laststart = 0;
   2347 
   2348   /* Address of beginning of regexp, or inside of last group.  */
   2349   UCHAR_T *begalt;
   2350 
   2351   /* Address of the place where a forward jump should go to the end of
   2352      the containing expression.  Each alternative of an `or' -- except the
   2353      last -- ends with a forward jump of this sort.  */
   2354   UCHAR_T *fixup_alt_jump = 0;
   2355 
   2356   /* Counts open-groups as they are encountered.  Remembered for the
   2357      matching close-group on the compile stack, so the same register
   2358      number is put in the stop_memory as the start_memory.  */
   2359   regnum_t regnum = 0;
   2360 
   2361 #ifdef WCHAR
   2362   /* Initialize the wchar_t PATTERN and offset_buffer.  */
   2363   p = pend = pattern = TALLOC(csize + 1, CHAR_T);
   2364   mbs_offset = TALLOC(csize + 1, int);
   2365   is_binary = TALLOC(csize + 1, char);
   2366   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
   2367     {
   2368       free(pattern);
   2369       free(mbs_offset);
   2370       free(is_binary);
   2371       return REG_ESPACE;
   2372     }
   2373   pattern[csize] = L'\0';	/* sentinel */
   2374   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
   2375   pend = p + size;
   2376   if (size < 0)
   2377     {
   2378       free(pattern);
   2379       free(mbs_offset);
   2380       free(is_binary);
   2381       return REG_BADPAT;
   2382     }
   2383 #endif
   2384 
   2385 #ifdef DEBUG
   2386   DEBUG_PRINT1 ("\nCompiling pattern: ");
   2387   if (debug)
   2388     {
   2389       unsigned debug_count;
   2390 
   2391       for (debug_count = 0; debug_count < size; debug_count++)
   2392         PUT_CHAR (pattern[debug_count]);
   2393       putchar ('\n');
   2394     }
   2395 #endif /* DEBUG */
   2396 
   2397   /* Initialize the compile stack.  */
   2398   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
   2399   if (compile_stack.stack == NULL)
   2400     {
   2401 #ifdef WCHAR
   2402       free(pattern);
   2403       free(mbs_offset);
   2404       free(is_binary);
   2405 #endif
   2406       return REG_ESPACE;
   2407     }
   2408 
   2409   compile_stack.size = INIT_COMPILE_STACK_SIZE;
   2410   compile_stack.avail = 0;
   2411 
   2412   /* Initialize the pattern buffer.  */
   2413   bufp->syntax = syntax;
   2414   bufp->fastmap_accurate = 0;
   2415   bufp->not_bol = bufp->not_eol = 0;
   2416 
   2417   /* Set `used' to zero, so that if we return an error, the pattern
   2418      printer (for debugging) will think there's no pattern.  We reset it
   2419      at the end.  */
   2420   bufp->used = 0;
   2421 
   2422   /* Always count groups, whether or not bufp->no_sub is set.  */
   2423   bufp->re_nsub = 0;
   2424 
   2425 #if !defined emacs && !defined SYNTAX_TABLE
   2426   /* Initialize the syntax table.  */
   2427    init_syntax_once ();
   2428 #endif
   2429 
   2430   if (bufp->allocated == 0)
   2431     {
   2432       if (bufp->buffer)
   2433 	{ /* If zero allocated, but buffer is non-null, try to realloc
   2434              enough space.  This loses if buffer's address is bogus, but
   2435              that is the user's responsibility.  */
   2436 #ifdef WCHAR
   2437 	  /* Free bufp->buffer and allocate an array for wchar_t pattern
   2438 	     buffer.  */
   2439           free(bufp->buffer);
   2440           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
   2441 					UCHAR_T);
   2442 #else
   2443           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
   2444 #endif /* WCHAR */
   2445         }
   2446       else
   2447         { /* Caller did not allocate a buffer.  Do it for them.  */
   2448           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
   2449 					UCHAR_T);
   2450         }
   2451 
   2452       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
   2453 #ifdef WCHAR
   2454       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
   2455 #endif /* WCHAR */
   2456       bufp->allocated = INIT_BUF_SIZE;
   2457     }
   2458 #ifdef WCHAR
   2459   else
   2460     COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
   2461 #endif
   2462 
   2463   begalt = b = COMPILED_BUFFER_VAR;
   2464 
   2465   /* Loop through the uncompiled pattern until we're at the end.  */
   2466   while (p != pend)
   2467     {
   2468       PATFETCH (c);
   2469 
   2470       switch (c)
   2471         {
   2472         case '^':
   2473           {
   2474             if (   /* If at start of pattern, it's an operator.  */
   2475                    p == pattern + 1
   2476                    /* If context independent, it's an operator.  */
   2477                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2478                    /* Otherwise, depends on what's come before.  */
   2479                 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
   2480               BUF_PUSH (begline);
   2481             else
   2482               goto normal_char;
   2483           }
   2484           break;
   2485 
   2486 
   2487         case '$':
   2488           {
   2489             if (   /* If at end of pattern, it's an operator.  */
   2490                    p == pend
   2491                    /* If context independent, it's an operator.  */
   2492                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2493                    /* Otherwise, depends on what's next.  */
   2494                 || PREFIX(at_endline_loc_p) (p, pend, syntax))
   2495                BUF_PUSH (endline);
   2496              else
   2497                goto normal_char;
   2498            }
   2499            break;
   2500 
   2501 
   2502 	case '+':
   2503         case '?':
   2504           if ((syntax & RE_BK_PLUS_QM)
   2505               || (syntax & RE_LIMITED_OPS))
   2506             goto normal_char;
   2507         handle_plus:
   2508         case '*':
   2509           /* If there is no previous pattern... */
   2510           if (!laststart)
   2511             {
   2512               if (syntax & RE_CONTEXT_INVALID_OPS)
   2513                 FREE_STACK_RETURN (REG_BADRPT);
   2514               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
   2515                 goto normal_char;
   2516             }
   2517 
   2518           {
   2519             /* Are we optimizing this jump?  */
   2520             boolean keep_string_p = false;
   2521 
   2522             /* 1 means zero (many) matches is allowed.  */
   2523             char zero_times_ok = 0, many_times_ok = 0;
   2524 
   2525             /* If there is a sequence of repetition chars, collapse it
   2526                down to just one (the right one).  We can't combine
   2527                interval operators with these because of, e.g., `a{2}*',
   2528                which should only match an even number of `a's.  */
   2529 
   2530             for (;;)
   2531               {
   2532                 zero_times_ok |= c != '+';
   2533                 many_times_ok |= c != '?';
   2534 
   2535                 if (p == pend)
   2536                   break;
   2537 
   2538                 PATFETCH (c);
   2539 
   2540                 if (c == '*'
   2541                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
   2542                   ;
   2543 
   2544                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
   2545                   {
   2546                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2547 
   2548                     PATFETCH (c1);
   2549                     if (!(c1 == '+' || c1 == '?'))
   2550                       {
   2551                         PATUNFETCH;
   2552                         PATUNFETCH;
   2553                         break;
   2554                       }
   2555 
   2556                     c = c1;
   2557                   }
   2558                 else
   2559                   {
   2560                     PATUNFETCH;
   2561                     break;
   2562                   }
   2563 
   2564                 /* If we get here, we found another repeat character.  */
   2565                }
   2566 
   2567             /* Star, etc. applied to an empty pattern is equivalent
   2568                to an empty pattern.  */
   2569             if (!laststart)
   2570               break;
   2571 
   2572             /* Now we know whether or not zero matches is allowed
   2573                and also whether or not two or more matches is allowed.  */
   2574             if (many_times_ok)
   2575               { /* More than one repetition is allowed, so put in at the
   2576                    end a backward relative jump from `b' to before the next
   2577                    jump we're going to put in below (which jumps from
   2578                    laststart to after this jump).
   2579 
   2580                    But if we are at the `*' in the exact sequence `.*\n',
   2581                    insert an unconditional jump backwards to the .,
   2582                    instead of the beginning of the loop.  This way we only
   2583                    push a failure point once, instead of every time
   2584                    through the loop.  */
   2585                 assert (p - 1 > pattern);
   2586 
   2587                 /* Allocate the space for the jump.  */
   2588                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2589 
   2590                 /* We know we are not at the first character of the pattern,
   2591                    because laststart was nonzero.  And we've already
   2592                    incremented `p', by the way, to be the character after
   2593                    the `*'.  Do we have to do something analogous here
   2594                    for null bytes, because of RE_DOT_NOT_NULL?  */
   2595                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
   2596 		    && zero_times_ok
   2597                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
   2598                     && !(syntax & RE_DOT_NEWLINE))
   2599                   { /* We have .*\n.  */
   2600                     STORE_JUMP (jump, b, laststart);
   2601                     keep_string_p = true;
   2602                   }
   2603                 else
   2604                   /* Anything else.  */
   2605                   STORE_JUMP (maybe_pop_jump, b, laststart -
   2606 			      (1 + OFFSET_ADDRESS_SIZE));
   2607 
   2608                 /* We've added more stuff to the buffer.  */
   2609                 b += 1 + OFFSET_ADDRESS_SIZE;
   2610               }
   2611 
   2612             /* On failure, jump from laststart to b + 3, which will be the
   2613                end of the buffer after this jump is inserted.  */
   2614 	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
   2615 	       'b + 3'.  */
   2616             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2617             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
   2618                                        : on_failure_jump,
   2619                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
   2620             pending_exact = 0;
   2621             b += 1 + OFFSET_ADDRESS_SIZE;
   2622 
   2623             if (!zero_times_ok)
   2624               {
   2625                 /* At least one repetition is required, so insert a
   2626                    `dummy_failure_jump' before the initial
   2627                    `on_failure_jump' instruction of the loop. This
   2628                    effects a skip over that instruction the first time
   2629                    we hit that loop.  */
   2630                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2631                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
   2632 			     2 + 2 * OFFSET_ADDRESS_SIZE);
   2633                 b += 1 + OFFSET_ADDRESS_SIZE;
   2634               }
   2635             }
   2636 	  break;
   2637 
   2638 
   2639 	case '.':
   2640           laststart = b;
   2641           BUF_PUSH (anychar);
   2642           break;
   2643 
   2644 
   2645         case '[':
   2646           {
   2647             boolean had_char_class = false;
   2648 #ifdef WCHAR
   2649 	    CHAR_T range_start = 0xffffffff;
   2650 #else
   2651 	    unsigned int range_start = 0xffffffff;
   2652 #endif
   2653             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2654 
   2655 #ifdef WCHAR
   2656 	    /* We assume a charset(_not) structure as a wchar_t array.
   2657 	       charset[0] = (re_opcode_t) charset(_not)
   2658                charset[1] = l (= length of char_classes)
   2659                charset[2] = m (= length of collating_symbols)
   2660                charset[3] = n (= length of equivalence_classes)
   2661 	       charset[4] = o (= length of char_ranges)
   2662 	       charset[5] = p (= length of chars)
   2663 
   2664                charset[6] = char_class (wctype_t)
   2665                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
   2666                          ...
   2667                charset[l+5]  = char_class (wctype_t)
   2668 
   2669                charset[l+6]  = collating_symbol (wchar_t)
   2670                             ...
   2671                charset[l+m+5]  = collating_symbol (wchar_t)
   2672 					ifdef _LIBC we use the index if
   2673 					_NL_COLLATE_SYMB_EXTRAMB instead of
   2674 					wchar_t string.
   2675 
   2676                charset[l+m+6]  = equivalence_classes (wchar_t)
   2677                               ...
   2678                charset[l+m+n+5]  = equivalence_classes (wchar_t)
   2679 					ifdef _LIBC we use the index in
   2680 					_NL_COLLATE_WEIGHT instead of
   2681 					wchar_t string.
   2682 
   2683 	       charset[l+m+n+6] = range_start
   2684 	       charset[l+m+n+7] = range_end
   2685 	                       ...
   2686 	       charset[l+m+n+2o+4] = range_start
   2687 	       charset[l+m+n+2o+5] = range_end
   2688 					ifdef _LIBC we use the value looked up
   2689 					in _NL_COLLATE_COLLSEQ instead of
   2690 					wchar_t character.
   2691 
   2692 	       charset[l+m+n+2o+6] = char
   2693 	                          ...
   2694 	       charset[l+m+n+2o+p+5] = char
   2695 
   2696 	     */
   2697 
   2698 	    /* We need at least 6 spaces: the opcode, the length of
   2699                char_classes, the length of collating_symbols, the length of
   2700                equivalence_classes, the length of char_ranges, the length of
   2701                chars.  */
   2702 	    GET_BUFFER_SPACE (6);
   2703 
   2704 	    /* Save b as laststart. And We use laststart as the pointer
   2705 	       to the first element of the charset here.
   2706 	       In other words, laststart[i] indicates charset[i].  */
   2707             laststart = b;
   2708 
   2709             /* We test `*p == '^' twice, instead of using an if
   2710                statement, so we only need one BUF_PUSH.  */
   2711             BUF_PUSH (*p == '^' ? charset_not : charset);
   2712             if (*p == '^')
   2713               p++;
   2714 
   2715             /* Push the length of char_classes, the length of
   2716                collating_symbols, the length of equivalence_classes, the
   2717                length of char_ranges and the length of chars.  */
   2718             BUF_PUSH_3 (0, 0, 0);
   2719             BUF_PUSH_2 (0, 0);
   2720 
   2721             /* Remember the first position in the bracket expression.  */
   2722             p1 = p;
   2723 
   2724             /* charset_not matches newline according to a syntax bit.  */
   2725             if ((re_opcode_t) b[-6] == charset_not
   2726                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   2727 	      {
   2728 		BUF_PUSH('\n');
   2729 		laststart[5]++; /* Update the length of characters  */
   2730 	      }
   2731 
   2732             /* Read in characters and ranges, setting map bits.  */
   2733             for (;;)
   2734               {
   2735                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2736 
   2737                 PATFETCH (c);
   2738 
   2739                 /* \ might escape characters inside [...] and [^...].  */
   2740                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   2741                   {
   2742                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2743 
   2744                     PATFETCH (c1);
   2745 		    BUF_PUSH(c1);
   2746 		    laststart[5]++; /* Update the length of chars  */
   2747 		    range_start = c1;
   2748                     continue;
   2749                   }
   2750 
   2751                 /* Could be the end of the bracket expression.  If it's
   2752                    not (i.e., when the bracket expression is `[]' so
   2753                    far), the ']' character bit gets set way below.  */
   2754                 if (c == ']' && p != p1 + 1)
   2755                   break;
   2756 
   2757                 /* Look ahead to see if it's a range when the last thing
   2758                    was a character class.  */
   2759                 if (had_char_class && c == '-' && *p != ']')
   2760                   FREE_STACK_RETURN (REG_ERANGE);
   2761 
   2762                 /* Look ahead to see if it's a range when the last thing
   2763                    was a character: if this is a hyphen not at the
   2764                    beginning or the end of a list, then it's the range
   2765                    operator.  */
   2766                 if (c == '-'
   2767                     && !(p - 2 >= pattern && p[-2] == '[')
   2768                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   2769                     && *p != ']')
   2770                   {
   2771                     reg_errcode_t ret;
   2772 		    /* Allocate the space for range_start and range_end.  */
   2773 		    GET_BUFFER_SPACE (2);
   2774 		    /* Update the pointer to indicate end of buffer.  */
   2775                     b += 2;
   2776                     ret = wcs_compile_range (range_start, &p, pend, translate,
   2777                                          syntax, b, laststart);
   2778                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2779                     range_start = 0xffffffff;
   2780                   }
   2781                 else if (p[0] == '-' && p[1] != ']')
   2782                   { /* This handles ranges made up of characters only.  */
   2783                     reg_errcode_t ret;
   2784 
   2785 		    /* Move past the `-'.  */
   2786                     PATFETCH (c1);
   2787 		    /* Allocate the space for range_start and range_end.  */
   2788 		    GET_BUFFER_SPACE (2);
   2789 		    /* Update the pointer to indicate end of buffer.  */
   2790                     b += 2;
   2791                     ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
   2792                                          laststart);
   2793                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2794 		    range_start = 0xffffffff;
   2795                   }
   2796 
   2797                 /* See if we're at the beginning of a possible character
   2798                    class.  */
   2799                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   2800                   { /* Leave room for the null.  */
   2801                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   2802 
   2803                     PATFETCH (c);
   2804                     c1 = 0;
   2805 
   2806                     /* If pattern is `[[:'.  */
   2807                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2808 
   2809                     for (;;)
   2810                       {
   2811                         PATFETCH (c);
   2812                         if ((c == ':' && *p == ']') || p == pend)
   2813                           break;
   2814 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   2815 			  str[c1++] = c;
   2816 			else
   2817 			  /* This is in any case an invalid class name.  */
   2818 			  str[0] = '\0';
   2819                       }
   2820                     str[c1] = '\0';
   2821 
   2822                     /* If isn't a word bracketed by `[:' and `:]':
   2823                        undo the ending character, the letters, and leave
   2824                        the leading `:' and `[' (but store them as character).  */
   2825                     if (c == ':' && *p == ']')
   2826                       {
   2827 			wctype_t wt;
   2828 			uintptr_t alignedp;
   2829 
   2830 			/* Query the character class as wctype_t.  */
   2831 			wt = IS_CHAR_CLASS (str);
   2832 			if (wt == 0)
   2833 			  FREE_STACK_RETURN (REG_ECTYPE);
   2834 
   2835                         /* Throw away the ] at the end of the character
   2836                            class.  */
   2837                         PATFETCH (c);
   2838 
   2839                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2840 
   2841 			/* Allocate the space for character class.  */
   2842                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
   2843 			/* Update the pointer to indicate end of buffer.  */
   2844                         b += CHAR_CLASS_SIZE;
   2845 			/* Move data which follow character classes
   2846 			    not to violate the data.  */
   2847                         insert_space(CHAR_CLASS_SIZE,
   2848 				     laststart + 6 + laststart[1],
   2849 				     b - 1);
   2850 			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
   2851 				    + __alignof__(wctype_t) - 1)
   2852 			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   2853 			/* Store the character class.  */
   2854                         *((wctype_t*)alignedp) = wt;
   2855                         /* Update length of char_classes */
   2856                         laststart[1] += CHAR_CLASS_SIZE;
   2857 
   2858                         had_char_class = true;
   2859                       }
   2860                     else
   2861                       {
   2862                         c1++;
   2863                         while (c1--)
   2864                           PATUNFETCH;
   2865                         BUF_PUSH ('[');
   2866                         BUF_PUSH (':');
   2867                         laststart[5] += 2; /* Update the length of characters  */
   2868 			range_start = ':';
   2869                         had_char_class = false;
   2870                       }
   2871                   }
   2872                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
   2873 							  || *p == '.'))
   2874 		  {
   2875 		    CHAR_T str[128];	/* Should be large enough.  */
   2876 		    CHAR_T delim = *p; /* '=' or '.'  */
   2877 # ifdef _LIBC
   2878 		    uint32_t nrules =
   2879 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   2880 # endif
   2881 		    PATFETCH (c);
   2882 		    c1 = 0;
   2883 
   2884 		    /* If pattern is `[[=' or '[[.'.  */
   2885 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2886 
   2887 		    for (;;)
   2888 		      {
   2889 			PATFETCH (c);
   2890 			if ((c == delim && *p == ']') || p == pend)
   2891 			  break;
   2892 			if (c1 < sizeof (str) - 1)
   2893 			  str[c1++] = c;
   2894 			else
   2895 			  /* This is in any case an invalid class name.  */
   2896 			  str[0] = '\0';
   2897                       }
   2898 		    str[c1] = '\0';
   2899 
   2900 		    if (c == delim && *p == ']' && str[0] != '\0')
   2901 		      {
   2902                         unsigned int i, offset;
   2903 			/* If we have no collation data we use the default
   2904 			   collation in which each character is in a class
   2905 			   by itself.  It also means that ASCII is the
   2906 			   character set and therefore we cannot have character
   2907 			   with more than one byte in the multibyte
   2908 			   representation.  */
   2909 
   2910                         /* If not defined _LIBC, we push the name and
   2911 			   `\0' for the sake of matching performance.  */
   2912 			int datasize = c1 + 1;
   2913 
   2914 # ifdef _LIBC
   2915 			int32_t idx = 0;
   2916 			if (nrules == 0)
   2917 # endif
   2918 			  {
   2919 			    if (c1 != 1)
   2920 			      FREE_STACK_RETURN (REG_ECOLLATE);
   2921 			  }
   2922 # ifdef _LIBC
   2923 			else
   2924 			  {
   2925 			    const int32_t *table;
   2926 			    const int32_t *weights;
   2927 			    const int32_t *extra;
   2928 			    const int32_t *indirect;
   2929 			    wint_t *cp;
   2930 
   2931 			    /* This #include defines a local function!  */
   2932 #  include <locale/weightwc.h>
   2933 
   2934 			    if(delim == '=')
   2935 			      {
   2936 				/* We push the index for equivalence class.  */
   2937 				cp = (wint_t*)str;
   2938 
   2939 				table = (const int32_t *)
   2940 				  _NL_CURRENT (LC_COLLATE,
   2941 					       _NL_COLLATE_TABLEWC);
   2942 				weights = (const int32_t *)
   2943 				  _NL_CURRENT (LC_COLLATE,
   2944 					       _NL_COLLATE_WEIGHTWC);
   2945 				extra = (const int32_t *)
   2946 				  _NL_CURRENT (LC_COLLATE,
   2947 					       _NL_COLLATE_EXTRAWC);
   2948 				indirect = (const int32_t *)
   2949 				  _NL_CURRENT (LC_COLLATE,
   2950 					       _NL_COLLATE_INDIRECTWC);
   2951 
   2952 				idx = findidx ((const wint_t**)&cp);
   2953 				if (idx == 0 || cp < (wint_t*) str + c1)
   2954 				  /* This is no valid character.  */
   2955 				  FREE_STACK_RETURN (REG_ECOLLATE);
   2956 
   2957 				str[0] = (wchar_t)idx;
   2958 			      }
   2959 			    else /* delim == '.' */
   2960 			      {
   2961 				/* We push collation sequence value
   2962 				   for collating symbol.  */
   2963 				int32_t table_size;
   2964 				const int32_t *symb_table;
   2965 				const unsigned char *extra;
   2966 				int32_t idx;
   2967 				int32_t elem;
   2968 				int32_t second;
   2969 				int32_t hash;
   2970 				char char_str[c1];
   2971 
   2972 				/* We have to convert the name to a single-byte
   2973 				   string.  This is possible since the names
   2974 				   consist of ASCII characters and the internal
   2975 				   representation is UCS4.  */
   2976 				for (i = 0; i < c1; ++i)
   2977 				  char_str[i] = str[i];
   2978 
   2979 				table_size =
   2980 				  _NL_CURRENT_WORD (LC_COLLATE,
   2981 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
   2982 				symb_table = (const int32_t *)
   2983 				  _NL_CURRENT (LC_COLLATE,
   2984 					       _NL_COLLATE_SYMB_TABLEMB);
   2985 				extra = (const unsigned char *)
   2986 				  _NL_CURRENT (LC_COLLATE,
   2987 					       _NL_COLLATE_SYMB_EXTRAMB);
   2988 
   2989 				/* Locate the character in the hashing table.  */
   2990 				hash = elem_hash (char_str, c1);
   2991 
   2992 				idx = 0;
   2993 				elem = hash % table_size;
   2994 				second = hash % (table_size - 2);
   2995 				while (symb_table[2 * elem] != 0)
   2996 				  {
   2997 				    /* First compare the hashing value.  */
   2998 				    if (symb_table[2 * elem] == hash
   2999 					&& c1 == extra[symb_table[2 * elem + 1]]
   3000 					&& memcmp (char_str,
   3001 						   &extra[symb_table[2 * elem + 1]
   3002 							 + 1], c1) == 0)
   3003 				      {
   3004 					/* Yep, this is the entry.  */
   3005 					idx = symb_table[2 * elem + 1];
   3006 					idx += 1 + extra[idx];
   3007 					break;
   3008 				      }
   3009 
   3010 				    /* Next entry.  */
   3011 				    elem += second;
   3012 				  }
   3013 
   3014 				if (symb_table[2 * elem] != 0)
   3015 				  {
   3016 				    /* Compute the index of the byte sequence
   3017 				       in the table.  */
   3018 				    idx += 1 + extra[idx];
   3019 				    /* Adjust for the alignment.  */
   3020 				    idx = (idx + 3) & ~3;
   3021 
   3022 				    str[0] = (wchar_t) idx + 4;
   3023 				  }
   3024 				else if (symb_table[2 * elem] == 0 && c1 == 1)
   3025 				  {
   3026 				    /* No valid character.  Match it as a
   3027 				       single byte character.  */
   3028 				    had_char_class = false;
   3029 				    BUF_PUSH(str[0]);
   3030 				    /* Update the length of characters  */
   3031 				    laststart[5]++;
   3032 				    range_start = str[0];
   3033 
   3034 				    /* Throw away the ] at the end of the
   3035 				       collating symbol.  */
   3036 				    PATFETCH (c);
   3037 				    /* exit from the switch block.  */
   3038 				    continue;
   3039 				  }
   3040 				else
   3041 				  FREE_STACK_RETURN (REG_ECOLLATE);
   3042 			      }
   3043 			    datasize = 1;
   3044 			  }
   3045 # endif
   3046                         /* Throw away the ] at the end of the equivalence
   3047                            class (or collating symbol).  */
   3048                         PATFETCH (c);
   3049 
   3050 			/* Allocate the space for the equivalence class
   3051 			   (or collating symbol) (and '\0' if needed).  */
   3052                         GET_BUFFER_SPACE(datasize);
   3053 			/* Update the pointer to indicate end of buffer.  */
   3054                         b += datasize;
   3055 
   3056 			if (delim == '=')
   3057 			  { /* equivalence class  */
   3058 			    /* Calculate the offset of char_ranges,
   3059 			       which is next to equivalence_classes.  */
   3060 			    offset = laststart[1] + laststart[2]
   3061 			      + laststart[3] +6;
   3062 			    /* Insert space.  */
   3063 			    insert_space(datasize, laststart + offset, b - 1);
   3064 
   3065 			    /* Write the equivalence_class and \0.  */
   3066 			    for (i = 0 ; i < datasize ; i++)
   3067 			      laststart[offset + i] = str[i];
   3068 
   3069 			    /* Update the length of equivalence_classes.  */
   3070 			    laststart[3] += datasize;
   3071 			    had_char_class = true;
   3072 			  }
   3073 			else /* delim == '.' */
   3074 			  { /* collating symbol  */
   3075 			    /* Calculate the offset of the equivalence_classes,
   3076 			       which is next to collating_symbols.  */
   3077 			    offset = laststart[1] + laststart[2] + 6;
   3078 			    /* Insert space and write the collationg_symbol
   3079 			       and \0.  */
   3080 			    insert_space(datasize, laststart + offset, b-1);
   3081 			    for (i = 0 ; i < datasize ; i++)
   3082 			      laststart[offset + i] = str[i];
   3083 
   3084 			    /* In re_match_2_internal if range_start < -1, we
   3085 			       assume -range_start is the offset of the
   3086 			       collating symbol which is specified as
   3087 			       the character of the range start.  So we assign
   3088 			       -(laststart[1] + laststart[2] + 6) to
   3089 			       range_start.  */
   3090 			    range_start = -(laststart[1] + laststart[2] + 6);
   3091 			    /* Update the length of collating_symbol.  */
   3092 			    laststart[2] += datasize;
   3093 			    had_char_class = false;
   3094 			  }
   3095 		      }
   3096                     else
   3097                       {
   3098                         c1++;
   3099                         while (c1--)
   3100                           PATUNFETCH;
   3101                         BUF_PUSH ('[');
   3102                         BUF_PUSH (delim);
   3103                         laststart[5] += 2; /* Update the length of characters  */
   3104 			range_start = delim;
   3105                         had_char_class = false;
   3106                       }
   3107 		  }
   3108                 else
   3109                   {
   3110                     had_char_class = false;
   3111 		    BUF_PUSH(c);
   3112 		    laststart[5]++;  /* Update the length of characters  */
   3113 		    range_start = c;
   3114                   }
   3115 	      }
   3116 
   3117 #else /* BYTE */
   3118             /* Ensure that we have enough space to push a charset: the
   3119                opcode, the length count, and the bitset; 34 bytes in all.  */
   3120 	    GET_BUFFER_SPACE (34);
   3121 
   3122             laststart = b;
   3123 
   3124             /* We test `*p == '^' twice, instead of using an if
   3125                statement, so we only need one BUF_PUSH.  */
   3126             BUF_PUSH (*p == '^' ? charset_not : charset);
   3127             if (*p == '^')
   3128               p++;
   3129 
   3130             /* Remember the first position in the bracket expression.  */
   3131             p1 = p;
   3132 
   3133             /* Push the number of bytes in the bitmap.  */
   3134             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
   3135 
   3136             /* Clear the whole map.  */
   3137             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
   3138 
   3139             /* charset_not matches newline according to a syntax bit.  */
   3140             if ((re_opcode_t) b[-2] == charset_not
   3141                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   3142               SET_LIST_BIT ('\n');
   3143 
   3144             /* Read in characters and ranges, setting map bits.  */
   3145             for (;;)
   3146               {
   3147                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3148 
   3149                 PATFETCH (c);
   3150 
   3151                 /* \ might escape characters inside [...] and [^...].  */
   3152                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   3153                   {
   3154                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3155 
   3156                     PATFETCH (c1);
   3157                     SET_LIST_BIT (c1);
   3158 		    range_start = c1;
   3159                     continue;
   3160                   }
   3161 
   3162                 /* Could be the end of the bracket expression.  If it's
   3163                    not (i.e., when the bracket expression is `[]' so
   3164                    far), the ']' character bit gets set way below.  */
   3165                 if (c == ']' && p != p1 + 1)
   3166                   break;
   3167 
   3168                 /* Look ahead to see if it's a range when the last thing
   3169                    was a character class.  */
   3170                 if (had_char_class && c == '-' && *p != ']')
   3171                   FREE_STACK_RETURN (REG_ERANGE);
   3172 
   3173                 /* Look ahead to see if it's a range when the last thing
   3174                    was a character: if this is a hyphen not at the
   3175                    beginning or the end of a list, then it's the range
   3176                    operator.  */
   3177                 if (c == '-'
   3178                     && !(p - 2 >= pattern && p[-2] == '[')
   3179                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   3180                     && *p != ']')
   3181                   {
   3182                     reg_errcode_t ret
   3183                       = byte_compile_range (range_start, &p, pend, translate,
   3184 					    syntax, b);
   3185                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3186 		    range_start = 0xffffffff;
   3187                   }
   3188 
   3189                 else if (p[0] == '-' && p[1] != ']')
   3190                   { /* This handles ranges made up of characters only.  */
   3191                     reg_errcode_t ret;
   3192 
   3193 		    /* Move past the `-'.  */
   3194                     PATFETCH (c1);
   3195 
   3196                     ret = byte_compile_range (c, &p, pend, translate, syntax, b);
   3197                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3198 		    range_start = 0xffffffff;
   3199                   }
   3200 
   3201                 /* See if we're at the beginning of a possible character
   3202                    class.  */
   3203 
   3204                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   3205                   { /* Leave room for the null.  */
   3206                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   3207 
   3208                     PATFETCH (c);
   3209                     c1 = 0;
   3210 
   3211                     /* If pattern is `[[:'.  */
   3212                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3213 
   3214                     for (;;)
   3215                       {
   3216                         PATFETCH (c);
   3217                         if ((c == ':' && *p == ']') || p == pend)
   3218                           break;
   3219 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   3220 			  str[c1++] = c;
   3221 			else
   3222 			  /* This is in any case an invalid class name.  */
   3223 			  str[0] = '\0';
   3224                       }
   3225                     str[c1] = '\0';
   3226 
   3227                     /* If isn't a word bracketed by `[:' and `:]':
   3228                        undo the ending character, the letters, and leave
   3229                        the leading `:' and `[' (but set bits for them).  */
   3230                     if (c == ':' && *p == ']')
   3231                       {
   3232 # if defined _LIBC || WIDE_CHAR_SUPPORT
   3233                         boolean is_lower = STREQ (str, "lower");
   3234                         boolean is_upper = STREQ (str, "upper");
   3235 			wctype_t wt;
   3236                         int ch;
   3237 
   3238 			wt = IS_CHAR_CLASS (str);
   3239 			if (wt == 0)
   3240 			  FREE_STACK_RETURN (REG_ECTYPE);
   3241 
   3242                         /* Throw away the ] at the end of the character
   3243                            class.  */
   3244                         PATFETCH (c);
   3245 
   3246                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3247 
   3248                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
   3249 			  {
   3250 #  ifdef _LIBC
   3251 			    if (__iswctype (__btowc (ch), wt))
   3252 			      SET_LIST_BIT (ch);
   3253 #  else
   3254 			    if (iswctype (btowc (ch), wt))
   3255 			      SET_LIST_BIT (ch);
   3256 #  endif
   3257 
   3258 			    if (translate && (is_upper || is_lower)
   3259 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3260 			      SET_LIST_BIT (ch);
   3261 			  }
   3262 
   3263                         had_char_class = true;
   3264 # else
   3265                         int ch;
   3266                         boolean is_alnum = STREQ (str, "alnum");
   3267                         boolean is_alpha = STREQ (str, "alpha");
   3268                         boolean is_blank = STREQ (str, "blank");
   3269                         boolean is_cntrl = STREQ (str, "cntrl");
   3270                         boolean is_digit = STREQ (str, "digit");
   3271                         boolean is_graph = STREQ (str, "graph");
   3272                         boolean is_lower = STREQ (str, "lower");
   3273                         boolean is_print = STREQ (str, "print");
   3274                         boolean is_punct = STREQ (str, "punct");
   3275                         boolean is_space = STREQ (str, "space");
   3276                         boolean is_upper = STREQ (str, "upper");
   3277                         boolean is_xdigit = STREQ (str, "xdigit");
   3278 
   3279                         if (!IS_CHAR_CLASS (str))
   3280 			  FREE_STACK_RETURN (REG_ECTYPE);
   3281 
   3282                         /* Throw away the ] at the end of the character
   3283                            class.  */
   3284                         PATFETCH (c);
   3285 
   3286                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3287 
   3288                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
   3289                           {
   3290 			    /* This was split into 3 if's to
   3291 			       avoid an arbitrary limit in some compiler.  */
   3292                             if (   (is_alnum  && ISALNUM (ch))
   3293                                 || (is_alpha  && ISALPHA (ch))
   3294                                 || (is_blank  && ISBLANK (ch))
   3295                                 || (is_cntrl  && ISCNTRL (ch)))
   3296 			      SET_LIST_BIT (ch);
   3297 			    if (   (is_digit  && ISDIGIT (ch))
   3298                                 || (is_graph  && ISGRAPH (ch))
   3299                                 || (is_lower  && ISLOWER (ch))
   3300                                 || (is_print  && ISPRINT (ch)))
   3301 			      SET_LIST_BIT (ch);
   3302 			    if (   (is_punct  && ISPUNCT (ch))
   3303                                 || (is_space  && ISSPACE (ch))
   3304                                 || (is_upper  && ISUPPER (ch))
   3305                                 || (is_xdigit && ISXDIGIT (ch)))
   3306 			      SET_LIST_BIT (ch);
   3307 			    if (   translate && (is_upper || is_lower)
   3308 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3309 			      SET_LIST_BIT (ch);
   3310                           }
   3311                         had_char_class = true;
   3312 # endif	/* libc || wctype.h */
   3313                       }
   3314                     else
   3315                       {
   3316                         c1++;
   3317                         while (c1--)
   3318                           PATUNFETCH;
   3319                         SET_LIST_BIT ('[');
   3320                         SET_LIST_BIT (':');
   3321 			range_start = ':';
   3322                         had_char_class = false;
   3323                       }
   3324                   }
   3325                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
   3326 		  {
   3327 		    unsigned char str[MB_LEN_MAX + 1];
   3328 # ifdef _LIBC
   3329 		    uint32_t nrules =
   3330 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3331 # endif
   3332 
   3333 		    PATFETCH (c);
   3334 		    c1 = 0;
   3335 
   3336 		    /* If pattern is `[[='.  */
   3337 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3338 
   3339 		    for (;;)
   3340 		      {
   3341 			PATFETCH (c);
   3342 			if ((c == '=' && *p == ']') || p == pend)
   3343 			  break;
   3344 			if (c1 < MB_LEN_MAX)
   3345 			  str[c1++] = c;
   3346 			else
   3347 			  /* This is in any case an invalid class name.  */
   3348 			  str[0] = '\0';
   3349                       }
   3350 		    str[c1] = '\0';
   3351 
   3352 		    if (c == '=' && *p == ']' && str[0] != '\0')
   3353 		      {
   3354 			/* If we have no collation data we use the default
   3355 			   collation in which each character is in a class
   3356 			   by itself.  It also means that ASCII is the
   3357 			   character set and therefore we cannot have character
   3358 			   with more than one byte in the multibyte
   3359 			   representation.  */
   3360 # ifdef _LIBC
   3361 			if (nrules == 0)
   3362 # endif
   3363 			  {
   3364 			    if (c1 != 1)
   3365 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3366 
   3367 			    /* Throw away the ] at the end of the equivalence
   3368 			       class.  */
   3369 			    PATFETCH (c);
   3370 
   3371 			    /* Set the bit for the character.  */
   3372 			    SET_LIST_BIT (str[0]);
   3373 			  }
   3374 # ifdef _LIBC
   3375 			else
   3376 			  {
   3377 			    /* Try to match the byte sequence in `str' against
   3378 			       those known to the collate implementation.
   3379 			       First find out whether the bytes in `str' are
   3380 			       actually from exactly one character.  */
   3381 			    const int32_t *table;
   3382 			    const unsigned char *weights;
   3383 			    const unsigned char *extra;
   3384 			    const int32_t *indirect;
   3385 			    int32_t idx;
   3386 			    const unsigned char *cp = str;
   3387 			    int ch;
   3388 
   3389 			    /* This #include defines a local function!  */
   3390 #  include <locale/weight.h>
   3391 
   3392 			    table = (const int32_t *)
   3393 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
   3394 			    weights = (const unsigned char *)
   3395 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
   3396 			    extra = (const unsigned char *)
   3397 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
   3398 			    indirect = (const int32_t *)
   3399 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
   3400 
   3401 			    idx = findidx (&cp);
   3402 			    if (idx == 0 || cp < str + c1)
   3403 			      /* This is no valid character.  */
   3404 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3405 
   3406 			    /* Throw away the ] at the end of the equivalence
   3407 			       class.  */
   3408 			    PATFETCH (c);
   3409 
   3410 			    /* Now we have to go throught the whole table
   3411 			       and find all characters which have the same
   3412 			       first level weight.
   3413 
   3414 			       XXX Note that this is not entirely correct.
   3415 			       we would have to match multibyte sequences
   3416 			       but this is not possible with the current
   3417 			       implementation.  */
   3418 			    for (ch = 1; ch < 256; ++ch)
   3419 			      /* XXX This test would have to be changed if we
   3420 				 would allow matching multibyte sequences.  */
   3421 			      if (table[ch] > 0)
   3422 				{
   3423 				  int32_t idx2 = table[ch];
   3424 				  size_t len = weights[idx2];
   3425 
   3426 				  /* Test whether the lenghts match.  */
   3427 				  if (weights[idx] == len)
   3428 				    {
   3429 				      /* They do.  New compare the bytes of
   3430 					 the weight.  */
   3431 				      size_t cnt = 0;
   3432 
   3433 				      while (cnt < len
   3434 					     && (weights[idx + 1 + cnt]
   3435 						 == weights[idx2 + 1 + cnt]))
   3436 					++cnt;
   3437 
   3438 				      if (cnt == len)
   3439 					/* They match.  Mark the character as
   3440 					   acceptable.  */
   3441 					SET_LIST_BIT (ch);
   3442 				    }
   3443 				}
   3444 			  }
   3445 # endif
   3446 			had_char_class = true;
   3447 		      }
   3448                     else
   3449                       {
   3450                         c1++;
   3451                         while (c1--)
   3452                           PATUNFETCH;
   3453                         SET_LIST_BIT ('[');
   3454                         SET_LIST_BIT ('=');
   3455 			range_start = '=';
   3456                         had_char_class = false;
   3457                       }
   3458 		  }
   3459                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
   3460 		  {
   3461 		    unsigned char str[128];	/* Should be large enough.  */
   3462 # ifdef _LIBC
   3463 		    uint32_t nrules =
   3464 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3465 # endif
   3466 
   3467 		    PATFETCH (c);
   3468 		    c1 = 0;
   3469 
   3470 		    /* If pattern is `[[.'.  */
   3471 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3472 
   3473 		    for (;;)
   3474 		      {
   3475 			PATFETCH (c);
   3476 			if ((c == '.' && *p == ']') || p == pend)
   3477 			  break;
   3478 			if (c1 < sizeof (str))
   3479 			  str[c1++] = c;
   3480 			else
   3481 			  /* This is in any case an invalid class name.  */
   3482 			  str[0] = '\0';
   3483                       }
   3484 		    str[c1] = '\0';
   3485 
   3486 		    if (c == '.' && *p == ']' && str[0] != '\0')
   3487 		      {
   3488 			/* If we have no collation data we use the default
   3489 			   collation in which each character is the name
   3490 			   for its own class which contains only the one
   3491 			   character.  It also means that ASCII is the
   3492 			   character set and therefore we cannot have character
   3493 			   with more than one byte in the multibyte
   3494 			   representation.  */
   3495 # ifdef _LIBC
   3496 			if (nrules == 0)
   3497 # endif
   3498 			  {
   3499 			    if (c1 != 1)
   3500 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3501 
   3502 			    /* Throw away the ] at the end of the equivalence
   3503 			       class.  */
   3504 			    PATFETCH (c);
   3505 
   3506 			    /* Set the bit for the character.  */
   3507 			    SET_LIST_BIT (str[0]);
   3508 			    range_start = ((const unsigned char *) str)[0];
   3509 			  }
   3510 # ifdef _LIBC
   3511 			else
   3512 			  {
   3513 			    /* Try to match the byte sequence in `str' against
   3514 			       those known to the collate implementation.
   3515 			       First find out whether the bytes in `str' are
   3516 			       actually from exactly one character.  */
   3517 			    int32_t table_size;
   3518 			    const int32_t *symb_table;
   3519 			    const unsigned char *extra;
   3520 			    int32_t idx;
   3521 			    int32_t elem;
   3522 			    int32_t second;
   3523 			    int32_t hash;
   3524 
   3525 			    table_size =
   3526 			      _NL_CURRENT_WORD (LC_COLLATE,
   3527 						_NL_COLLATE_SYMB_HASH_SIZEMB);
   3528 			    symb_table = (const int32_t *)
   3529 			      _NL_CURRENT (LC_COLLATE,
   3530 					   _NL_COLLATE_SYMB_TABLEMB);
   3531 			    extra = (const unsigned char *)
   3532 			      _NL_CURRENT (LC_COLLATE,
   3533 					   _NL_COLLATE_SYMB_EXTRAMB);
   3534 
   3535 			    /* Locate the character in the hashing table.  */
   3536 			    hash = elem_hash (str, c1);
   3537 
   3538 			    idx = 0;
   3539 			    elem = hash % table_size;
   3540 			    second = hash % (table_size - 2);
   3541 			    while (symb_table[2 * elem] != 0)
   3542 			      {
   3543 				/* First compare the hashing value.  */
   3544 				if (symb_table[2 * elem] == hash
   3545 				    && c1 == extra[symb_table[2 * elem + 1]]
   3546 				    && memcmp (str,
   3547 					       &extra[symb_table[2 * elem + 1]
   3548 						     + 1],
   3549 					       c1) == 0)
   3550 				  {
   3551 				    /* Yep, this is the entry.  */
   3552 				    idx = symb_table[2 * elem + 1];
   3553 				    idx += 1 + extra[idx];
   3554 				    break;
   3555 				  }
   3556 
   3557 				/* Next entry.  */
   3558 				elem += second;
   3559 			      }
   3560 
   3561 			    if (symb_table[2 * elem] == 0)
   3562 			      /* This is no valid character.  */
   3563 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3564 
   3565 			    /* Throw away the ] at the end of the equivalence
   3566 			       class.  */
   3567 			    PATFETCH (c);
   3568 
   3569 			    /* Now add the multibyte character(s) we found
   3570 			       to the accept list.
   3571 
   3572 			       XXX Note that this is not entirely correct.
   3573 			       we would have to match multibyte sequences
   3574 			       but this is not possible with the current
   3575 			       implementation.  Also, we have to match
   3576 			       collating symbols, which expand to more than
   3577 			       one file, as a whole and not allow the
   3578 			       individual bytes.  */
   3579 			    c1 = extra[idx++];
   3580 			    if (c1 == 1)
   3581 			      range_start = extra[idx];
   3582 			    while (c1-- > 0)
   3583 			      {
   3584 				SET_LIST_BIT (extra[idx]);
   3585 				++idx;
   3586 			      }
   3587 			  }
   3588 # endif
   3589 			had_char_class = false;
   3590 		      }
   3591                     else
   3592                       {
   3593                         c1++;
   3594                         while (c1--)
   3595                           PATUNFETCH;
   3596                         SET_LIST_BIT ('[');
   3597                         SET_LIST_BIT ('.');
   3598 			range_start = '.';
   3599                         had_char_class = false;
   3600                       }
   3601 		  }
   3602                 else
   3603                   {
   3604                     had_char_class = false;
   3605                     SET_LIST_BIT (c);
   3606 		    range_start = c;
   3607                   }
   3608               }
   3609 
   3610             /* Discard any (non)matching list bytes that are all 0 at the
   3611                end of the map.  Decrease the map-length byte too.  */
   3612             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
   3613               b[-1]--;
   3614             b += b[-1];
   3615 #endif /* WCHAR */
   3616           }
   3617           break;
   3618 
   3619 
   3620 	case '(':
   3621           if (syntax & RE_NO_BK_PARENS)
   3622             goto handle_open;
   3623           else
   3624             goto normal_char;
   3625 
   3626 
   3627         case ')':
   3628           if (syntax & RE_NO_BK_PARENS)
   3629             goto handle_close;
   3630           else
   3631             goto normal_char;
   3632 
   3633 
   3634         case '\n':
   3635           if (syntax & RE_NEWLINE_ALT)
   3636             goto handle_alt;
   3637           else
   3638             goto normal_char;
   3639 
   3640 
   3641 	case '|':
   3642           if (syntax & RE_NO_BK_VBAR)
   3643             goto handle_alt;
   3644           else
   3645             goto normal_char;
   3646 
   3647 
   3648         case '{':
   3649            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
   3650              goto handle_interval;
   3651            else
   3652              goto normal_char;
   3653 
   3654 
   3655         case '\\':
   3656           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3657 
   3658           /* Do not translate the character after the \, so that we can
   3659              distinguish, e.g., \B from \b, even if we normally would
   3660              translate, e.g., B to b.  */
   3661           PATFETCH_RAW (c);
   3662 
   3663           switch (c)
   3664             {
   3665             case '(':
   3666               if (syntax & RE_NO_BK_PARENS)
   3667                 goto normal_backslash;
   3668 
   3669             handle_open:
   3670               bufp->re_nsub++;
   3671               regnum++;
   3672 
   3673               if (COMPILE_STACK_FULL)
   3674                 {
   3675                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
   3676                             compile_stack_elt_t);
   3677                   if (compile_stack.stack == NULL) return REG_ESPACE;
   3678 
   3679                   compile_stack.size <<= 1;
   3680                 }
   3681 
   3682               /* These are the values to restore when we hit end of this
   3683                  group.  They are all relative offsets, so that if the
   3684                  whole pattern moves because of realloc, they will still
   3685                  be valid.  */
   3686               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
   3687               COMPILE_STACK_TOP.fixup_alt_jump
   3688                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
   3689               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
   3690               COMPILE_STACK_TOP.regnum = regnum;
   3691 
   3692               /* We will eventually replace the 0 with the number of
   3693                  groups inner to this one.  But do not push a
   3694                  start_memory for groups beyond the last one we can
   3695                  represent in the compiled pattern.  */
   3696               if (regnum <= MAX_REGNUM)
   3697                 {
   3698                   COMPILE_STACK_TOP.inner_group_offset = b
   3699 		    - COMPILED_BUFFER_VAR + 2;
   3700                   BUF_PUSH_3 (start_memory, regnum, 0);
   3701                 }
   3702 
   3703               compile_stack.avail++;
   3704 
   3705               fixup_alt_jump = 0;
   3706               laststart = 0;
   3707               begalt = b;
   3708 	      /* If we've reached MAX_REGNUM groups, then this open
   3709 		 won't actually generate any code, so we'll have to
   3710 		 clear pending_exact explicitly.  */
   3711 	      pending_exact = 0;
   3712               break;
   3713 
   3714 
   3715             case ')':
   3716               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
   3717 
   3718               if (COMPILE_STACK_EMPTY)
   3719 		{
   3720 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3721 		    goto normal_backslash;
   3722 		  else
   3723 		    FREE_STACK_RETURN (REG_ERPAREN);
   3724 		}
   3725 
   3726             handle_close:
   3727               if (fixup_alt_jump)
   3728                 { /* Push a dummy failure point at the end of the
   3729                      alternative for a possible future
   3730                      `pop_failure_jump' to pop.  See comments at
   3731                      `push_dummy_failure' in `re_match_2'.  */
   3732                   BUF_PUSH (push_dummy_failure);
   3733 
   3734                   /* We allocated space for this jump when we assigned
   3735                      to `fixup_alt_jump', in the `handle_alt' case below.  */
   3736                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
   3737                 }
   3738 
   3739               /* See similar code for backslashed left paren above.  */
   3740               if (COMPILE_STACK_EMPTY)
   3741 		{
   3742 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3743 		    goto normal_char;
   3744 		  else
   3745 		    FREE_STACK_RETURN (REG_ERPAREN);
   3746 		}
   3747 
   3748               /* Since we just checked for an empty stack above, this
   3749                  ``can't happen''.  */
   3750               assert (compile_stack.avail != 0);
   3751               {
   3752                 /* We don't just want to restore into `regnum', because
   3753                    later groups should continue to be numbered higher,
   3754                    as in `(ab)c(de)' -- the second group is #2.  */
   3755                 regnum_t this_group_regnum;
   3756 
   3757                 compile_stack.avail--;
   3758                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
   3759                 fixup_alt_jump
   3760                   = COMPILE_STACK_TOP.fixup_alt_jump
   3761                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
   3762                     : 0;
   3763                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
   3764                 this_group_regnum = COMPILE_STACK_TOP.regnum;
   3765 		/* If we've reached MAX_REGNUM groups, then this open
   3766 		   won't actually generate any code, so we'll have to
   3767 		   clear pending_exact explicitly.  */
   3768 		pending_exact = 0;
   3769 
   3770                 /* We're at the end of the group, so now we know how many
   3771                    groups were inside this one.  */
   3772                 if (this_group_regnum <= MAX_REGNUM)
   3773                   {
   3774 		    UCHAR_T *inner_group_loc
   3775                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
   3776 
   3777                     *inner_group_loc = regnum - this_group_regnum;
   3778                     BUF_PUSH_3 (stop_memory, this_group_regnum,
   3779                                 regnum - this_group_regnum);
   3780                   }
   3781               }
   3782               break;
   3783 
   3784 
   3785             case '|':					/* `\|'.  */
   3786               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
   3787                 goto normal_backslash;
   3788             handle_alt:
   3789               if (syntax & RE_LIMITED_OPS)
   3790                 goto normal_char;
   3791 
   3792               /* Insert before the previous alternative a jump which
   3793                  jumps to this alternative if the former fails.  */
   3794               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3795               INSERT_JUMP (on_failure_jump, begalt,
   3796 			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
   3797               pending_exact = 0;
   3798               b += 1 + OFFSET_ADDRESS_SIZE;
   3799 
   3800               /* The alternative before this one has a jump after it
   3801                  which gets executed if it gets matched.  Adjust that
   3802                  jump so it will jump to this alternative's analogous
   3803                  jump (put in below, which in turn will jump to the next
   3804                  (if any) alternative's such jump, etc.).  The last such
   3805                  jump jumps to the correct final destination.  A picture:
   3806                           _____ _____
   3807                           |   | |   |
   3808                           |   v |   v
   3809                          a | b   | c
   3810 
   3811                  If we are at `b', then fixup_alt_jump right now points to a
   3812                  three-byte space after `a'.  We'll put in the jump, set
   3813                  fixup_alt_jump to right after `b', and leave behind three
   3814                  bytes which we'll fill in when we get to after `c'.  */
   3815 
   3816               if (fixup_alt_jump)
   3817                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   3818 
   3819               /* Mark and leave space for a jump after this alternative,
   3820                  to be filled in later either by next alternative or
   3821                  when know we're at the end of a series of alternatives.  */
   3822               fixup_alt_jump = b;
   3823               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3824               b += 1 + OFFSET_ADDRESS_SIZE;
   3825 
   3826               laststart = 0;
   3827               begalt = b;
   3828               break;
   3829 
   3830 
   3831             case '{':
   3832               /* If \{ is a literal.  */
   3833               if (!(syntax & RE_INTERVALS)
   3834                      /* If we're at `\{' and it's not the open-interval
   3835                         operator.  */
   3836 		  || (syntax & RE_NO_BK_BRACES))
   3837                 goto normal_backslash;
   3838 
   3839             handle_interval:
   3840               {
   3841                 /* If got here, then the syntax allows intervals.  */
   3842 
   3843                 /* At least (most) this many matches must be made.  */
   3844                 int lower_bound = -1, upper_bound = -1;
   3845 
   3846 		/* Place in the uncompiled pattern (i.e., just after
   3847 		   the '{') to go back to if the interval is invalid.  */
   3848 		const CHAR_T *beg_interval = p;
   3849 
   3850                 if (p == pend)
   3851 		  goto invalid_interval;
   3852 
   3853                 GET_UNSIGNED_NUMBER (lower_bound);
   3854 
   3855                 if (c == ',')
   3856                   {
   3857                     GET_UNSIGNED_NUMBER (upper_bound);
   3858 		    if (upper_bound < 0)
   3859 		      upper_bound = RE_DUP_MAX;
   3860                   }
   3861                 else
   3862                   /* Interval such as `{1}' => match exactly once. */
   3863                   upper_bound = lower_bound;
   3864 
   3865                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
   3866 		  goto invalid_interval;
   3867 
   3868                 if (!(syntax & RE_NO_BK_BRACES))
   3869                   {
   3870 		    if (c != '\\' || p == pend)
   3871 		      goto invalid_interval;
   3872                     PATFETCH (c);
   3873                   }
   3874 
   3875                 if (c != '}')
   3876 		  goto invalid_interval;
   3877 
   3878                 /* If it's invalid to have no preceding re.  */
   3879                 if (!laststart)
   3880                   {
   3881 		    if (syntax & RE_CONTEXT_INVALID_OPS
   3882 			&& !(syntax & RE_INVALID_INTERVAL_ORD))
   3883                       FREE_STACK_RETURN (REG_BADRPT);
   3884                     else if (syntax & RE_CONTEXT_INDEP_OPS)
   3885                       laststart = b;
   3886                     else
   3887                       goto unfetch_interval;
   3888                   }
   3889 
   3890                 /* We just parsed a valid interval.  */
   3891 
   3892                 if (RE_DUP_MAX < upper_bound)
   3893 		  FREE_STACK_RETURN (REG_BADBR);
   3894 
   3895                 /* If the upper bound is zero, don't want to succeed at
   3896                    all; jump from `laststart' to `b + 3', which will be
   3897 		   the end of the buffer after we insert the jump.  */
   3898 		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
   3899 		   instead of 'b + 3'.  */
   3900                  if (upper_bound == 0)
   3901                    {
   3902                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3903                      INSERT_JUMP (jump, laststart, b + 1
   3904 				  + OFFSET_ADDRESS_SIZE);
   3905                      b += 1 + OFFSET_ADDRESS_SIZE;
   3906                    }
   3907 
   3908                  /* Otherwise, we have a nontrivial interval.  When
   3909                     we're all done, the pattern will look like:
   3910                       set_number_at <jump count> <upper bound>
   3911                       set_number_at <succeed_n count> <lower bound>
   3912                       succeed_n <after jump addr> <succeed_n count>
   3913                       <body of loop>
   3914                       jump_n <succeed_n addr> <jump count>
   3915                     (The upper bound and `jump_n' are omitted if
   3916                     `upper_bound' is 1, though.)  */
   3917                  else
   3918                    { /* If the upper bound is > 1, we need to insert
   3919                         more at the end of the loop.  */
   3920                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
   3921 		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
   3922 
   3923                      GET_BUFFER_SPACE (nbytes);
   3924 
   3925                      /* Initialize lower bound of the `succeed_n', even
   3926                         though it will be set during matching by its
   3927                         attendant `set_number_at' (inserted next),
   3928                         because `re_compile_fastmap' needs to know.
   3929                         Jump to the `jump_n' we might insert below.  */
   3930                      INSERT_JUMP2 (succeed_n, laststart,
   3931                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
   3932 				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
   3933 				   , lower_bound);
   3934                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3935 
   3936                      /* Code to initialize the lower bound.  Insert
   3937                         before the `succeed_n'.  The `5' is the last two
   3938                         bytes of this `set_number_at', plus 3 bytes of
   3939                         the following `succeed_n'.  */
   3940 		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
   3941 			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
   3942 			of the following `succeed_n'.  */
   3943                      PREFIX(insert_op2) (set_number_at, laststart, 1
   3944 				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
   3945                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3946 
   3947                      if (upper_bound > 1)
   3948                        { /* More than one repetition is allowed, so
   3949                             append a backward jump to the `succeed_n'
   3950                             that starts this interval.
   3951 
   3952                             When we've reached this during matching,
   3953                             we'll have matched the interval once, so
   3954                             jump back only `upper_bound - 1' times.  */
   3955                          STORE_JUMP2 (jump_n, b, laststart
   3956 				      + 2 * OFFSET_ADDRESS_SIZE + 1,
   3957                                       upper_bound - 1);
   3958                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3959 
   3960                          /* The location we want to set is the second
   3961                             parameter of the `jump_n'; that is `b-2' as
   3962                             an absolute address.  `laststart' will be
   3963                             the `set_number_at' we're about to insert;
   3964                             `laststart+3' the number to set, the source
   3965                             for the relative address.  But we are
   3966                             inserting into the middle of the pattern --
   3967                             so everything is getting moved up by 5.
   3968                             Conclusion: (b - 2) - (laststart + 3) + 5,
   3969                             i.e., b - laststart.
   3970 
   3971                             We insert this at the beginning of the loop
   3972                             so that if we fail during matching, we'll
   3973                             reinitialize the bounds.  */
   3974                          PREFIX(insert_op2) (set_number_at, laststart,
   3975 					     b - laststart,
   3976 					     upper_bound - 1, b);
   3977                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3978                        }
   3979                    }
   3980                 pending_exact = 0;
   3981 		break;
   3982 
   3983 	      invalid_interval:
   3984 		if (!(syntax & RE_INVALID_INTERVAL_ORD))
   3985 		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
   3986 	      unfetch_interval:
   3987 		/* Match the characters as literals.  */
   3988 		p = beg_interval;
   3989 		c = '{';
   3990 		if (syntax & RE_NO_BK_BRACES)
   3991 		  goto normal_char;
   3992 		else
   3993 		  goto normal_backslash;
   3994 	      }
   3995 
   3996 #ifdef emacs
   3997             /* There is no way to specify the before_dot and after_dot
   3998                operators.  rms says this is ok.  --karl  */
   3999             case '=':
   4000               BUF_PUSH (at_dot);
   4001               break;
   4002 
   4003             case 's':
   4004               laststart = b;
   4005               PATFETCH (c);
   4006               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
   4007               break;
   4008 
   4009             case 'S':
   4010               laststart = b;
   4011               PATFETCH (c);
   4012               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
   4013               break;
   4014 #endif /* emacs */
   4015 
   4016 
   4017             case 'w':
   4018 	      if (syntax & RE_NO_GNU_OPS)
   4019 		goto normal_char;
   4020               laststart = b;
   4021               BUF_PUSH (wordchar);
   4022               break;
   4023 
   4024 
   4025             case 'W':
   4026 	      if (syntax & RE_NO_GNU_OPS)
   4027 		goto normal_char;
   4028               laststart = b;
   4029               BUF_PUSH (notwordchar);
   4030               break;
   4031 
   4032 
   4033             case '<':
   4034 	      if (syntax & RE_NO_GNU_OPS)
   4035 		goto normal_char;
   4036               BUF_PUSH (wordbeg);
   4037               break;
   4038 
   4039             case '>':
   4040 	      if (syntax & RE_NO_GNU_OPS)
   4041 		goto normal_char;
   4042               BUF_PUSH (wordend);
   4043               break;
   4044 
   4045             case 'b':
   4046 	      if (syntax & RE_NO_GNU_OPS)
   4047 		goto normal_char;
   4048               BUF_PUSH (wordbound);
   4049               break;
   4050 
   4051             case 'B':
   4052 	      if (syntax & RE_NO_GNU_OPS)
   4053 		goto normal_char;
   4054               BUF_PUSH (notwordbound);
   4055               break;
   4056 
   4057             case '`':
   4058 	      if (syntax & RE_NO_GNU_OPS)
   4059 		goto normal_char;
   4060               BUF_PUSH (begbuf);
   4061               break;
   4062 
   4063             case '\'':
   4064 	      if (syntax & RE_NO_GNU_OPS)
   4065 		goto normal_char;
   4066               BUF_PUSH (endbuf);
   4067               break;
   4068 
   4069             case '1': case '2': case '3': case '4': case '5':
   4070             case '6': case '7': case '8': case '9':
   4071               if (syntax & RE_NO_BK_REFS)
   4072                 goto normal_char;
   4073 
   4074               c1 = c - '0';
   4075 
   4076               if (c1 > regnum)
   4077                 FREE_STACK_RETURN (REG_ESUBREG);
   4078 
   4079               /* Can't back reference to a subexpression if inside of it.  */
   4080               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
   4081                 goto normal_char;
   4082 
   4083               laststart = b;
   4084               BUF_PUSH_2 (duplicate, c1);
   4085               break;
   4086 
   4087 
   4088             case '+':
   4089             case '?':
   4090               if (syntax & RE_BK_PLUS_QM)
   4091                 goto handle_plus;
   4092               else
   4093                 goto normal_backslash;
   4094 
   4095             default:
   4096             normal_backslash:
   4097               /* You might think it would be useful for \ to mean
   4098                  not to translate; but if we don't translate it
   4099                  it will never match anything.  */
   4100               c = TRANSLATE (c);
   4101               goto normal_char;
   4102             }
   4103           break;
   4104 
   4105 
   4106 	default:
   4107         /* Expects the character in `c'.  */
   4108 	normal_char:
   4109 	      /* If no exactn currently being built.  */
   4110           if (!pending_exact
   4111 #ifdef WCHAR
   4112 	      /* If last exactn handle binary(or character) and
   4113 		 new exactn handle character(or binary).  */
   4114 	      || is_exactn_bin != is_binary[p - 1 - pattern]
   4115 #endif /* WCHAR */
   4116 
   4117               /* If last exactn not at current position.  */
   4118               || pending_exact + *pending_exact + 1 != b
   4119 
   4120               /* We have only one byte following the exactn for the count.  */
   4121 	      || *pending_exact == (1 << BYTEWIDTH) - 1
   4122 
   4123               /* If followed by a repetition operator.  */
   4124               || *p == '*' || *p == '^'
   4125 	      || ((syntax & RE_BK_PLUS_QM)
   4126 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
   4127 		  : (*p == '+' || *p == '?'))
   4128 	      || ((syntax & RE_INTERVALS)
   4129                   && ((syntax & RE_NO_BK_BRACES)
   4130 		      ? *p == '{'
   4131                       : (p[0] == '\\' && p[1] == '{'))))
   4132 	    {
   4133 	      /* Start building a new exactn.  */
   4134 
   4135               laststart = b;
   4136 
   4137 #ifdef WCHAR
   4138 	      /* Is this exactn binary data or character? */
   4139 	      is_exactn_bin = is_binary[p - 1 - pattern];
   4140 	      if (is_exactn_bin)
   4141 		  BUF_PUSH_2 (exactn_bin, 0);
   4142 	      else
   4143 		  BUF_PUSH_2 (exactn, 0);
   4144 #else
   4145 	      BUF_PUSH_2 (exactn, 0);
   4146 #endif /* WCHAR */
   4147 	      pending_exact = b - 1;
   4148             }
   4149 
   4150 	  BUF_PUSH (c);
   4151           (*pending_exact)++;
   4152 	  break;
   4153         } /* switch (c) */
   4154     } /* while p != pend */
   4155 
   4156 
   4157   /* Through the pattern now.  */
   4158 
   4159   if (fixup_alt_jump)
   4160     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   4161 
   4162   if (!COMPILE_STACK_EMPTY)
   4163     FREE_STACK_RETURN (REG_EPAREN);
   4164 
   4165   /* If we don't want backtracking, force success
   4166      the first time we reach the end of the compiled pattern.  */
   4167   if (syntax & RE_NO_POSIX_BACKTRACKING)
   4168     BUF_PUSH (succeed);
   4169 
   4170 #ifdef WCHAR
   4171   free (pattern);
   4172   free (mbs_offset);
   4173   free (is_binary);
   4174 #endif
   4175   free (compile_stack.stack);
   4176 
   4177   /* We have succeeded; set the length of the buffer.  */
   4178 #ifdef WCHAR
   4179   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
   4180 #else
   4181   bufp->used = b - bufp->buffer;
   4182 #endif
   4183 
   4184 #ifdef DEBUG
   4185   if (debug)
   4186     {
   4187       DEBUG_PRINT1 ("\nCompiled pattern: \n");
   4188       PREFIX(print_compiled_pattern) (bufp);
   4189     }
   4190 #endif /* DEBUG */
   4191 
   4192 #ifndef MATCH_MAY_ALLOCATE
   4193   /* Initialize the failure stack to the largest possible stack.  This
   4194      isn't necessary unless we're trying to avoid calling alloca in
   4195      the search and match routines.  */
   4196   {
   4197     int num_regs = bufp->re_nsub + 1;
   4198 
   4199     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
   4200        is strictly greater than re_max_failures, the largest possible stack
   4201        is 2 * re_max_failures failure points.  */
   4202     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
   4203       {
   4204 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
   4205 
   4206 # ifdef emacs
   4207 	if (! fail_stack.stack)
   4208 	  fail_stack.stack
   4209 	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
   4210 				    * sizeof (PREFIX(fail_stack_elt_t)));
   4211 	else
   4212 	  fail_stack.stack
   4213 	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
   4214 				     (fail_stack.size
   4215 				      * sizeof (PREFIX(fail_stack_elt_t))));
   4216 # else /* not emacs */
   4217 	if (! fail_stack.stack)
   4218 	  fail_stack.stack
   4219 	    = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
   4220 				   * sizeof (PREFIX(fail_stack_elt_t)));
   4221 	else
   4222 	  fail_stack.stack
   4223 	    = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
   4224 					    (fail_stack.size
   4225 				     * sizeof (PREFIX(fail_stack_elt_t))));
   4226 # endif /* not emacs */
   4227       }
   4228 
   4229    PREFIX(regex_grow_registers) (num_regs);
   4230   }
   4231 #endif /* not MATCH_MAY_ALLOCATE */
   4232 
   4233   return REG_NOERROR;
   4234 } /* regex_compile */
   4235 
   4236 /* Subroutines for `regex_compile'.  */
   4237 
   4238 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
   4239 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4240 
   4241 static void
   4242 PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
   4243 {
   4244   *loc = (UCHAR_T) op;
   4245   STORE_NUMBER (loc + 1, arg);
   4246 }
   4247 
   4248 
   4249 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4250 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4251 
   4252 static void
   4253 PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
   4254 {
   4255   *loc = (UCHAR_T) op;
   4256   STORE_NUMBER (loc + 1, arg1);
   4257   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
   4258 }
   4259 
   4260 
   4261 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
   4262    for OP followed by two-byte integer parameter ARG.  */
   4263 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4264 
   4265 static void
   4266 PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
   4267 {
   4268   register UCHAR_T *pfrom = end;
   4269   register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
   4270 
   4271   while (pfrom != loc)
   4272     *--pto = *--pfrom;
   4273 
   4274   PREFIX(store_op1) (op, loc, arg);
   4275 }
   4276 
   4277 
   4278 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4279 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4280 
   4281 static void
   4282 PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1,
   4283                     int arg2, UCHAR_T *end)
   4284 {
   4285   register UCHAR_T *pfrom = end;
   4286   register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
   4287 
   4288   while (pfrom != loc)
   4289     *--pto = *--pfrom;
   4290 
   4291   PREFIX(store_op2) (op, loc, arg1, arg2);
   4292 }
   4293 
   4294 
   4295 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
   4296    after an alternative or a begin-subexpression.  We assume there is at
   4297    least one character before the ^.  */
   4298 
   4299 static boolean
   4300 PREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
   4301                           reg_syntax_t syntax)
   4302 {
   4303   const CHAR_T *prev = p - 2;
   4304   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
   4305 
   4306   return
   4307        /* After a subexpression?  */
   4308        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
   4309        /* After an alternative?  */
   4310     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
   4311 }
   4312 
   4313 
   4314 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
   4315    at least one character after the $, i.e., `P < PEND'.  */
   4316 
   4317 static boolean
   4318 PREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
   4319                           reg_syntax_t syntax)
   4320 {
   4321   const CHAR_T *next = p;
   4322   boolean next_backslash = *next == '\\';
   4323   const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
   4324 
   4325   return
   4326        /* Before a subexpression?  */
   4327        (syntax & RE_NO_BK_PARENS ? *next == ')'
   4328         : next_backslash && next_next && *next_next == ')')
   4329        /* Before an alternative?  */
   4330     || (syntax & RE_NO_BK_VBAR ? *next == '|'
   4331         : next_backslash && next_next && *next_next == '|');
   4332 }
   4333 
   4334 #else /* not INSIDE_RECURSION */
   4335 
   4336 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
   4337    false if it's not.  */
   4338 
   4339 static boolean
   4340 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
   4341 {
   4342   int this_element;
   4343 
   4344   for (this_element = compile_stack.avail - 1;
   4345        this_element >= 0;
   4346        this_element--)
   4347     if (compile_stack.stack[this_element].regnum == regnum)
   4348       return true;
   4349 
   4350   return false;
   4351 }
   4352 #endif /* not INSIDE_RECURSION */
   4353 
   4354 #ifdef INSIDE_RECURSION
   4355 
   4356 #ifdef WCHAR
   4357 /* This insert space, which size is "num", into the pattern at "loc".
   4358    "end" must point the end of the allocated buffer.  */
   4359 static void
   4360 insert_space (int num, CHAR_T *loc, CHAR_T *end)
   4361 {
   4362   register CHAR_T *pto = end;
   4363   register CHAR_T *pfrom = end - num;
   4364 
   4365   while (pfrom >= loc)
   4366     *pto-- = *pfrom--;
   4367 }
   4368 #endif /* WCHAR */
   4369 
   4370 #ifdef WCHAR
   4371 static reg_errcode_t
   4372 wcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
   4373                    const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
   4374                    reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set)
   4375 {
   4376   const CHAR_T *p = *p_ptr;
   4377   CHAR_T range_start, range_end;
   4378   reg_errcode_t ret;
   4379 # ifdef _LIBC
   4380   uint32_t nrules;
   4381   uint32_t start_val, end_val;
   4382 # endif
   4383   if (p == pend)
   4384     return REG_ERANGE;
   4385 
   4386 # ifdef _LIBC
   4387   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   4388   if (nrules != 0)
   4389     {
   4390       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
   4391 						       _NL_COLLATE_COLLSEQWC);
   4392       const unsigned char *extra = (const unsigned char *)
   4393 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   4394 
   4395       if (range_start_char < -1)
   4396 	{
   4397 	  /* range_start is a collating symbol.  */
   4398 	  int32_t *wextra;
   4399 	  /* Retreive the index and get collation sequence value.  */
   4400 	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
   4401 	  start_val = wextra[1 + *wextra];
   4402 	}
   4403       else
   4404 	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
   4405 
   4406       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
   4407 
   4408       /* Report an error if the range is empty and the syntax prohibits
   4409 	 this.  */
   4410       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4411 	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
   4412 
   4413       /* Insert space to the end of the char_ranges.  */
   4414       insert_space(2, b - char_set[5] - 2, b - 1);
   4415       *(b - char_set[5] - 2) = (wchar_t)start_val;
   4416       *(b - char_set[5] - 1) = (wchar_t)end_val;
   4417       char_set[4]++; /* ranges_index */
   4418     }
   4419   else
   4420 # endif
   4421     {
   4422       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
   4423 	range_start_char;
   4424       range_end = TRANSLATE (p[0]);
   4425       /* Report an error if the range is empty and the syntax prohibits
   4426 	 this.  */
   4427       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4428 	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
   4429 
   4430       /* Insert space to the end of the char_ranges.  */
   4431       insert_space(2, b - char_set[5] - 2, b - 1);
   4432       *(b - char_set[5] - 2) = range_start;
   4433       *(b - char_set[5] - 1) = range_end;
   4434       char_set[4]++; /* ranges_index */
   4435     }
   4436   /* Have to increment the pointer into the pattern string, so the
   4437      caller isn't still at the ending character.  */
   4438   (*p_ptr)++;
   4439 
   4440   return ret;
   4441 }
   4442 #else /* BYTE */
   4443 /* Read the ending character of a range (in a bracket expression) from the
   4444    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
   4445    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
   4446    Then we set the translation of all bits between the starting and
   4447    ending characters (inclusive) in the compiled pattern B.
   4448 
   4449    Return an error code.
   4450 
   4451    We use these short variable names so we can use the same macros as
   4452    `regex_compile' itself.  */
   4453 
   4454 static reg_errcode_t
   4455 byte_compile_range (unsigned int range_start_char, const char **p_ptr,
   4456                     const char *pend, RE_TRANSLATE_TYPE translate,
   4457                     reg_syntax_t syntax, unsigned char *b)
   4458 {
   4459   unsigned this_char;
   4460   const char *p = *p_ptr;
   4461   reg_errcode_t ret;
   4462 # if _LIBC
   4463   const unsigned char *collseq;
   4464   unsigned int start_colseq;
   4465   unsigned int end_colseq;
   4466 # else
   4467   unsigned end_char;
   4468 # endif
   4469 
   4470   if (p == pend)
   4471     return REG_ERANGE;
   4472 
   4473   /* Have to increment the pointer into the pattern string, so the
   4474      caller isn't still at the ending character.  */
   4475   (*p_ptr)++;
   4476 
   4477   /* Report an error if the range is empty and the syntax prohibits this.  */
   4478   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
   4479 
   4480 # if _LIBC
   4481   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
   4482 						 _NL_COLLATE_COLLSEQMB);
   4483 
   4484   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
   4485   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
   4486   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
   4487     {
   4488       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
   4489 
   4490       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
   4491 	{
   4492 	  SET_LIST_BIT (TRANSLATE (this_char));
   4493 	  ret = REG_NOERROR;
   4494 	}
   4495     }
   4496 # else
   4497   /* Here we see why `this_char' has to be larger than an `unsigned
   4498      char' -- we would otherwise go into an infinite loop, since all
   4499      characters <= 0xff.  */
   4500   range_start_char = TRANSLATE (range_start_char);
   4501   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
   4502      and some compilers cast it to int implicitly, so following for_loop
   4503      may fall to (almost) infinite loop.
   4504      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
   4505      To avoid this, we cast p[0] to unsigned int and truncate it.  */
   4506   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
   4507 
   4508   for (this_char = range_start_char; this_char <= end_char; ++this_char)
   4509     {
   4510       SET_LIST_BIT (TRANSLATE (this_char));
   4511       ret = REG_NOERROR;
   4512     }
   4513 # endif
   4514 
   4515   return ret;
   4516 }
   4517 #endif /* WCHAR */
   4518 
   4519 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
   4521    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
   4522    characters can start a string that matches the pattern.  This fastmap
   4523    is used by re_search to skip quickly over impossible starting points.
   4524 
   4525    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
   4526    area as BUFP->fastmap.
   4527 
   4528    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
   4529    the pattern buffer.
   4530 
   4531    Returns 0 if we succeed, -2 if an internal error.   */
   4532 
   4533 #ifdef WCHAR
   4534 /* local function for re_compile_fastmap.
   4535    truncate wchar_t character to char.  */
   4536 static unsigned char truncate_wchar (CHAR_T c);
   4537 
   4538 static unsigned char
   4539 truncate_wchar (CHAR_T c)
   4540 {
   4541   unsigned char buf[MB_CUR_MAX];
   4542   mbstate_t state;
   4543   int retval;
   4544   memset (&state, '\0', sizeof (state));
   4545 # ifdef _LIBC
   4546   retval = __wcrtomb (buf, c, &state);
   4547 # else
   4548   retval = wcrtomb (buf, c, &state);
   4549 # endif
   4550   return retval > 0 ? buf[0] : (unsigned char) c;
   4551 }
   4552 #endif /* WCHAR */
   4553 
   4554 static int
   4555 PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
   4556 {
   4557   int j, k;
   4558 #ifdef MATCH_MAY_ALLOCATE
   4559   PREFIX(fail_stack_type) fail_stack;
   4560 #endif
   4561 #ifndef REGEX_MALLOC
   4562   char *destination;
   4563 #endif
   4564 
   4565   register char *fastmap = bufp->fastmap;
   4566 
   4567 #ifdef WCHAR
   4568   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   4569      pattern to (char*) in regex_compile.  */
   4570   UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
   4571   register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
   4572 #else /* BYTE */
   4573   UCHAR_T *pattern = bufp->buffer;
   4574   register UCHAR_T *pend = pattern + bufp->used;
   4575 #endif /* WCHAR */
   4576   UCHAR_T *p = pattern;
   4577 
   4578 #ifdef REL_ALLOC
   4579   /* This holds the pointer to the failure stack, when
   4580      it is allocated relocatably.  */
   4581   fail_stack_elt_t *failure_stack_ptr;
   4582 #endif
   4583 
   4584   /* Assume that each path through the pattern can be null until
   4585      proven otherwise.  We set this false at the bottom of switch
   4586      statement, to which we get only if a particular path doesn't
   4587      match the empty string.  */
   4588   boolean path_can_be_null = true;
   4589 
   4590   /* We aren't doing a `succeed_n' to begin with.  */
   4591   boolean succeed_n_p = false;
   4592 
   4593   assert (fastmap != NULL && p != NULL);
   4594 
   4595   INIT_FAIL_STACK ();
   4596   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
   4597   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
   4598   bufp->can_be_null = 0;
   4599 
   4600   while (1)
   4601     {
   4602       if (p == pend || *p == (UCHAR_T) succeed)
   4603 	{
   4604 	  /* We have reached the (effective) end of pattern.  */
   4605 	  if (!FAIL_STACK_EMPTY ())
   4606 	    {
   4607 	      bufp->can_be_null |= path_can_be_null;
   4608 
   4609 	      /* Reset for next path.  */
   4610 	      path_can_be_null = true;
   4611 
   4612 	      p = fail_stack.stack[--fail_stack.avail].pointer;
   4613 
   4614 	      continue;
   4615 	    }
   4616 	  else
   4617 	    break;
   4618 	}
   4619 
   4620       /* We should never be about to go beyond the end of the pattern.  */
   4621       assert (p < pend);
   4622 
   4623       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   4624 	{
   4625 
   4626         /* I guess the idea here is to simply not bother with a fastmap
   4627            if a backreference is used, since it's too hard to figure out
   4628            the fastmap for the corresponding group.  Setting
   4629            `can_be_null' stops `re_search_2' from using the fastmap, so
   4630            that is all we do.  */
   4631 	case duplicate:
   4632 	  bufp->can_be_null = 1;
   4633           goto done;
   4634 
   4635 
   4636       /* Following are the cases which match a character.  These end
   4637          with `break'.  */
   4638 
   4639 #ifdef WCHAR
   4640 	case exactn:
   4641           fastmap[truncate_wchar(p[1])] = 1;
   4642 	  break;
   4643 #else /* BYTE */
   4644 	case exactn:
   4645           fastmap[p[1]] = 1;
   4646 	  break;
   4647 #endif /* WCHAR */
   4648 #ifdef MBS_SUPPORT
   4649 	case exactn_bin:
   4650 	  fastmap[p[1]] = 1;
   4651 	  break;
   4652 #endif
   4653 
   4654 #ifdef WCHAR
   4655         /* It is hard to distinguish fastmap from (multi byte) characters
   4656            which depends on current locale.  */
   4657         case charset:
   4658 	case charset_not:
   4659 	case wordchar:
   4660 	case notwordchar:
   4661           bufp->can_be_null = 1;
   4662           goto done;
   4663 #else /* BYTE */
   4664         case charset:
   4665           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4666 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
   4667               fastmap[j] = 1;
   4668 	  break;
   4669 
   4670 
   4671 	case charset_not:
   4672 	  /* Chars beyond end of map must be allowed.  */
   4673 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
   4674             fastmap[j] = 1;
   4675 
   4676 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4677 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
   4678               fastmap[j] = 1;
   4679           break;
   4680 
   4681 
   4682 	case wordchar:
   4683 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4684 	    if (SYNTAX (j) == Sword)
   4685 	      fastmap[j] = 1;
   4686 	  break;
   4687 
   4688 
   4689 	case notwordchar:
   4690 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4691 	    if (SYNTAX (j) != Sword)
   4692 	      fastmap[j] = 1;
   4693 	  break;
   4694 #endif /* WCHAR */
   4695 
   4696         case anychar:
   4697 	  {
   4698 	    int fastmap_newline = fastmap['\n'];
   4699 
   4700 	    /* `.' matches anything ...  */
   4701 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
   4702 	      fastmap[j] = 1;
   4703 
   4704 	    /* ... except perhaps newline.  */
   4705 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
   4706 	      fastmap['\n'] = fastmap_newline;
   4707 
   4708 	    /* Return if we have already set `can_be_null'; if we have,
   4709 	       then the fastmap is irrelevant.  Something's wrong here.  */
   4710 	    else if (bufp->can_be_null)
   4711 	      goto done;
   4712 
   4713 	    /* Otherwise, have to check alternative paths.  */
   4714 	    break;
   4715 	  }
   4716 
   4717 #ifdef emacs
   4718         case syntaxspec:
   4719 	  k = *p++;
   4720 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4721 	    if (SYNTAX (j) == (enum syntaxcode) k)
   4722 	      fastmap[j] = 1;
   4723 	  break;
   4724 
   4725 
   4726 	case notsyntaxspec:
   4727 	  k = *p++;
   4728 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4729 	    if (SYNTAX (j) != (enum syntaxcode) k)
   4730 	      fastmap[j] = 1;
   4731 	  break;
   4732 
   4733 
   4734       /* All cases after this match the empty string.  These end with
   4735          `continue'.  */
   4736 
   4737 
   4738 	case before_dot:
   4739 	case at_dot:
   4740 	case after_dot:
   4741           continue;
   4742 #endif /* emacs */
   4743 
   4744 
   4745         case no_op:
   4746         case begline:
   4747         case endline:
   4748 	case begbuf:
   4749 	case endbuf:
   4750 	case wordbound:
   4751 	case notwordbound:
   4752 	case wordbeg:
   4753 	case wordend:
   4754         case push_dummy_failure:
   4755           continue;
   4756 
   4757 
   4758 	case jump_n:
   4759         case pop_failure_jump:
   4760 	case maybe_pop_jump:
   4761 	case jump:
   4762         case jump_past_alt:
   4763 	case dummy_failure_jump:
   4764           EXTRACT_NUMBER_AND_INCR (j, p);
   4765 	  p += j;
   4766 	  if (j > 0)
   4767 	    continue;
   4768 
   4769           /* Jump backward implies we just went through the body of a
   4770              loop and matched nothing.  Opcode jumped to should be
   4771              `on_failure_jump' or `succeed_n'.  Just treat it like an
   4772              ordinary jump.  For a * loop, it has pushed its failure
   4773              point already; if so, discard that as redundant.  */
   4774           if ((re_opcode_t) *p != on_failure_jump
   4775 	      && (re_opcode_t) *p != succeed_n)
   4776 	    continue;
   4777 
   4778           p++;
   4779           EXTRACT_NUMBER_AND_INCR (j, p);
   4780           p += j;
   4781 
   4782           /* If what's on the stack is where we are now, pop it.  */
   4783           if (!FAIL_STACK_EMPTY ()
   4784 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
   4785             fail_stack.avail--;
   4786 
   4787           continue;
   4788 
   4789 
   4790         case on_failure_jump:
   4791         case on_failure_keep_string_jump:
   4792 	handle_on_failure_jump:
   4793           EXTRACT_NUMBER_AND_INCR (j, p);
   4794 
   4795           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
   4796              end of the pattern.  We don't want to push such a point,
   4797              since when we restore it above, entering the switch will
   4798              increment `p' past the end of the pattern.  We don't need
   4799              to push such a point since we obviously won't find any more
   4800              fastmap entries beyond `pend'.  Such a pattern can match
   4801              the null string, though.  */
   4802           if (p + j < pend)
   4803             {
   4804               if (!PUSH_PATTERN_OP (p + j, fail_stack))
   4805 		{
   4806 		  RESET_FAIL_STACK ();
   4807 		  return -2;
   4808 		}
   4809             }
   4810           else
   4811             bufp->can_be_null = 1;
   4812 
   4813           if (succeed_n_p)
   4814             {
   4815               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
   4816               succeed_n_p = false;
   4817 	    }
   4818 
   4819           continue;
   4820 
   4821 
   4822 	case succeed_n:
   4823           /* Get to the number of times to succeed.  */
   4824           p += OFFSET_ADDRESS_SIZE;
   4825 
   4826           /* Increment p past the n for when k != 0.  */
   4827           EXTRACT_NUMBER_AND_INCR (k, p);
   4828           if (k == 0)
   4829 	    {
   4830               p -= 2 * OFFSET_ADDRESS_SIZE;
   4831   	      succeed_n_p = true;  /* Spaghetti code alert.  */
   4832               goto handle_on_failure_jump;
   4833             }
   4834           continue;
   4835 
   4836 
   4837 	case set_number_at:
   4838           p += 2 * OFFSET_ADDRESS_SIZE;
   4839           continue;
   4840 
   4841 
   4842 	case start_memory:
   4843         case stop_memory:
   4844 	  p += 2;
   4845 	  continue;
   4846 
   4847 
   4848 	default:
   4849           abort (); /* We have listed all the cases.  */
   4850         } /* switch *p++ */
   4851 
   4852       /* Getting here means we have found the possible starting
   4853          characters for one path of the pattern -- and that the empty
   4854          string does not match.  We need not follow this path further.
   4855          Instead, look at the next alternative (remembered on the
   4856          stack), or quit if no more.  The test at the top of the loop
   4857          does these things.  */
   4858       path_can_be_null = false;
   4859       p = pend;
   4860     } /* while p */
   4861 
   4862   /* Set `can_be_null' for the last path (also the first path, if the
   4863      pattern is empty).  */
   4864   bufp->can_be_null |= path_can_be_null;
   4865 
   4866  done:
   4867   RESET_FAIL_STACK ();
   4868   return 0;
   4869 }
   4870 
   4871 #else /* not INSIDE_RECURSION */
   4872 
   4873 int
   4874 re_compile_fastmap (struct re_pattern_buffer *bufp)
   4875 {
   4876 # ifdef MBS_SUPPORT
   4877   if (MB_CUR_MAX != 1)
   4878     return wcs_re_compile_fastmap(bufp);
   4879   else
   4880 # endif
   4881     return byte_re_compile_fastmap(bufp);
   4882 } /* re_compile_fastmap */
   4883 #ifdef _LIBC
   4884 weak_alias (__re_compile_fastmap, re_compile_fastmap)
   4885 #endif
   4886 
   4887 
   4889 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
   4890    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
   4891    this memory for recording register information.  STARTS and ENDS
   4892    must be allocated using the malloc library routine, and must each
   4893    be at least NUM_REGS * sizeof (regoff_t) bytes long.
   4894 
   4895    If NUM_REGS == 0, then subsequent matches should allocate their own
   4896    register data.
   4897 
   4898    Unless this function is called, the first search or match using
   4899    PATTERN_BUFFER will allocate its own register data, without
   4900    freeing the old data.  */
   4901 
   4902 void
   4903 re_set_registers (struct re_pattern_buffer *bufp,
   4904                   struct re_registers *regs, unsigned num_regs,
   4905                   regoff_t *starts, regoff_t *ends)
   4906 {
   4907   if (num_regs)
   4908     {
   4909       bufp->regs_allocated = REGS_REALLOCATE;
   4910       regs->num_regs = num_regs;
   4911       regs->start = starts;
   4912       regs->end = ends;
   4913     }
   4914   else
   4915     {
   4916       bufp->regs_allocated = REGS_UNALLOCATED;
   4917       regs->num_regs = 0;
   4918       regs->start = regs->end = (regoff_t *) 0;
   4919     }
   4920 }
   4921 #ifdef _LIBC
   4922 weak_alias (__re_set_registers, re_set_registers)
   4923 #endif
   4924 
   4925 /* Searching routines.  */
   4927 
   4928 /* Like re_search_2, below, but only one string is specified, and
   4929    doesn't let you say where to stop matching.  */
   4930 
   4931 int
   4932 re_search (struct re_pattern_buffer *bufp, const char *string, int size,
   4933            int startpos, int range, struct re_registers *regs)
   4934 {
   4935   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
   4936 		      regs, size);
   4937 }
   4938 #ifdef _LIBC
   4939 weak_alias (__re_search, re_search)
   4940 #endif
   4941 
   4942 
   4943 /* Using the compiled pattern in BUFP->buffer, first tries to match the
   4944    virtual concatenation of STRING1 and STRING2, starting first at index
   4945    STARTPOS, then at STARTPOS + 1, and so on.
   4946 
   4947    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
   4948 
   4949    RANGE is how far to scan while trying to match.  RANGE = 0 means try
   4950    only at STARTPOS; in general, the last start tried is STARTPOS +
   4951    RANGE.
   4952 
   4953    In REGS, return the indices of the virtual concatenation of STRING1
   4954    and STRING2 that matched the entire BUFP->buffer and its contained
   4955    subexpressions.
   4956 
   4957    Do not consider matching one past the index STOP in the virtual
   4958    concatenation of STRING1 and STRING2.
   4959 
   4960    We return either the position in the strings at which the match was
   4961    found, -1 if no match, or -2 if error (such as failure
   4962    stack overflow).  */
   4963 
   4964 int
   4965 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
   4966              const char *string2, int size2, int startpos, int range,
   4967              struct re_registers *regs, int stop)
   4968 {
   4969 # ifdef MBS_SUPPORT
   4970   if (MB_CUR_MAX != 1)
   4971     return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
   4972 			    range, regs, stop);
   4973   else
   4974 # endif
   4975     return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
   4976 			     range, regs, stop);
   4977 } /* re_search_2 */
   4978 #ifdef _LIBC
   4979 weak_alias (__re_search_2, re_search_2)
   4980 #endif
   4981 
   4982 #endif /* not INSIDE_RECURSION */
   4983 
   4984 #ifdef INSIDE_RECURSION
   4985 
   4986 #ifdef MATCH_MAY_ALLOCATE
   4987 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
   4988 #else
   4989 # define FREE_VAR(var) if (var) free (var); var = NULL
   4990 #endif
   4991 
   4992 #ifdef WCHAR
   4993 # define MAX_ALLOCA_SIZE	2000
   4994 
   4995 # define FREE_WCS_BUFFERS() \
   4996   do {									      \
   4997     if (size1 > MAX_ALLOCA_SIZE)					      \
   4998       {									      \
   4999 	free (wcs_string1);						      \
   5000 	free (mbs_offset1);						      \
   5001       }									      \
   5002     else								      \
   5003       {									      \
   5004 	FREE_VAR (wcs_string1);						      \
   5005 	FREE_VAR (mbs_offset1);						      \
   5006       }									      \
   5007     if (size2 > MAX_ALLOCA_SIZE) 					      \
   5008       {									      \
   5009 	free (wcs_string2);						      \
   5010 	free (mbs_offset2);						      \
   5011       }									      \
   5012     else								      \
   5013       {									      \
   5014 	FREE_VAR (wcs_string2);						      \
   5015 	FREE_VAR (mbs_offset2);						      \
   5016       }									      \
   5017   } while (0)
   5018 
   5019 #endif
   5020 
   5021 
   5022 static int
   5023 PREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1,
   5024                      int size1, const char *string2, int size2,
   5025                      int startpos, int range,
   5026                      struct re_registers *regs, int stop)
   5027 {
   5028   int val;
   5029   register char *fastmap = bufp->fastmap;
   5030   register RE_TRANSLATE_TYPE translate = bufp->translate;
   5031   int total_size = size1 + size2;
   5032   int endpos = startpos + range;
   5033 #ifdef WCHAR
   5034   /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
   5035   wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
   5036   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
   5037   int wcs_size1 = 0, wcs_size2 = 0;
   5038   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   5039   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
   5040   /* They hold whether each wchar_t is binary data or not.  */
   5041   char *is_binary = NULL;
   5042 #endif /* WCHAR */
   5043 
   5044   /* Check for out-of-range STARTPOS.  */
   5045   if (startpos < 0 || startpos > total_size)
   5046     return -1;
   5047 
   5048   /* Fix up RANGE if it might eventually take us outside
   5049      the virtual concatenation of STRING1 and STRING2.
   5050      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
   5051   if (endpos < 0)
   5052     range = 0 - startpos;
   5053   else if (endpos > total_size)
   5054     range = total_size - startpos;
   5055 
   5056   /* If the search isn't to be a backwards one, don't waste time in a
   5057      search for a pattern that must be anchored.  */
   5058   if (bufp->used > 0 && range > 0
   5059       && ((re_opcode_t) bufp->buffer[0] == begbuf
   5060 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
   5061 	  || ((re_opcode_t) bufp->buffer[0] == begline
   5062 	      && !bufp->newline_anchor)))
   5063     {
   5064       if (startpos > 0)
   5065 	return -1;
   5066       else
   5067 	range = 1;
   5068     }
   5069 
   5070 #ifdef emacs
   5071   /* In a forward search for something that starts with \=.
   5072      don't keep searching past point.  */
   5073   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
   5074     {
   5075       range = PT - startpos;
   5076       if (range <= 0)
   5077 	return -1;
   5078     }
   5079 #endif /* emacs */
   5080 
   5081   /* Update the fastmap now if not correct already.  */
   5082   if (fastmap && !bufp->fastmap_accurate)
   5083     if (re_compile_fastmap (bufp) == -2)
   5084       return -2;
   5085 
   5086 #ifdef WCHAR
   5087   /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
   5088      fill them with converted string.  */
   5089   if (size1 != 0)
   5090     {
   5091       if (size1 > MAX_ALLOCA_SIZE)
   5092 	{
   5093 	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
   5094 	  mbs_offset1 = TALLOC (size1 + 1, int);
   5095 	  is_binary = TALLOC (size1 + 1, char);
   5096 	}
   5097       else
   5098 	{
   5099 	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
   5100 	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
   5101 	  is_binary = REGEX_TALLOC (size1 + 1, char);
   5102 	}
   5103       if (!wcs_string1 || !mbs_offset1 || !is_binary)
   5104 	{
   5105 	  if (size1 > MAX_ALLOCA_SIZE)
   5106 	    {
   5107 	      free (wcs_string1);
   5108 	      free (mbs_offset1);
   5109 	      free (is_binary);
   5110 	    }
   5111 	  else
   5112 	    {
   5113 	      FREE_VAR (wcs_string1);
   5114 	      FREE_VAR (mbs_offset1);
   5115 	      FREE_VAR (is_binary);
   5116 	    }
   5117 	  return -2;
   5118 	}
   5119       wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
   5120 				     mbs_offset1, is_binary);
   5121       wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
   5122       if (size1 > MAX_ALLOCA_SIZE)
   5123 	free (is_binary);
   5124       else
   5125 	FREE_VAR (is_binary);
   5126     }
   5127   if (size2 != 0)
   5128     {
   5129       if (size2 > MAX_ALLOCA_SIZE)
   5130 	{
   5131 	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
   5132 	  mbs_offset2 = TALLOC (size2 + 1, int);
   5133 	  is_binary = TALLOC (size2 + 1, char);
   5134 	}
   5135       else
   5136 	{
   5137 	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
   5138 	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
   5139 	  is_binary = REGEX_TALLOC (size2 + 1, char);
   5140 	}
   5141       if (!wcs_string2 || !mbs_offset2 || !is_binary)
   5142 	{
   5143 	  FREE_WCS_BUFFERS ();
   5144 	  if (size2 > MAX_ALLOCA_SIZE)
   5145 	    free (is_binary);
   5146 	  else
   5147 	    FREE_VAR (is_binary);
   5148 	  return -2;
   5149 	}
   5150       wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
   5151 				     mbs_offset2, is_binary);
   5152       wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
   5153       if (size2 > MAX_ALLOCA_SIZE)
   5154 	free (is_binary);
   5155       else
   5156 	FREE_VAR (is_binary);
   5157     }
   5158 #endif /* WCHAR */
   5159 
   5160 
   5161   /* Loop through the string, looking for a place to start matching.  */
   5162   for (;;)
   5163     {
   5164       /* If a fastmap is supplied, skip quickly over characters that
   5165          cannot be the start of a match.  If the pattern can match the
   5166          null string, however, we don't need to skip characters; we want
   5167          the first null string.  */
   5168       if (fastmap && startpos < total_size && !bufp->can_be_null)
   5169 	{
   5170 	  if (range > 0)	/* Searching forwards.  */
   5171 	    {
   5172 	      register const char *d;
   5173 	      register int lim = 0;
   5174 	      int irange = range;
   5175 
   5176               if (startpos < size1 && startpos + range >= size1)
   5177                 lim = range - (size1 - startpos);
   5178 
   5179 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
   5180 
   5181               /* Written out as an if-else to avoid testing `translate'
   5182                  inside the loop.  */
   5183 	      if (translate)
   5184                 while (range > lim
   5185                        && !fastmap[(unsigned char)
   5186 				   translate[(unsigned char) *d++]])
   5187                   range--;
   5188 	      else
   5189                 while (range > lim && !fastmap[(unsigned char) *d++])
   5190                   range--;
   5191 
   5192 	      startpos += irange - range;
   5193 	    }
   5194 	  else				/* Searching backwards.  */
   5195 	    {
   5196 	      register CHAR_T c = (size1 == 0 || startpos >= size1
   5197 				      ? string2[startpos - size1]
   5198 				      : string1[startpos]);
   5199 
   5200 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
   5201 		goto advance;
   5202 	    }
   5203 	}
   5204 
   5205       /* If can't match the null string, and that's all we have left, fail.  */
   5206       if (range >= 0 && startpos == total_size && fastmap
   5207           && !bufp->can_be_null)
   5208        {
   5209 #ifdef WCHAR
   5210          FREE_WCS_BUFFERS ();
   5211 #endif
   5212          return -1;
   5213        }
   5214 
   5215 #ifdef WCHAR
   5216       val = wcs_re_match_2_internal (bufp, string1, size1, string2,
   5217 				     size2, startpos, regs, stop,
   5218 				     wcs_string1, wcs_size1,
   5219 				     wcs_string2, wcs_size2,
   5220 				     mbs_offset1, mbs_offset2);
   5221 #else /* BYTE */
   5222       val = byte_re_match_2_internal (bufp, string1, size1, string2,
   5223 				      size2, startpos, regs, stop);
   5224 #endif /* BYTE */
   5225 
   5226 #ifndef REGEX_MALLOC
   5227 # ifdef C_ALLOCA
   5228       alloca (0);
   5229 # endif
   5230 #endif
   5231 
   5232       if (val >= 0)
   5233 	{
   5234 #ifdef WCHAR
   5235 	  FREE_WCS_BUFFERS ();
   5236 #endif
   5237 	  return startpos;
   5238 	}
   5239 
   5240       if (val == -2)
   5241 	{
   5242 #ifdef WCHAR
   5243 	  FREE_WCS_BUFFERS ();
   5244 #endif
   5245 	  return -2;
   5246 	}
   5247 
   5248     advance:
   5249       if (!range)
   5250         break;
   5251       else if (range > 0)
   5252         {
   5253           range--;
   5254           startpos++;
   5255         }
   5256       else
   5257         {
   5258           range++;
   5259           startpos--;
   5260         }
   5261     }
   5262 #ifdef WCHAR
   5263   FREE_WCS_BUFFERS ();
   5264 #endif
   5265   return -1;
   5266 }
   5267 
   5268 #ifdef WCHAR
   5269 /* This converts PTR, a pointer into one of the search wchar_t strings
   5270    `string1' and `string2' into an multibyte string offset from the
   5271    beginning of that string. We use mbs_offset to optimize.
   5272    See convert_mbs_to_wcs.  */
   5273 # define POINTER_TO_OFFSET(ptr)						\
   5274   (FIRST_STRING_P (ptr)							\
   5275    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
   5276    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
   5277 		 + csize1)))
   5278 #else /* BYTE */
   5279 /* This converts PTR, a pointer into one of the search strings `string1'
   5280    and `string2' into an offset from the beginning of that string.  */
   5281 # define POINTER_TO_OFFSET(ptr)			\
   5282   (FIRST_STRING_P (ptr)				\
   5283    ? ((regoff_t) ((ptr) - string1))		\
   5284    : ((regoff_t) ((ptr) - string2 + size1)))
   5285 #endif /* WCHAR */
   5286 
   5287 /* Macros for dealing with the split strings in re_match_2.  */
   5288 
   5289 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
   5290 
   5291 /* Call before fetching a character with *d.  This switches over to
   5292    string2 if necessary.  */
   5293 #define PREFETCH()							\
   5294   while (d == dend)						    	\
   5295     {									\
   5296       /* End of string2 => fail.  */					\
   5297       if (dend == end_match_2) 						\
   5298         goto fail;							\
   5299       /* End of string1 => advance to string2.  */ 			\
   5300       d = string2;						        \
   5301       dend = end_match_2;						\
   5302     }
   5303 
   5304 /* Test if at very beginning or at very end of the virtual concatenation
   5305    of `string1' and `string2'.  If only one string, it's `string2'.  */
   5306 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
   5307 #define AT_STRINGS_END(d) ((d) == end2)
   5308 
   5309 
   5310 /* Test if D points to a character which is word-constituent.  We have
   5311    two special cases to check for: if past the end of string1, look at
   5312    the first character in string2; and if before the beginning of
   5313    string2, look at the last character in string1.  */
   5314 #ifdef WCHAR
   5315 /* Use internationalized API instead of SYNTAX.  */
   5316 # define WORDCHAR_P(d)							\
   5317   (iswalnum ((wint_t)((d) == end1 ? *string2				\
   5318            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
   5319    || ((d) == end1 ? *string2						\
   5320        : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
   5321 #else /* BYTE */
   5322 # define WORDCHAR_P(d)							\
   5323   (SYNTAX ((d) == end1 ? *string2					\
   5324            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
   5325    == Sword)
   5326 #endif /* WCHAR */
   5327 
   5328 /* Disabled due to a compiler bug -- see comment at case wordbound */
   5329 #if 0
   5330 /* Test if the character before D and the one at D differ with respect
   5331    to being word-constituent.  */
   5332 #define AT_WORD_BOUNDARY(d)						\
   5333   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
   5334    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
   5335 #endif
   5336 
   5337 /* Free everything we malloc.  */
   5338 #ifdef MATCH_MAY_ALLOCATE
   5339 # ifdef WCHAR
   5340 #  define FREE_VARIABLES()						\
   5341   do {									\
   5342     REGEX_FREE_STACK (fail_stack.stack);				\
   5343     FREE_VAR (regstart);						\
   5344     FREE_VAR (regend);							\
   5345     FREE_VAR (old_regstart);						\
   5346     FREE_VAR (old_regend);						\
   5347     FREE_VAR (best_regstart);						\
   5348     FREE_VAR (best_regend);						\
   5349     FREE_VAR (reg_info);						\
   5350     FREE_VAR (reg_dummy);						\
   5351     FREE_VAR (reg_info_dummy);						\
   5352     if (!cant_free_wcs_buf)						\
   5353       {									\
   5354         FREE_VAR (string1);						\
   5355         FREE_VAR (string2);						\
   5356         FREE_VAR (mbs_offset1);						\
   5357         FREE_VAR (mbs_offset2);						\
   5358       }									\
   5359   } while (0)
   5360 # else /* BYTE */
   5361 #  define FREE_VARIABLES()						\
   5362   do {									\
   5363     REGEX_FREE_STACK (fail_stack.stack);				\
   5364     FREE_VAR (regstart);						\
   5365     FREE_VAR (regend);							\
   5366     FREE_VAR (old_regstart);						\
   5367     FREE_VAR (old_regend);						\
   5368     FREE_VAR (best_regstart);						\
   5369     FREE_VAR (best_regend);						\
   5370     FREE_VAR (reg_info);						\
   5371     FREE_VAR (reg_dummy);						\
   5372     FREE_VAR (reg_info_dummy);						\
   5373   } while (0)
   5374 # endif /* WCHAR */
   5375 #else
   5376 # ifdef WCHAR
   5377 #  define FREE_VARIABLES()						\
   5378   do {									\
   5379     if (!cant_free_wcs_buf)						\
   5380       {									\
   5381         FREE_VAR (string1);						\
   5382         FREE_VAR (string2);						\
   5383         FREE_VAR (mbs_offset1);						\
   5384         FREE_VAR (mbs_offset2);						\
   5385       }									\
   5386   } while (0)
   5387 # else /* BYTE */
   5388 #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
   5389 # endif /* WCHAR */
   5390 #endif /* not MATCH_MAY_ALLOCATE */
   5391 
   5392 /* These values must meet several constraints.  They must not be valid
   5393    register values; since we have a limit of 255 registers (because
   5394    we use only one byte in the pattern for the register number), we can
   5395    use numbers larger than 255.  They must differ by 1, because of
   5396    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
   5397    be larger than the value for the highest register, so we do not try
   5398    to actually save any registers when none are active.  */
   5399 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
   5400 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
   5401 
   5402 #else /* not INSIDE_RECURSION */
   5404 /* Matching routines.  */
   5405 
   5406 #ifndef emacs   /* Emacs never uses this.  */
   5407 /* re_match is like re_match_2 except it takes only a single string.  */
   5408 
   5409 int
   5410 re_match (struct re_pattern_buffer *bufp, const char *string,
   5411           int size, int pos, struct re_registers *regs)
   5412 {
   5413   int result;
   5414 # ifdef MBS_SUPPORT
   5415   if (MB_CUR_MAX != 1)
   5416     result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
   5417 				      pos, regs, size,
   5418 				      NULL, 0, NULL, 0, NULL, NULL);
   5419   else
   5420 # endif
   5421     result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
   5422 				  pos, regs, size);
   5423 # ifndef REGEX_MALLOC
   5424 #  ifdef C_ALLOCA
   5425   alloca (0);
   5426 #  endif
   5427 # endif
   5428   return result;
   5429 }
   5430 # ifdef _LIBC
   5431 weak_alias (__re_match, re_match)
   5432 # endif
   5433 #endif /* not emacs */
   5434 
   5435 #endif /* not INSIDE_RECURSION */
   5436 
   5437 #ifdef INSIDE_RECURSION
   5438 static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
   5439                                                   UCHAR_T *end,
   5440 					PREFIX(register_info_type) *reg_info);
   5441 static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
   5442                                                 UCHAR_T *end,
   5443 					PREFIX(register_info_type) *reg_info);
   5444 static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
   5445                                                       UCHAR_T *end,
   5446 					PREFIX(register_info_type) *reg_info);
   5447 static int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
   5448                                    int len, char *translate);
   5449 #else /* not INSIDE_RECURSION */
   5450 
   5451 /* re_match_2 matches the compiled pattern in BUFP against the
   5452    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
   5453    and SIZE2, respectively).  We start matching at POS, and stop
   5454    matching at STOP.
   5455 
   5456    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
   5457    store offsets for the substring each group matched in REGS.  See the
   5458    documentation for exactly how many groups we fill.
   5459 
   5460    We return -1 if no match, -2 if an internal error (such as the
   5461    failure stack overflowing).  Otherwise, we return the length of the
   5462    matched substring.  */
   5463 
   5464 int
   5465 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
   5466             const char *string2, int size2, int pos,
   5467             struct re_registers *regs, int stop)
   5468 {
   5469   int result;
   5470 # ifdef MBS_SUPPORT
   5471   if (MB_CUR_MAX != 1)
   5472     result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
   5473 				      pos, regs, stop,
   5474 				      NULL, 0, NULL, 0, NULL, NULL);
   5475   else
   5476 # endif
   5477     result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
   5478 				  pos, regs, stop);
   5479 
   5480 #ifndef REGEX_MALLOC
   5481 # ifdef C_ALLOCA
   5482   alloca (0);
   5483 # endif
   5484 #endif
   5485   return result;
   5486 }
   5487 #ifdef _LIBC
   5488 weak_alias (__re_match_2, re_match_2)
   5489 #endif
   5490 
   5491 #endif /* not INSIDE_RECURSION */
   5492 
   5493 #ifdef INSIDE_RECURSION
   5494 
   5495 #ifdef WCHAR
   5496 static int count_mbs_length (int *, int);
   5497 
   5498 /* This check the substring (from 0, to length) of the multibyte string,
   5499    to which offset_buffer correspond. And count how many wchar_t_characters
   5500    the substring occupy. We use offset_buffer to optimization.
   5501    See convert_mbs_to_wcs.  */
   5502 
   5503 static int
   5504 count_mbs_length(int *offset_buffer, int length)
   5505 {
   5506   int upper, lower;
   5507 
   5508   /* Check whether the size is valid.  */
   5509   if (length < 0)
   5510     return -1;
   5511 
   5512   if (offset_buffer == NULL)
   5513     return 0;
   5514 
   5515   /* If there are no multibyte character, offset_buffer[i] == i.
   5516    Optmize for this case.  */
   5517   if (offset_buffer[length] == length)
   5518     return length;
   5519 
   5520   /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
   5521   upper = length;
   5522   lower = 0;
   5523 
   5524   while (true)
   5525     {
   5526       int middle = (lower + upper) / 2;
   5527       if (middle == lower || middle == upper)
   5528 	break;
   5529       if (offset_buffer[middle] > length)
   5530 	upper = middle;
   5531       else if (offset_buffer[middle] < length)
   5532 	lower = middle;
   5533       else
   5534 	return middle;
   5535     }
   5536 
   5537   return -1;
   5538 }
   5539 #endif /* WCHAR */
   5540 
   5541 /* This is a separate function so that we can force an alloca cleanup
   5542    afterwards.  */
   5543 #ifdef WCHAR
   5544 static int
   5545 wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
   5546                          const char *cstring1, int csize1,
   5547                          const char *cstring2, int csize2,
   5548                          int pos,
   5549 			 struct re_registers *regs,
   5550                          int stop,
   5551      /* string1 == string2 == NULL means string1/2, size1/2 and
   5552 	mbs_offset1/2 need seting up in this function.  */
   5553      /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
   5554                          wchar_t *string1, int size1,
   5555                          wchar_t *string2, int size2,
   5556      /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   5557 			 int *mbs_offset1, int *mbs_offset2)
   5558 #else /* BYTE */
   5559 static int
   5560 byte_re_match_2_internal (struct re_pattern_buffer *bufp,
   5561                           const char *string1, int size1,
   5562                           const char *string2, int size2,
   5563                           int pos,
   5564 			  struct re_registers *regs, int stop)
   5565 #endif /* BYTE */
   5566 {
   5567   /* General temporaries.  */
   5568   int mcnt;
   5569   UCHAR_T *p1;
   5570 #ifdef WCHAR
   5571   /* They hold whether each wchar_t is binary data or not.  */
   5572   char *is_binary = NULL;
   5573   /* If true, we can't free string1/2, mbs_offset1/2.  */
   5574   int cant_free_wcs_buf = 1;
   5575 #endif /* WCHAR */
   5576 
   5577   /* Just past the end of the corresponding string.  */
   5578   const CHAR_T *end1, *end2;
   5579 
   5580   /* Pointers into string1 and string2, just past the last characters in
   5581      each to consider matching.  */
   5582   const CHAR_T *end_match_1, *end_match_2;
   5583 
   5584   /* Where we are in the data, and the end of the current string.  */
   5585   const CHAR_T *d, *dend;
   5586 
   5587   /* Where we are in the pattern, and the end of the pattern.  */
   5588 #ifdef WCHAR
   5589   UCHAR_T *pattern, *p;
   5590   register UCHAR_T *pend;
   5591 #else /* BYTE */
   5592   UCHAR_T *p = bufp->buffer;
   5593   register UCHAR_T *pend = p + bufp->used;
   5594 #endif /* WCHAR */
   5595 
   5596   /* Mark the opcode just after a start_memory, so we can test for an
   5597      empty subpattern when we get to the stop_memory.  */
   5598   UCHAR_T *just_past_start_mem = 0;
   5599 
   5600   /* We use this to map every character in the string.  */
   5601   RE_TRANSLATE_TYPE translate = bufp->translate;
   5602 
   5603   /* Failure point stack.  Each place that can handle a failure further
   5604      down the line pushes a failure point on this stack.  It consists of
   5605      restart, regend, and reg_info for all registers corresponding to
   5606      the subexpressions we're currently inside, plus the number of such
   5607      registers, and, finally, two char *'s.  The first char * is where
   5608      to resume scanning the pattern; the second one is where to resume
   5609      scanning the strings.  If the latter is zero, the failure point is
   5610      a ``dummy''; if a failure happens and the failure point is a dummy,
   5611      it gets discarded and the next next one is tried.  */
   5612 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5613   PREFIX(fail_stack_type) fail_stack;
   5614 #endif
   5615 #ifdef DEBUG
   5616   static unsigned failure_id;
   5617   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
   5618 #endif
   5619 
   5620 #ifdef REL_ALLOC
   5621   /* This holds the pointer to the failure stack, when
   5622      it is allocated relocatably.  */
   5623   fail_stack_elt_t *failure_stack_ptr;
   5624 #endif
   5625 
   5626   /* We fill all the registers internally, independent of what we
   5627      return, for use in backreferences.  The number here includes
   5628      an element for register zero.  */
   5629   size_t num_regs = bufp->re_nsub + 1;
   5630 
   5631   /* The currently active registers.  */
   5632   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   5633   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   5634 
   5635   /* Information on the contents of registers. These are pointers into
   5636      the input strings; they record just what was matched (on this
   5637      attempt) by a subexpression part of the pattern, that is, the
   5638      regnum-th regstart pointer points to where in the pattern we began
   5639      matching and the regnum-th regend points to right after where we
   5640      stopped matching the regnum-th subexpression.  (The zeroth register
   5641      keeps track of what the whole pattern matches.)  */
   5642 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5643   const CHAR_T **regstart, **regend;
   5644 #endif
   5645 
   5646   /* If a group that's operated upon by a repetition operator fails to
   5647      match anything, then the register for its start will need to be
   5648      restored because it will have been set to wherever in the string we
   5649      are when we last see its open-group operator.  Similarly for a
   5650      register's end.  */
   5651 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5652   const CHAR_T **old_regstart, **old_regend;
   5653 #endif
   5654 
   5655   /* The is_active field of reg_info helps us keep track of which (possibly
   5656      nested) subexpressions we are currently in. The matched_something
   5657      field of reg_info[reg_num] helps us tell whether or not we have
   5658      matched any of the pattern so far this time through the reg_num-th
   5659      subexpression.  These two fields get reset each time through any
   5660      loop their register is in.  */
   5661 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5662   PREFIX(register_info_type) *reg_info;
   5663 #endif
   5664 
   5665   /* The following record the register info as found in the above
   5666      variables when we find a match better than any we've seen before.
   5667      This happens as we backtrack through the failure points, which in
   5668      turn happens only if we have not yet matched the entire string. */
   5669   unsigned best_regs_set = false;
   5670 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5671   const CHAR_T **best_regstart, **best_regend;
   5672 #endif
   5673 
   5674   /* Logically, this is `best_regend[0]'.  But we don't want to have to
   5675      allocate space for that if we're not allocating space for anything
   5676      else (see below).  Also, we never need info about register 0 for
   5677      any of the other register vectors, and it seems rather a kludge to
   5678      treat `best_regend' differently than the rest.  So we keep track of
   5679      the end of the best match so far in a separate variable.  We
   5680      initialize this to NULL so that when we backtrack the first time
   5681      and need to test it, it's not garbage.  */
   5682   const CHAR_T *match_end = NULL;
   5683 
   5684   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
   5685   int set_regs_matched_done = 0;
   5686 
   5687   /* Used when we pop values we don't care about.  */
   5688 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5689   const CHAR_T **reg_dummy;
   5690   PREFIX(register_info_type) *reg_info_dummy;
   5691 #endif
   5692 
   5693 #ifdef DEBUG
   5694   /* Counts the total number of registers pushed.  */
   5695   unsigned num_regs_pushed = 0;
   5696 #endif
   5697 
   5698   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
   5699 
   5700   INIT_FAIL_STACK ();
   5701 
   5702 #ifdef MATCH_MAY_ALLOCATE
   5703   /* Do not bother to initialize all the register variables if there are
   5704      no groups in the pattern, as it takes a fair amount of time.  If
   5705      there are groups, we include space for register 0 (the whole
   5706      pattern), even though we never use it, since it simplifies the
   5707      array indexing.  We should fix this.  */
   5708   if (bufp->re_nsub)
   5709     {
   5710       regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5711       regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5712       old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5713       old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5714       best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5715       best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5716       reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
   5717       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
   5718       reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
   5719 
   5720       if (!(regstart && regend && old_regstart && old_regend && reg_info
   5721             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
   5722         {
   5723           FREE_VARIABLES ();
   5724           return -2;
   5725         }
   5726     }
   5727   else
   5728     {
   5729       /* We must initialize all our variables to NULL, so that
   5730          `FREE_VARIABLES' doesn't try to free them.  */
   5731       regstart = regend = old_regstart = old_regend = best_regstart
   5732         = best_regend = reg_dummy = NULL;
   5733       reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
   5734     }
   5735 #endif /* MATCH_MAY_ALLOCATE */
   5736 
   5737   /* The starting position is bogus.  */
   5738 #ifdef WCHAR
   5739   if (pos < 0 || pos > csize1 + csize2)
   5740 #else /* BYTE */
   5741   if (pos < 0 || pos > size1 + size2)
   5742 #endif
   5743     {
   5744       FREE_VARIABLES ();
   5745       return -1;
   5746     }
   5747 
   5748 #ifdef WCHAR
   5749   /* Allocate wchar_t array for string1 and string2 and
   5750      fill them with converted string.  */
   5751   if (string1 == NULL && string2 == NULL)
   5752     {
   5753       /* We need seting up buffers here.  */
   5754 
   5755       /* We must free wcs buffers in this function.  */
   5756       cant_free_wcs_buf = 0;
   5757 
   5758       if (csize1 != 0)
   5759 	{
   5760 	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
   5761 	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
   5762 	  is_binary = REGEX_TALLOC (csize1 + 1, char);
   5763 	  if (!string1 || !mbs_offset1 || !is_binary)
   5764 	    {
   5765 	      FREE_VAR (string1);
   5766 	      FREE_VAR (mbs_offset1);
   5767 	      FREE_VAR (is_binary);
   5768 	      return -2;
   5769 	    }
   5770 	}
   5771       if (csize2 != 0)
   5772 	{
   5773 	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
   5774 	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
   5775 	  is_binary = REGEX_TALLOC (csize2 + 1, char);
   5776 	  if (!string2 || !mbs_offset2 || !is_binary)
   5777 	    {
   5778 	      FREE_VAR (string1);
   5779 	      FREE_VAR (mbs_offset1);
   5780 	      FREE_VAR (string2);
   5781 	      FREE_VAR (mbs_offset2);
   5782 	      FREE_VAR (is_binary);
   5783 	      return -2;
   5784 	    }
   5785 	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
   5786 				     mbs_offset2, is_binary);
   5787 	  string2[size2] = L'\0'; /* for a sentinel  */
   5788 	  FREE_VAR (is_binary);
   5789 	}
   5790     }
   5791 
   5792   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   5793      pattern to (char*) in regex_compile.  */
   5794   p = pattern = (CHAR_T*)bufp->buffer;
   5795   pend = (CHAR_T*)(bufp->buffer + bufp->used);
   5796 
   5797 #endif /* WCHAR */
   5798 
   5799   /* Initialize subexpression text positions to -1 to mark ones that no
   5800      start_memory/stop_memory has been seen for. Also initialize the
   5801      register information struct.  */
   5802   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   5803     {
   5804       regstart[mcnt] = regend[mcnt]
   5805         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
   5806 
   5807       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
   5808       IS_ACTIVE (reg_info[mcnt]) = 0;
   5809       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   5810       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   5811     }
   5812 
   5813   /* We move `string1' into `string2' if the latter's empty -- but not if
   5814      `string1' is null.  */
   5815   if (size2 == 0 && string1 != NULL)
   5816     {
   5817       string2 = string1;
   5818       size2 = size1;
   5819       string1 = 0;
   5820       size1 = 0;
   5821 #ifdef WCHAR
   5822       mbs_offset2 = mbs_offset1;
   5823       csize2 = csize1;
   5824       mbs_offset1 = NULL;
   5825       csize1 = 0;
   5826 #endif
   5827     }
   5828   end1 = string1 + size1;
   5829   end2 = string2 + size2;
   5830 
   5831   /* Compute where to stop matching, within the two strings.  */
   5832 #ifdef WCHAR
   5833   if (stop <= csize1)
   5834     {
   5835       mcnt = count_mbs_length(mbs_offset1, stop);
   5836       end_match_1 = string1 + mcnt;
   5837       end_match_2 = string2;
   5838     }
   5839   else
   5840     {
   5841       if (stop > csize1 + csize2)
   5842 	stop = csize1 + csize2;
   5843       end_match_1 = end1;
   5844       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
   5845       end_match_2 = string2 + mcnt;
   5846     }
   5847   if (mcnt < 0)
   5848     { /* count_mbs_length return error.  */
   5849       FREE_VARIABLES ();
   5850       return -1;
   5851     }
   5852 #else
   5853   if (stop <= size1)
   5854     {
   5855       end_match_1 = string1 + stop;
   5856       end_match_2 = string2;
   5857     }
   5858   else
   5859     {
   5860       end_match_1 = end1;
   5861       end_match_2 = string2 + stop - size1;
   5862     }
   5863 #endif /* WCHAR */
   5864 
   5865   /* `p' scans through the pattern as `d' scans through the data.
   5866      `dend' is the end of the input string that `d' points within.  `d'
   5867      is advanced into the following input string whenever necessary, but
   5868      this happens before fetching; therefore, at the beginning of the
   5869      loop, `d' can be pointing at the end of a string, but it cannot
   5870      equal `string2'.  */
   5871 #ifdef WCHAR
   5872   if (size1 > 0 && pos <= csize1)
   5873     {
   5874       mcnt = count_mbs_length(mbs_offset1, pos);
   5875       d = string1 + mcnt;
   5876       dend = end_match_1;
   5877     }
   5878   else
   5879     {
   5880       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
   5881       d = string2 + mcnt;
   5882       dend = end_match_2;
   5883     }
   5884 
   5885   if (mcnt < 0)
   5886     { /* count_mbs_length return error.  */
   5887       FREE_VARIABLES ();
   5888       return -1;
   5889     }
   5890 #else
   5891   if (size1 > 0 && pos <= size1)
   5892     {
   5893       d = string1 + pos;
   5894       dend = end_match_1;
   5895     }
   5896   else
   5897     {
   5898       d = string2 + pos - size1;
   5899       dend = end_match_2;
   5900     }
   5901 #endif /* WCHAR */
   5902 
   5903   DEBUG_PRINT1 ("The compiled pattern is:\n");
   5904   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
   5905   DEBUG_PRINT1 ("The string to match is: `");
   5906   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
   5907   DEBUG_PRINT1 ("'\n");
   5908 
   5909   /* This loops over pattern commands.  It exits by returning from the
   5910      function if the match is complete, or it drops through if the match
   5911      fails at this starting point in the input data.  */
   5912   for (;;)
   5913     {
   5914 #ifdef _LIBC
   5915       DEBUG_PRINT2 ("\n%p: ", p);
   5916 #else
   5917       DEBUG_PRINT2 ("\n0x%x: ", p);
   5918 #endif
   5919 
   5920       if (p == pend)
   5921 	{ /* End of pattern means we might have succeeded.  */
   5922           DEBUG_PRINT1 ("end of pattern ... ");
   5923 
   5924 	  /* If we haven't matched the entire string, and we want the
   5925              longest match, try backtracking.  */
   5926           if (d != end_match_2)
   5927 	    {
   5928 	      /* 1 if this match ends in the same string (string1 or string2)
   5929 		 as the best previous match.  */
   5930 	      boolean same_str_p;
   5931 
   5932 	      /* 1 if this match is the best seen so far.  */
   5933 	      boolean best_match_p;
   5934 
   5935               same_str_p = (FIRST_STRING_P (match_end)
   5936                             == MATCHING_IN_FIRST_STRING);
   5937 
   5938 	      /* AIX compiler got confused when this was combined
   5939 		 with the previous declaration.  */
   5940 	      if (same_str_p)
   5941 		best_match_p = d > match_end;
   5942 	      else
   5943 		best_match_p = !MATCHING_IN_FIRST_STRING;
   5944 
   5945               DEBUG_PRINT1 ("backtracking.\n");
   5946 
   5947               if (!FAIL_STACK_EMPTY ())
   5948                 { /* More failure points to try.  */
   5949 
   5950                   /* If exceeds best match so far, save it.  */
   5951                   if (!best_regs_set || best_match_p)
   5952                     {
   5953                       best_regs_set = true;
   5954                       match_end = d;
   5955 
   5956                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
   5957 
   5958                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   5959                         {
   5960                           best_regstart[mcnt] = regstart[mcnt];
   5961                           best_regend[mcnt] = regend[mcnt];
   5962                         }
   5963                     }
   5964                   goto fail;
   5965                 }
   5966 
   5967               /* If no failure points, don't restore garbage.  And if
   5968                  last match is real best match, don't restore second
   5969                  best one. */
   5970               else if (best_regs_set && !best_match_p)
   5971                 {
   5972   	        restore_best_regs:
   5973                   /* Restore best match.  It may happen that `dend ==
   5974                      end_match_1' while the restored d is in string2.
   5975                      For example, the pattern `x.*y.*z' against the
   5976                      strings `x-' and `y-z-', if the two strings are
   5977                      not consecutive in memory.  */
   5978                   DEBUG_PRINT1 ("Restoring best registers.\n");
   5979 
   5980                   d = match_end;
   5981                   dend = ((d >= string1 && d <= end1)
   5982 		           ? end_match_1 : end_match_2);
   5983 
   5984 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   5985 		    {
   5986 		      regstart[mcnt] = best_regstart[mcnt];
   5987 		      regend[mcnt] = best_regend[mcnt];
   5988 		    }
   5989                 }
   5990             } /* d != end_match_2 */
   5991 
   5992 	succeed_label:
   5993           DEBUG_PRINT1 ("Accepting match.\n");
   5994           /* If caller wants register contents data back, do it.  */
   5995           if (regs && !bufp->no_sub)
   5996 	    {
   5997 	      /* Have the register data arrays been allocated?  */
   5998               if (bufp->regs_allocated == REGS_UNALLOCATED)
   5999                 { /* No.  So allocate them with malloc.  We need one
   6000                      extra element beyond `num_regs' for the `-1' marker
   6001                      GNU code uses.  */
   6002                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
   6003                   regs->start = TALLOC (regs->num_regs, regoff_t);
   6004                   regs->end = TALLOC (regs->num_regs, regoff_t);
   6005                   if (regs->start == NULL || regs->end == NULL)
   6006 		    {
   6007 		      FREE_VARIABLES ();
   6008 		      return -2;
   6009 		    }
   6010                   bufp->regs_allocated = REGS_REALLOCATE;
   6011                 }
   6012               else if (bufp->regs_allocated == REGS_REALLOCATE)
   6013                 { /* Yes.  If we need more elements than were already
   6014                      allocated, reallocate them.  If we need fewer, just
   6015                      leave it alone.  */
   6016                   if (regs->num_regs < num_regs + 1)
   6017                     {
   6018                       regs->num_regs = num_regs + 1;
   6019                       RETALLOC (regs->start, regs->num_regs, regoff_t);
   6020                       RETALLOC (regs->end, regs->num_regs, regoff_t);
   6021                       if (regs->start == NULL || regs->end == NULL)
   6022 			{
   6023 			  FREE_VARIABLES ();
   6024 			  return -2;
   6025 			}
   6026                     }
   6027                 }
   6028               else
   6029 		{
   6030 		  /* These braces fend off a "empty body in an else-statement"
   6031 		     warning under GCC when assert expands to nothing.  */
   6032 		  assert (bufp->regs_allocated == REGS_FIXED);
   6033 		}
   6034 
   6035               /* Convert the pointer data in `regstart' and `regend' to
   6036                  indices.  Register zero has to be set differently,
   6037                  since we haven't kept track of any info for it.  */
   6038               if (regs->num_regs > 0)
   6039                 {
   6040                   regs->start[0] = pos;
   6041 #ifdef WCHAR
   6042 		  if (MATCHING_IN_FIRST_STRING)
   6043 		    regs->end[0] = mbs_offset1 != NULL ?
   6044 					mbs_offset1[d-string1] : 0;
   6045 		  else
   6046 		    regs->end[0] = csize1 + (mbs_offset2 != NULL ?
   6047 					     mbs_offset2[d-string2] : 0);
   6048 #else
   6049                   regs->end[0] = (MATCHING_IN_FIRST_STRING
   6050 				  ? ((regoff_t) (d - string1))
   6051 			          : ((regoff_t) (d - string2 + size1)));
   6052 #endif /* WCHAR */
   6053                 }
   6054 
   6055               /* Go through the first `min (num_regs, regs->num_regs)'
   6056                  registers, since that is all we initialized.  */
   6057 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
   6058 		   mcnt++)
   6059 		{
   6060                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
   6061                     regs->start[mcnt] = regs->end[mcnt] = -1;
   6062                   else
   6063                     {
   6064 		      regs->start[mcnt]
   6065 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
   6066                       regs->end[mcnt]
   6067 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
   6068                     }
   6069 		}
   6070 
   6071               /* If the regs structure we return has more elements than
   6072                  were in the pattern, set the extra elements to -1.  If
   6073                  we (re)allocated the registers, this is the case,
   6074                  because we always allocate enough to have at least one
   6075                  -1 at the end.  */
   6076               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
   6077                 regs->start[mcnt] = regs->end[mcnt] = -1;
   6078 	    } /* regs && !bufp->no_sub */
   6079 
   6080           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
   6081                         nfailure_points_pushed, nfailure_points_popped,
   6082                         nfailure_points_pushed - nfailure_points_popped);
   6083           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
   6084 
   6085 #ifdef WCHAR
   6086 	  if (MATCHING_IN_FIRST_STRING)
   6087 	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
   6088 	  else
   6089 	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
   6090 			csize1;
   6091           mcnt -= pos;
   6092 #else
   6093           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
   6094 			    ? string1
   6095 			    : string2 - size1);
   6096 #endif /* WCHAR */
   6097 
   6098           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
   6099 
   6100           FREE_VARIABLES ();
   6101           return mcnt;
   6102         }
   6103 
   6104       /* Otherwise match next pattern command.  */
   6105       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   6106 	{
   6107         /* Ignore these.  Used to ignore the n of succeed_n's which
   6108            currently have n == 0.  */
   6109         case no_op:
   6110           DEBUG_PRINT1 ("EXECUTING no_op.\n");
   6111           break;
   6112 
   6113 	case succeed:
   6114           DEBUG_PRINT1 ("EXECUTING succeed.\n");
   6115 	  goto succeed_label;
   6116 
   6117         /* Match the next n pattern characters exactly.  The following
   6118            byte in the pattern defines n, and the n bytes after that
   6119            are the characters to match.  */
   6120 	case exactn:
   6121 #ifdef MBS_SUPPORT
   6122 	case exactn_bin:
   6123 #endif
   6124 	  mcnt = *p++;
   6125           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
   6126 
   6127           /* This is written out as an if-else so we don't waste time
   6128              testing `translate' inside the loop.  */
   6129           if (translate)
   6130 	    {
   6131 	      do
   6132 		{
   6133 		  PREFETCH ();
   6134 #ifdef WCHAR
   6135 		  if (*d <= 0xff)
   6136 		    {
   6137 		      if ((UCHAR_T) translate[(unsigned char) *d++]
   6138 			  != (UCHAR_T) *p++)
   6139 			goto fail;
   6140 		    }
   6141 		  else
   6142 		    {
   6143 		      if (*d++ != (CHAR_T) *p++)
   6144 			goto fail;
   6145 		    }
   6146 #else
   6147 		  if ((UCHAR_T) translate[(unsigned char) *d++]
   6148 		      != (UCHAR_T) *p++)
   6149                     goto fail;
   6150 #endif /* WCHAR */
   6151 		}
   6152 	      while (--mcnt);
   6153 	    }
   6154 	  else
   6155 	    {
   6156 	      do
   6157 		{
   6158 		  PREFETCH ();
   6159 		  if (*d++ != (CHAR_T) *p++) goto fail;
   6160 		}
   6161 	      while (--mcnt);
   6162 	    }
   6163 	  SET_REGS_MATCHED ();
   6164           break;
   6165 
   6166 
   6167         /* Match any character except possibly a newline or a null.  */
   6168 	case anychar:
   6169           DEBUG_PRINT1 ("EXECUTING anychar.\n");
   6170 
   6171           PREFETCH ();
   6172 
   6173           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
   6174               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
   6175 	    goto fail;
   6176 
   6177           SET_REGS_MATCHED ();
   6178           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
   6179           d++;
   6180 	  break;
   6181 
   6182 
   6183 	case charset:
   6184 	case charset_not:
   6185 	  {
   6186 	    register UCHAR_T c;
   6187 #ifdef WCHAR
   6188 	    unsigned int i, char_class_length, coll_symbol_length,
   6189               equiv_class_length, ranges_length, chars_length, length;
   6190 	    CHAR_T *workp, *workp2, *charset_top;
   6191 #define WORK_BUFFER_SIZE 128
   6192             CHAR_T str_buf[WORK_BUFFER_SIZE];
   6193 # ifdef _LIBC
   6194 	    uint32_t nrules;
   6195 # endif /* _LIBC */
   6196 #endif /* WCHAR */
   6197 	    boolean negate = (re_opcode_t) *(p - 1) == charset_not;
   6198 
   6199             DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
   6200 	    PREFETCH ();
   6201 	    c = TRANSLATE (*d); /* The character to match.  */
   6202 #ifdef WCHAR
   6203 # ifdef _LIBC
   6204 	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   6205 # endif /* _LIBC */
   6206 	    charset_top = p - 1;
   6207 	    char_class_length = *p++;
   6208 	    coll_symbol_length = *p++;
   6209 	    equiv_class_length = *p++;
   6210 	    ranges_length = *p++;
   6211 	    chars_length = *p++;
   6212 	    /* p points charset[6], so the address of the next instruction
   6213 	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
   6214 	       where l=length of char_classes, m=length of collating_symbol,
   6215 	       n=equivalence_class, o=length of char_range,
   6216 	       p'=length of character.  */
   6217 	    workp = p;
   6218 	    /* Update p to indicate the next instruction.  */
   6219 	    p += char_class_length + coll_symbol_length+ equiv_class_length +
   6220               2*ranges_length + chars_length;
   6221 
   6222             /* match with char_class?  */
   6223 	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
   6224 	      {
   6225 		wctype_t wctype;
   6226 		uintptr_t alignedp = ((uintptr_t)workp
   6227 				      + __alignof__(wctype_t) - 1)
   6228 		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   6229 		wctype = *((wctype_t*)alignedp);
   6230 		workp += CHAR_CLASS_SIZE;
   6231 # ifdef _LIBC
   6232 		if (__iswctype((wint_t)c, wctype))
   6233 		  goto char_set_matched;
   6234 # else
   6235 		if (iswctype((wint_t)c, wctype))
   6236 		  goto char_set_matched;
   6237 # endif
   6238 	      }
   6239 
   6240             /* match with collating_symbol?  */
   6241 # ifdef _LIBC
   6242 	    if (nrules != 0)
   6243 	      {
   6244 		const unsigned char *extra = (const unsigned char *)
   6245 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   6246 
   6247 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
   6248 		     workp++)
   6249 		  {
   6250 		    int32_t *wextra;
   6251 		    wextra = (int32_t*)(extra + *workp++);
   6252 		    for (i = 0; i < *wextra; ++i)
   6253 		      if (TRANSLATE(d[i]) != wextra[1 + i])
   6254 			break;
   6255 
   6256 		    if (i == *wextra)
   6257 		      {
   6258 			/* Update d, however d will be incremented at
   6259 			   char_set_matched:, we decrement d here.  */
   6260 			d += i - 1;
   6261 			goto char_set_matched;
   6262 		      }
   6263 		  }
   6264 	      }
   6265 	    else /* (nrules == 0) */
   6266 # endif
   6267 	      /* If we can't look up collation data, we use wcscoll
   6268 		 instead.  */
   6269 	      {
   6270 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
   6271 		  {
   6272 		    const CHAR_T *backup_d = d, *backup_dend = dend;
   6273 # ifdef _LIBC
   6274 		    length = __wcslen (workp);
   6275 # else
   6276 		    length = wcslen (workp);
   6277 # endif
   6278 
   6279 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6280 		       any substring of the string never match with the
   6281 		       collating symbol.  */
   6282 # ifdef _LIBC
   6283 		    if (__wcscoll (workp, d) > 0)
   6284 # else
   6285 		    if (wcscoll (workp, d) > 0)
   6286 # endif
   6287 		      {
   6288 			workp += length + 1;
   6289 			continue;
   6290 		      }
   6291 
   6292 		    /* First, we compare the collating symbol with
   6293 		       the first character of the string.
   6294 		       If it don't match, we add the next character to
   6295 		       the compare buffer in turn.  */
   6296 		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
   6297 		      {
   6298 			int match;
   6299 			if (d == dend)
   6300 			  {
   6301 			    if (dend == end_match_2)
   6302 			      break;
   6303 			    d = string2;
   6304 			    dend = end_match_2;
   6305 			  }
   6306 
   6307 			/* add next character to the compare buffer.  */
   6308 			str_buf[i] = TRANSLATE(*d);
   6309 			str_buf[i+1] = '\0';
   6310 
   6311 # ifdef _LIBC
   6312 			match = __wcscoll (workp, str_buf);
   6313 # else
   6314 			match = wcscoll (workp, str_buf);
   6315 # endif
   6316 			if (match == 0)
   6317 			  goto char_set_matched;
   6318 
   6319 			if (match < 0)
   6320 			  /* (str_buf > workp) indicate (str_buf + X > workp),
   6321 			     because for all X (str_buf + X > str_buf).
   6322 			     So we don't need continue this loop.  */
   6323 			  break;
   6324 
   6325 			/* Otherwise(str_buf < workp),
   6326 			   (str_buf+next_character) may equals (workp).
   6327 			   So we continue this loop.  */
   6328 		      }
   6329 		    /* not matched */
   6330 		    d = backup_d;
   6331 		    dend = backup_dend;
   6332 		    workp += length + 1;
   6333 		  }
   6334               }
   6335             /* match with equivalence_class?  */
   6336 # ifdef _LIBC
   6337 	    if (nrules != 0)
   6338 	      {
   6339                 const CHAR_T *backup_d = d, *backup_dend = dend;
   6340 		/* Try to match the equivalence class against
   6341 		   those known to the collate implementation.  */
   6342 		const int32_t *table;
   6343 		const int32_t *weights;
   6344 		const int32_t *extra;
   6345 		const int32_t *indirect;
   6346 		int32_t idx, idx2;
   6347 		wint_t *cp;
   6348 		size_t len;
   6349 
   6350 		/* This #include defines a local function!  */
   6351 #  include <locale/weightwc.h>
   6352 
   6353 		table = (const int32_t *)
   6354 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
   6355 		weights = (const wint_t *)
   6356 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
   6357 		extra = (const wint_t *)
   6358 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
   6359 		indirect = (const int32_t *)
   6360 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
   6361 
   6362 		/* Write 1 collating element to str_buf, and
   6363 		   get its index.  */
   6364 		idx2 = 0;
   6365 
   6366 		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
   6367 		  {
   6368 		    cp = (wint_t*)str_buf;
   6369 		    if (d == dend)
   6370 		      {
   6371 			if (dend == end_match_2)
   6372 			  break;
   6373 			d = string2;
   6374 			dend = end_match_2;
   6375 		      }
   6376 		    str_buf[i] = TRANSLATE(*(d+i));
   6377 		    str_buf[i+1] = '\0'; /* sentinel */
   6378 		    idx2 = findidx ((const wint_t**)&cp);
   6379 		  }
   6380 
   6381 		/* Update d, however d will be incremented at
   6382 		   char_set_matched:, we decrement d here.  */
   6383 		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
   6384 		if (d >= dend)
   6385 		  {
   6386 		    if (dend == end_match_2)
   6387 			d = dend;
   6388 		    else
   6389 		      {
   6390 			d = string2;
   6391 			dend = end_match_2;
   6392 		      }
   6393 		  }
   6394 
   6395 		len = weights[idx2];
   6396 
   6397 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
   6398 		     workp++)
   6399 		  {
   6400 		    idx = (int32_t)*workp;
   6401 		    /* We already checked idx != 0 in regex_compile. */
   6402 
   6403 		    if (idx2 != 0 && len == weights[idx])
   6404 		      {
   6405 			int cnt = 0;
   6406 			while (cnt < len && (weights[idx + 1 + cnt]
   6407 					     == weights[idx2 + 1 + cnt]))
   6408 			  ++cnt;
   6409 
   6410 			if (cnt == len)
   6411 			  goto char_set_matched;
   6412 		      }
   6413 		  }
   6414 		/* not matched */
   6415                 d = backup_d;
   6416                 dend = backup_dend;
   6417 	      }
   6418 	    else /* (nrules == 0) */
   6419 # endif
   6420 	      /* If we can't look up collation data, we use wcscoll
   6421 		 instead.  */
   6422 	      {
   6423 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
   6424 		  {
   6425 		    const CHAR_T *backup_d = d, *backup_dend = dend;
   6426 # ifdef _LIBC
   6427 		    length = __wcslen (workp);
   6428 # else
   6429 		    length = wcslen (workp);
   6430 # endif
   6431 
   6432 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6433 		       any substring of the string never match with the
   6434 		       collating symbol.  */
   6435 # ifdef _LIBC
   6436 		    if (__wcscoll (workp, d) > 0)
   6437 # else
   6438 		    if (wcscoll (workp, d) > 0)
   6439 # endif
   6440 		      {
   6441 			workp += length + 1;
   6442 			break;
   6443 		      }
   6444 
   6445 		    /* First, we compare the equivalence class with
   6446 		       the first character of the string.
   6447 		       If it don't match, we add the next character to
   6448 		       the compare buffer in turn.  */
   6449 		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
   6450 		      {
   6451 			int match;
   6452 			if (d == dend)
   6453 			  {
   6454 			    if (dend == end_match_2)
   6455 			      break;
   6456 			    d = string2;
   6457 			    dend = end_match_2;
   6458 			  }
   6459 
   6460 			/* add next character to the compare buffer.  */
   6461 			str_buf[i] = TRANSLATE(*d);
   6462 			str_buf[i+1] = '\0';
   6463 
   6464 # ifdef _LIBC
   6465 			match = __wcscoll (workp, str_buf);
   6466 # else
   6467 			match = wcscoll (workp, str_buf);
   6468 # endif
   6469 
   6470 			if (match == 0)
   6471 			  goto char_set_matched;
   6472 
   6473 			if (match < 0)
   6474 			/* (str_buf > workp) indicate (str_buf + X > workp),
   6475 			   because for all X (str_buf + X > str_buf).
   6476 			   So we don't need continue this loop.  */
   6477 			  break;
   6478 
   6479 			/* Otherwise(str_buf < workp),
   6480 			   (str_buf+next_character) may equals (workp).
   6481 			   So we continue this loop.  */
   6482 		      }
   6483 		    /* not matched */
   6484 		    d = backup_d;
   6485 		    dend = backup_dend;
   6486 		    workp += length + 1;
   6487 		  }
   6488 	      }
   6489 
   6490             /* match with char_range?  */
   6491 # ifdef _LIBC
   6492 	    if (nrules != 0)
   6493 	      {
   6494 		uint32_t collseqval;
   6495 		const char *collseq = (const char *)
   6496 		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
   6497 
   6498 		collseqval = collseq_table_lookup (collseq, c);
   6499 
   6500 		for (; workp < p - chars_length ;)
   6501 		  {
   6502 		    uint32_t start_val, end_val;
   6503 
   6504 		    /* We already compute the collation sequence value
   6505 		       of the characters (or collating symbols).  */
   6506 		    start_val = (uint32_t) *workp++; /* range_start */
   6507 		    end_val = (uint32_t) *workp++; /* range_end */
   6508 
   6509 		    if (start_val <= collseqval && collseqval <= end_val)
   6510 		      goto char_set_matched;
   6511 		  }
   6512 	      }
   6513 	    else
   6514 # endif
   6515 	      {
   6516 		/* We set range_start_char at str_buf[0], range_end_char
   6517 		   at str_buf[4], and compared char at str_buf[2].  */
   6518 		str_buf[1] = 0;
   6519 		str_buf[2] = c;
   6520 		str_buf[3] = 0;
   6521 		str_buf[5] = 0;
   6522 		for (; workp < p - chars_length ;)
   6523 		  {
   6524 		    wchar_t *range_start_char, *range_end_char;
   6525 
   6526 		    /* match if (range_start_char <= c <= range_end_char).  */
   6527 
   6528 		    /* If range_start(or end) < 0, we assume -range_start(end)
   6529 		       is the offset of the collating symbol which is specified
   6530 		       as the character of the range start(end).  */
   6531 
   6532 		    /* range_start */
   6533 		    if (*workp < 0)
   6534 		      range_start_char = charset_top - (*workp++);
   6535 		    else
   6536 		      {
   6537 			str_buf[0] = *workp++;
   6538 			range_start_char = str_buf;
   6539 		      }
   6540 
   6541 		    /* range_end */
   6542 		    if (*workp < 0)
   6543 		      range_end_char = charset_top - (*workp++);
   6544 		    else
   6545 		      {
   6546 			str_buf[4] = *workp++;
   6547 			range_end_char = str_buf + 4;
   6548 		      }
   6549 
   6550 # ifdef _LIBC
   6551 		    if (__wcscoll (range_start_char, str_buf+2) <= 0
   6552 			&& __wcscoll (str_buf+2, range_end_char) <= 0)
   6553 # else
   6554 		    if (wcscoll (range_start_char, str_buf+2) <= 0
   6555 			&& wcscoll (str_buf+2, range_end_char) <= 0)
   6556 # endif
   6557 		      goto char_set_matched;
   6558 		  }
   6559 	      }
   6560 
   6561             /* match with char?  */
   6562 	    for (; workp < p ; workp++)
   6563 	      if (c == *workp)
   6564 		goto char_set_matched;
   6565 
   6566 	    negate = !negate;
   6567 
   6568 	  char_set_matched:
   6569 	    if (negate) goto fail;
   6570 #else
   6571             /* Cast to `unsigned' instead of `unsigned char' in case the
   6572                bit list is a full 32 bytes long.  */
   6573 	    if (c < (unsigned) (*p * BYTEWIDTH)
   6574 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   6575 	      negate = !negate;
   6576 
   6577 	    p += 1 + *p;
   6578 
   6579 	    if (!negate) goto fail;
   6580 #undef WORK_BUFFER_SIZE
   6581 #endif /* WCHAR */
   6582 	    SET_REGS_MATCHED ();
   6583             d++;
   6584 	    break;
   6585 	  }
   6586 
   6587 
   6588         /* The beginning of a group is represented by start_memory.
   6589            The arguments are the register number in the next byte, and the
   6590            number of groups inner to this one in the next.  The text
   6591            matched within the group is recorded (in the internal
   6592            registers data structure) under the register number.  */
   6593         case start_memory:
   6594 	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
   6595 			(long int) *p, (long int) p[1]);
   6596 
   6597           /* Find out if this group can match the empty string.  */
   6598 	  p1 = p;		/* To send to group_match_null_string_p.  */
   6599 
   6600           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
   6601             REG_MATCH_NULL_STRING_P (reg_info[*p])
   6602               = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
   6603 
   6604           /* Save the position in the string where we were the last time
   6605              we were at this open-group operator in case the group is
   6606              operated upon by a repetition operator, e.g., with `(a*)*b'
   6607              against `ab'; then we want to ignore where we are now in
   6608              the string in case this attempt to match fails.  */
   6609           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6610                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
   6611                              : regstart[*p];
   6612 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
   6613 			 POINTER_TO_OFFSET (old_regstart[*p]));
   6614 
   6615           regstart[*p] = d;
   6616 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
   6617 
   6618           IS_ACTIVE (reg_info[*p]) = 1;
   6619           MATCHED_SOMETHING (reg_info[*p]) = 0;
   6620 
   6621 	  /* Clear this whenever we change the register activity status.  */
   6622 	  set_regs_matched_done = 0;
   6623 
   6624           /* This is the new highest active register.  */
   6625           highest_active_reg = *p;
   6626 
   6627           /* If nothing was active before, this is the new lowest active
   6628              register.  */
   6629           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   6630             lowest_active_reg = *p;
   6631 
   6632           /* Move past the register number and inner group count.  */
   6633           p += 2;
   6634 	  just_past_start_mem = p;
   6635 
   6636           break;
   6637 
   6638 
   6639         /* The stop_memory opcode represents the end of a group.  Its
   6640            arguments are the same as start_memory's: the register
   6641            number, and the number of inner groups.  */
   6642 	case stop_memory:
   6643 	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
   6644 			(long int) *p, (long int) p[1]);
   6645 
   6646           /* We need to save the string position the last time we were at
   6647              this close-group operator in case the group is operated
   6648              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
   6649              against `aba'; then we want to ignore where we are now in
   6650              the string in case this attempt to match fails.  */
   6651           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6652                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
   6653 			   : regend[*p];
   6654 	  DEBUG_PRINT2 ("      old_regend: %d\n",
   6655 			 POINTER_TO_OFFSET (old_regend[*p]));
   6656 
   6657           regend[*p] = d;
   6658 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
   6659 
   6660           /* This register isn't active anymore.  */
   6661           IS_ACTIVE (reg_info[*p]) = 0;
   6662 
   6663 	  /* Clear this whenever we change the register activity status.  */
   6664 	  set_regs_matched_done = 0;
   6665 
   6666           /* If this was the only register active, nothing is active
   6667              anymore.  */
   6668           if (lowest_active_reg == highest_active_reg)
   6669             {
   6670               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6671               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6672             }
   6673           else
   6674             { /* We must scan for the new highest active register, since
   6675                  it isn't necessarily one less than now: consider
   6676                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
   6677                  new highest active register is 1.  */
   6678               UCHAR_T r = *p - 1;
   6679               while (r > 0 && !IS_ACTIVE (reg_info[r]))
   6680                 r--;
   6681 
   6682               /* If we end up at register zero, that means that we saved
   6683                  the registers as the result of an `on_failure_jump', not
   6684                  a `start_memory', and we jumped to past the innermost
   6685                  `stop_memory'.  For example, in ((.)*) we save
   6686                  registers 1 and 2 as a result of the *, but when we pop
   6687                  back to the second ), we are at the stop_memory 1.
   6688                  Thus, nothing is active.  */
   6689 	      if (r == 0)
   6690                 {
   6691                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6692                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6693                 }
   6694               else
   6695                 highest_active_reg = r;
   6696             }
   6697 
   6698           /* If just failed to match something this time around with a
   6699              group that's operated on by a repetition operator, try to
   6700              force exit from the ``loop'', and restore the register
   6701              information for this group that we had before trying this
   6702              last match.  */
   6703           if ((!MATCHED_SOMETHING (reg_info[*p])
   6704                || just_past_start_mem == p - 1)
   6705 	      && (p + 2) < pend)
   6706             {
   6707               boolean is_a_jump_n = false;
   6708 
   6709               p1 = p + 2;
   6710               mcnt = 0;
   6711               switch ((re_opcode_t) *p1++)
   6712                 {
   6713                   case jump_n:
   6714 		    is_a_jump_n = true;
   6715                   case pop_failure_jump:
   6716 		  case maybe_pop_jump:
   6717 		  case jump:
   6718 		  case dummy_failure_jump:
   6719                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6720 		    if (is_a_jump_n)
   6721 		      p1 += OFFSET_ADDRESS_SIZE;
   6722                     break;
   6723 
   6724                   default:
   6725                     /* do nothing */ ;
   6726                 }
   6727 	      p1 += mcnt;
   6728 
   6729               /* If the next operation is a jump backwards in the pattern
   6730 	         to an on_failure_jump right before the start_memory
   6731                  corresponding to this stop_memory, exit from the loop
   6732                  by forcing a failure after pushing on the stack the
   6733                  on_failure_jump's jump in the pattern, and d.  */
   6734               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
   6735                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
   6736 		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
   6737 		{
   6738                   /* If this group ever matched anything, then restore
   6739                      what its registers were before trying this last
   6740                      failed match, e.g., with `(a*)*b' against `ab' for
   6741                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
   6742                      against `aba' for regend[3].
   6743 
   6744                      Also restore the registers for inner groups for,
   6745                      e.g., `((a*)(b*))*' against `aba' (register 3 would
   6746                      otherwise get trashed).  */
   6747 
   6748                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
   6749 		    {
   6750 		      unsigned r;
   6751 
   6752                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
   6753 
   6754 		      /* Restore this and inner groups' (if any) registers.  */
   6755                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
   6756 			   r++)
   6757                         {
   6758                           regstart[r] = old_regstart[r];
   6759 
   6760                           /* xx why this test?  */
   6761                           if (old_regend[r] >= regstart[r])
   6762                             regend[r] = old_regend[r];
   6763                         }
   6764                     }
   6765 		  p1++;
   6766                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6767                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
   6768 
   6769                   goto fail;
   6770                 }
   6771             }
   6772 
   6773           /* Move past the register number and the inner group count.  */
   6774           p += 2;
   6775           break;
   6776 
   6777 
   6778 	/* \<digit> has been turned into a `duplicate' command which is
   6779            followed by the numeric value of <digit> as the register number.  */
   6780         case duplicate:
   6781 	  {
   6782 	    register const CHAR_T *d2, *dend2;
   6783 	    int regno = *p++;   /* Get which register to match against.  */
   6784 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
   6785 
   6786 	    /* Can't back reference a group which we've never matched.  */
   6787             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
   6788               goto fail;
   6789 
   6790             /* Where in input to try to start matching.  */
   6791             d2 = regstart[regno];
   6792 
   6793             /* Where to stop matching; if both the place to start and
   6794                the place to stop matching are in the same string, then
   6795                set to the place to stop, otherwise, for now have to use
   6796                the end of the first string.  */
   6797 
   6798             dend2 = ((FIRST_STRING_P (regstart[regno])
   6799 		      == FIRST_STRING_P (regend[regno]))
   6800 		     ? regend[regno] : end_match_1);
   6801 	    for (;;)
   6802 	      {
   6803 		/* If necessary, advance to next segment in register
   6804                    contents.  */
   6805 		while (d2 == dend2)
   6806 		  {
   6807 		    if (dend2 == end_match_2) break;
   6808 		    if (dend2 == regend[regno]) break;
   6809 
   6810                     /* End of string1 => advance to string2. */
   6811                     d2 = string2;
   6812                     dend2 = regend[regno];
   6813 		  }
   6814 		/* At end of register contents => success */
   6815 		if (d2 == dend2) break;
   6816 
   6817 		/* If necessary, advance to next segment in data.  */
   6818 		PREFETCH ();
   6819 
   6820 		/* How many characters left in this segment to match.  */
   6821 		mcnt = dend - d;
   6822 
   6823 		/* Want how many consecutive characters we can match in
   6824                    one shot, so, if necessary, adjust the count.  */
   6825                 if (mcnt > dend2 - d2)
   6826 		  mcnt = dend2 - d2;
   6827 
   6828 		/* Compare that many; failure if mismatch, else move
   6829                    past them.  */
   6830 		if (translate
   6831                     ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
   6832                     : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
   6833 		  goto fail;
   6834 		d += mcnt, d2 += mcnt;
   6835 
   6836 		/* Do this because we've match some characters.  */
   6837 		SET_REGS_MATCHED ();
   6838 	      }
   6839 	  }
   6840 	  break;
   6841 
   6842 
   6843         /* begline matches the empty string at the beginning of the string
   6844            (unless `not_bol' is set in `bufp'), and, if
   6845            `newline_anchor' is set, after newlines.  */
   6846 	case begline:
   6847           DEBUG_PRINT1 ("EXECUTING begline.\n");
   6848 
   6849           if (AT_STRINGS_BEG (d))
   6850             {
   6851               if (!bufp->not_bol) break;
   6852             }
   6853           else if (d[-1] == '\n' && bufp->newline_anchor)
   6854             {
   6855               break;
   6856             }
   6857           /* In all other cases, we fail.  */
   6858           goto fail;
   6859 
   6860 
   6861         /* endline is the dual of begline.  */
   6862 	case endline:
   6863           DEBUG_PRINT1 ("EXECUTING endline.\n");
   6864 
   6865           if (AT_STRINGS_END (d))
   6866             {
   6867               if (!bufp->not_eol) break;
   6868             }
   6869 
   6870           /* We have to ``prefetch'' the next character.  */
   6871           else if ((d == end1 ? *string2 : *d) == '\n'
   6872                    && bufp->newline_anchor)
   6873             {
   6874               break;
   6875             }
   6876           goto fail;
   6877 
   6878 
   6879 	/* Match at the very beginning of the data.  */
   6880         case begbuf:
   6881           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
   6882           if (AT_STRINGS_BEG (d))
   6883             break;
   6884           goto fail;
   6885 
   6886 
   6887 	/* Match at the very end of the data.  */
   6888         case endbuf:
   6889           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
   6890 	  if (AT_STRINGS_END (d))
   6891 	    break;
   6892           goto fail;
   6893 
   6894 
   6895         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
   6896            pushes NULL as the value for the string on the stack.  Then
   6897            `pop_failure_point' will keep the current value for the
   6898            string, instead of restoring it.  To see why, consider
   6899            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
   6900            then the . fails against the \n.  But the next thing we want
   6901            to do is match the \n against the \n; if we restored the
   6902            string value, we would be back at the foo.
   6903 
   6904            Because this is used only in specific cases, we don't need to
   6905            check all the things that `on_failure_jump' does, to make
   6906            sure the right things get saved on the stack.  Hence we don't
   6907            share its code.  The only reason to push anything on the
   6908            stack at all is that otherwise we would have to change
   6909            `anychar's code to do something besides goto fail in this
   6910            case; that seems worse than this.  */
   6911         case on_failure_keep_string_jump:
   6912           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
   6913 
   6914           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   6915 #ifdef _LIBC
   6916           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
   6917 #else
   6918           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
   6919 #endif
   6920 
   6921           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
   6922           break;
   6923 
   6924 
   6925 	/* Uses of on_failure_jump:
   6926 
   6927            Each alternative starts with an on_failure_jump that points
   6928            to the beginning of the next alternative.  Each alternative
   6929            except the last ends with a jump that in effect jumps past
   6930            the rest of the alternatives.  (They really jump to the
   6931            ending jump of the following alternative, because tensioning
   6932            these jumps is a hassle.)
   6933 
   6934            Repeats start with an on_failure_jump that points past both
   6935            the repetition text and either the following jump or
   6936            pop_failure_jump back to this on_failure_jump.  */
   6937 	case on_failure_jump:
   6938         on_failure:
   6939           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
   6940 
   6941           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   6942 #ifdef _LIBC
   6943           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
   6944 #else
   6945           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
   6946 #endif
   6947 
   6948           /* If this on_failure_jump comes right before a group (i.e.,
   6949              the original * applied to a group), save the information
   6950              for that group and all inner ones, so that if we fail back
   6951              to this point, the group's information will be correct.
   6952              For example, in \(a*\)*\1, we need the preceding group,
   6953              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
   6954 
   6955           /* We can't use `p' to check ahead because we push
   6956              a failure point to `p + mcnt' after we do this.  */
   6957           p1 = p;
   6958 
   6959           /* We need to skip no_op's before we look for the
   6960              start_memory in case this on_failure_jump is happening as
   6961              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
   6962              against aba.  */
   6963           while (p1 < pend && (re_opcode_t) *p1 == no_op)
   6964             p1++;
   6965 
   6966           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
   6967             {
   6968               /* We have a new highest active register now.  This will
   6969                  get reset at the start_memory we are about to get to,
   6970                  but we will have saved all the registers relevant to
   6971                  this repetition op, as described above.  */
   6972               highest_active_reg = *(p1 + 1) + *(p1 + 2);
   6973               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   6974                 lowest_active_reg = *(p1 + 1);
   6975             }
   6976 
   6977           DEBUG_PRINT1 (":\n");
   6978           PUSH_FAILURE_POINT (p + mcnt, d, -2);
   6979           break;
   6980 
   6981 
   6982         /* A smart repeat ends with `maybe_pop_jump'.
   6983 	   We change it to either `pop_failure_jump' or `jump'.  */
   6984         case maybe_pop_jump:
   6985           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   6986           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
   6987           {
   6988 	    register UCHAR_T *p2 = p;
   6989 
   6990             /* Compare the beginning of the repeat with what in the
   6991                pattern follows its end. If we can establish that there
   6992                is nothing that they would both match, i.e., that we
   6993                would have to backtrack because of (as in, e.g., `a*a')
   6994                then we can change to pop_failure_jump, because we'll
   6995                never have to backtrack.
   6996 
   6997                This is not true in the case of alternatives: in
   6998                `(a|ab)*' we do need to backtrack to the `ab' alternative
   6999                (e.g., if the string was `ab').  But instead of trying to
   7000                detect that here, the alternative has put on a dummy
   7001                failure point which is what we will end up popping.  */
   7002 
   7003 	    /* Skip over open/close-group commands.
   7004 	       If what follows this loop is a ...+ construct,
   7005 	       look at what begins its body, since we will have to
   7006 	       match at least one of that.  */
   7007 	    while (1)
   7008 	      {
   7009 		if (p2 + 2 < pend
   7010 		    && ((re_opcode_t) *p2 == stop_memory
   7011 			|| (re_opcode_t) *p2 == start_memory))
   7012 		  p2 += 3;
   7013 		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
   7014 			 && (re_opcode_t) *p2 == dummy_failure_jump)
   7015 		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
   7016 		else
   7017 		  break;
   7018 	      }
   7019 
   7020 	    p1 = p + mcnt;
   7021 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
   7022 	       to the `maybe_finalize_jump' of this case.  Examine what
   7023 	       follows.  */
   7024 
   7025             /* If we're at the end of the pattern, we can change.  */
   7026             if (p2 == pend)
   7027 	      {
   7028 		/* Consider what happens when matching ":\(.*\)"
   7029 		   against ":/".  I don't really understand this code
   7030 		   yet.  */
   7031   	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
   7032 		  pop_failure_jump;
   7033                 DEBUG_PRINT1
   7034                   ("  End of pattern: change to `pop_failure_jump'.\n");
   7035               }
   7036 
   7037             else if ((re_opcode_t) *p2 == exactn
   7038 #ifdef MBS_SUPPORT
   7039 		     || (re_opcode_t) *p2 == exactn_bin
   7040 #endif
   7041 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
   7042 	      {
   7043 		register UCHAR_T c
   7044                   = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
   7045 
   7046                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
   7047 #ifdef MBS_SUPPORT
   7048 		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
   7049 #endif
   7050 		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
   7051                   {
   7052   		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
   7053 		      pop_failure_jump;
   7054 #ifdef WCHAR
   7055 		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
   7056 				    (wint_t) c,
   7057 				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
   7058 #else
   7059 		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
   7060 				    (char) c,
   7061 				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
   7062 #endif
   7063                   }
   7064 
   7065 #ifndef WCHAR
   7066 		else if ((re_opcode_t) p1[3] == charset
   7067 			 || (re_opcode_t) p1[3] == charset_not)
   7068 		  {
   7069 		    int negate = (re_opcode_t) p1[3] == charset_not;
   7070 
   7071 		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
   7072 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   7073 		      negate = !negate;
   7074 
   7075                     /* `negate' is equal to 1 if c would match, which means
   7076                         that we can't change to pop_failure_jump.  */
   7077 		    if (!negate)
   7078                       {
   7079   		        p[-3] = (unsigned char) pop_failure_jump;
   7080                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7081                       }
   7082 		  }
   7083 #endif /* not WCHAR */
   7084 	      }
   7085 #ifndef WCHAR
   7086             else if ((re_opcode_t) *p2 == charset)
   7087 	      {
   7088 		/* We win if the first character of the loop is not part
   7089                    of the charset.  */
   7090                 if ((re_opcode_t) p1[3] == exactn
   7091  		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
   7092  			  && (p2[2 + p1[5] / BYTEWIDTH]
   7093  			      & (1 << (p1[5] % BYTEWIDTH)))))
   7094 		  {
   7095 		    p[-3] = (unsigned char) pop_failure_jump;
   7096 		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7097                   }
   7098 
   7099 		else if ((re_opcode_t) p1[3] == charset_not)
   7100 		  {
   7101 		    int idx;
   7102 		    /* We win if the charset_not inside the loop
   7103 		       lists every character listed in the charset after.  */
   7104 		    for (idx = 0; idx < (int) p2[1]; idx++)
   7105 		      if (! (p2[2 + idx] == 0
   7106 			     || (idx < (int) p1[4]
   7107 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
   7108 			break;
   7109 
   7110 		    if (idx == p2[1])
   7111                       {
   7112   		        p[-3] = (unsigned char) pop_failure_jump;
   7113                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7114                       }
   7115 		  }
   7116 		else if ((re_opcode_t) p1[3] == charset)
   7117 		  {
   7118 		    int idx;
   7119 		    /* We win if the charset inside the loop
   7120 		       has no overlap with the one after the loop.  */
   7121 		    for (idx = 0;
   7122 			 idx < (int) p2[1] && idx < (int) p1[4];
   7123 			 idx++)
   7124 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
   7125 			break;
   7126 
   7127 		    if (idx == p2[1] || idx == p1[4])
   7128                       {
   7129   		        p[-3] = (unsigned char) pop_failure_jump;
   7130                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7131                       }
   7132 		  }
   7133 	      }
   7134 #endif /* not WCHAR */
   7135 	  }
   7136 	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
   7137 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
   7138 	    {
   7139 	      p[-1] = (UCHAR_T) jump;
   7140               DEBUG_PRINT1 ("  Match => jump.\n");
   7141 	      goto unconditional_jump;
   7142 	    }
   7143         /* Note fall through.  */
   7144 
   7145 
   7146 	/* The end of a simple repeat has a pop_failure_jump back to
   7147            its matching on_failure_jump, where the latter will push a
   7148            failure point.  The pop_failure_jump takes off failure
   7149            points put on by this pop_failure_jump's matching
   7150            on_failure_jump; we got through the pattern to here from the
   7151            matching on_failure_jump, so didn't fail.  */
   7152         case pop_failure_jump:
   7153           {
   7154             /* We need to pass separate storage for the lowest and
   7155                highest registers, even though we don't care about the
   7156                actual values.  Otherwise, we will restore only one
   7157                register from the stack, since lowest will == highest in
   7158                `pop_failure_point'.  */
   7159             active_reg_t dummy_low_reg, dummy_high_reg;
   7160             UCHAR_T *pdummy ATTRIBUTE_UNUSED = NULL;
   7161             const CHAR_T *sdummy ATTRIBUTE_UNUSED = NULL;
   7162 
   7163             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
   7164             POP_FAILURE_POINT (sdummy, pdummy,
   7165                                dummy_low_reg, dummy_high_reg,
   7166                                reg_dummy, reg_dummy, reg_info_dummy);
   7167           }
   7168 	  /* Note fall through.  */
   7169 
   7170 	unconditional_jump:
   7171 #ifdef _LIBC
   7172 	  DEBUG_PRINT2 ("\n%p: ", p);
   7173 #else
   7174 	  DEBUG_PRINT2 ("\n0x%x: ", p);
   7175 #endif
   7176           /* Note fall through.  */
   7177 
   7178         /* Unconditionally jump (without popping any failure points).  */
   7179         case jump:
   7180 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
   7181           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
   7182 	  p += mcnt;				/* Do the jump.  */
   7183 #ifdef _LIBC
   7184           DEBUG_PRINT2 ("(to %p).\n", p);
   7185 #else
   7186           DEBUG_PRINT2 ("(to 0x%x).\n", p);
   7187 #endif
   7188 	  break;
   7189 
   7190 
   7191         /* We need this opcode so we can detect where alternatives end
   7192            in `group_match_null_string_p' et al.  */
   7193         case jump_past_alt:
   7194           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
   7195           goto unconditional_jump;
   7196 
   7197 
   7198         /* Normally, the on_failure_jump pushes a failure point, which
   7199            then gets popped at pop_failure_jump.  We will end up at
   7200            pop_failure_jump, also, and with a pattern of, say, `a+', we
   7201            are skipping over the on_failure_jump, so we have to push
   7202            something meaningless for pop_failure_jump to pop.  */
   7203         case dummy_failure_jump:
   7204           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
   7205           /* It doesn't matter what we push for the string here.  What
   7206              the code at `fail' tests is the value for the pattern.  */
   7207           PUSH_FAILURE_POINT (NULL, NULL, -2);
   7208           goto unconditional_jump;
   7209 
   7210 
   7211         /* At the end of an alternative, we need to push a dummy failure
   7212            point in case we are followed by a `pop_failure_jump', because
   7213            we don't want the failure point for the alternative to be
   7214            popped.  For example, matching `(a|ab)*' against `aab'
   7215            requires that we match the `ab' alternative.  */
   7216         case push_dummy_failure:
   7217           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
   7218           /* See comments just above at `dummy_failure_jump' about the
   7219              two zeroes.  */
   7220           PUSH_FAILURE_POINT (NULL, NULL, -2);
   7221           break;
   7222 
   7223         /* Have to succeed matching what follows at least n times.
   7224            After that, handle like `on_failure_jump'.  */
   7225         case succeed_n:
   7226           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   7227           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
   7228 
   7229           assert (mcnt >= 0);
   7230           /* Originally, this is how many times we HAVE to succeed.  */
   7231           if (mcnt > 0)
   7232             {
   7233                mcnt--;
   7234 	       p += OFFSET_ADDRESS_SIZE;
   7235                STORE_NUMBER_AND_INCR (p, mcnt);
   7236 #ifdef _LIBC
   7237                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
   7238 			     , mcnt);
   7239 #else
   7240                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
   7241 			     , mcnt);
   7242 #endif
   7243             }
   7244 	  else if (mcnt == 0)
   7245             {
   7246 #ifdef _LIBC
   7247               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
   7248 			    p + OFFSET_ADDRESS_SIZE);
   7249 #else
   7250               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
   7251 			    p + OFFSET_ADDRESS_SIZE);
   7252 #endif /* _LIBC */
   7253 
   7254 #ifdef WCHAR
   7255 	      p[1] = (UCHAR_T) no_op;
   7256 #else
   7257 	      p[2] = (UCHAR_T) no_op;
   7258               p[3] = (UCHAR_T) no_op;
   7259 #endif /* WCHAR */
   7260               goto on_failure;
   7261             }
   7262           break;
   7263 
   7264         case jump_n:
   7265           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   7266           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
   7267 
   7268           /* Originally, this is how many times we CAN jump.  */
   7269           if (mcnt)
   7270             {
   7271                mcnt--;
   7272                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
   7273 
   7274 #ifdef _LIBC
   7275                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7276 			     mcnt);
   7277 #else
   7278                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7279 			     mcnt);
   7280 #endif /* _LIBC */
   7281 	       goto unconditional_jump;
   7282             }
   7283           /* If don't have to jump any more, skip over the rest of command.  */
   7284 	  else
   7285 	    p += 2 * OFFSET_ADDRESS_SIZE;
   7286           break;
   7287 
   7288 	case set_number_at:
   7289 	  {
   7290             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
   7291 
   7292             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7293             p1 = p + mcnt;
   7294             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7295 #ifdef _LIBC
   7296             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
   7297 #else
   7298             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
   7299 #endif
   7300 	    STORE_NUMBER (p1, mcnt);
   7301             break;
   7302           }
   7303 
   7304 #if 0
   7305 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
   7306 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
   7307 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
   7308 	   macro and introducing temporary variables works around the bug.  */
   7309 
   7310 	case wordbound:
   7311 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7312 	  if (AT_WORD_BOUNDARY (d))
   7313 	    break;
   7314 	  goto fail;
   7315 
   7316 	case notwordbound:
   7317 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7318 	  if (AT_WORD_BOUNDARY (d))
   7319 	    goto fail;
   7320 	  break;
   7321 #else
   7322 	case wordbound:
   7323 	{
   7324 	  boolean prevchar, thischar;
   7325 
   7326 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7327 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7328 	    break;
   7329 
   7330 	  prevchar = WORDCHAR_P (d - 1);
   7331 	  thischar = WORDCHAR_P (d);
   7332 	  if (prevchar != thischar)
   7333 	    break;
   7334 	  goto fail;
   7335 	}
   7336 
   7337       case notwordbound:
   7338 	{
   7339 	  boolean prevchar, thischar;
   7340 
   7341 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7342 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7343 	    goto fail;
   7344 
   7345 	  prevchar = WORDCHAR_P (d - 1);
   7346 	  thischar = WORDCHAR_P (d);
   7347 	  if (prevchar != thischar)
   7348 	    goto fail;
   7349 	  break;
   7350 	}
   7351 #endif
   7352 
   7353 	case wordbeg:
   7354           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
   7355 	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
   7356 	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
   7357 	    break;
   7358           goto fail;
   7359 
   7360 	case wordend:
   7361           DEBUG_PRINT1 ("EXECUTING wordend.\n");
   7362 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
   7363               && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
   7364 	    break;
   7365           goto fail;
   7366 
   7367 #ifdef emacs
   7368   	case before_dot:
   7369           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
   7370  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
   7371   	    goto fail;
   7372   	  break;
   7373 
   7374   	case at_dot:
   7375           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
   7376  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
   7377   	    goto fail;
   7378   	  break;
   7379 
   7380   	case after_dot:
   7381           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
   7382           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
   7383   	    goto fail;
   7384   	  break;
   7385 
   7386 	case syntaxspec:
   7387           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
   7388 	  mcnt = *p++;
   7389 	  goto matchsyntax;
   7390 
   7391         case wordchar:
   7392           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
   7393 	  mcnt = (int) Sword;
   7394         matchsyntax:
   7395 	  PREFETCH ();
   7396 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7397 	  d++;
   7398 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
   7399 	    goto fail;
   7400           SET_REGS_MATCHED ();
   7401 	  break;
   7402 
   7403 	case notsyntaxspec:
   7404           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
   7405 	  mcnt = *p++;
   7406 	  goto matchnotsyntax;
   7407 
   7408         case notwordchar:
   7409           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
   7410 	  mcnt = (int) Sword;
   7411         matchnotsyntax:
   7412 	  PREFETCH ();
   7413 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7414 	  d++;
   7415 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
   7416 	    goto fail;
   7417 	  SET_REGS_MATCHED ();
   7418           break;
   7419 
   7420 #else /* not emacs */
   7421 	case wordchar:
   7422           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
   7423 	  PREFETCH ();
   7424           if (!WORDCHAR_P (d))
   7425             goto fail;
   7426 	  SET_REGS_MATCHED ();
   7427           d++;
   7428 	  break;
   7429 
   7430 	case notwordchar:
   7431           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
   7432 	  PREFETCH ();
   7433 	  if (WORDCHAR_P (d))
   7434             goto fail;
   7435           SET_REGS_MATCHED ();
   7436           d++;
   7437 	  break;
   7438 #endif /* not emacs */
   7439 
   7440         default:
   7441           abort ();
   7442 	}
   7443       continue;  /* Successfully executed one pattern command; keep going.  */
   7444 
   7445 
   7446     /* We goto here if a matching operation fails. */
   7447     fail:
   7448       if (!FAIL_STACK_EMPTY ())
   7449 	{ /* A restart point is known.  Restore to that state.  */
   7450           DEBUG_PRINT1 ("\nFAIL:\n");
   7451           POP_FAILURE_POINT (d, p,
   7452                              lowest_active_reg, highest_active_reg,
   7453                              regstart, regend, reg_info);
   7454 
   7455           /* If this failure point is a dummy, try the next one.  */
   7456           if (!p)
   7457 	    goto fail;
   7458 
   7459           /* If we failed to the end of the pattern, don't examine *p.  */
   7460 	  assert (p <= pend);
   7461           if (p < pend)
   7462             {
   7463               boolean is_a_jump_n = false;
   7464 
   7465               /* If failed to a backwards jump that's part of a repetition
   7466                  loop, need to pop this failure point and use the next one.  */
   7467               switch ((re_opcode_t) *p)
   7468                 {
   7469                 case jump_n:
   7470                   is_a_jump_n = true;
   7471                 case maybe_pop_jump:
   7472                 case pop_failure_jump:
   7473                 case jump:
   7474                   p1 = p + 1;
   7475                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7476                   p1 += mcnt;
   7477 
   7478                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
   7479                       || (!is_a_jump_n
   7480                           && (re_opcode_t) *p1 == on_failure_jump))
   7481                     goto fail;
   7482                   break;
   7483                 default:
   7484                   /* do nothing */ ;
   7485                 }
   7486             }
   7487 
   7488           if (d >= string1 && d <= end1)
   7489 	    dend = end_match_1;
   7490         }
   7491       else
   7492         break;   /* Matching at this starting point really fails.  */
   7493     } /* for (;;) */
   7494 
   7495   if (best_regs_set)
   7496     goto restore_best_regs;
   7497 
   7498   FREE_VARIABLES ();
   7499 
   7500   return -1;         			/* Failure to match.  */
   7501 } /* re_match_2 */
   7502 
   7503 /* Subroutine definitions for re_match_2.  */
   7505 
   7506 
   7507 /* We are passed P pointing to a register number after a start_memory.
   7508 
   7509    Return true if the pattern up to the corresponding stop_memory can
   7510    match the empty string, and false otherwise.
   7511 
   7512    If we find the matching stop_memory, sets P to point to one past its number.
   7513    Otherwise, sets P to an undefined byte less than or equal to END.
   7514 
   7515    We don't handle duplicates properly (yet).  */
   7516 
   7517 static boolean
   7518 PREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
   7519                                    PREFIX(register_info_type) *reg_info)
   7520 {
   7521   int mcnt;
   7522   /* Point to after the args to the start_memory.  */
   7523   UCHAR_T *p1 = *p + 2;
   7524 
   7525   while (p1 < end)
   7526     {
   7527       /* Skip over opcodes that can match nothing, and return true or
   7528 	 false, as appropriate, when we get to one that can't, or to the
   7529          matching stop_memory.  */
   7530 
   7531       switch ((re_opcode_t) *p1)
   7532         {
   7533         /* Could be either a loop or a series of alternatives.  */
   7534         case on_failure_jump:
   7535           p1++;
   7536           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7537 
   7538           /* If the next operation is not a jump backwards in the
   7539 	     pattern.  */
   7540 
   7541 	  if (mcnt >= 0)
   7542 	    {
   7543               /* Go through the on_failure_jumps of the alternatives,
   7544                  seeing if any of the alternatives cannot match nothing.
   7545                  The last alternative starts with only a jump,
   7546                  whereas the rest start with on_failure_jump and end
   7547                  with a jump, e.g., here is the pattern for `a|b|c':
   7548 
   7549                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
   7550                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
   7551                  /exactn/1/c
   7552 
   7553                  So, we have to first go through the first (n-1)
   7554                  alternatives and then deal with the last one separately.  */
   7555 
   7556 
   7557               /* Deal with the first (n-1) alternatives, which start
   7558                  with an on_failure_jump (see above) that jumps to right
   7559                  past a jump_past_alt.  */
   7560 
   7561               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
   7562 		     jump_past_alt)
   7563                 {
   7564                   /* `mcnt' holds how many bytes long the alternative
   7565                      is, including the ending `jump_past_alt' and
   7566                      its number.  */
   7567 
   7568                   if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
   7569 						(1 + OFFSET_ADDRESS_SIZE),
   7570 						reg_info))
   7571                     return false;
   7572 
   7573                   /* Move to right after this alternative, including the
   7574 		     jump_past_alt.  */
   7575                   p1 += mcnt;
   7576 
   7577                   /* Break if it's the beginning of an n-th alternative
   7578                      that doesn't begin with an on_failure_jump.  */
   7579                   if ((re_opcode_t) *p1 != on_failure_jump)
   7580                     break;
   7581 
   7582 		  /* Still have to check that it's not an n-th
   7583 		     alternative that starts with an on_failure_jump.  */
   7584 		  p1++;
   7585                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7586                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
   7587 		      jump_past_alt)
   7588                     {
   7589 		      /* Get to the beginning of the n-th alternative.  */
   7590                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
   7591                       break;
   7592                     }
   7593                 }
   7594 
   7595               /* Deal with the last alternative: go back and get number
   7596                  of the `jump_past_alt' just before it.  `mcnt' contains
   7597                  the length of the alternative.  */
   7598               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
   7599 
   7600               if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
   7601                 return false;
   7602 
   7603               p1 += mcnt;	/* Get past the n-th alternative.  */
   7604             } /* if mcnt > 0 */
   7605           break;
   7606 
   7607 
   7608         case stop_memory:
   7609 	  assert (p1[1] == **p);
   7610           *p = p1 + 2;
   7611           return true;
   7612 
   7613 
   7614         default:
   7615           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
   7616             return false;
   7617         }
   7618     } /* while p1 < end */
   7619 
   7620   return false;
   7621 } /* group_match_null_string_p */
   7622 
   7623 
   7624 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
   7625    It expects P to be the first byte of a single alternative and END one
   7626    byte past the last. The alternative can contain groups.  */
   7627 
   7628 static boolean
   7629 PREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
   7630                                  PREFIX(register_info_type) *reg_info)
   7631 {
   7632   int mcnt;
   7633   UCHAR_T *p1 = p;
   7634 
   7635   while (p1 < end)
   7636     {
   7637       /* Skip over opcodes that can match nothing, and break when we get
   7638          to one that can't.  */
   7639 
   7640       switch ((re_opcode_t) *p1)
   7641         {
   7642 	/* It's a loop.  */
   7643         case on_failure_jump:
   7644           p1++;
   7645           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7646           p1 += mcnt;
   7647           break;
   7648 
   7649 	default:
   7650           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
   7651             return false;
   7652         }
   7653     }  /* while p1 < end */
   7654 
   7655   return true;
   7656 } /* alt_match_null_string_p */
   7657 
   7658 
   7659 /* Deals with the ops common to group_match_null_string_p and
   7660    alt_match_null_string_p.
   7661 
   7662    Sets P to one after the op and its arguments, if any.  */
   7663 
   7664 static boolean
   7665 PREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
   7666                                        PREFIX(register_info_type) *reg_info)
   7667 {
   7668   int mcnt;
   7669   boolean ret;
   7670   int reg_no;
   7671   UCHAR_T *p1 = *p;
   7672 
   7673   switch ((re_opcode_t) *p1++)
   7674     {
   7675     case no_op:
   7676     case begline:
   7677     case endline:
   7678     case begbuf:
   7679     case endbuf:
   7680     case wordbeg:
   7681     case wordend:
   7682     case wordbound:
   7683     case notwordbound:
   7684 #ifdef emacs
   7685     case before_dot:
   7686     case at_dot:
   7687     case after_dot:
   7688 #endif
   7689       break;
   7690 
   7691     case start_memory:
   7692       reg_no = *p1;
   7693       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
   7694       ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
   7695 
   7696       /* Have to set this here in case we're checking a group which
   7697          contains a group and a back reference to it.  */
   7698 
   7699       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
   7700         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
   7701 
   7702       if (!ret)
   7703         return false;
   7704       break;
   7705 
   7706     /* If this is an optimized succeed_n for zero times, make the jump.  */
   7707     case jump:
   7708       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7709       if (mcnt >= 0)
   7710         p1 += mcnt;
   7711       else
   7712         return false;
   7713       break;
   7714 
   7715     case succeed_n:
   7716       /* Get to the number of times to succeed.  */
   7717       p1 += OFFSET_ADDRESS_SIZE;
   7718       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7719 
   7720       if (mcnt == 0)
   7721         {
   7722           p1 -= 2 * OFFSET_ADDRESS_SIZE;
   7723           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7724           p1 += mcnt;
   7725         }
   7726       else
   7727         return false;
   7728       break;
   7729 
   7730     case duplicate:
   7731       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
   7732         return false;
   7733       break;
   7734 
   7735     case set_number_at:
   7736       p1 += 2 * OFFSET_ADDRESS_SIZE;
   7737 
   7738     default:
   7739       /* All other opcodes mean we cannot match the empty string.  */
   7740       return false;
   7741   }
   7742 
   7743   *p = p1;
   7744   return true;
   7745 } /* common_op_match_null_string_p */
   7746 
   7747 
   7748 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
   7749    bytes; nonzero otherwise.  */
   7750 
   7751 static int
   7752 PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len,
   7753                         RE_TRANSLATE_TYPE translate)
   7754 {
   7755   register const UCHAR_T *p1 = (const UCHAR_T *) s1;
   7756   register const UCHAR_T *p2 = (const UCHAR_T *) s2;
   7757   while (len)
   7758     {
   7759 #ifdef WCHAR
   7760       if (((*p1<=0xff)?translate[*p1++]:*p1++)
   7761 	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
   7762 	return 1;
   7763 #else /* BYTE */
   7764       if (translate[*p1++] != translate[*p2++]) return 1;
   7765 #endif /* WCHAR */
   7766       len--;
   7767     }
   7768   return 0;
   7769 }
   7770 
   7771 
   7773 #else /* not INSIDE_RECURSION */
   7774 
   7775 /* Entry points for GNU code.  */
   7776 
   7777 /* re_compile_pattern is the GNU regular expression compiler: it
   7778    compiles PATTERN (of length SIZE) and puts the result in BUFP.
   7779    Returns 0 if the pattern was valid, otherwise an error string.
   7780 
   7781    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
   7782    are set in BUFP on entry.
   7783 
   7784    We call regex_compile to do the actual compilation.  */
   7785 
   7786 const char *
   7787 re_compile_pattern (const char *pattern, size_t length,
   7788                     struct re_pattern_buffer *bufp)
   7789 {
   7790   reg_errcode_t ret;
   7791 
   7792   /* GNU code is written to assume at least RE_NREGS registers will be set
   7793      (and at least one extra will be -1).  */
   7794   bufp->regs_allocated = REGS_UNALLOCATED;
   7795 
   7796   /* And GNU code determines whether or not to get register information
   7797      by passing null for the REGS argument to re_match, etc., not by
   7798      setting no_sub.  */
   7799   bufp->no_sub = 0;
   7800 
   7801   /* Match anchors at newline.  */
   7802   bufp->newline_anchor = 1;
   7803 
   7804 # ifdef MBS_SUPPORT
   7805   if (MB_CUR_MAX != 1)
   7806     ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
   7807   else
   7808 # endif
   7809     ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
   7810 
   7811   if (!ret)
   7812     return NULL;
   7813   return gettext (re_error_msgid[(int) ret]);
   7814 }
   7815 #ifdef _LIBC
   7816 weak_alias (__re_compile_pattern, re_compile_pattern)
   7817 #endif
   7818 
   7819 /* Entry points compatible with 4.2 BSD regex library.  We don't define
   7821    them unless specifically requested.  */
   7822 
   7823 #if defined _REGEX_RE_COMP || defined _LIBC
   7824 
   7825 /* BSD has one and only one pattern buffer.  */
   7826 static struct re_pattern_buffer re_comp_buf;
   7827 
   7828 char *
   7829 #ifdef _LIBC
   7830 /* Make these definitions weak in libc, so POSIX programs can redefine
   7831    these names if they don't use our functions, and still use
   7832    regcomp/regexec below without link errors.  */
   7833 weak_function
   7834 #endif
   7835 re_comp (const char *s)
   7836 {
   7837   reg_errcode_t ret;
   7838 
   7839   if (!s)
   7840     {
   7841       if (!re_comp_buf.buffer)
   7842 	return (char *) gettext ("No previous regular expression");
   7843       return 0;
   7844     }
   7845 
   7846   if (!re_comp_buf.buffer)
   7847     {
   7848       re_comp_buf.buffer = (unsigned char *) malloc (200);
   7849       if (re_comp_buf.buffer == NULL)
   7850         return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
   7851       re_comp_buf.allocated = 200;
   7852 
   7853       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
   7854       if (re_comp_buf.fastmap == NULL)
   7855 	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
   7856     }
   7857 
   7858   /* Since `re_exec' always passes NULL for the `regs' argument, we
   7859      don't need to initialize the pattern buffer fields which affect it.  */
   7860 
   7861   /* Match anchors at newlines.  */
   7862   re_comp_buf.newline_anchor = 1;
   7863 
   7864 # ifdef MBS_SUPPORT
   7865   if (MB_CUR_MAX != 1)
   7866     ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   7867   else
   7868 # endif
   7869     ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   7870 
   7871   if (!ret)
   7872     return NULL;
   7873 
   7874   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
   7875   return (char *) gettext (re_error_msgid[(int) ret]);
   7876 }
   7877 
   7878 
   7879 int
   7880 #ifdef _LIBC
   7881 weak_function
   7882 #endif
   7883 re_exec (const char *s)
   7884 {
   7885   const int len = strlen (s);
   7886   return
   7887     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
   7888 }
   7889 
   7890 #endif /* _REGEX_RE_COMP */
   7891 
   7892 /* POSIX.2 functions.  Don't define these for Emacs.  */
   7894 
   7895 #ifndef emacs
   7896 
   7897 /* regcomp takes a regular expression as a string and compiles it.
   7898 
   7899    PREG is a regex_t *.  We do not expect any fields to be initialized,
   7900    since POSIX says we shouldn't.  Thus, we set
   7901 
   7902      `buffer' to the compiled pattern;
   7903      `used' to the length of the compiled pattern;
   7904      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
   7905        REG_EXTENDED bit in CFLAGS is set; otherwise, to
   7906        RE_SYNTAX_POSIX_BASIC;
   7907      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
   7908      `fastmap' to an allocated space for the fastmap;
   7909      `fastmap_accurate' to zero;
   7910      `re_nsub' to the number of subexpressions in PATTERN.
   7911 
   7912    PATTERN is the address of the pattern string.
   7913 
   7914    CFLAGS is a series of bits which affect compilation.
   7915 
   7916      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
   7917      use POSIX basic syntax.
   7918 
   7919      If REG_NEWLINE is set, then . and [^...] don't match newline.
   7920      Also, regexec will try a match beginning after every newline.
   7921 
   7922      If REG_ICASE is set, then we considers upper- and lowercase
   7923      versions of letters to be equivalent when matching.
   7924 
   7925      If REG_NOSUB is set, then when PREG is passed to regexec, that
   7926      routine will report only success or failure, and nothing about the
   7927      registers.
   7928 
   7929    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
   7930    the return codes and their meanings.)  */
   7931 
   7932 int
   7933 regcomp (regex_t *preg, const char *pattern, int cflags)
   7934 {
   7935   reg_errcode_t ret;
   7936   reg_syntax_t syntax
   7937     = (cflags & REG_EXTENDED) ?
   7938       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
   7939 
   7940   /* regex_compile will allocate the space for the compiled pattern.  */
   7941   preg->buffer = 0;
   7942   preg->allocated = 0;
   7943   preg->used = 0;
   7944 
   7945   /* Try to allocate space for the fastmap.  */
   7946   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
   7947 
   7948   if (cflags & REG_ICASE)
   7949     {
   7950       int i;
   7951 
   7952       preg->translate
   7953 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
   7954 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
   7955       if (preg->translate == NULL)
   7956         return (int) REG_ESPACE;
   7957 
   7958       /* Map uppercase characters to corresponding lowercase ones.  */
   7959       for (i = 0; i < CHAR_SET_SIZE; i++)
   7960         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
   7961     }
   7962   else
   7963     preg->translate = NULL;
   7964 
   7965   /* If REG_NEWLINE is set, newlines are treated differently.  */
   7966   if (cflags & REG_NEWLINE)
   7967     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
   7968       syntax &= ~RE_DOT_NEWLINE;
   7969       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
   7970       /* It also changes the matching behavior.  */
   7971       preg->newline_anchor = 1;
   7972     }
   7973   else
   7974     preg->newline_anchor = 0;
   7975 
   7976   preg->no_sub = !!(cflags & REG_NOSUB);
   7977 
   7978   /* POSIX says a null character in the pattern terminates it, so we
   7979      can use strlen here in compiling the pattern.  */
   7980 # ifdef MBS_SUPPORT
   7981   if (MB_CUR_MAX != 1)
   7982     ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
   7983   else
   7984 # endif
   7985     ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
   7986 
   7987   /* POSIX doesn't distinguish between an unmatched open-group and an
   7988      unmatched close-group: both are REG_EPAREN.  */
   7989   if (ret == REG_ERPAREN) ret = REG_EPAREN;
   7990 
   7991   if (ret == REG_NOERROR && preg->fastmap)
   7992     {
   7993       /* Compute the fastmap now, since regexec cannot modify the pattern
   7994 	 buffer.  */
   7995       if (re_compile_fastmap (preg) == -2)
   7996 	{
   7997 	  /* Some error occurred while computing the fastmap, just forget
   7998 	     about it.  */
   7999 	  free (preg->fastmap);
   8000 	  preg->fastmap = NULL;
   8001 	}
   8002     }
   8003 
   8004   return (int) ret;
   8005 }
   8006 #ifdef _LIBC
   8007 weak_alias (__regcomp, regcomp)
   8008 #endif
   8009 
   8010 
   8011 /* regexec searches for a given pattern, specified by PREG, in the
   8012    string STRING.
   8013 
   8014    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
   8015    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
   8016    least NMATCH elements, and we set them to the offsets of the
   8017    corresponding matched substrings.
   8018 
   8019    EFLAGS specifies `execution flags' which affect matching: if
   8020    REG_NOTBOL is set, then ^ does not match at the beginning of the
   8021    string; if REG_NOTEOL is set, then $ does not match at the end.
   8022 
   8023    We return 0 if we find a match and REG_NOMATCH if not.  */
   8024 
   8025 int
   8026 regexec (const regex_t *preg, const char *string, size_t nmatch,
   8027          regmatch_t pmatch[], int eflags)
   8028 {
   8029   int ret;
   8030   struct re_registers regs;
   8031   regex_t private_preg;
   8032   int len = strlen (string);
   8033   boolean want_reg_info = !preg->no_sub && nmatch > 0;
   8034 
   8035   private_preg = *preg;
   8036 
   8037   private_preg.not_bol = !!(eflags & REG_NOTBOL);
   8038   private_preg.not_eol = !!(eflags & REG_NOTEOL);
   8039 
   8040   /* The user has told us exactly how many registers to return
   8041      information about, via `nmatch'.  We have to pass that on to the
   8042      matching routines.  */
   8043   private_preg.regs_allocated = REGS_FIXED;
   8044 
   8045   if (want_reg_info)
   8046     {
   8047       regs.num_regs = nmatch;
   8048       regs.start = TALLOC (nmatch * 2, regoff_t);
   8049       if (regs.start == NULL)
   8050         return (int) REG_NOMATCH;
   8051       regs.end = regs.start + nmatch;
   8052     }
   8053 
   8054   /* Perform the searching operation.  */
   8055   ret = re_search (&private_preg, string, len,
   8056                    /* start: */ 0, /* range: */ len,
   8057                    want_reg_info ? &regs : (struct re_registers *) 0);
   8058 
   8059   /* Copy the register information to the POSIX structure.  */
   8060   if (want_reg_info)
   8061     {
   8062       if (ret >= 0)
   8063         {
   8064           unsigned r;
   8065 
   8066           for (r = 0; r < nmatch; r++)
   8067             {
   8068               pmatch[r].rm_so = regs.start[r];
   8069               pmatch[r].rm_eo = regs.end[r];
   8070             }
   8071         }
   8072 
   8073       /* If we needed the temporary register info, free the space now.  */
   8074       free (regs.start);
   8075     }
   8076 
   8077   /* We want zero return to mean success, unlike `re_search'.  */
   8078   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
   8079 }
   8080 #ifdef _LIBC
   8081 weak_alias (__regexec, regexec)
   8082 #endif
   8083 
   8084 
   8085 /* Returns a message corresponding to an error code, ERRCODE, returned
   8086    from either regcomp or regexec.   We don't use PREG here.  */
   8087 
   8088 size_t
   8089 regerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
   8090           char *errbuf, size_t errbuf_size)
   8091 {
   8092   const char *msg;
   8093   size_t msg_size;
   8094 
   8095   if (errcode < 0
   8096       || errcode >= (int) (sizeof (re_error_msgid)
   8097 			   / sizeof (re_error_msgid[0])))
   8098     /* Only error codes returned by the rest of the code should be passed
   8099        to this routine.  If we are given anything else, or if other regex
   8100        code generates an invalid error code, then the program has a bug.
   8101        Dump core so we can fix it.  */
   8102     abort ();
   8103 
   8104   msg = gettext (re_error_msgid[errcode]);
   8105 
   8106   msg_size = strlen (msg) + 1; /* Includes the null.  */
   8107 
   8108   if (errbuf_size != 0)
   8109     {
   8110       if (msg_size > errbuf_size)
   8111         {
   8112 #if defined HAVE_MEMPCPY || defined _LIBC
   8113 	  *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
   8114 #else
   8115           memcpy (errbuf, msg, errbuf_size - 1);
   8116           errbuf[errbuf_size - 1] = 0;
   8117 #endif
   8118         }
   8119       else
   8120         memcpy (errbuf, msg, msg_size);
   8121     }
   8122 
   8123   return msg_size;
   8124 }
   8125 #ifdef _LIBC
   8126 weak_alias (__regerror, regerror)
   8127 #endif
   8128 
   8129 
   8130 /* Free dynamically allocated space used by PREG.  */
   8131 
   8132 void
   8133 regfree (regex_t *preg)
   8134 {
   8135   if (preg->buffer != NULL)
   8136     free (preg->buffer);
   8137   preg->buffer = NULL;
   8138 
   8139   preg->allocated = 0;
   8140   preg->used = 0;
   8141 
   8142   if (preg->fastmap != NULL)
   8143     free (preg->fastmap);
   8144   preg->fastmap = NULL;
   8145   preg->fastmap_accurate = 0;
   8146 
   8147   if (preg->translate != NULL)
   8148     free (preg->translate);
   8149   preg->translate = NULL;
   8150 }
   8151 #ifdef _LIBC
   8152 weak_alias (__regfree, regfree)
   8153 #endif
   8154 
   8155 #endif /* not emacs  */
   8156 
   8157 #endif /* not INSIDE_RECURSION */
   8158 
   8159 
   8160 #undef STORE_NUMBER
   8162 #undef STORE_NUMBER_AND_INCR
   8163 #undef EXTRACT_NUMBER
   8164 #undef EXTRACT_NUMBER_AND_INCR
   8165 
   8166 #undef DEBUG_PRINT_COMPILED_PATTERN
   8167 #undef DEBUG_PRINT_DOUBLE_STRING
   8168 
   8169 #undef INIT_FAIL_STACK
   8170 #undef RESET_FAIL_STACK
   8171 #undef DOUBLE_FAIL_STACK
   8172 #undef PUSH_PATTERN_OP
   8173 #undef PUSH_FAILURE_POINTER
   8174 #undef PUSH_FAILURE_INT
   8175 #undef PUSH_FAILURE_ELT
   8176 #undef POP_FAILURE_POINTER
   8177 #undef POP_FAILURE_INT
   8178 #undef POP_FAILURE_ELT
   8179 #undef DEBUG_PUSH
   8180 #undef DEBUG_POP
   8181 #undef PUSH_FAILURE_POINT
   8182 #undef POP_FAILURE_POINT
   8183 
   8184 #undef REG_UNSET_VALUE
   8185 #undef REG_UNSET
   8186 
   8187 #undef PATFETCH
   8188 #undef PATFETCH_RAW
   8189 #undef PATUNFETCH
   8190 #undef TRANSLATE
   8191 
   8192 #undef INIT_BUF_SIZE
   8193 #undef GET_BUFFER_SPACE
   8194 #undef BUF_PUSH
   8195 #undef BUF_PUSH_2
   8196 #undef BUF_PUSH_3
   8197 #undef STORE_JUMP
   8198 #undef STORE_JUMP2
   8199 #undef INSERT_JUMP
   8200 #undef INSERT_JUMP2
   8201 #undef EXTEND_BUFFER
   8202 #undef GET_UNSIGNED_NUMBER
   8203 #undef FREE_STACK_RETURN
   8204 
   8205 # undef POINTER_TO_OFFSET
   8206 # undef MATCHING_IN_FRST_STRING
   8207 # undef PREFETCH
   8208 # undef AT_STRINGS_BEG
   8209 # undef AT_STRINGS_END
   8210 # undef WORDCHAR_P
   8211 # undef FREE_VAR
   8212 # undef FREE_VARIABLES
   8213 # undef NO_HIGHEST_ACTIVE_REG
   8214 # undef NO_LOWEST_ACTIVE_REG
   8215 
   8216 # undef CHAR_T
   8217 # undef UCHAR_T
   8218 # undef COMPILED_BUFFER_VAR
   8219 # undef OFFSET_ADDRESS_SIZE
   8220 # undef CHAR_CLASS_SIZE
   8221 # undef PREFIX
   8222 # undef ARG_PREFIX
   8223 # undef PUT_CHAR
   8224 # undef BYTE
   8225 # undef WCHAR
   8226 
   8227 # define DEFINED_ONCE
   8228