Home | History | Annotate | Line # | Download | only in lib
      1 /*	$NetBSD: regex.c,v 1.3 2020/09/26 11:39:17 mlelstv Exp $	*/
      2 
      3 /* Extended regular expression matching and search library,
      4    version 0.12.
      5    (Implements POSIX draft P1003.2/D11.2, except for some of the
      6    internationalization features.)
      7    Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
      8 
      9    The GNU C Library is free software; you can redistribute it and/or
     10    modify it under the terms of the GNU Library General Public License as
     11    published by the Free Software Foundation; either version 2 of the
     12    License, or (at your option) any later version.
     13 
     14    The GNU C Library is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17    Library General Public License for more details.
     18 
     19    You should have received a copy of the GNU Library General Public
     20    License along with the GNU C Library; see the file COPYING.LIB.  If not,
     21    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     22    Boston, MA 02111-1307, USA.  */
     23 
     24 /* AIX requires this to be the first thing in the file. */
     25 #if defined _AIX && !defined REGEX_MALLOC
     26   #pragma alloca
     27 #endif
     28 
     29 #undef	_GNU_SOURCE
     30 #define _GNU_SOURCE
     31 
     32 #ifdef HAVE_CONFIG_H
     33 # include <config.h>
     34 #endif
     35 
     36 #ifndef PARAMS
     37 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
     38 #  define PARAMS(args) args
     39 # else
     40 #  define PARAMS(args) ()
     41 # endif  /* GCC.  */
     42 #endif  /* Not PARAMS.  */
     43 
     44 #if defined STDC_HEADERS && !defined emacs
     45 # include <stddef.h>
     46 #else
     47 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
     48 # include <sys/types.h>
     49 #endif
     50 
     51 #define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
     52 
     53 /* For platform which support the ISO C amendement 1 functionality we
     54    support user defined character classes.  */
     55 #if defined _LIBC || WIDE_CHAR_SUPPORT
     56 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
     57 # include <wchar.h>
     58 # include <wctype.h>
     59 #endif
     60 
     61 /* This is for multi byte string support.  */
     62 #ifdef MBS_SUPPORT
     63 # define CHAR_TYPE wchar_t
     64 # define US_CHAR_TYPE wchar_t/* unsigned character type */
     65 # define COMPILED_BUFFER_VAR wc_buffer
     66 # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
     67 # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_TYPE)+1)
     68 # define PUT_CHAR(c) \
     69   do {									      \
     70     if (MB_CUR_MAX == 1)						      \
     71       putchar (c);							      \
     72     else								      \
     73       printf ("%C", (wint_t) c); /* Should we use wide stream??  */	      \
     74   } while (0)
     75 # define TRUE 1
     76 # define FALSE 0
     77 #else
     78 # define CHAR_TYPE char
     79 # define US_CHAR_TYPE unsigned char /* unsigned character type */
     80 # define COMPILED_BUFFER_VAR bufp->buffer
     81 # define OFFSET_ADDRESS_SIZE 2
     82 # define PUT_CHAR(c) putchar (c)
     83 #endif /* MBS_SUPPORT */
     84 
     85 #ifdef _LIBC
     86 /* We have to keep the namespace clean.  */
     87 # define regfree(preg) __regfree (preg)
     88 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
     89 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
     90 # define regerror(errcode, preg, errbuf, errbuf_size) \
     91 	__regerror(errcode, preg, errbuf, errbuf_size)
     92 # define re_set_registers(bu, re, nu, st, en) \
     93 	__re_set_registers (bu, re, nu, st, en)
     94 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
     95 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
     96 # define re_match(bufp, string, size, pos, regs) \
     97 	__re_match (bufp, string, size, pos, regs)
     98 # define re_search(bufp, string, size, startpos, range, regs) \
     99 	__re_search (bufp, string, size, startpos, range, regs)
    100 # define re_compile_pattern(pattern, length, bufp) \
    101 	__re_compile_pattern (pattern, length, bufp)
    102 # define re_set_syntax(syntax) __re_set_syntax (syntax)
    103 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
    104 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
    105 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
    106 
    107 # define btowc __btowc
    108 
    109 /* We are also using some library internals.  */
    110 # include <locale/localeinfo.h>
    111 # include <locale/elem-hash.h>
    112 # include <langinfo.h>
    113 # include <locale/coll-lookup.h>
    114 #endif
    115 
    116 /* This is for other GNU distributions with internationalized messages.  */
    117 #if HAVE_LIBINTL_H || defined _LIBC
    118 # include <libintl.h>
    119 # ifdef _LIBC
    120 #  undef gettext
    121 #  define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
    122 # endif
    123 #else
    124 # define gettext(msgid) (msgid)
    125 #endif
    126 
    127 #ifndef gettext_noop
    128 /* This define is so xgettext can find the internationalizable
    129    strings.  */
    130 # define gettext_noop(String) String
    131 #endif
    132 
    133 /* The `emacs' switch turns on certain matching commands
    134    that make sense only in Emacs. */
    135 #ifdef emacs
    136 
    137 # include "lisp.h"
    138 # include "buffer.h"
    139 # include "syntax.h"
    140 
    141 #else  /* not emacs */
    142 
    143 /* If we are not linking with Emacs proper,
    144    we can't use the relocating allocator
    145    even if config.h says that we can.  */
    146 # undef REL_ALLOC
    147 
    148 # if defined STDC_HEADERS || defined _LIBC
    149 #  include <stdlib.h>
    150 # else
    151 char *malloc ();
    152 char *realloc ();
    153 # endif
    154 
    155 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
    156    If nothing else has been done, use the method below.  */
    157 # ifdef INHIBIT_STRING_HEADER
    158 #  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
    159 #   if !defined bzero && !defined bcopy
    160 #    undef INHIBIT_STRING_HEADER
    161 #   endif
    162 #  endif
    163 # endif
    164 
    165 /* This is the normal way of making sure we have a bcopy and a bzero.
    166    This is used in most programs--a few other programs avoid this
    167    by defining INHIBIT_STRING_HEADER.  */
    168 # ifndef INHIBIT_STRING_HEADER
    169 #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
    170 #   include <string.h>
    171 #   ifndef bzero
    172 #    ifndef _LIBC
    173 #     define bzero(s, n)	(memset (s, '\0', n), (s))
    174 #    else
    175 #     define bzero(s, n)	__bzero (s, n)
    176 #    endif
    177 #   endif
    178 #  else
    179 #   include <strings.h>
    180 #   ifndef memcmp
    181 #    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
    182 #   endif
    183 #   ifndef memcpy
    184 #    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
    185 #   endif
    186 #  endif
    187 # endif
    188 
    189 /* Define the syntax stuff for \<, \>, etc.  */
    190 
    191 /* This must be nonzero for the wordchar and notwordchar pattern
    192    commands in re_match_2.  */
    193 # ifndef Sword
    194 #  define Sword 1
    195 # endif
    196 
    197 # ifdef SWITCH_ENUM_BUG
    198 #  define SWITCH_ENUM_CAST(x) ((int)(x))
    199 # else
    200 #  define SWITCH_ENUM_CAST(x) (x)
    201 # endif
    202 
    203 #endif /* not emacs */
    204 
    205 #if defined _LIBC || HAVE_LIMITS_H
    206 # include <limits.h>
    207 #endif
    208 
    209 #ifndef MB_LEN_MAX
    210 # define MB_LEN_MAX 1
    211 #endif
    212 
    213 /* Get the interface, including the syntax bits.  */
    215 #include <regex.h>
    216 
    217 /* isalpha etc. are used for the character classes.  */
    218 #include <ctype.h>
    219 
    220 /* Jim Meyering writes:
    221 
    222    "... Some ctype macros are valid only for character codes that
    223    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
    224    using /bin/cc or gcc but without giving an ansi option).  So, all
    225    ctype uses should be through macros like ISPRINT...  If
    226    STDC_HEADERS is defined, then autoconf has verified that the ctype
    227    macros don't need to be guarded with references to isascii. ...
    228    Defining isascii to 1 should let any compiler worth its salt
    229    eliminate the && through constant folding."
    230    Solaris defines some of these symbols so we must undefine them first.  */
    231 
    232 #undef ISASCII
    233 #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
    234 # define ISASCII(c) 1
    235 #else
    236 # define ISASCII(c) isascii(c)
    237 #endif
    238 
    239 #ifdef isblank
    240 # define ISBLANK(c) (ISASCII (c) && isblank (c))
    241 #else
    242 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
    243 #endif
    244 #ifdef isgraph
    245 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
    246 #else
    247 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
    248 #endif
    249 
    250 #undef ISPRINT
    251 #define ISPRINT(c) (ISASCII (c) && isprint (c))
    252 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
    253 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
    254 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
    255 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
    256 #define ISLOWER(c) (ISASCII (c) && islower (c))
    257 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
    258 #define ISSPACE(c) (ISASCII (c) && isspace (c))
    259 #define ISUPPER(c) (ISASCII (c) && isupper (c))
    260 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
    261 
    262 #ifdef _tolower
    263 # define TOLOWER(c) _tolower(c)
    264 #else
    265 # define TOLOWER(c) tolower(c)
    266 #endif
    267 
    268 #ifndef NULL
    269 # define NULL (void *)0
    270 #endif
    271 
    272 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
    273    since ours (we hope) works properly with all combinations of
    274    machines, compilers, `char' and `unsigned char' argument types.
    275    (Per Bothner suggested the basic approach.)  */
    276 #undef SIGN_EXTEND_CHAR
    277 #if __STDC__
    278 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
    279 #else  /* not __STDC__ */
    280 /* As in Harbison and Steele.  */
    281 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
    282 #endif
    283 
    284 #ifndef emacs
    286 /* How many characters in the character set.  */
    287 # define CHAR_SET_SIZE 256
    288 
    289 # ifdef SYNTAX_TABLE
    290 
    291 extern char *re_syntax_table;
    292 
    293 # else /* not SYNTAX_TABLE */
    294 
    295 static char re_syntax_table[CHAR_SET_SIZE];
    296 
    297 static void init_syntax_once PARAMS ((void));
    298 
    299 static void
    300 init_syntax_once ()
    301 {
    302    register int c;
    303    static int done = 0;
    304 
    305    if (done)
    306      return;
    307    bzero (re_syntax_table, sizeof re_syntax_table);
    308 
    309    for (c = 0; c < CHAR_SET_SIZE; ++c)
    310      if (ISALNUM (c))
    311 	re_syntax_table[c] = Sword;
    312 
    313    re_syntax_table['_'] = Sword;
    314 
    315    done = 1;
    316 }
    317 
    318 # endif /* not SYNTAX_TABLE */
    319 
    320 # define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
    321 
    322 #endif /* emacs */
    323 
    324 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
    326    use `alloca' instead of `malloc'.  This is because using malloc in
    327    re_search* or re_match* could cause memory leaks when C-g is used in
    328    Emacs; also, malloc is slower and causes storage fragmentation.  On
    329    the other hand, malloc is more portable, and easier to debug.
    330 
    331    Because we sometimes use alloca, some routines have to be macros,
    332    not functions -- `alloca'-allocated space disappears at the end of the
    333    function it is called in.  */
    334 
    335 #ifdef REGEX_MALLOC
    336 
    337 # define REGEX_ALLOCATE malloc
    338 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
    339 # define REGEX_FREE free
    340 
    341 #else /* not REGEX_MALLOC  */
    342 
    343 /* Emacs already defines alloca, sometimes.  */
    344 # ifndef alloca
    345 
    346 /* Make alloca work the best possible way.  */
    347 #  ifdef __GNUC__
    348 #   define alloca __builtin_alloca
    349 #  else /* not __GNUC__ */
    350 #   if HAVE_ALLOCA_H
    351 #    include <alloca.h>
    352 #   endif /* HAVE_ALLOCA_H */
    353 #  endif /* not __GNUC__ */
    354 
    355 # endif /* not alloca */
    356 
    357 # define REGEX_ALLOCATE alloca
    358 
    359 /* Assumes a `char *destination' variable.  */
    360 # define REGEX_REALLOCATE(source, osize, nsize)				\
    361   (destination = (char *) alloca (nsize),				\
    362    memcpy (destination, source, osize))
    363 
    364 /* No need to do anything to free, after alloca.  */
    365 # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
    366 
    367 #endif /* not REGEX_MALLOC */
    368 
    369 /* Define how to allocate the failure stack.  */
    370 
    371 #if defined REL_ALLOC && defined REGEX_MALLOC
    372 
    373 # define REGEX_ALLOCATE_STACK(size)				\
    374   r_alloc (&failure_stack_ptr, (size))
    375 # define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
    376   r_re_alloc (&failure_stack_ptr, (nsize))
    377 # define REGEX_FREE_STACK(ptr)					\
    378   r_alloc_free (&failure_stack_ptr)
    379 
    380 #else /* not using relocating allocator */
    381 
    382 # ifdef REGEX_MALLOC
    383 
    384 #  define REGEX_ALLOCATE_STACK malloc
    385 #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
    386 #  define REGEX_FREE_STACK free
    387 
    388 # else /* not REGEX_MALLOC */
    389 
    390 #  define REGEX_ALLOCATE_STACK alloca
    391 
    392 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
    393    REGEX_REALLOCATE (source, osize, nsize)
    394 /* No need to explicitly free anything.  */
    395 #  define REGEX_FREE_STACK(arg)
    396 
    397 # endif /* not REGEX_MALLOC */
    398 #endif /* not using relocating allocator */
    399 
    400 
    401 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
    402    `string1' or just past its end.  This works if PTR is NULL, which is
    403    a good thing.  */
    404 #define FIRST_STRING_P(ptr) 					\
    405   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
    406 
    407 /* (Re)Allocate N items of type T using malloc, or fail.  */
    408 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
    409 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
    410 #define RETALLOC_IF(addr, n, t) \
    411   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
    412 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
    413 
    414 #define BYTEWIDTH 8 /* In bits.  */
    415 
    416 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
    417 
    418 #undef MAX
    419 #undef MIN
    420 #define MAX(a, b) ((a) > (b) ? (a) : (b))
    421 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    422 
    423 typedef char boolean;
    424 #define false 0
    425 #define true 1
    426 
    427 static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
    428 					const char *string1, int size1,
    429 					const char *string2, int size2,
    430 					int pos,
    431 					struct re_registers *regs,
    432 					int stop));
    433 
    434 /* These are the command codes that appear in compiled regular
    436    expressions.  Some opcodes are followed by argument bytes.  A
    437    command code can specify any interpretation whatsoever for its
    438    arguments.  Zero bytes may appear in the compiled regular expression.  */
    439 
    440 typedef enum
    441 {
    442   no_op = 0,
    443 
    444   /* Succeed right away--no more backtracking.  */
    445   succeed,
    446 
    447         /* Followed by one byte giving n, then by n literal bytes.  */
    448   exactn,
    449 
    450 #ifdef MBS_SUPPORT
    451 	/* Same as exactn, but contains binary data.  */
    452   exactn_bin,
    453 #endif
    454 
    455         /* Matches any (more or less) character.  */
    456   anychar,
    457 
    458         /* Matches any one char belonging to specified set.  First
    459            following byte is number of bitmap bytes.  Then come bytes
    460            for a bitmap saying which chars are in.  Bits in each byte
    461            are ordered low-bit-first.  A character is in the set if its
    462            bit is 1.  A character too large to have a bit in the map is
    463            automatically not in the set.  */
    464         /* ifdef MBS_SUPPORT, following element is length of character
    465 	   classes, length of collating symbols, length of equivalence
    466 	   classes, length of character ranges, and length of characters.
    467 	   Next, character class element, collating symbols elements,
    468 	   equivalence class elements, range elements, and character
    469 	   elements follow.
    470 	   See regex_compile function.  */
    471   charset,
    472 
    473         /* Same parameters as charset, but match any character that is
    474            not one of those specified.  */
    475   charset_not,
    476 
    477         /* Start remembering the text that is matched, for storing in a
    478            register.  Followed by one byte with the register number, in
    479            the range 0 to one less than the pattern buffer's re_nsub
    480            field.  Then followed by one byte with the number of groups
    481            inner to this one.  (This last has to be part of the
    482            start_memory only because we need it in the on_failure_jump
    483            of re_match_2.)  */
    484   start_memory,
    485 
    486         /* Stop remembering the text that is matched and store it in a
    487            memory register.  Followed by one byte with the register
    488            number, in the range 0 to one less than `re_nsub' in the
    489            pattern buffer, and one byte with the number of inner groups,
    490            just like `start_memory'.  (We need the number of inner
    491            groups here because we don't have any easy way of finding the
    492            corresponding start_memory when we're at a stop_memory.)  */
    493   stop_memory,
    494 
    495         /* Match a duplicate of something remembered. Followed by one
    496            byte containing the register number.  */
    497   duplicate,
    498 
    499         /* Fail unless at beginning of line.  */
    500   begline,
    501 
    502         /* Fail unless at end of line.  */
    503   endline,
    504 
    505         /* Succeeds if at beginning of buffer (if emacs) or at beginning
    506            of string to be matched (if not).  */
    507   begbuf,
    508 
    509         /* Analogously, for end of buffer/string.  */
    510   endbuf,
    511 
    512         /* Followed by two byte relative address to which to jump.  */
    513   jump,
    514 
    515 	/* Same as jump, but marks the end of an alternative.  */
    516   jump_past_alt,
    517 
    518         /* Followed by two-byte relative address of place to resume at
    519            in case of failure.  */
    520         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    521   on_failure_jump,
    522 
    523         /* Like on_failure_jump, but pushes a placeholder instead of the
    524            current string position when executed.  */
    525   on_failure_keep_string_jump,
    526 
    527         /* Throw away latest failure point and then jump to following
    528            two-byte relative address.  */
    529         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    530   pop_failure_jump,
    531 
    532         /* Change to pop_failure_jump if know won't have to backtrack to
    533            match; otherwise change to jump.  This is used to jump
    534            back to the beginning of a repeat.  If what follows this jump
    535            clearly won't match what the repeat does, such that we can be
    536            sure that there is no use backtracking out of repetitions
    537            already matched, then we change it to a pop_failure_jump.
    538            Followed by two-byte address.  */
    539         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    540   maybe_pop_jump,
    541 
    542         /* Jump to following two-byte address, and push a dummy failure
    543            point. This failure point will be thrown away if an attempt
    544            is made to use it for a failure.  A `+' construct makes this
    545            before the first repeat.  Also used as an intermediary kind
    546            of jump when compiling an alternative.  */
    547         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    548   dummy_failure_jump,
    549 
    550 	/* Push a dummy failure point and continue.  Used at the end of
    551 	   alternatives.  */
    552   push_dummy_failure,
    553 
    554         /* Followed by two-byte relative address and two-byte number n.
    555            After matching N times, jump to the address upon failure.  */
    556         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    557   succeed_n,
    558 
    559         /* Followed by two-byte relative address, and two-byte number n.
    560            Jump to the address N times, then fail.  */
    561         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    562   jump_n,
    563 
    564         /* Set the following two-byte relative address to the
    565            subsequent two-byte number.  The address *includes* the two
    566            bytes of number.  */
    567         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    568   set_number_at,
    569 
    570   wordchar,	/* Matches any word-constituent character.  */
    571   notwordchar,	/* Matches any char that is not a word-constituent.  */
    572 
    573   wordbeg,	/* Succeeds if at word beginning.  */
    574   wordend,	/* Succeeds if at word end.  */
    575 
    576   wordbound,	/* Succeeds if at a word boundary.  */
    577   notwordbound	/* Succeeds if not at a word boundary.  */
    578 
    579 #ifdef emacs
    580   ,before_dot,	/* Succeeds if before point.  */
    581   at_dot,	/* Succeeds if at point.  */
    582   after_dot,	/* Succeeds if after point.  */
    583 
    584 	/* Matches any character whose syntax is specified.  Followed by
    585            a byte which contains a syntax code, e.g., Sword.  */
    586   syntaxspec,
    587 
    588 	/* Matches any character whose syntax is not that specified.  */
    589   notsyntaxspec
    590 #endif /* emacs */
    591 } re_opcode_t;
    592 
    593 /* Common operations on the compiled pattern.  */
    595 
    596 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
    597 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    598 
    599 #ifdef MBS_SUPPORT
    600 # define STORE_NUMBER(destination, number)				\
    601   do {									\
    602     *(destination) = (US_CHAR_TYPE)(number);				\
    603   } while (0)
    604 #else
    605 # define STORE_NUMBER(destination, number)				\
    606   do {									\
    607     (destination)[0] = (number) & 0377;					\
    608     (destination)[1] = (number) >> 8;					\
    609   } while (0)
    610 #endif /* MBS_SUPPORT */
    611 
    612 /* Same as STORE_NUMBER, except increment DESTINATION to
    613    the byte after where the number is stored.  Therefore, DESTINATION
    614    must be an lvalue.  */
    615 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    616 
    617 #define STORE_NUMBER_AND_INCR(destination, number)			\
    618   do {									\
    619     STORE_NUMBER (destination, number);					\
    620     (destination) += OFFSET_ADDRESS_SIZE;				\
    621   } while (0)
    622 
    623 /* Put into DESTINATION a number stored in two contiguous bytes starting
    624    at SOURCE.  */
    625 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    626 
    627 #ifdef MBS_SUPPORT
    628 # define EXTRACT_NUMBER(destination, source)				\
    629   do {									\
    630     (destination) = *(source);						\
    631   } while (0)
    632 #else
    633 # define EXTRACT_NUMBER(destination, source)				\
    634   do {									\
    635     (destination) = *(source) & 0377;					\
    636     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
    637   } while (0)
    638 #endif
    639 
    640 #ifdef DEBUG
    641 static void extract_number _RE_ARGS ((int *dest, US_CHAR_TYPE *source));
    642 static void
    643 extract_number (dest, source)
    644     int *dest;
    645     US_CHAR_TYPE *source;
    646 {
    647 #ifdef MBS_SUPPORT
    648   *dest = *source;
    649 #else
    650   int temp = SIGN_EXTEND_CHAR (*(source + 1));
    651   *dest = *source & 0377;
    652   *dest += temp << 8;
    653 #endif
    654 }
    655 
    656 # ifndef EXTRACT_MACROS /* To debug the macros.  */
    657 #  undef EXTRACT_NUMBER
    658 #  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
    659 # endif /* not EXTRACT_MACROS */
    660 
    661 #endif /* DEBUG */
    662 
    663 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
    664    SOURCE must be an lvalue.  */
    665 
    666 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
    667   do {									\
    668     EXTRACT_NUMBER (destination, source);				\
    669     (source) += OFFSET_ADDRESS_SIZE; 					\
    670   } while (0)
    671 
    672 #ifdef DEBUG
    673 static void extract_number_and_incr _RE_ARGS ((int *destination,
    674 					       US_CHAR_TYPE **source));
    675 static void
    676 extract_number_and_incr (destination, source)
    677     int *destination;
    678     US_CHAR_TYPE **source;
    679 {
    680   extract_number (destination, *source);
    681   *source += OFFSET_ADDRESS_SIZE;
    682 }
    683 
    684 # ifndef EXTRACT_MACROS
    685 #  undef EXTRACT_NUMBER_AND_INCR
    686 #  define EXTRACT_NUMBER_AND_INCR(dest, src) \
    687   extract_number_and_incr (&dest, &src)
    688 # endif /* not EXTRACT_MACROS */
    689 
    690 #endif /* DEBUG */
    691 
    692 /* If DEBUG is defined, Regex prints many voluminous messages about what
    694    it is doing (if the variable `debug' is nonzero).  If linked with the
    695    main program in `iregex.c', you can enter patterns and strings
    696    interactively.  And if linked with the main program in `main.c' and
    697    the other test files, you can run the already-written tests.  */
    698 
    699 #ifdef DEBUG
    700 
    701 /* We use standard I/O for debugging.  */
    702 # include <stdio.h>
    703 
    704 /* It is useful to test things that ``must'' be true when debugging.  */
    705 # include <assert.h>
    706 
    707 static int debug;
    708 
    709 # define DEBUG_STATEMENT(e) e
    710 # define DEBUG_PRINT1(x) if (debug) printf (x)
    711 # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
    712 # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
    713 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
    714 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
    715   if (debug) print_partial_compiled_pattern (s, e)
    716 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
    717   if (debug) print_double_string (w, s1, sz1, s2, sz2)
    718 
    719 
    720 /* Print the fastmap in human-readable form.  */
    721 
    722 void
    723 print_fastmap (fastmap)
    724     char *fastmap;
    725 {
    726   unsigned was_a_range = 0;
    727   unsigned i = 0;
    728 
    729   while (i < (1 << BYTEWIDTH))
    730     {
    731       if (fastmap[i++])
    732 	{
    733 	  was_a_range = 0;
    734           putchar (i - 1);
    735           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
    736             {
    737               was_a_range = 1;
    738               i++;
    739             }
    740 	  if (was_a_range)
    741             {
    742               printf ("-");
    743               putchar (i - 1);
    744             }
    745         }
    746     }
    747   putchar ('\n');
    748 }
    749 
    750 
    751 /* Print a compiled pattern string in human-readable form, starting at
    752    the START pointer into it and ending just before the pointer END.  */
    753 
    754 void
    755 print_partial_compiled_pattern (start, end)
    756     US_CHAR_TYPE *start;
    757     US_CHAR_TYPE *end;
    758 {
    759   int mcnt, mcnt2;
    760   US_CHAR_TYPE *p1;
    761   US_CHAR_TYPE *p = start;
    762   US_CHAR_TYPE *pend = end;
    763 
    764   if (start == NULL)
    765     {
    766       printf ("(null)\n");
    767       return;
    768     }
    769 
    770   /* Loop over pattern commands.  */
    771   while (p < pend)
    772     {
    773 #ifdef _LIBC
    774       printf ("%td:\t", p - start);
    775 #else
    776       printf ("%ld:\t", (long int) (p - start));
    777 #endif
    778 
    779       switch ((re_opcode_t) *p++)
    780 	{
    781         case no_op:
    782           printf ("/no_op");
    783           break;
    784 
    785 	case exactn:
    786 	  mcnt = *p++;
    787           printf ("/exactn/%d", mcnt);
    788           do
    789 	    {
    790               putchar ('/');
    791 	      PUT_CHAR (*p++);
    792             }
    793           while (--mcnt);
    794           break;
    795 
    796 #ifdef MBS_SUPPORT
    797 	case exactn_bin:
    798 	  mcnt = *p++;
    799 	  printf ("/exactn_bin/%d", mcnt);
    800           do
    801 	    {
    802 	      printf("/%lx", (long int) *p++);
    803             }
    804           while (--mcnt);
    805           break;
    806 #endif /* MBS_SUPPORT */
    807 
    808 	case start_memory:
    809           mcnt = *p++;
    810           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
    811           break;
    812 
    813 	case stop_memory:
    814           mcnt = *p++;
    815 	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
    816           break;
    817 
    818 	case duplicate:
    819 	  printf ("/duplicate/%ld", (long int) *p++);
    820 	  break;
    821 
    822 	case anychar:
    823 	  printf ("/anychar");
    824 	  break;
    825 
    826 	case charset:
    827         case charset_not:
    828           {
    829 #ifdef MBS_SUPPORT
    830 	    int i, length;
    831 	    wchar_t *workp = p;
    832 	    printf ("/charset [%s",
    833 	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
    834 	    p += 5;
    835 	    length = *workp++; /* the length of char_classes */
    836 	    for (i=0 ; i<length ; i++)
    837 	      printf("[:%lx:]", (long int) *p++);
    838 	    length = *workp++; /* the length of collating_symbol */
    839 	    for (i=0 ; i<length ;)
    840 	      {
    841 		printf("[.");
    842 		while(*p != 0)
    843 		  PUT_CHAR((i++,*p++));
    844 		i++,p++;
    845 		printf(".]");
    846 	      }
    847 	    length = *workp++; /* the length of equivalence_class */
    848 	    for (i=0 ; i<length ;)
    849 	      {
    850 		printf("[=");
    851 		while(*p != 0)
    852 		  PUT_CHAR((i++,*p++));
    853 		i++,p++;
    854 		printf("=]");
    855 	      }
    856 	    length = *workp++; /* the length of char_range */
    857 	    for (i=0 ; i<length ; i++)
    858 	      {
    859 		wchar_t range_start = *p++;
    860 		wchar_t range_end = *p++;
    861 		if (MB_CUR_MAX == 1)
    862 		  printf("%c-%c", (char) range_start, (char) range_end);
    863 		else
    864 		  printf("%C-%C", (wint_t) range_start, (wint_t) range_end);
    865 	      }
    866 	    length = *workp++; /* the length of char */
    867 	    for (i=0 ; i<length ; i++)
    868 	      if (MB_CUR_MAX == 1)
    869 		putchar (*p++);
    870 	      else
    871 		printf("%C", (wint_t) *p++);
    872 	    putchar (']');
    873 #else
    874             register int c, last = -100;
    875 	    register int in_range = 0;
    876 
    877 	    printf ("/charset [%s",
    878 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
    879 
    880             assert (p + *p < pend);
    881 
    882             for (c = 0; c < 256; c++)
    883 	      if (c / 8 < *p
    884 		  && (p[1 + (c/8)] & (1 << (c % 8))))
    885 		{
    886 		  /* Are we starting a range?  */
    887 		  if (last + 1 == c && ! in_range)
    888 		    {
    889 		      putchar ('-');
    890 		      in_range = 1;
    891 		    }
    892 		  /* Have we broken a range?  */
    893 		  else if (last + 1 != c && in_range)
    894               {
    895 		      putchar (last);
    896 		      in_range = 0;
    897 		    }
    898 
    899 		  if (! in_range)
    900 		    putchar (c);
    901 
    902 		  last = c;
    903               }
    904 
    905 	    if (in_range)
    906 	      putchar (last);
    907 
    908 	    putchar (']');
    909 
    910 	    p += 1 + *p;
    911 #endif /* MBS_SUPPORT */
    912 	  }
    913 	  break;
    914 
    915 	case begline:
    916 	  printf ("/begline");
    917           break;
    918 
    919 	case endline:
    920           printf ("/endline");
    921           break;
    922 
    923 	case on_failure_jump:
    924           extract_number_and_incr (&mcnt, &p);
    925 #ifdef _LIBC
    926   	  printf ("/on_failure_jump to %td", p + mcnt - start);
    927 #else
    928   	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
    929 #endif
    930           break;
    931 
    932 	case on_failure_keep_string_jump:
    933           extract_number_and_incr (&mcnt, &p);
    934 #ifdef _LIBC
    935   	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
    936 #else
    937   	  printf ("/on_failure_keep_string_jump to %ld",
    938 		  (long int) (p + mcnt - start));
    939 #endif
    940           break;
    941 
    942 	case dummy_failure_jump:
    943           extract_number_and_incr (&mcnt, &p);
    944 #ifdef _LIBC
    945   	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
    946 #else
    947   	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
    948 #endif
    949           break;
    950 
    951 	case push_dummy_failure:
    952           printf ("/push_dummy_failure");
    953           break;
    954 
    955         case maybe_pop_jump:
    956           extract_number_and_incr (&mcnt, &p);
    957 #ifdef _LIBC
    958   	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
    959 #else
    960   	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
    961 #endif
    962 	  break;
    963 
    964         case pop_failure_jump:
    965 	  extract_number_and_incr (&mcnt, &p);
    966 #ifdef _LIBC
    967   	  printf ("/pop_failure_jump to %td", p + mcnt - start);
    968 #else
    969   	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
    970 #endif
    971 	  break;
    972 
    973         case jump_past_alt:
    974 	  extract_number_and_incr (&mcnt, &p);
    975 #ifdef _LIBC
    976   	  printf ("/jump_past_alt to %td", p + mcnt - start);
    977 #else
    978   	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
    979 #endif
    980 	  break;
    981 
    982         case jump:
    983 	  extract_number_and_incr (&mcnt, &p);
    984 #ifdef _LIBC
    985   	  printf ("/jump to %td", p + mcnt - start);
    986 #else
    987   	  printf ("/jump to %ld", (long int) (p + mcnt - start));
    988 #endif
    989 	  break;
    990 
    991         case succeed_n:
    992           extract_number_and_incr (&mcnt, &p);
    993 	  p1 = p + mcnt;
    994           extract_number_and_incr (&mcnt2, &p);
    995 #ifdef _LIBC
    996 	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
    997 #else
    998 	  printf ("/succeed_n to %ld, %d times",
    999 		  (long int) (p1 - start), mcnt2);
   1000 #endif
   1001           break;
   1002 
   1003         case jump_n:
   1004           extract_number_and_incr (&mcnt, &p);
   1005 	  p1 = p + mcnt;
   1006           extract_number_and_incr (&mcnt2, &p);
   1007 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
   1008           break;
   1009 
   1010         case set_number_at:
   1011           extract_number_and_incr (&mcnt, &p);
   1012 	  p1 = p + mcnt;
   1013           extract_number_and_incr (&mcnt2, &p);
   1014 #ifdef _LIBC
   1015 	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
   1016 #else
   1017 	  printf ("/set_number_at location %ld to %d",
   1018 		  (long int) (p1 - start), mcnt2);
   1019 #endif
   1020           break;
   1021 
   1022         case wordbound:
   1023 	  printf ("/wordbound");
   1024 	  break;
   1025 
   1026 	case notwordbound:
   1027 	  printf ("/notwordbound");
   1028           break;
   1029 
   1030 	case wordbeg:
   1031 	  printf ("/wordbeg");
   1032 	  break;
   1033 
   1034 	case wordend:
   1035 	  printf ("/wordend");
   1036 	  break;
   1037 
   1038 # ifdef emacs
   1039 	case before_dot:
   1040 	  printf ("/before_dot");
   1041           break;
   1042 
   1043 	case at_dot:
   1044 	  printf ("/at_dot");
   1045           break;
   1046 
   1047 	case after_dot:
   1048 	  printf ("/after_dot");
   1049           break;
   1050 
   1051 	case syntaxspec:
   1052           printf ("/syntaxspec");
   1053 	  mcnt = *p++;
   1054 	  printf ("/%d", mcnt);
   1055           break;
   1056 
   1057 	case notsyntaxspec:
   1058           printf ("/notsyntaxspec");
   1059 	  mcnt = *p++;
   1060 	  printf ("/%d", mcnt);
   1061 	  break;
   1062 # endif /* emacs */
   1063 
   1064 	case wordchar:
   1065 	  printf ("/wordchar");
   1066           break;
   1067 
   1068 	case notwordchar:
   1069 	  printf ("/notwordchar");
   1070           break;
   1071 
   1072 	case begbuf:
   1073 	  printf ("/begbuf");
   1074           break;
   1075 
   1076 	case endbuf:
   1077 	  printf ("/endbuf");
   1078           break;
   1079 
   1080         default:
   1081           printf ("?%ld", (long int) *(p-1));
   1082 	}
   1083 
   1084       putchar ('\n');
   1085     }
   1086 
   1087 #ifdef _LIBC
   1088   printf ("%td:\tend of pattern.\n", p - start);
   1089 #else
   1090   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
   1091 #endif
   1092 }
   1093 
   1094 
   1095 void
   1096 print_compiled_pattern (bufp)
   1097     struct re_pattern_buffer *bufp;
   1098 {
   1099   US_CHAR_TYPE *buffer = (US_CHAR_TYPE*) bufp->buffer;
   1100 
   1101   print_partial_compiled_pattern (buffer, buffer
   1102 				  + bufp->used / sizeof(US_CHAR_TYPE));
   1103   printf ("%ld bytes used/%ld bytes allocated.\n",
   1104 	  bufp->used, bufp->allocated);
   1105 
   1106   if (bufp->fastmap_accurate && bufp->fastmap)
   1107     {
   1108       printf ("fastmap: ");
   1109       print_fastmap (bufp->fastmap);
   1110     }
   1111 
   1112 #ifdef _LIBC
   1113   printf ("re_nsub: %Zd\t", bufp->re_nsub);
   1114 #else
   1115   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
   1116 #endif
   1117   printf ("regs_alloc: %d\t", bufp->regs_allocated);
   1118   printf ("can_be_null: %d\t", bufp->can_be_null);
   1119   printf ("newline_anchor: %d\n", bufp->newline_anchor);
   1120   printf ("no_sub: %d\t", bufp->no_sub);
   1121   printf ("not_bol: %d\t", bufp->not_bol);
   1122   printf ("not_eol: %d\t", bufp->not_eol);
   1123   printf ("syntax: %lx\n", bufp->syntax);
   1124   /* Perhaps we should print the translate table?  */
   1125 }
   1126 
   1127 
   1128 void
   1129 print_double_string (where, string1, size1, string2, size2)
   1130     const CHAR_TYPE *where;
   1131     const CHAR_TYPE *string1;
   1132     const CHAR_TYPE *string2;
   1133     int size1;
   1134     int size2;
   1135 {
   1136   ptrdiff_t this_char;
   1137 
   1138   if (where == NULL)
   1139     printf ("(null)");
   1140   else
   1141     {
   1142       if (FIRST_STRING_P (where))
   1143         {
   1144           for (this_char = where - string1; this_char < size1; this_char++)
   1145 	    PUT_CHAR (string1[this_char]);
   1146 
   1147           where = string2;
   1148         }
   1149 
   1150       for (this_char = where - string2; this_char < size2; this_char++)
   1151         PUT_CHAR (string2[this_char]);
   1152     }
   1153 }
   1154 
   1155 void
   1156 printchar (c)
   1157      int c;
   1158 {
   1159   putc (c, stderr);
   1160 }
   1161 
   1162 #else /* not DEBUG */
   1163 
   1164 # undef assert
   1165 # define assert(e)
   1166 
   1167 # define DEBUG_STATEMENT(e)
   1168 # define DEBUG_PRINT1(x)
   1169 # define DEBUG_PRINT2(x1, x2)
   1170 # define DEBUG_PRINT3(x1, x2, x3)
   1171 # define DEBUG_PRINT4(x1, x2, x3, x4)
   1172 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
   1173 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
   1174 
   1175 #endif /* not DEBUG */
   1176 
   1177 #ifdef MBS_SUPPORT
   1179 /* This  convert a multibyte string to a wide character string.
   1180    And write their correspondances to offset_buffer(see below)
   1181    and write whether each wchar_t is binary data to is_binary.
   1182    This assume invalid multibyte sequences as binary data.
   1183    We assume offset_buffer and is_binary is already allocated
   1184    enough space.  */
   1185 
   1186 static size_t convert_mbs_to_wcs (CHAR_TYPE *dest, const unsigned char* src,
   1187 				  size_t len, int *offset_buffer,
   1188 				  char *is_binary);
   1189 static size_t
   1190 convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
   1191      CHAR_TYPE *dest;
   1192      const unsigned char* src;
   1193      size_t len; /* the length of multibyte string.  */
   1194 
   1195      /* It hold correspondances between src(char string) and
   1196 	dest(wchar_t string) for optimization.
   1197 	e.g. src  = "xxxyzz"
   1198              dest = {'X', 'Y', 'Z'}
   1199 	      (each "xxx", "y" and "zz" represent one multibyte character
   1200 	       corresponding to 'X', 'Y' and 'Z'.)
   1201 	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
   1202 	  	        = {0, 3, 4, 6}
   1203      */
   1204      int *offset_buffer;
   1205      char *is_binary;
   1206 {
   1207   wchar_t *pdest = dest;
   1208   const unsigned char *psrc = src;
   1209   size_t wc_count = 0;
   1210 
   1211   if (MB_CUR_MAX == 1)
   1212     { /* We don't need conversion.  */
   1213       for ( ; wc_count < len ; ++wc_count)
   1214 	{
   1215 	  *pdest++ = *psrc++;
   1216 	  is_binary[wc_count] = FALSE;
   1217 	  offset_buffer[wc_count] = wc_count;
   1218 	}
   1219       offset_buffer[wc_count] = wc_count;
   1220     }
   1221   else
   1222     {
   1223       /* We need conversion.  */
   1224       mbstate_t mbs;
   1225       int consumed;
   1226       size_t mb_remain = len;
   1227       size_t mb_count = 0;
   1228 
   1229       /* Initialize the conversion state.  */
   1230       memset (&mbs, 0, sizeof (mbstate_t));
   1231 
   1232       offset_buffer[0] = 0;
   1233       for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
   1234 	     psrc += consumed)
   1235 	{
   1236 	  consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
   1237 
   1238 	  if (consumed <= 0)
   1239 	    /* failed to convert. maybe src contains binary data.
   1240 	       So we consume 1 byte manualy.  */
   1241 	    {
   1242 	      *pdest = *psrc;
   1243 	      consumed = 1;
   1244 	      is_binary[wc_count] = TRUE;
   1245 	    }
   1246 	  else
   1247 	    is_binary[wc_count] = FALSE;
   1248 	  /* In sjis encoding, we use yen sign as escape character in
   1249 	     place of reverse solidus. So we convert 0x5c(yen sign in
   1250 	     sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
   1251 	     solidus in UCS2).  */
   1252 	  if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
   1253 	    *pdest = (wchar_t) *psrc;
   1254 
   1255 	  offset_buffer[wc_count + 1] = mb_count += consumed;
   1256 	}
   1257     }
   1258 
   1259   return wc_count;
   1260 }
   1261 
   1262 #endif /* MBS_SUPPORT */
   1263 
   1264 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
   1265    also be assigned to arbitrarily: each pattern buffer stores its own
   1266    syntax, so it can be changed between regex compilations.  */
   1267 /* This has no initializer because initialized variables in Emacs
   1268    become read-only after dumping.  */
   1269 reg_syntax_t re_syntax_options;
   1270 
   1271 
   1272 /* Specify the precise syntax of regexps for compilation.  This provides
   1273    for compatibility for various utilities which historically have
   1274    different, incompatible syntaxes.
   1275 
   1276    The argument SYNTAX is a bit mask comprised of the various bits
   1277    defined in regex.h.  We return the old syntax.  */
   1278 
   1279 reg_syntax_t
   1280 re_set_syntax (syntax)
   1281     reg_syntax_t syntax;
   1282 {
   1283   reg_syntax_t ret = re_syntax_options;
   1284 
   1285   re_syntax_options = syntax;
   1286 #ifdef DEBUG
   1287   if (syntax & RE_DEBUG)
   1288     debug = 1;
   1289   else if (debug) /* was on but now is not */
   1290     debug = 0;
   1291 #endif /* DEBUG */
   1292   return ret;
   1293 }
   1294 #ifdef _LIBC
   1295 weak_alias (__re_set_syntax, re_set_syntax)
   1296 #endif
   1297 
   1298 /* This table gives an error message for each of the error codes listed
   1300    in regex.h.  Obviously the order here has to be same as there.
   1301    POSIX doesn't require that we do anything for REG_NOERROR,
   1302    but why not be nice?  */
   1303 
   1304 static const char re_error_msgid[] =
   1305   {
   1306 #define REG_NOERROR_IDX	0
   1307     gettext_noop ("Success")	/* REG_NOERROR */
   1308     "\0"
   1309 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
   1310     gettext_noop ("No match")	/* REG_NOMATCH */
   1311     "\0"
   1312 #define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
   1313     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
   1314     "\0"
   1315 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
   1316     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
   1317     "\0"
   1318 #define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
   1319     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
   1320     "\0"
   1321 #define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
   1322     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
   1323     "\0"
   1324 #define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
   1325     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
   1326     "\0"
   1327 #define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
   1328     gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
   1329     "\0"
   1330 #define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
   1331     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
   1332     "\0"
   1333 #define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
   1334     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
   1335     "\0"
   1336 #define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
   1337     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
   1338     "\0"
   1339 #define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
   1340     gettext_noop ("Invalid range end")	/* REG_ERANGE */
   1341     "\0"
   1342 #define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
   1343     gettext_noop ("Memory exhausted") /* REG_ESPACE */
   1344     "\0"
   1345 #define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
   1346     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
   1347     "\0"
   1348 #define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
   1349     gettext_noop ("Premature end of regular expression") /* REG_EEND */
   1350     "\0"
   1351 #define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
   1352     gettext_noop ("Regular expression too big") /* REG_ESIZE */
   1353     "\0"
   1354 #define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
   1355     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
   1356   };
   1357 
   1358 static const size_t re_error_msgid_idx[] =
   1359   {
   1360     REG_NOERROR_IDX,
   1361     REG_NOMATCH_IDX,
   1362     REG_BADPAT_IDX,
   1363     REG_ECOLLATE_IDX,
   1364     REG_ECTYPE_IDX,
   1365     REG_EESCAPE_IDX,
   1366     REG_ESUBREG_IDX,
   1367     REG_EBRACK_IDX,
   1368     REG_EPAREN_IDX,
   1369     REG_EBRACE_IDX,
   1370     REG_BADBR_IDX,
   1371     REG_ERANGE_IDX,
   1372     REG_ESPACE_IDX,
   1373     REG_BADRPT_IDX,
   1374     REG_EEND_IDX,
   1375     REG_ESIZE_IDX,
   1376     REG_ERPAREN_IDX
   1377   };
   1378 
   1379 /* Avoiding alloca during matching, to placate r_alloc.  */
   1381 
   1382 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
   1383    searching and matching functions should not call alloca.  On some
   1384    systems, alloca is implemented in terms of malloc, and if we're
   1385    using the relocating allocator routines, then malloc could cause a
   1386    relocation, which might (if the strings being searched are in the
   1387    ralloc heap) shift the data out from underneath the regexp
   1388    routines.
   1389 
   1390    Here's another reason to avoid allocation: Emacs
   1391    processes input from X in a signal handler; processing X input may
   1392    call malloc; if input arrives while a matching routine is calling
   1393    malloc, then we're scrod.  But Emacs can't just block input while
   1394    calling matching routines; then we don't notice interrupts when
   1395    they come in.  So, Emacs blocks input around all regexp calls
   1396    except the matching calls, which it leaves unprotected, in the
   1397    faith that they will not malloc.  */
   1398 
   1399 /* Normally, this is fine.  */
   1400 #define MATCH_MAY_ALLOCATE
   1401 
   1402 /* When using GNU C, we are not REALLY using the C alloca, no matter
   1403    what config.h may say.  So don't take precautions for it.  */
   1404 #ifdef __GNUC__
   1405 # undef C_ALLOCA
   1406 #endif
   1407 
   1408 /* The match routines may not allocate if (1) they would do it with malloc
   1409    and (2) it's not safe for them to use malloc.
   1410    Note that if REL_ALLOC is defined, matching would not use malloc for the
   1411    failure stack, but we would still use it for the register vectors;
   1412    so REL_ALLOC should not affect this.  */
   1413 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
   1414 # undef MATCH_MAY_ALLOCATE
   1415 #endif
   1416 
   1417 
   1418 /* Failure stack declarations and macros; both re_compile_fastmap and
   1420    re_match_2 use a failure stack.  These have to be macros because of
   1421    REGEX_ALLOCATE_STACK.  */
   1422 
   1423 
   1424 /* Number of failure points for which to initially allocate space
   1425    when matching.  If this number is exceeded, we allocate more
   1426    space, so it is not a hard limit.  */
   1427 #ifndef INIT_FAILURE_ALLOC
   1428 # define INIT_FAILURE_ALLOC 5
   1429 #endif
   1430 
   1431 /* Roughly the maximum number of failure points on the stack.  Would be
   1432    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
   1433    This is a variable only so users of regex can assign to it; we never
   1434    change it ourselves.  */
   1435 
   1436 #ifdef INT_IS_16BIT
   1437 
   1438 # if defined MATCH_MAY_ALLOCATE
   1439 /* 4400 was enough to cause a crash on Alpha OSF/1,
   1440    whose default stack limit is 2mb.  */
   1441 long int re_max_failures = 4000;
   1442 # else
   1443 long int re_max_failures = 2000;
   1444 # endif
   1445 
   1446 union fail_stack_elt
   1447 {
   1448   US_CHAR_TYPE *pointer;
   1449   long int integer;
   1450 };
   1451 
   1452 typedef union fail_stack_elt fail_stack_elt_t;
   1453 
   1454 typedef struct
   1455 {
   1456   fail_stack_elt_t *stack;
   1457   unsigned long int size;
   1458   unsigned long int avail;		/* Offset of next open position.  */
   1459 } fail_stack_type;
   1460 
   1461 #else /* not INT_IS_16BIT */
   1462 
   1463 # if defined MATCH_MAY_ALLOCATE
   1464 /* 4400 was enough to cause a crash on Alpha OSF/1,
   1465    whose default stack limit is 2mb.  */
   1466 int re_max_failures = 4000;
   1467 # else
   1468 int re_max_failures = 2000;
   1469 # endif
   1470 
   1471 union fail_stack_elt
   1472 {
   1473   US_CHAR_TYPE *pointer;
   1474   int integer;
   1475 };
   1476 
   1477 typedef union fail_stack_elt fail_stack_elt_t;
   1478 
   1479 typedef struct
   1480 {
   1481   fail_stack_elt_t *stack;
   1482   unsigned size;
   1483   unsigned avail;			/* Offset of next open position.  */
   1484 } fail_stack_type;
   1485 
   1486 #endif /* INT_IS_16BIT */
   1487 
   1488 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
   1489 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
   1490 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
   1491 
   1492 
   1493 /* Define macros to initialize and free the failure stack.
   1494    Do `return -2' if the alloc fails.  */
   1495 
   1496 #ifdef MATCH_MAY_ALLOCATE
   1497 # define INIT_FAIL_STACK()						\
   1498   do {									\
   1499     fail_stack.stack = (fail_stack_elt_t *)				\
   1500       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
   1501 									\
   1502     if (fail_stack.stack == NULL)					\
   1503       return -2;							\
   1504 									\
   1505     fail_stack.size = INIT_FAILURE_ALLOC;				\
   1506     fail_stack.avail = 0;						\
   1507   } while (0)
   1508 
   1509 # define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
   1510 #else
   1511 # define INIT_FAIL_STACK()						\
   1512   do {									\
   1513     fail_stack.avail = 0;						\
   1514   } while (0)
   1515 
   1516 # define RESET_FAIL_STACK()
   1517 #endif
   1518 
   1519 
   1520 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
   1521 
   1522    Return 1 if succeeds, and 0 if either ran out of memory
   1523    allocating space for it or it was already too large.
   1524 
   1525    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
   1526 
   1527 #define DOUBLE_FAIL_STACK(fail_stack)					\
   1528   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
   1529    ? 0									\
   1530    : ((fail_stack).stack = (fail_stack_elt_t *)				\
   1531         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
   1532           (fail_stack).size * sizeof (fail_stack_elt_t),		\
   1533           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
   1534 									\
   1535       (fail_stack).stack == NULL					\
   1536       ? 0								\
   1537       : ((fail_stack).size <<= 1, 					\
   1538          1)))
   1539 
   1540 
   1541 /* Push pointer POINTER on FAIL_STACK.
   1542    Return 1 if was able to do so and 0 if ran out of memory allocating
   1543    space to do so.  */
   1544 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
   1545   ((FAIL_STACK_FULL ()							\
   1546     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
   1547    ? 0									\
   1548    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
   1549       1))
   1550 
   1551 /* Push a pointer value onto the failure stack.
   1552    Assumes the variable `fail_stack'.  Probably should only
   1553    be called from within `PUSH_FAILURE_POINT'.  */
   1554 #define PUSH_FAILURE_POINTER(item)					\
   1555   fail_stack.stack[fail_stack.avail++].pointer = (US_CHAR_TYPE *) (item)
   1556 
   1557 /* This pushes an integer-valued item onto the failure stack.
   1558    Assumes the variable `fail_stack'.  Probably should only
   1559    be called from within `PUSH_FAILURE_POINT'.  */
   1560 #define PUSH_FAILURE_INT(item)					\
   1561   fail_stack.stack[fail_stack.avail++].integer = (item)
   1562 
   1563 /* Push a fail_stack_elt_t value onto the failure stack.
   1564    Assumes the variable `fail_stack'.  Probably should only
   1565    be called from within `PUSH_FAILURE_POINT'.  */
   1566 #define PUSH_FAILURE_ELT(item)					\
   1567   fail_stack.stack[fail_stack.avail++] =  (item)
   1568 
   1569 /* These three POP... operations complement the three PUSH... operations.
   1570    All assume that `fail_stack' is nonempty.  */
   1571 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
   1572 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
   1573 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
   1574 
   1575 /* Used to omit pushing failure point id's when we're not debugging.  */
   1576 #ifdef DEBUG
   1577 # define DEBUG_PUSH PUSH_FAILURE_INT
   1578 # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
   1579 #else
   1580 # define DEBUG_PUSH(item)
   1581 # define DEBUG_POP(item_addr)
   1582 #endif
   1583 
   1584 
   1585 /* Push the information about the state we will need
   1586    if we ever fail back to it.
   1587 
   1588    Requires variables fail_stack, regstart, regend, reg_info, and
   1589    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
   1590    be declared.
   1591 
   1592    Does `return FAILURE_CODE' if runs out of memory.  */
   1593 
   1594 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
   1595   do {									\
   1596     char *destination;							\
   1597     /* Must be int, so when we don't save any registers, the arithmetic	\
   1598        of 0 + -1 isn't done as unsigned.  */				\
   1599     /* Can't be int, since there is not a shred of a guarantee that int	\
   1600        is wide enough to hold a value of something to which pointer can	\
   1601        be assigned */							\
   1602     active_reg_t this_reg;						\
   1603     									\
   1604     DEBUG_STATEMENT (failure_id++);					\
   1605     DEBUG_STATEMENT (nfailure_points_pushed++);				\
   1606     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
   1607     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
   1608     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
   1609 									\
   1610     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
   1611     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
   1612 									\
   1613     /* Ensure we have enough space allocated for what we will push.  */	\
   1614     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
   1615       {									\
   1616         if (!DOUBLE_FAIL_STACK (fail_stack))				\
   1617           return failure_code;						\
   1618 									\
   1619         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
   1620 		       (fail_stack).size);				\
   1621         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
   1622       }									\
   1623 									\
   1624     /* Push the info, starting with the registers.  */			\
   1625     DEBUG_PRINT1 ("\n");						\
   1626 									\
   1627     if (1)								\
   1628       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
   1629 	   this_reg++)							\
   1630 	{								\
   1631 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
   1632 	  DEBUG_STATEMENT (num_regs_pushed++);				\
   1633 									\
   1634 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
   1635 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
   1636 									\
   1637 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
   1638 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
   1639 									\
   1640 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
   1641 			reg_info[this_reg].word.pointer);		\
   1642 	  DEBUG_PRINT2 (" match_null=%d",				\
   1643 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
   1644 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
   1645 	  DEBUG_PRINT2 (" matched_something=%d",			\
   1646 			MATCHED_SOMETHING (reg_info[this_reg]));	\
   1647 	  DEBUG_PRINT2 (" ever_matched=%d",				\
   1648 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
   1649 	  DEBUG_PRINT1 ("\n");						\
   1650 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
   1651 	}								\
   1652 									\
   1653     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
   1654     PUSH_FAILURE_INT (lowest_active_reg);				\
   1655 									\
   1656     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
   1657     PUSH_FAILURE_INT (highest_active_reg);				\
   1658 									\
   1659     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
   1660     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
   1661     PUSH_FAILURE_POINTER (pattern_place);				\
   1662 									\
   1663     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
   1664     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
   1665 				 size2);				\
   1666     DEBUG_PRINT1 ("'\n");						\
   1667     PUSH_FAILURE_POINTER (string_place);				\
   1668 									\
   1669     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
   1670     DEBUG_PUSH (failure_id);						\
   1671   } while (0)
   1672 
   1673 /* This is the number of items that are pushed and popped on the stack
   1674    for each register.  */
   1675 #define NUM_REG_ITEMS  3
   1676 
   1677 /* Individual items aside from the registers.  */
   1678 #ifdef DEBUG
   1679 # define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
   1680 #else
   1681 # define NUM_NONREG_ITEMS 4
   1682 #endif
   1683 
   1684 /* We push at most this many items on the stack.  */
   1685 /* We used to use (num_regs - 1), which is the number of registers
   1686    this regexp will save; but that was changed to 5
   1687    to avoid stack overflow for a regexp with lots of parens.  */
   1688 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
   1689 
   1690 /* We actually push this many items.  */
   1691 #define NUM_FAILURE_ITEMS				\
   1692   (((0							\
   1693      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
   1694     * NUM_REG_ITEMS)					\
   1695    + NUM_NONREG_ITEMS)
   1696 
   1697 /* How many items can still be added to the stack without overflowing it.  */
   1698 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
   1699 
   1700 
   1701 /* Pops what PUSH_FAIL_STACK pushes.
   1702 
   1703    We restore into the parameters, all of which should be lvalues:
   1704      STR -- the saved data position.
   1705      PAT -- the saved pattern position.
   1706      LOW_REG, HIGH_REG -- the highest and lowest active registers.
   1707      REGSTART, REGEND -- arrays of string positions.
   1708      REG_INFO -- array of information about each subexpression.
   1709 
   1710    Also assumes the variables `fail_stack' and (if debugging), `bufp',
   1711    `pend', `string1', `size1', `string2', and `size2'.  */
   1712 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
   1713 {									\
   1714   DEBUG_STATEMENT (unsigned failure_id;)				\
   1715   active_reg_t this_reg;						\
   1716   const US_CHAR_TYPE *string_temp;					\
   1717 									\
   1718   assert (!FAIL_STACK_EMPTY ());					\
   1719 									\
   1720   /* Remove failure points and point to how many regs pushed.  */	\
   1721   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
   1722   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
   1723   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
   1724 									\
   1725   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
   1726 									\
   1727   DEBUG_POP (&failure_id);						\
   1728   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
   1729 									\
   1730   /* If the saved string location is NULL, it came from an		\
   1731      on_failure_keep_string_jump opcode, and we want to throw away the	\
   1732      saved NULL, thus retaining our current position in the string.  */	\
   1733   string_temp = POP_FAILURE_POINTER ();					\
   1734   if (string_temp != NULL)						\
   1735     str = (const CHAR_TYPE *) string_temp;				\
   1736 									\
   1737   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
   1738   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
   1739   DEBUG_PRINT1 ("'\n");							\
   1740 									\
   1741   pat = (US_CHAR_TYPE *) POP_FAILURE_POINTER ();			\
   1742   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
   1743   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
   1744 									\
   1745   /* Restore register info.  */						\
   1746   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1747   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
   1748 									\
   1749   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1750   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
   1751 									\
   1752   if (1)								\
   1753     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
   1754       {									\
   1755 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
   1756 									\
   1757 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
   1758 	DEBUG_PRINT2 ("      info: %p\n",				\
   1759 		      reg_info[this_reg].word.pointer);			\
   1760 									\
   1761 	regend[this_reg] = (const CHAR_TYPE *) POP_FAILURE_POINTER ();	\
   1762 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
   1763 									\
   1764 	regstart[this_reg] = (const CHAR_TYPE *) POP_FAILURE_POINTER ();\
   1765 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
   1766       }									\
   1767   else									\
   1768     {									\
   1769       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
   1770 	{								\
   1771 	  reg_info[this_reg].word.integer = 0;				\
   1772 	  regend[this_reg] = 0;						\
   1773 	  regstart[this_reg] = 0;					\
   1774 	}								\
   1775       highest_active_reg = high_reg;					\
   1776     }									\
   1777 									\
   1778   set_regs_matched_done = 0;						\
   1779   DEBUG_STATEMENT (nfailure_points_popped++);				\
   1780 } /* POP_FAILURE_POINT */
   1781 
   1782 
   1783 /* Structure for per-register (a.k.a. per-group) information.
   1785    Other register information, such as the
   1786    starting and ending positions (which are addresses), and the list of
   1787    inner groups (which is a bits list) are maintained in separate
   1788    variables.
   1789 
   1790    We are making a (strictly speaking) nonportable assumption here: that
   1791    the compiler will pack our bit fields into something that fits into
   1792    the type of `word', i.e., is something that fits into one item on the
   1793    failure stack.  */
   1794 
   1795 
   1796 /* Declarations and macros for re_match_2.  */
   1797 
   1798 typedef union
   1799 {
   1800   fail_stack_elt_t word;
   1801   struct
   1802   {
   1803       /* This field is one if this group can match the empty string,
   1804          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
   1805 #define MATCH_NULL_UNSET_VALUE 3
   1806     unsigned match_null_string_p : 2;
   1807     unsigned is_active : 1;
   1808     unsigned matched_something : 1;
   1809     unsigned ever_matched_something : 1;
   1810   } bits;
   1811 } register_info_type;
   1812 
   1813 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
   1814 #define IS_ACTIVE(R)  ((R).bits.is_active)
   1815 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
   1816 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
   1817 
   1818 
   1819 /* Call this when have matched a real character; it sets `matched' flags
   1820    for the subexpressions which we are currently inside.  Also records
   1821    that those subexprs have matched.  */
   1822 #define SET_REGS_MATCHED()						\
   1823   do									\
   1824     {									\
   1825       if (!set_regs_matched_done)					\
   1826 	{								\
   1827 	  active_reg_t r;						\
   1828 	  set_regs_matched_done = 1;					\
   1829 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
   1830 	    {								\
   1831 	      MATCHED_SOMETHING (reg_info[r])				\
   1832 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
   1833 		= 1;							\
   1834 	    }								\
   1835 	}								\
   1836     }									\
   1837   while (0)
   1838 
   1839 /* Registers are set to a sentinel when they haven't yet matched.  */
   1840 static CHAR_TYPE reg_unset_dummy;
   1841 #define REG_UNSET_VALUE (&reg_unset_dummy)
   1842 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
   1843 
   1844 /* Subroutine declarations and macros for regex_compile.  */
   1846 
   1847 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
   1848 					      reg_syntax_t syntax,
   1849 					      struct re_pattern_buffer *bufp));
   1850 static void store_op1 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc, int arg));
   1851 static void store_op2 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc,
   1852 				 int arg1, int arg2));
   1853 static void insert_op1 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc,
   1854 				  int arg, US_CHAR_TYPE *end));
   1855 static void insert_op2 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc,
   1856 				  int arg1, int arg2, US_CHAR_TYPE *end));
   1857 static boolean at_begline_loc_p _RE_ARGS ((const CHAR_TYPE *pattern,
   1858 					   const CHAR_TYPE *p,
   1859 					   reg_syntax_t syntax));
   1860 static boolean at_endline_loc_p _RE_ARGS ((const CHAR_TYPE *p,
   1861 					   const CHAR_TYPE *pend,
   1862 					   reg_syntax_t syntax));
   1863 #ifdef MBS_SUPPORT
   1864 static reg_errcode_t compile_range _RE_ARGS ((CHAR_TYPE range_start,
   1865 					      const CHAR_TYPE **p_ptr,
   1866 					      const CHAR_TYPE *pend,
   1867 					      char *translate,
   1868 					      reg_syntax_t syntax,
   1869 					      US_CHAR_TYPE *b,
   1870 					      CHAR_TYPE *char_set));
   1871 static void insert_space _RE_ARGS ((int num, CHAR_TYPE *loc, CHAR_TYPE *end));
   1872 #else
   1873 static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
   1874 					      const CHAR_TYPE **p_ptr,
   1875 					      const CHAR_TYPE *pend,
   1876 					      char *translate,
   1877 					      reg_syntax_t syntax,
   1878 					      US_CHAR_TYPE *b));
   1879 #endif /* MBS_SUPPORT */
   1880 
   1881 /* Fetch the next character in the uncompiled pattern---translating it
   1882    if necessary.  Also cast from a signed character in the constant
   1883    string passed to us by the user to an unsigned char that we can use
   1884    as an array index (in, e.g., `translate').  */
   1885 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1886    because it is impossible to allocate 4GB array for some encodings
   1887    which have 4 byte character_set like UCS4.  */
   1888 #ifndef PATFETCH
   1889 # ifdef MBS_SUPPORT
   1890 #  define PATFETCH(c)							\
   1891   do {if (p == pend) return REG_EEND;					\
   1892     c = (US_CHAR_TYPE) *p++;						\
   1893     if (translate && (c <= 0xff)) c = (US_CHAR_TYPE) translate[c];	\
   1894   } while (0)
   1895 # else
   1896 #  define PATFETCH(c)							\
   1897   do {if (p == pend) return REG_EEND;					\
   1898     c = (unsigned char) *p++;						\
   1899     if (translate) c = (unsigned char) translate[c];			\
   1900   } while (0)
   1901 # endif /* MBS_SUPPORT */
   1902 #endif
   1903 
   1904 /* Fetch the next character in the uncompiled pattern, with no
   1905    translation.  */
   1906 #define PATFETCH_RAW(c)							\
   1907   do {if (p == pend) return REG_EEND;					\
   1908     c = (US_CHAR_TYPE) *p++; 						\
   1909   } while (0)
   1910 
   1911 /* Go backwards one character in the pattern.  */
   1912 #define PATUNFETCH p--
   1913 
   1914 
   1915 /* If `translate' is non-null, return translate[D], else just D.  We
   1916    cast the subscript to translate because some data is declared as
   1917    `char *', to avoid warnings when a string constant is passed.  But
   1918    when we use a character as a subscript we must make it unsigned.  */
   1919 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1920    because it is impossible to allocate 4GB array for some encodings
   1921    which have 4 byte character_set like UCS4.  */
   1922 #ifndef TRANSLATE
   1923 # ifdef MBS_SUPPORT
   1924 #  define TRANSLATE(d) \
   1925   ((translate && ((US_CHAR_TYPE) (d)) <= 0xff) \
   1926    ? (char) translate[(unsigned char) (d)] : (d))
   1927 #else
   1928 #  define TRANSLATE(d) \
   1929   (translate ? (char) translate[(unsigned char) (d)] : (d))
   1930 # endif /* MBS_SUPPORT */
   1931 #endif
   1932 
   1933 
   1934 /* Macros for outputting the compiled pattern into `buffer'.  */
   1935 
   1936 /* If the buffer isn't allocated when it comes in, use this.  */
   1937 #define INIT_BUF_SIZE  (32 * sizeof(US_CHAR_TYPE))
   1938 
   1939 /* Make sure we have at least N more bytes of space in buffer.  */
   1940 #ifdef MBS_SUPPORT
   1941 # define GET_BUFFER_SPACE(n)						\
   1942     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
   1943             + (n)*sizeof(CHAR_TYPE)) > bufp->allocated)			\
   1944       EXTEND_BUFFER ()
   1945 #else
   1946 # define GET_BUFFER_SPACE(n)						\
   1947     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
   1948       EXTEND_BUFFER ()
   1949 #endif /* MBS_SUPPORT */
   1950 
   1951 /* Make sure we have one more byte of buffer space and then add C to it.  */
   1952 #define BUF_PUSH(c)							\
   1953   do {									\
   1954     GET_BUFFER_SPACE (1);						\
   1955     *b++ = (US_CHAR_TYPE) (c);						\
   1956   } while (0)
   1957 
   1958 
   1959 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
   1960 #define BUF_PUSH_2(c1, c2)						\
   1961   do {									\
   1962     GET_BUFFER_SPACE (2);						\
   1963     *b++ = (US_CHAR_TYPE) (c1);					\
   1964     *b++ = (US_CHAR_TYPE) (c2);					\
   1965   } while (0)
   1966 
   1967 
   1968 /* As with BUF_PUSH_2, except for three bytes.  */
   1969 #define BUF_PUSH_3(c1, c2, c3)						\
   1970   do {									\
   1971     GET_BUFFER_SPACE (3);						\
   1972     *b++ = (US_CHAR_TYPE) (c1);					\
   1973     *b++ = (US_CHAR_TYPE) (c2);					\
   1974     *b++ = (US_CHAR_TYPE) (c3);					\
   1975   } while (0)
   1976 
   1977 /* Store a jump with opcode OP at LOC to location TO.  We store a
   1978    relative address offset by the three bytes the jump itself occupies.  */
   1979 #define STORE_JUMP(op, loc, to) \
   1980   store_op1 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
   1981 
   1982 /* Likewise, for a two-argument jump.  */
   1983 #define STORE_JUMP2(op, loc, to, arg) \
   1984   store_op2 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
   1985 
   1986 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
   1987 #define INSERT_JUMP(op, loc, to) \
   1988   insert_op1 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
   1989 
   1990 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
   1991 #define INSERT_JUMP2(op, loc, to, arg) \
   1992   insert_op2 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
   1993 	      arg, b)
   1994 
   1995 
   1996 /* This is not an arbitrary limit: the arguments which represent offsets
   1997    into the pattern are two bytes long.  So if 2^16 bytes turns out to
   1998    be too small, many things would have to change.  */
   1999 /* Any other compiler which, like MSC, has allocation limit below 2^16
   2000    bytes will have to use approach similar to what was done below for
   2001    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
   2002    reallocating to 0 bytes.  Such thing is not going to work too well.
   2003    You have been warned!!  */
   2004 #if defined _MSC_VER  && !defined WIN32
   2005 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
   2006    The REALLOC define eliminates a flurry of conversion warnings,
   2007    but is not required. */
   2008 # define MAX_BUF_SIZE  65500L
   2009 # define REALLOC(p,s) realloc ((p), (size_t) (s))
   2010 #else
   2011 # define MAX_BUF_SIZE (1L << 16)
   2012 # define REALLOC(p,s) realloc ((p), (s))
   2013 #endif
   2014 
   2015 /* Extend the buffer by twice its current size via realloc and
   2016    reset the pointers that pointed into the old block to point to the
   2017    correct places in the new one.  If extending the buffer results in it
   2018    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
   2019 #if __BOUNDED_POINTERS__
   2020 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
   2021 # define MOVE_BUFFER_POINTER(P) \
   2022   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
   2023 # define ELSE_EXTEND_BUFFER_HIGH_BOUND		\
   2024   else						\
   2025     {						\
   2026       SET_HIGH_BOUND (b);			\
   2027       SET_HIGH_BOUND (begalt);			\
   2028       if (fixup_alt_jump)			\
   2029 	SET_HIGH_BOUND (fixup_alt_jump);	\
   2030       if (laststart)				\
   2031 	SET_HIGH_BOUND (laststart);		\
   2032       if (pending_exact)			\
   2033 	SET_HIGH_BOUND (pending_exact);		\
   2034     }
   2035 #else
   2036 # define MOVE_BUFFER_POINTER(P) (P) += incr
   2037 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
   2038 #endif
   2039 
   2040 #ifdef MBS_SUPPORT
   2041 # define EXTEND_BUFFER()						\
   2042   do {									\
   2043     US_CHAR_TYPE *old_buffer = COMPILED_BUFFER_VAR;			\
   2044     int wchar_count;							\
   2045     if (bufp->allocated + sizeof(US_CHAR_TYPE) > MAX_BUF_SIZE)		\
   2046       return REG_ESIZE;							\
   2047     bufp->allocated <<= 1;						\
   2048     if (bufp->allocated > MAX_BUF_SIZE)					\
   2049       bufp->allocated = MAX_BUF_SIZE;					\
   2050     /* How many characters the new buffer can have?  */			\
   2051     wchar_count = bufp->allocated / sizeof(US_CHAR_TYPE);		\
   2052     if (wchar_count == 0) wchar_count = 1;				\
   2053     /* Truncate the buffer to CHAR_TYPE align.  */			\
   2054     bufp->allocated = wchar_count * sizeof(US_CHAR_TYPE);		\
   2055     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, US_CHAR_TYPE);		\
   2056     bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
   2057     if (COMPILED_BUFFER_VAR == NULL)					\
   2058       return REG_ESPACE;						\
   2059     /* If the buffer moved, move all the pointers into it.  */		\
   2060     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2061       {									\
   2062 	ptrdiff_t incr = COMPILED_BUFFER_VAR - old_buffer;			\
   2063 	MOVE_BUFFER_POINTER (b);					\
   2064 	MOVE_BUFFER_POINTER (begalt);					\
   2065 	if (fixup_alt_jump)						\
   2066 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2067 	if (laststart)							\
   2068 	  MOVE_BUFFER_POINTER (laststart);				\
   2069 	if (pending_exact)						\
   2070 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2071       }									\
   2072     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2073   } while (0)
   2074 #else
   2075 # define EXTEND_BUFFER()						\
   2076   do {									\
   2077     US_CHAR_TYPE *old_buffer = COMPILED_BUFFER_VAR;			\
   2078     if (bufp->allocated == MAX_BUF_SIZE)				\
   2079       return REG_ESIZE;							\
   2080     bufp->allocated <<= 1;						\
   2081     if (bufp->allocated > MAX_BUF_SIZE)					\
   2082       bufp->allocated = MAX_BUF_SIZE;					\
   2083     bufp->buffer = (US_CHAR_TYPE *) REALLOC (COMPILED_BUFFER_VAR,	\
   2084 						bufp->allocated);	\
   2085     if (COMPILED_BUFFER_VAR == NULL)					\
   2086       return REG_ESPACE;						\
   2087     /* If the buffer moved, move all the pointers into it.  */		\
   2088     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2089       {									\
   2090 	ptrdiff_t incr = COMPILED_BUFFER_VAR - old_buffer;			\
   2091 	MOVE_BUFFER_POINTER (b);					\
   2092 	MOVE_BUFFER_POINTER (begalt);					\
   2093 	if (fixup_alt_jump)						\
   2094 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2095 	if (laststart)							\
   2096 	  MOVE_BUFFER_POINTER (laststart);				\
   2097 	if (pending_exact)						\
   2098 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2099       }									\
   2100     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2101   } while (0)
   2102 #endif /* MBS_SUPPORT */
   2103 
   2104 /* Since we have one byte reserved for the register number argument to
   2105    {start,stop}_memory, the maximum number of groups we can report
   2106    things about is what fits in that byte.  */
   2107 #define MAX_REGNUM 255
   2108 
   2109 /* But patterns can have more than `MAX_REGNUM' registers.  We just
   2110    ignore the excess.  */
   2111 typedef unsigned regnum_t;
   2112 
   2113 
   2114 /* Macros for the compile stack.  */
   2115 
   2116 /* Since offsets can go either forwards or backwards, this type needs to
   2117    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
   2118 /* int may be not enough when sizeof(int) == 2.  */
   2119 typedef long pattern_offset_t;
   2120 
   2121 typedef struct
   2122 {
   2123   pattern_offset_t begalt_offset;
   2124   pattern_offset_t fixup_alt_jump;
   2125   pattern_offset_t inner_group_offset;
   2126   pattern_offset_t laststart_offset;
   2127   regnum_t regnum;
   2128 } compile_stack_elt_t;
   2129 
   2130 
   2131 typedef struct
   2132 {
   2133   compile_stack_elt_t *stack;
   2134   unsigned size;
   2135   unsigned avail;			/* Offset of next open position.  */
   2136 } compile_stack_type;
   2137 
   2138 
   2139 #define INIT_COMPILE_STACK_SIZE 32
   2140 
   2141 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
   2142 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
   2143 
   2144 /* The next available element.  */
   2145 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
   2146 
   2147 
   2148 /* Set the bit for character C in a list.  */
   2149 #define SET_LIST_BIT(c)                               \
   2150   (b[((unsigned char) (c)) / BYTEWIDTH]               \
   2151    |= 1 << (((unsigned char) c) % BYTEWIDTH))
   2152 
   2153 
   2154 /* Get the next unsigned number in the uncompiled pattern.  */
   2155 #define GET_UNSIGNED_NUMBER(num) 					\
   2156   {									\
   2157     while (p != pend)							\
   2158       {									\
   2159 	PATFETCH (c);							\
   2160 	if (! ('0' <= c && c <= '9'))					\
   2161 	  break;							\
   2162 	if (num <= RE_DUP_MAX)						\
   2163 	  {								\
   2164 	    if (num < 0)						\
   2165 	      num = 0;							\
   2166 	    num = num * 10 + c - '0';					\
   2167 	  }								\
   2168       }									\
   2169   }
   2170 
   2171 #if defined _LIBC || WIDE_CHAR_SUPPORT
   2172 /* The GNU C library provides support for user-defined character classes
   2173    and the functions from ISO C amendement 1.  */
   2174 # ifdef CHARCLASS_NAME_MAX
   2175 #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
   2176 # else
   2177 /* This shouldn't happen but some implementation might still have this
   2178    problem.  Use a reasonable default value.  */
   2179 #  define CHAR_CLASS_MAX_LENGTH 256
   2180 # endif
   2181 
   2182 # ifdef _LIBC
   2183 #  define IS_CHAR_CLASS(string) __wctype (string)
   2184 # else
   2185 #  define IS_CHAR_CLASS(string) wctype (string)
   2186 # endif
   2187 #else
   2188 # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
   2189 
   2190 # define IS_CHAR_CLASS(string)						\
   2191    (STREQ (string, "alpha") || STREQ (string, "upper")			\
   2192     || STREQ (string, "lower") || STREQ (string, "digit")		\
   2193     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
   2194     || STREQ (string, "space") || STREQ (string, "print")		\
   2195     || STREQ (string, "punct") || STREQ (string, "graph")		\
   2196     || STREQ (string, "cntrl") || STREQ (string, "blank"))
   2197 #endif
   2198 
   2199 #ifndef MATCH_MAY_ALLOCATE
   2201 
   2202 /* If we cannot allocate large objects within re_match_2_internal,
   2203    we make the fail stack and register vectors global.
   2204    The fail stack, we grow to the maximum size when a regexp
   2205    is compiled.
   2206    The register vectors, we adjust in size each time we
   2207    compile a regexp, according to the number of registers it needs.  */
   2208 
   2209 static fail_stack_type fail_stack;
   2210 
   2211 /* Size with which the following vectors are currently allocated.
   2212    That is so we can make them bigger as needed,
   2213    but never make them smaller.  */
   2214 static int regs_allocated_size;
   2215 
   2216 static const char **     regstart, **     regend;
   2217 static const char ** old_regstart, ** old_regend;
   2218 static const char **best_regstart, **best_regend;
   2219 static register_info_type *reg_info;
   2220 static const char **reg_dummy;
   2221 static register_info_type *reg_info_dummy;
   2222 
   2223 /* Make the register vectors big enough for NUM_REGS registers,
   2224    but don't make them smaller.  */
   2225 
   2226 static
   2227 regex_grow_registers (num_regs)
   2228      int num_regs;
   2229 {
   2230   if (num_regs > regs_allocated_size)
   2231     {
   2232       RETALLOC_IF (regstart,	 num_regs, const char *);
   2233       RETALLOC_IF (regend,	 num_regs, const char *);
   2234       RETALLOC_IF (old_regstart, num_regs, const char *);
   2235       RETALLOC_IF (old_regend,	 num_regs, const char *);
   2236       RETALLOC_IF (best_regstart, num_regs, const char *);
   2237       RETALLOC_IF (best_regend,	 num_regs, const char *);
   2238       RETALLOC_IF (reg_info,	 num_regs, register_info_type);
   2239       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
   2240       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
   2241 
   2242       regs_allocated_size = num_regs;
   2243     }
   2244 }
   2245 
   2246 #endif /* not MATCH_MAY_ALLOCATE */
   2247 
   2248 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
   2250 						 compile_stack,
   2251 						 regnum_t regnum));
   2252 
   2253 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
   2254    Returns one of error codes defined in `regex.h', or zero for success.
   2255 
   2256    Assumes the `allocated' (and perhaps `buffer') and `translate'
   2257    fields are set in BUFP on entry.
   2258 
   2259    If it succeeds, results are put in BUFP (if it returns an error, the
   2260    contents of BUFP are undefined):
   2261      `buffer' is the compiled pattern;
   2262      `syntax' is set to SYNTAX;
   2263      `used' is set to the length of the compiled pattern;
   2264      `fastmap_accurate' is zero;
   2265      `re_nsub' is the number of subexpressions in PATTERN;
   2266      `not_bol' and `not_eol' are zero;
   2267 
   2268    The `fastmap' and `newline_anchor' fields are neither
   2269    examined nor set.  */
   2270 
   2271 /* Return, freeing storage we allocated.  */
   2272 #ifdef MBS_SUPPORT
   2273 # define FREE_STACK_RETURN(value)		\
   2274   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
   2275 #else
   2276 # define FREE_STACK_RETURN(value)		\
   2277   return (free (compile_stack.stack), value)
   2278 #endif /* MBS_SUPPORT */
   2279 
   2280 static reg_errcode_t
   2281 #ifdef MBS_SUPPORT
   2282 regex_compile (cpattern, csize, syntax, bufp)
   2283      const char *cpattern;
   2284      size_t csize;
   2285 #else
   2286 regex_compile (pattern, size, syntax, bufp)
   2287      const char *pattern;
   2288      size_t size;
   2289 #endif /* MBS_SUPPORT */
   2290      reg_syntax_t syntax;
   2291      struct re_pattern_buffer *bufp;
   2292 {
   2293   /* We fetch characters from PATTERN here.  Even though PATTERN is
   2294      `char *' (i.e., signed), we declare these variables as unsigned, so
   2295      they can be reliably used as array indices.  */
   2296   register US_CHAR_TYPE c, c1;
   2297 
   2298 #ifdef MBS_SUPPORT
   2299   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
   2300   CHAR_TYPE *pattern, *COMPILED_BUFFER_VAR;
   2301   size_t size;
   2302   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   2303   int *mbs_offset = NULL;
   2304   /* It hold whether each wchar_t is binary data or not.  */
   2305   char *is_binary = NULL;
   2306   /* A flag whether exactn is handling binary data or not.  */
   2307   char is_exactn_bin = FALSE;
   2308 #endif /* MBS_SUPPORT */
   2309 
   2310   /* A random temporary spot in PATTERN.  */
   2311   const CHAR_TYPE *p1;
   2312 
   2313   /* Points to the end of the buffer, where we should append.  */
   2314   register US_CHAR_TYPE *b;
   2315 
   2316   /* Keeps track of unclosed groups.  */
   2317   compile_stack_type compile_stack;
   2318 
   2319   /* Points to the current (ending) position in the pattern.  */
   2320 #ifdef MBS_SUPPORT
   2321   const CHAR_TYPE *p;
   2322   const CHAR_TYPE *pend;
   2323 #else
   2324   const CHAR_TYPE *p = pattern;
   2325   const CHAR_TYPE *pend = pattern + size;
   2326 #endif /* MBS_SUPPORT */
   2327 
   2328   /* How to translate the characters in the pattern.  */
   2329   RE_TRANSLATE_TYPE translate = bufp->translate;
   2330 
   2331   /* Address of the count-byte of the most recently inserted `exactn'
   2332      command.  This makes it possible to tell if a new exact-match
   2333      character can be added to that command or if the character requires
   2334      a new `exactn' command.  */
   2335   US_CHAR_TYPE *pending_exact = 0;
   2336 
   2337   /* Address of start of the most recently finished expression.
   2338      This tells, e.g., postfix * where to find the start of its
   2339      operand.  Reset at the beginning of groups and alternatives.  */
   2340   US_CHAR_TYPE *laststart = 0;
   2341 
   2342   /* Address of beginning of regexp, or inside of last group.  */
   2343   US_CHAR_TYPE *begalt;
   2344 
   2345   /* Address of the place where a forward jump should go to the end of
   2346      the containing expression.  Each alternative of an `or' -- except the
   2347      last -- ends with a forward jump of this sort.  */
   2348   US_CHAR_TYPE *fixup_alt_jump = 0;
   2349 
   2350   /* Counts open-groups as they are encountered.  Remembered for the
   2351      matching close-group on the compile stack, so the same register
   2352      number is put in the stop_memory as the start_memory.  */
   2353   regnum_t regnum = 0;
   2354 
   2355 #ifdef MBS_SUPPORT
   2356   /* Initialize the wchar_t PATTERN and offset_buffer.  */
   2357   p = pend = pattern = TALLOC(csize + 1, CHAR_TYPE);
   2358   p[csize] = L'\0';	/* sentinel */
   2359   mbs_offset = TALLOC(csize + 1, int);
   2360   is_binary = TALLOC(csize + 1, char);
   2361   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
   2362     {
   2363       if (pattern) free(pattern);
   2364       if (mbs_offset) free(mbs_offset);
   2365       if (is_binary) free(is_binary);
   2366       return REG_ESPACE;
   2367     }
   2368   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
   2369   pend = p + size;
   2370   if (size < 0)
   2371     {
   2372       if (pattern) free(pattern);
   2373       if (mbs_offset) free(mbs_offset);
   2374       if (is_binary) free(is_binary);
   2375       return REG_BADPAT;
   2376     }
   2377 #endif
   2378 
   2379 #ifdef DEBUG
   2380   DEBUG_PRINT1 ("\nCompiling pattern: ");
   2381   if (debug)
   2382     {
   2383       unsigned debug_count;
   2384 
   2385       for (debug_count = 0; debug_count < size; debug_count++)
   2386         PUT_CHAR (pattern[debug_count]);
   2387       putchar ('\n');
   2388     }
   2389 #endif /* DEBUG */
   2390 
   2391   /* Initialize the compile stack.  */
   2392   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
   2393   if (compile_stack.stack == NULL)
   2394     {
   2395 #ifdef MBS_SUPPORT
   2396       if (pattern) free(pattern);
   2397       if (mbs_offset) free(mbs_offset);
   2398       if (is_binary) free(is_binary);
   2399 #endif
   2400       return REG_ESPACE;
   2401     }
   2402 
   2403   compile_stack.size = INIT_COMPILE_STACK_SIZE;
   2404   compile_stack.avail = 0;
   2405 
   2406   /* Initialize the pattern buffer.  */
   2407   bufp->syntax = syntax;
   2408   bufp->fastmap_accurate = 0;
   2409   bufp->not_bol = bufp->not_eol = 0;
   2410 
   2411   /* Set `used' to zero, so that if we return an error, the pattern
   2412      printer (for debugging) will think there's no pattern.  We reset it
   2413      at the end.  */
   2414   bufp->used = 0;
   2415 
   2416   /* Always count groups, whether or not bufp->no_sub is set.  */
   2417   bufp->re_nsub = 0;
   2418 
   2419 #if !defined emacs && !defined SYNTAX_TABLE
   2420   /* Initialize the syntax table.  */
   2421    init_syntax_once ();
   2422 #endif
   2423 
   2424   if (bufp->allocated == 0)
   2425     {
   2426       if (bufp->buffer)
   2427 	{ /* If zero allocated, but buffer is non-null, try to realloc
   2428              enough space.  This loses if buffer's address is bogus, but
   2429              that is the user's responsibility.  */
   2430 #ifdef MBS_SUPPORT
   2431 	  /* Free bufp->buffer and allocate an array for wchar_t pattern
   2432 	     buffer.  */
   2433           free(bufp->buffer);
   2434           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(US_CHAR_TYPE),
   2435 					US_CHAR_TYPE);
   2436 #else
   2437           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, US_CHAR_TYPE);
   2438 #endif /* MBS_SUPPORT */
   2439         }
   2440       else
   2441         { /* Caller did not allocate a buffer.  Do it for them.  */
   2442           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(US_CHAR_TYPE),
   2443 					US_CHAR_TYPE);
   2444         }
   2445 
   2446       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
   2447 #ifdef MBS_SUPPORT
   2448       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
   2449 #endif /* MBS_SUPPORT */
   2450       bufp->allocated = INIT_BUF_SIZE;
   2451     }
   2452 #ifdef MBS_SUPPORT
   2453   else
   2454     COMPILED_BUFFER_VAR = (US_CHAR_TYPE*) bufp->buffer;
   2455 #endif
   2456 
   2457   begalt = b = COMPILED_BUFFER_VAR;
   2458 
   2459   /* Loop through the uncompiled pattern until we're at the end.  */
   2460   while (p != pend)
   2461     {
   2462       PATFETCH (c);
   2463 
   2464       switch (c)
   2465         {
   2466         case '^':
   2467           {
   2468             if (   /* If at start of pattern, it's an operator.  */
   2469                    p == pattern + 1
   2470                    /* If context independent, it's an operator.  */
   2471                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2472                    /* Otherwise, depends on what's come before.  */
   2473                 || at_begline_loc_p (pattern, p, syntax))
   2474               BUF_PUSH (begline);
   2475             else
   2476               goto normal_char;
   2477           }
   2478           break;
   2479 
   2480 
   2481         case '$':
   2482           {
   2483             if (   /* If at end of pattern, it's an operator.  */
   2484                    p == pend
   2485                    /* If context independent, it's an operator.  */
   2486                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2487                    /* Otherwise, depends on what's next.  */
   2488                 || at_endline_loc_p (p, pend, syntax))
   2489                BUF_PUSH (endline);
   2490              else
   2491                goto normal_char;
   2492            }
   2493            break;
   2494 
   2495 
   2496 	case '+':
   2497         case '?':
   2498           if ((syntax & RE_BK_PLUS_QM)
   2499               || (syntax & RE_LIMITED_OPS))
   2500             goto normal_char;
   2501         handle_plus:
   2502         case '*':
   2503           /* If there is no previous pattern... */
   2504           if (!laststart)
   2505             {
   2506               if (syntax & RE_CONTEXT_INVALID_OPS)
   2507                 FREE_STACK_RETURN (REG_BADRPT);
   2508               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
   2509                 goto normal_char;
   2510             }
   2511 
   2512           {
   2513             /* Are we optimizing this jump?  */
   2514             boolean keep_string_p = false;
   2515 
   2516             /* 1 means zero (many) matches is allowed.  */
   2517             char zero_times_ok = 0, many_times_ok = 0;
   2518 
   2519             /* If there is a sequence of repetition chars, collapse it
   2520                down to just one (the right one).  We can't combine
   2521                interval operators with these because of, e.g., `a{2}*',
   2522                which should only match an even number of `a's.  */
   2523 
   2524             for (;;)
   2525               {
   2526                 zero_times_ok |= c != '+';
   2527                 many_times_ok |= c != '?';
   2528 
   2529                 if (p == pend)
   2530                   break;
   2531 
   2532                 PATFETCH (c);
   2533 
   2534                 if (c == '*'
   2535                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
   2536                   ;
   2537 
   2538                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
   2539                   {
   2540                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2541 
   2542                     PATFETCH (c1);
   2543                     if (!(c1 == '+' || c1 == '?'))
   2544                       {
   2545                         PATUNFETCH;
   2546                         PATUNFETCH;
   2547                         break;
   2548                       }
   2549 
   2550                     c = c1;
   2551                   }
   2552                 else
   2553                   {
   2554                     PATUNFETCH;
   2555                     break;
   2556                   }
   2557 
   2558                 /* If we get here, we found another repeat character.  */
   2559                }
   2560 
   2561             /* Star, etc. applied to an empty pattern is equivalent
   2562                to an empty pattern.  */
   2563             if (!laststart)
   2564               break;
   2565 
   2566             /* Now we know whether or not zero matches is allowed
   2567                and also whether or not two or more matches is allowed.  */
   2568             if (many_times_ok)
   2569               { /* More than one repetition is allowed, so put in at the
   2570                    end a backward relative jump from `b' to before the next
   2571                    jump we're going to put in below (which jumps from
   2572                    laststart to after this jump).
   2573 
   2574                    But if we are at the `*' in the exact sequence `.*\n',
   2575                    insert an unconditional jump backwards to the .,
   2576                    instead of the beginning of the loop.  This way we only
   2577                    push a failure point once, instead of every time
   2578                    through the loop.  */
   2579                 assert (p - 1 > pattern);
   2580 
   2581                 /* Allocate the space for the jump.  */
   2582                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2583 
   2584                 /* We know we are not at the first character of the pattern,
   2585                    because laststart was nonzero.  And we've already
   2586                    incremented `p', by the way, to be the character after
   2587                    the `*'.  Do we have to do something analogous here
   2588                    for null bytes, because of RE_DOT_NOT_NULL?  */
   2589                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
   2590 		    && zero_times_ok
   2591                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
   2592                     && !(syntax & RE_DOT_NEWLINE))
   2593                   { /* We have .*\n.  */
   2594                     STORE_JUMP (jump, b, laststart);
   2595                     keep_string_p = true;
   2596                   }
   2597                 else
   2598                   /* Anything else.  */
   2599                   STORE_JUMP (maybe_pop_jump, b, laststart -
   2600 			      (1 + OFFSET_ADDRESS_SIZE));
   2601 
   2602                 /* We've added more stuff to the buffer.  */
   2603                 b += 1 + OFFSET_ADDRESS_SIZE;
   2604               }
   2605 
   2606             /* On failure, jump from laststart to b + 3, which will be the
   2607                end of the buffer after this jump is inserted.  */
   2608 	    /* ifdef MBS_SUPPORT, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
   2609 	       'b + 3'.  */
   2610             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2611             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
   2612                                        : on_failure_jump,
   2613                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
   2614             pending_exact = 0;
   2615             b += 1 + OFFSET_ADDRESS_SIZE;
   2616 
   2617             if (!zero_times_ok)
   2618               {
   2619                 /* At least one repetition is required, so insert a
   2620                    `dummy_failure_jump' before the initial
   2621                    `on_failure_jump' instruction of the loop. This
   2622                    effects a skip over that instruction the first time
   2623                    we hit that loop.  */
   2624                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2625                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
   2626 			     2 + 2 * OFFSET_ADDRESS_SIZE);
   2627                 b += 1 + OFFSET_ADDRESS_SIZE;
   2628               }
   2629             }
   2630 	  break;
   2631 
   2632 
   2633 	case '.':
   2634           laststart = b;
   2635           BUF_PUSH (anychar);
   2636           break;
   2637 
   2638 
   2639         case '[':
   2640           {
   2641             boolean had_char_class = false;
   2642 #ifdef MBS_SUPPORT
   2643 	    CHAR_TYPE range_start = 0xffffffff;
   2644 #else
   2645 	    unsigned int range_start = 0xffffffff;
   2646 #endif
   2647             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2648 
   2649 #ifdef MBS_SUPPORT
   2650 	    /* We assume a charset(_not) structure as a wchar_t array.
   2651 	       charset[0] = (re_opcode_t) charset(_not)
   2652                charset[1] = l (= length of char_classes)
   2653                charset[2] = m (= length of collating_symbols)
   2654                charset[3] = n (= length of equivalence_classes)
   2655 	       charset[4] = o (= length of char_ranges)
   2656 	       charset[5] = p (= length of chars)
   2657 
   2658                charset[6] = char_class (wctype_t)
   2659                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
   2660                          ...
   2661                charset[l+5]  = char_class (wctype_t)
   2662 
   2663                charset[l+6]  = collating_symbol (wchar_t)
   2664                             ...
   2665                charset[l+m+5]  = collating_symbol (wchar_t)
   2666 					ifdef _LIBC we use the index if
   2667 					_NL_COLLATE_SYMB_EXTRAMB instead of
   2668 					wchar_t string.
   2669 
   2670                charset[l+m+6]  = equivalence_classes (wchar_t)
   2671                               ...
   2672                charset[l+m+n+5]  = equivalence_classes (wchar_t)
   2673 					ifdef _LIBC we use the index in
   2674 					_NL_COLLATE_WEIGHT instead of
   2675 					wchar_t string.
   2676 
   2677 	       charset[l+m+n+6] = range_start
   2678 	       charset[l+m+n+7] = range_end
   2679 	                       ...
   2680 	       charset[l+m+n+2o+4] = range_start
   2681 	       charset[l+m+n+2o+5] = range_end
   2682 					ifdef _LIBC we use the value looked up
   2683 					in _NL_COLLATE_COLLSEQ instead of
   2684 					wchar_t character.
   2685 
   2686 	       charset[l+m+n+2o+6] = char
   2687 	                          ...
   2688 	       charset[l+m+n+2o+p+5] = char
   2689 
   2690 	     */
   2691 
   2692 	    /* We need at least 6 spaces: the opcode, the length of
   2693                char_classes, the length of collating_symbols, the length of
   2694                equivalence_classes, the length of char_ranges, the length of
   2695                chars.  */
   2696 	    GET_BUFFER_SPACE (6);
   2697 
   2698 	    /* Save b as laststart. And We use laststart as the pointer
   2699 	       to the first element of the charset here.
   2700 	       In other words, laststart[i] indicates charset[i].  */
   2701             laststart = b;
   2702 
   2703             /* We test `*p == '^' twice, instead of using an if
   2704                statement, so we only need one BUF_PUSH.  */
   2705             BUF_PUSH (*p == '^' ? charset_not : charset);
   2706             if (*p == '^')
   2707               p++;
   2708 
   2709             /* Push the length of char_classes, the length of
   2710                collating_symbols, the length of equivalence_classes, the
   2711                length of char_ranges and the length of chars.  */
   2712             BUF_PUSH_3 (0, 0, 0);
   2713             BUF_PUSH_2 (0, 0);
   2714 
   2715             /* Remember the first position in the bracket expression.  */
   2716             p1 = p;
   2717 
   2718             /* charset_not matches newline according to a syntax bit.  */
   2719             if ((re_opcode_t) b[-6] == charset_not
   2720                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   2721 	      {
   2722 		BUF_PUSH('\n');
   2723 		laststart[5]++; /* Update the length of characters  */
   2724 	      }
   2725 
   2726             /* Read in characters and ranges, setting map bits.  */
   2727             for (;;)
   2728               {
   2729                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2730 
   2731                 PATFETCH (c);
   2732 
   2733                 /* \ might escape characters inside [...] and [^...].  */
   2734                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   2735                   {
   2736                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2737 
   2738                     PATFETCH (c1);
   2739 		    BUF_PUSH(c1);
   2740 		    laststart[5]++; /* Update the length of chars  */
   2741 		    range_start = c1;
   2742                     continue;
   2743                   }
   2744 
   2745                 /* Could be the end of the bracket expression.  If it's
   2746                    not (i.e., when the bracket expression is `[]' so
   2747                    far), the ']' character bit gets set way below.  */
   2748                 if (c == ']' && p != p1 + 1)
   2749                   break;
   2750 
   2751                 /* Look ahead to see if it's a range when the last thing
   2752                    was a character class.  */
   2753                 if (had_char_class && c == '-' && *p != ']')
   2754                   FREE_STACK_RETURN (REG_ERANGE);
   2755 
   2756                 /* Look ahead to see if it's a range when the last thing
   2757                    was a character: if this is a hyphen not at the
   2758                    beginning or the end of a list, then it's the range
   2759                    operator.  */
   2760                 if (c == '-'
   2761                     && !(p - 2 >= pattern && p[-2] == '[')
   2762                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   2763                     && *p != ']')
   2764                   {
   2765                     reg_errcode_t ret;
   2766 		    /* Allocate the space for range_start and range_end.  */
   2767 		    GET_BUFFER_SPACE (2);
   2768 		    /* Update the pointer to indicate end of buffer.  */
   2769                     b += 2;
   2770                     ret = compile_range (range_start, &p, pend, translate,
   2771                                          syntax, b, laststart);
   2772                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2773                     range_start = 0xffffffff;
   2774                   }
   2775                 else if (p[0] == '-' && p[1] != ']')
   2776                   { /* This handles ranges made up of characters only.  */
   2777                     reg_errcode_t ret;
   2778 
   2779 		    /* Move past the `-'.  */
   2780                     PATFETCH (c1);
   2781 		    /* Allocate the space for range_start and range_end.  */
   2782 		    GET_BUFFER_SPACE (2);
   2783 		    /* Update the pointer to indicate end of buffer.  */
   2784                     b += 2;
   2785                     ret = compile_range (c, &p, pend, translate, syntax, b,
   2786                                          laststart);
   2787                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2788 		    range_start = 0xffffffff;
   2789                   }
   2790 
   2791                 /* See if we're at the beginning of a possible character
   2792                    class.  */
   2793                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   2794                   { /* Leave room for the null.  */
   2795                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   2796 
   2797                     PATFETCH (c);
   2798                     c1 = 0;
   2799 
   2800                     /* If pattern is `[[:'.  */
   2801                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2802 
   2803                     for (;;)
   2804                       {
   2805                         PATFETCH (c);
   2806                         if ((c == ':' && *p == ']') || p == pend)
   2807                           break;
   2808 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   2809 			  str[c1++] = c;
   2810 			else
   2811 			  /* This is in any case an invalid class name.  */
   2812 			  str[0] = '\0';
   2813                       }
   2814                     str[c1] = '\0';
   2815 
   2816                     /* If isn't a word bracketed by `[:' and `:]':
   2817                        undo the ending character, the letters, and leave
   2818                        the leading `:' and `[' (but store them as character).  */
   2819                     if (c == ':' && *p == ']')
   2820                       {
   2821 			wctype_t wt;
   2822 			uintptr_t alignedp;
   2823 
   2824 			/* Query the character class as wctype_t.  */
   2825 			wt = IS_CHAR_CLASS (str);
   2826 			if (wt == 0)
   2827 			  FREE_STACK_RETURN (REG_ECTYPE);
   2828 
   2829                         /* Throw away the ] at the end of the character
   2830                            class.  */
   2831                         PATFETCH (c);
   2832 
   2833                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2834 
   2835 			/* Allocate the space for character class.  */
   2836                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
   2837 			/* Update the pointer to indicate end of buffer.  */
   2838                         b += CHAR_CLASS_SIZE;
   2839 			/* Move data which follow character classes
   2840 			    not to violate the data.  */
   2841                         insert_space(CHAR_CLASS_SIZE,
   2842 				     laststart + 6 + laststart[1],
   2843 				     b - 1);
   2844 			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
   2845 				    + __alignof__(wctype_t) - 1)
   2846 			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   2847 			/* Store the character class.  */
   2848                         *((wctype_t*)alignedp) = wt;
   2849                         /* Update length of char_classes */
   2850                         laststart[1] += CHAR_CLASS_SIZE;
   2851 
   2852                         had_char_class = true;
   2853                       }
   2854                     else
   2855                       {
   2856                         c1++;
   2857                         while (c1--)
   2858                           PATUNFETCH;
   2859                         BUF_PUSH ('[');
   2860                         BUF_PUSH (':');
   2861                         laststart[5] += 2; /* Update the length of characters  */
   2862 			range_start = ':';
   2863                         had_char_class = false;
   2864                       }
   2865                   }
   2866                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
   2867 							  || *p == '.'))
   2868 		  {
   2869 		    CHAR_TYPE str[128];	/* Should be large enough.  */
   2870 		    CHAR_TYPE delim = *p; /* '=' or '.'  */
   2871 # ifdef _LIBC
   2872 		    uint32_t nrules =
   2873 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   2874 # endif
   2875 		    PATFETCH (c);
   2876 		    c1 = 0;
   2877 
   2878 		    /* If pattern is `[[=' or '[[.'.  */
   2879 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2880 
   2881 		    for (;;)
   2882 		      {
   2883 			PATFETCH (c);
   2884 			if ((c == delim && *p == ']') || p == pend)
   2885 			  break;
   2886 			if (c1 < sizeof (str) - 1)
   2887 			  str[c1++] = c;
   2888 			else
   2889 			  /* This is in any case an invalid class name.  */
   2890 			  str[0] = '\0';
   2891                       }
   2892 		    str[c1] = '\0';
   2893 
   2894 		    if (c == delim && *p == ']' && str[0] != '\0')
   2895 		      {
   2896                         unsigned int i, offset;
   2897 			/* If we have no collation data we use the default
   2898 			   collation in which each character is in a class
   2899 			   by itself.  It also means that ASCII is the
   2900 			   character set and therefore we cannot have character
   2901 			   with more than one byte in the multibyte
   2902 			   representation.  */
   2903 
   2904                         /* If not defined _LIBC, we push the name and
   2905 			   `\0' for the sake of matching performance.  */
   2906 			int datasize = c1 + 1;
   2907 
   2908 # ifdef _LIBC
   2909 			int32_t idx = 0;
   2910 			if (nrules == 0)
   2911 # endif
   2912 			  {
   2913 			    if (c1 != 1)
   2914 			      FREE_STACK_RETURN (REG_ECOLLATE);
   2915 			  }
   2916 # ifdef _LIBC
   2917 			else
   2918 			  {
   2919 			    const int32_t *table;
   2920 			    const int32_t *weights;
   2921 			    const int32_t *extra;
   2922 			    const int32_t *indirect;
   2923 			    wint_t *cp;
   2924 
   2925 			    /* This #include defines a local function!  */
   2926 #  include <locale/weightwc.h>
   2927 
   2928 			    if(delim == '=')
   2929 			      {
   2930 				/* We push the index for equivalence class.  */
   2931 				cp = (wint_t*)str;
   2932 
   2933 				table = (const int32_t *)
   2934 				  _NL_CURRENT (LC_COLLATE,
   2935 					       _NL_COLLATE_TABLEWC);
   2936 				weights = (const int32_t *)
   2937 				  _NL_CURRENT (LC_COLLATE,
   2938 					       _NL_COLLATE_WEIGHTWC);
   2939 				extra = (const int32_t *)
   2940 				  _NL_CURRENT (LC_COLLATE,
   2941 					       _NL_COLLATE_EXTRAWC);
   2942 				indirect = (const int32_t *)
   2943 				  _NL_CURRENT (LC_COLLATE,
   2944 					       _NL_COLLATE_INDIRECTWC);
   2945 
   2946 				idx = findidx ((const wint_t**)&cp);
   2947 				if (idx == 0 || cp < (wint_t*) str + c1)
   2948 				  /* This is no valid character.  */
   2949 				  FREE_STACK_RETURN (REG_ECOLLATE);
   2950 
   2951 				str[0] = (wchar_t)idx;
   2952 			      }
   2953 			    else /* delim == '.' */
   2954 			      {
   2955 				/* We push collation sequence value
   2956 				   for collating symbol.  */
   2957 				int32_t table_size;
   2958 				const int32_t *symb_table;
   2959 				const unsigned char *extra;
   2960 				int32_t idx;
   2961 				int32_t elem;
   2962 				int32_t second;
   2963 				int32_t hash;
   2964 				char char_str[c1];
   2965 
   2966 				/* We have to convert the name to a single-byte
   2967 				   string.  This is possible since the names
   2968 				   consist of ASCII characters and the internal
   2969 				   representation is UCS4.  */
   2970 				for (i = 0; i < c1; ++i)
   2971 				  char_str[i] = str[i];
   2972 
   2973 				table_size =
   2974 				  _NL_CURRENT_WORD (LC_COLLATE,
   2975 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
   2976 				symb_table = (const int32_t *)
   2977 				  _NL_CURRENT (LC_COLLATE,
   2978 					       _NL_COLLATE_SYMB_TABLEMB);
   2979 				extra = (const unsigned char *)
   2980 				  _NL_CURRENT (LC_COLLATE,
   2981 					       _NL_COLLATE_SYMB_EXTRAMB);
   2982 
   2983 				/* Locate the character in the hashing table.  */
   2984 				hash = elem_hash (char_str, c1);
   2985 
   2986 				idx = 0;
   2987 				elem = hash % table_size;
   2988 				second = hash % (table_size - 2);
   2989 				while (symb_table[2 * elem] != 0)
   2990 				  {
   2991 				    /* First compare the hashing value.  */
   2992 				    if (symb_table[2 * elem] == hash
   2993 					&& c1 == extra[symb_table[2 * elem + 1]]
   2994 					&& memcmp (str,
   2995 						   &extra[symb_table[2 * elem + 1]
   2996 							 + 1], c1) == 0)
   2997 				      {
   2998 					/* Yep, this is the entry.  */
   2999 					idx = symb_table[2 * elem + 1];
   3000 					idx += 1 + extra[idx];
   3001 					break;
   3002 				      }
   3003 
   3004 				    /* Next entry.  */
   3005 				    elem += second;
   3006 				  }
   3007 
   3008 				if (symb_table[2 * elem] != 0)
   3009 				  {
   3010 				    /* Compute the index of the byte sequence
   3011 				       in the table.  */
   3012 				    idx += 1 + extra[idx];
   3013 				    /* Adjust for the alignment.  */
   3014 				    idx = (idx + 3) & ~4;
   3015 
   3016 				    str[0] = (wchar_t) idx + 4;
   3017 				  }
   3018 				else if (symb_table[2 * elem] == 0 && c1 == 1)
   3019 				  {
   3020 				    /* No valid character.  Match it as a
   3021 				       single byte character.  */
   3022 				    had_char_class = false;
   3023 				    BUF_PUSH(str[0]);
   3024 				    /* Update the length of characters  */
   3025 				    laststart[5]++;
   3026 				    range_start = str[0];
   3027 
   3028 				    /* Throw away the ] at the end of the
   3029 				       collating symbol.  */
   3030 				    PATFETCH (c);
   3031 				    /* exit from the switch block.  */
   3032 				    continue;
   3033 				  }
   3034 				else
   3035 				  FREE_STACK_RETURN (REG_ECOLLATE);
   3036 			      }
   3037 			    datasize = 1;
   3038 			  }
   3039 # endif
   3040                         /* Throw away the ] at the end of the equivalence
   3041                            class (or collating symbol).  */
   3042                         PATFETCH (c);
   3043 
   3044 			/* Allocate the space for the equivalence class
   3045 			   (or collating symbol) (and '\0' if needed).  */
   3046                         GET_BUFFER_SPACE(datasize);
   3047 			/* Update the pointer to indicate end of buffer.  */
   3048                         b += datasize;
   3049 
   3050 			if (delim == '=')
   3051 			  { /* equivalence class  */
   3052 			    /* Calculate the offset of char_ranges,
   3053 			       which is next to equivalence_classes.  */
   3054 			    offset = laststart[1] + laststart[2]
   3055 			      + laststart[3] +6;
   3056 			    /* Insert space.  */
   3057 			    insert_space(datasize, laststart + offset, b - 1);
   3058 
   3059 			    /* Write the equivalence_class and \0.  */
   3060 			    for (i = 0 ; i < datasize ; i++)
   3061 			      laststart[offset + i] = str[i];
   3062 
   3063 			    /* Update the length of equivalence_classes.  */
   3064 			    laststart[3] += datasize;
   3065 			    had_char_class = true;
   3066 			  }
   3067 			else /* delim == '.' */
   3068 			  { /* collating symbol  */
   3069 			    /* Calculate the offset of the equivalence_classes,
   3070 			       which is next to collating_symbols.  */
   3071 			    offset = laststart[1] + laststart[2] + 6;
   3072 			    /* Insert space and write the collationg_symbol
   3073 			       and \0.  */
   3074 			    insert_space(datasize, laststart + offset, b-1);
   3075 			    for (i = 0 ; i < datasize ; i++)
   3076 			      laststart[offset + i] = str[i];
   3077 
   3078 			    /* In re_match_2_internal if range_start < -1, we
   3079 			       assume -range_start is the offset of the
   3080 			       collating symbol which is specified as
   3081 			       the character of the range start.  So we assign
   3082 			       -(laststart[1] + laststart[2] + 6) to
   3083 			       range_start.  */
   3084 			    range_start = -(laststart[1] + laststart[2] + 6);
   3085 			    /* Update the length of collating_symbol.  */
   3086 			    laststart[2] += datasize;
   3087 			    had_char_class = false;
   3088 			  }
   3089 		      }
   3090                     else
   3091                       {
   3092                         c1++;
   3093                         while (c1--)
   3094                           PATUNFETCH;
   3095                         BUF_PUSH ('[');
   3096                         BUF_PUSH (delim);
   3097                         laststart[5] += 2; /* Update the length of characters  */
   3098 			range_start = delim;
   3099                         had_char_class = false;
   3100                       }
   3101 		  }
   3102                 else
   3103                   {
   3104                     had_char_class = false;
   3105 		    BUF_PUSH(c);
   3106 		    laststart[5]++;  /* Update the length of characters  */
   3107 		    range_start = c;
   3108                   }
   3109 	      }
   3110 
   3111 #else /* not MBS_SUPPORT */
   3112             /* Ensure that we have enough space to push a charset: the
   3113                opcode, the length count, and the bitset; 34 bytes in all.  */
   3114 	    GET_BUFFER_SPACE (34);
   3115 
   3116             laststart = b;
   3117 
   3118             /* We test `*p == '^' twice, instead of using an if
   3119                statement, so we only need one BUF_PUSH.  */
   3120             BUF_PUSH (*p == '^' ? charset_not : charset);
   3121             if (*p == '^')
   3122               p++;
   3123 
   3124             /* Remember the first position in the bracket expression.  */
   3125             p1 = p;
   3126 
   3127             /* Push the number of bytes in the bitmap.  */
   3128             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
   3129 
   3130             /* Clear the whole map.  */
   3131             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
   3132 
   3133             /* charset_not matches newline according to a syntax bit.  */
   3134             if ((re_opcode_t) b[-2] == charset_not
   3135                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   3136               SET_LIST_BIT ('\n');
   3137 
   3138             /* Read in characters and ranges, setting map bits.  */
   3139             for (;;)
   3140               {
   3141                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3142 
   3143                 PATFETCH (c);
   3144 
   3145                 /* \ might escape characters inside [...] and [^...].  */
   3146                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   3147                   {
   3148                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3149 
   3150                     PATFETCH (c1);
   3151                     SET_LIST_BIT (c1);
   3152 		    range_start = c1;
   3153                     continue;
   3154                   }
   3155 
   3156                 /* Could be the end of the bracket expression.  If it's
   3157                    not (i.e., when the bracket expression is `[]' so
   3158                    far), the ']' character bit gets set way below.  */
   3159                 if (c == ']' && p != p1 + 1)
   3160                   break;
   3161 
   3162                 /* Look ahead to see if it's a range when the last thing
   3163                    was a character class.  */
   3164                 if (had_char_class && c == '-' && *p != ']')
   3165                   FREE_STACK_RETURN (REG_ERANGE);
   3166 
   3167                 /* Look ahead to see if it's a range when the last thing
   3168                    was a character: if this is a hyphen not at the
   3169                    beginning or the end of a list, then it's the range
   3170                    operator.  */
   3171                 if (c == '-'
   3172                     && !(p - 2 >= pattern && p[-2] == '[')
   3173                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   3174                     && *p != ']')
   3175                   {
   3176                     reg_errcode_t ret
   3177                       = compile_range (range_start, &p, pend, translate,
   3178 				       syntax, b);
   3179                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3180 		    range_start = 0xffffffff;
   3181                   }
   3182 
   3183                 else if (p[0] == '-' && p[1] != ']')
   3184                   { /* This handles ranges made up of characters only.  */
   3185                     reg_errcode_t ret;
   3186 
   3187 		    /* Move past the `-'.  */
   3188                     PATFETCH (c1);
   3189 
   3190                     ret = compile_range (c, &p, pend, translate, syntax, b);
   3191                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3192 		    range_start = 0xffffffff;
   3193                   }
   3194 
   3195                 /* See if we're at the beginning of a possible character
   3196                    class.  */
   3197 
   3198                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   3199                   { /* Leave room for the null.  */
   3200                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   3201 
   3202                     PATFETCH (c);
   3203                     c1 = 0;
   3204 
   3205                     /* If pattern is `[[:'.  */
   3206                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3207 
   3208                     for (;;)
   3209                       {
   3210                         PATFETCH (c);
   3211                         if ((c == ':' && *p == ']') || p == pend)
   3212                           break;
   3213 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   3214 			  str[c1++] = c;
   3215 			else
   3216 			  /* This is in any case an invalid class name.  */
   3217 			  str[0] = '\0';
   3218                       }
   3219                     str[c1] = '\0';
   3220 
   3221                     /* If isn't a word bracketed by `[:' and `:]':
   3222                        undo the ending character, the letters, and leave
   3223                        the leading `:' and `[' (but set bits for them).  */
   3224                     if (c == ':' && *p == ']')
   3225                       {
   3226 # if defined _LIBC || WIDE_CHAR_SUPPORT
   3227                         boolean is_lower = STREQ (str, "lower");
   3228                         boolean is_upper = STREQ (str, "upper");
   3229 			wctype_t wt;
   3230                         int ch;
   3231 
   3232 			wt = IS_CHAR_CLASS (str);
   3233 			if (wt == 0)
   3234 			  FREE_STACK_RETURN (REG_ECTYPE);
   3235 
   3236                         /* Throw away the ] at the end of the character
   3237                            class.  */
   3238                         PATFETCH (c);
   3239 
   3240                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3241 
   3242                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
   3243 			  {
   3244 #  ifdef _LIBC
   3245 			    if (__iswctype (__btowc (ch), wt))
   3246 			      SET_LIST_BIT (ch);
   3247 #  else
   3248 			    if (iswctype (btowc (ch), wt))
   3249 			      SET_LIST_BIT (ch);
   3250 #  endif
   3251 
   3252 			    if (translate && (is_upper || is_lower)
   3253 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3254 			      SET_LIST_BIT (ch);
   3255 			  }
   3256 
   3257                         had_char_class = true;
   3258 # else
   3259                         int ch;
   3260                         boolean is_alnum = STREQ (str, "alnum");
   3261                         boolean is_alpha = STREQ (str, "alpha");
   3262                         boolean is_blank = STREQ (str, "blank");
   3263                         boolean is_cntrl = STREQ (str, "cntrl");
   3264                         boolean is_digit = STREQ (str, "digit");
   3265                         boolean is_graph = STREQ (str, "graph");
   3266                         boolean is_lower = STREQ (str, "lower");
   3267                         boolean is_print = STREQ (str, "print");
   3268                         boolean is_punct = STREQ (str, "punct");
   3269                         boolean is_space = STREQ (str, "space");
   3270                         boolean is_upper = STREQ (str, "upper");
   3271                         boolean is_xdigit = STREQ (str, "xdigit");
   3272 
   3273                         if (!IS_CHAR_CLASS (str))
   3274 			  FREE_STACK_RETURN (REG_ECTYPE);
   3275 
   3276                         /* Throw away the ] at the end of the character
   3277                            class.  */
   3278                         PATFETCH (c);
   3279 
   3280                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3281 
   3282                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
   3283                           {
   3284 			    /* This was split into 3 if's to
   3285 			       avoid an arbitrary limit in some compiler.  */
   3286                             if (   (is_alnum  && ISALNUM (ch))
   3287                                 || (is_alpha  && ISALPHA (ch))
   3288                                 || (is_blank  && ISBLANK (ch))
   3289                                 || (is_cntrl  && ISCNTRL (ch)))
   3290 			      SET_LIST_BIT (ch);
   3291 			    if (   (is_digit  && ISDIGIT (ch))
   3292                                 || (is_graph  && ISGRAPH (ch))
   3293                                 || (is_lower  && ISLOWER (ch))
   3294                                 || (is_print  && ISPRINT (ch)))
   3295 			      SET_LIST_BIT (ch);
   3296 			    if (   (is_punct  && ISPUNCT (ch))
   3297                                 || (is_space  && ISSPACE (ch))
   3298                                 || (is_upper  && ISUPPER (ch))
   3299                                 || (is_xdigit && ISXDIGIT (ch)))
   3300 			      SET_LIST_BIT (ch);
   3301 			    if (   translate && (is_upper || is_lower)
   3302 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3303 			      SET_LIST_BIT (ch);
   3304                           }
   3305                         had_char_class = true;
   3306 # endif	/* libc || wctype.h */
   3307                       }
   3308                     else
   3309                       {
   3310                         c1++;
   3311                         while (c1--)
   3312                           PATUNFETCH;
   3313                         SET_LIST_BIT ('[');
   3314                         SET_LIST_BIT (':');
   3315 			range_start = ':';
   3316                         had_char_class = false;
   3317                       }
   3318                   }
   3319                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
   3320 		  {
   3321 		    unsigned char str[MB_LEN_MAX + 1];
   3322 # ifdef _LIBC
   3323 		    uint32_t nrules =
   3324 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3325 # endif
   3326 
   3327 		    PATFETCH (c);
   3328 		    c1 = 0;
   3329 
   3330 		    /* If pattern is `[[='.  */
   3331 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3332 
   3333 		    for (;;)
   3334 		      {
   3335 			PATFETCH (c);
   3336 			if ((c == '=' && *p == ']') || p == pend)
   3337 			  break;
   3338 			if (c1 < MB_LEN_MAX)
   3339 			  str[c1++] = c;
   3340 			else
   3341 			  /* This is in any case an invalid class name.  */
   3342 			  str[0] = '\0';
   3343                       }
   3344 		    str[c1] = '\0';
   3345 
   3346 		    if (c == '=' && *p == ']' && str[0] != '\0')
   3347 		      {
   3348 			/* If we have no collation data we use the default
   3349 			   collation in which each character is in a class
   3350 			   by itself.  It also means that ASCII is the
   3351 			   character set and therefore we cannot have character
   3352 			   with more than one byte in the multibyte
   3353 			   representation.  */
   3354 # ifdef _LIBC
   3355 			if (nrules == 0)
   3356 # endif
   3357 			  {
   3358 			    if (c1 != 1)
   3359 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3360 
   3361 			    /* Throw away the ] at the end of the equivalence
   3362 			       class.  */
   3363 			    PATFETCH (c);
   3364 
   3365 			    /* Set the bit for the character.  */
   3366 			    SET_LIST_BIT (str[0]);
   3367 			  }
   3368 # ifdef _LIBC
   3369 			else
   3370 			  {
   3371 			    /* Try to match the byte sequence in `str' against
   3372 			       those known to the collate implementation.
   3373 			       First find out whether the bytes in `str' are
   3374 			       actually from exactly one character.  */
   3375 			    const int32_t *table;
   3376 			    const unsigned char *weights;
   3377 			    const unsigned char *extra;
   3378 			    const int32_t *indirect;
   3379 			    int32_t idx;
   3380 			    const unsigned char *cp = str;
   3381 			    int ch;
   3382 
   3383 			    /* This #include defines a local function!  */
   3384 #  include <locale/weight.h>
   3385 
   3386 			    table = (const int32_t *)
   3387 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
   3388 			    weights = (const unsigned char *)
   3389 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
   3390 			    extra = (const unsigned char *)
   3391 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
   3392 			    indirect = (const int32_t *)
   3393 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
   3394 
   3395 			    idx = findidx (&cp);
   3396 			    if (idx == 0 || cp < str + c1)
   3397 			      /* This is no valid character.  */
   3398 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3399 
   3400 			    /* Throw away the ] at the end of the equivalence
   3401 			       class.  */
   3402 			    PATFETCH (c);
   3403 
   3404 			    /* Now we have to go throught the whole table
   3405 			       and find all characters which have the same
   3406 			       first level weight.
   3407 
   3408 			       XXX Note that this is not entirely correct.
   3409 			       we would have to match multibyte sequences
   3410 			       but this is not possible with the current
   3411 			       implementation.  */
   3412 			    for (ch = 1; ch < 256; ++ch)
   3413 			      /* XXX This test would have to be changed if we
   3414 				 would allow matching multibyte sequences.  */
   3415 			      if (table[ch] > 0)
   3416 				{
   3417 				  int32_t idx2 = table[ch];
   3418 				  size_t len = weights[idx2];
   3419 
   3420 				  /* Test whether the lenghts match.  */
   3421 				  if (weights[idx] == len)
   3422 				    {
   3423 				      /* They do.  New compare the bytes of
   3424 					 the weight.  */
   3425 				      size_t cnt = 0;
   3426 
   3427 				      while (cnt < len
   3428 					     && (weights[idx + 1 + cnt]
   3429 						 == weights[idx2 + 1 + cnt]))
   3430 					++cnt;
   3431 
   3432 				      if (cnt == len)
   3433 					/* They match.  Mark the character as
   3434 					   acceptable.  */
   3435 					SET_LIST_BIT (ch);
   3436 				    }
   3437 				}
   3438 			  }
   3439 # endif
   3440 			had_char_class = true;
   3441 		      }
   3442                     else
   3443                       {
   3444                         c1++;
   3445                         while (c1--)
   3446                           PATUNFETCH;
   3447                         SET_LIST_BIT ('[');
   3448                         SET_LIST_BIT ('=');
   3449 			range_start = '=';
   3450                         had_char_class = false;
   3451                       }
   3452 		  }
   3453                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
   3454 		  {
   3455 		    unsigned char str[128];	/* Should be large enough.  */
   3456 # ifdef _LIBC
   3457 		    uint32_t nrules =
   3458 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3459 # endif
   3460 
   3461 		    PATFETCH (c);
   3462 		    c1 = 0;
   3463 
   3464 		    /* If pattern is `[[.'.  */
   3465 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3466 
   3467 		    for (;;)
   3468 		      {
   3469 			PATFETCH (c);
   3470 			if ((c == '.' && *p == ']') || p == pend)
   3471 			  break;
   3472 			if (c1 < sizeof (str))
   3473 			  str[c1++] = c;
   3474 			else
   3475 			  /* This is in any case an invalid class name.  */
   3476 			  str[0] = '\0';
   3477                       }
   3478 		    str[c1] = '\0';
   3479 
   3480 		    if (c == '.' && *p == ']' && str[0] != '\0')
   3481 		      {
   3482 			/* If we have no collation data we use the default
   3483 			   collation in which each character is the name
   3484 			   for its own class which contains only the one
   3485 			   character.  It also means that ASCII is the
   3486 			   character set and therefore we cannot have character
   3487 			   with more than one byte in the multibyte
   3488 			   representation.  */
   3489 # ifdef _LIBC
   3490 			if (nrules == 0)
   3491 # endif
   3492 			  {
   3493 			    if (c1 != 1)
   3494 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3495 
   3496 			    /* Throw away the ] at the end of the equivalence
   3497 			       class.  */
   3498 			    PATFETCH (c);
   3499 
   3500 			    /* Set the bit for the character.  */
   3501 			    SET_LIST_BIT (str[0]);
   3502 			    range_start = ((const unsigned char *) str)[0];
   3503 			  }
   3504 # ifdef _LIBC
   3505 			else
   3506 			  {
   3507 			    /* Try to match the byte sequence in `str' against
   3508 			       those known to the collate implementation.
   3509 			       First find out whether the bytes in `str' are
   3510 			       actually from exactly one character.  */
   3511 			    int32_t table_size;
   3512 			    const int32_t *symb_table;
   3513 			    const unsigned char *extra;
   3514 			    int32_t idx;
   3515 			    int32_t elem;
   3516 			    int32_t second;
   3517 			    int32_t hash;
   3518 
   3519 			    table_size =
   3520 			      _NL_CURRENT_WORD (LC_COLLATE,
   3521 						_NL_COLLATE_SYMB_HASH_SIZEMB);
   3522 			    symb_table = (const int32_t *)
   3523 			      _NL_CURRENT (LC_COLLATE,
   3524 					   _NL_COLLATE_SYMB_TABLEMB);
   3525 			    extra = (const unsigned char *)
   3526 			      _NL_CURRENT (LC_COLLATE,
   3527 					   _NL_COLLATE_SYMB_EXTRAMB);
   3528 
   3529 			    /* Locate the character in the hashing table.  */
   3530 			    hash = elem_hash (str, c1);
   3531 
   3532 			    idx = 0;
   3533 			    elem = hash % table_size;
   3534 			    second = hash % (table_size - 2);
   3535 			    while (symb_table[2 * elem] != 0)
   3536 			      {
   3537 				/* First compare the hashing value.  */
   3538 				if (symb_table[2 * elem] == hash
   3539 				    && c1 == extra[symb_table[2 * elem + 1]]
   3540 				    && memcmp (str,
   3541 					       &extra[symb_table[2 * elem + 1]
   3542 						     + 1],
   3543 					       c1) == 0)
   3544 				  {
   3545 				    /* Yep, this is the entry.  */
   3546 				    idx = symb_table[2 * elem + 1];
   3547 				    idx += 1 + extra[idx];
   3548 				    break;
   3549 				  }
   3550 
   3551 				/* Next entry.  */
   3552 				elem += second;
   3553 			      }
   3554 
   3555 			    if (symb_table[2 * elem] == 0)
   3556 			      /* This is no valid character.  */
   3557 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3558 
   3559 			    /* Throw away the ] at the end of the equivalence
   3560 			       class.  */
   3561 			    PATFETCH (c);
   3562 
   3563 			    /* Now add the multibyte character(s) we found
   3564 			       to the accept list.
   3565 
   3566 			       XXX Note that this is not entirely correct.
   3567 			       we would have to match multibyte sequences
   3568 			       but this is not possible with the current
   3569 			       implementation.  Also, we have to match
   3570 			       collating symbols, which expand to more than
   3571 			       one file, as a whole and not allow the
   3572 			       individual bytes.  */
   3573 			    c1 = extra[idx++];
   3574 			    if (c1 == 1)
   3575 			      range_start = extra[idx];
   3576 			    while (c1-- > 0)
   3577 			      {
   3578 				SET_LIST_BIT (extra[idx]);
   3579 				++idx;
   3580 			      }
   3581 			  }
   3582 # endif
   3583 			had_char_class = false;
   3584 		      }
   3585                     else
   3586                       {
   3587                         c1++;
   3588                         while (c1--)
   3589                           PATUNFETCH;
   3590                         SET_LIST_BIT ('[');
   3591                         SET_LIST_BIT ('.');
   3592 			range_start = '.';
   3593                         had_char_class = false;
   3594                       }
   3595 		  }
   3596                 else
   3597                   {
   3598                     had_char_class = false;
   3599                     SET_LIST_BIT (c);
   3600 		    range_start = c;
   3601                   }
   3602               }
   3603 
   3604             /* Discard any (non)matching list bytes that are all 0 at the
   3605                end of the map.  Decrease the map-length byte too.  */
   3606             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
   3607               b[-1]--;
   3608             b += b[-1];
   3609 #endif /* MBS_SUPPORT */
   3610           }
   3611           break;
   3612 
   3613 
   3614 	case '(':
   3615           if (syntax & RE_NO_BK_PARENS)
   3616             goto handle_open;
   3617           else
   3618             goto normal_char;
   3619 
   3620 
   3621         case ')':
   3622           if (syntax & RE_NO_BK_PARENS)
   3623             goto handle_close;
   3624           else
   3625             goto normal_char;
   3626 
   3627 
   3628         case '\n':
   3629           if (syntax & RE_NEWLINE_ALT)
   3630             goto handle_alt;
   3631           else
   3632             goto normal_char;
   3633 
   3634 
   3635 	case '|':
   3636           if (syntax & RE_NO_BK_VBAR)
   3637             goto handle_alt;
   3638           else
   3639             goto normal_char;
   3640 
   3641 
   3642         case '{':
   3643            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
   3644              goto handle_interval;
   3645            else
   3646              goto normal_char;
   3647 
   3648 
   3649         case '\\':
   3650           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3651 
   3652           /* Do not translate the character after the \, so that we can
   3653              distinguish, e.g., \B from \b, even if we normally would
   3654              translate, e.g., B to b.  */
   3655           PATFETCH_RAW (c);
   3656 
   3657           switch (c)
   3658             {
   3659             case '(':
   3660               if (syntax & RE_NO_BK_PARENS)
   3661                 goto normal_backslash;
   3662 
   3663             handle_open:
   3664               bufp->re_nsub++;
   3665               regnum++;
   3666 
   3667               if (COMPILE_STACK_FULL)
   3668                 {
   3669                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
   3670                             compile_stack_elt_t);
   3671                   if (compile_stack.stack == NULL) return REG_ESPACE;
   3672 
   3673                   compile_stack.size <<= 1;
   3674                 }
   3675 
   3676               /* These are the values to restore when we hit end of this
   3677                  group.  They are all relative offsets, so that if the
   3678                  whole pattern moves because of realloc, they will still
   3679                  be valid.  */
   3680               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
   3681               COMPILE_STACK_TOP.fixup_alt_jump
   3682                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
   3683               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
   3684               COMPILE_STACK_TOP.regnum = regnum;
   3685 
   3686               /* We will eventually replace the 0 with the number of
   3687                  groups inner to this one.  But do not push a
   3688                  start_memory for groups beyond the last one we can
   3689                  represent in the compiled pattern.  */
   3690               if (regnum <= MAX_REGNUM)
   3691                 {
   3692                   COMPILE_STACK_TOP.inner_group_offset = b
   3693 		    - COMPILED_BUFFER_VAR + 2;
   3694                   BUF_PUSH_3 (start_memory, regnum, 0);
   3695                 }
   3696 
   3697               compile_stack.avail++;
   3698 
   3699               fixup_alt_jump = 0;
   3700               laststart = 0;
   3701               begalt = b;
   3702 	      /* If we've reached MAX_REGNUM groups, then this open
   3703 		 won't actually generate any code, so we'll have to
   3704 		 clear pending_exact explicitly.  */
   3705 	      pending_exact = 0;
   3706               break;
   3707 
   3708 
   3709             case ')':
   3710               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
   3711 
   3712               if (COMPILE_STACK_EMPTY)
   3713 		{
   3714 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3715 		    goto normal_backslash;
   3716 		  else
   3717 		    FREE_STACK_RETURN (REG_ERPAREN);
   3718 		}
   3719 
   3720             handle_close:
   3721               if (fixup_alt_jump)
   3722                 { /* Push a dummy failure point at the end of the
   3723                      alternative for a possible future
   3724                      `pop_failure_jump' to pop.  See comments at
   3725                      `push_dummy_failure' in `re_match_2'.  */
   3726                   BUF_PUSH (push_dummy_failure);
   3727 
   3728                   /* We allocated space for this jump when we assigned
   3729                      to `fixup_alt_jump', in the `handle_alt' case below.  */
   3730                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
   3731                 }
   3732 
   3733               /* See similar code for backslashed left paren above.  */
   3734               if (COMPILE_STACK_EMPTY)
   3735 		{
   3736 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3737 		    goto normal_char;
   3738 		  else
   3739 		    FREE_STACK_RETURN (REG_ERPAREN);
   3740 		}
   3741 
   3742               /* Since we just checked for an empty stack above, this
   3743                  ``can't happen''.  */
   3744               assert (compile_stack.avail != 0);
   3745               {
   3746                 /* We don't just want to restore into `regnum', because
   3747                    later groups should continue to be numbered higher,
   3748                    as in `(ab)c(de)' -- the second group is #2.  */
   3749                 regnum_t this_group_regnum;
   3750 
   3751                 compile_stack.avail--;
   3752                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
   3753                 fixup_alt_jump
   3754                   = COMPILE_STACK_TOP.fixup_alt_jump
   3755                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
   3756                     : 0;
   3757                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
   3758                 this_group_regnum = COMPILE_STACK_TOP.regnum;
   3759 		/* If we've reached MAX_REGNUM groups, then this open
   3760 		   won't actually generate any code, so we'll have to
   3761 		   clear pending_exact explicitly.  */
   3762 		pending_exact = 0;
   3763 
   3764                 /* We're at the end of the group, so now we know how many
   3765                    groups were inside this one.  */
   3766                 if (this_group_regnum <= MAX_REGNUM)
   3767                   {
   3768 		    US_CHAR_TYPE *inner_group_loc
   3769                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
   3770 
   3771                     *inner_group_loc = regnum - this_group_regnum;
   3772                     BUF_PUSH_3 (stop_memory, this_group_regnum,
   3773                                 regnum - this_group_regnum);
   3774                   }
   3775               }
   3776               break;
   3777 
   3778 
   3779             case '|':					/* `\|'.  */
   3780               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
   3781                 goto normal_backslash;
   3782             handle_alt:
   3783               if (syntax & RE_LIMITED_OPS)
   3784                 goto normal_char;
   3785 
   3786               /* Insert before the previous alternative a jump which
   3787                  jumps to this alternative if the former fails.  */
   3788               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3789               INSERT_JUMP (on_failure_jump, begalt,
   3790 			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
   3791               pending_exact = 0;
   3792               b += 1 + OFFSET_ADDRESS_SIZE;
   3793 
   3794               /* The alternative before this one has a jump after it
   3795                  which gets executed if it gets matched.  Adjust that
   3796                  jump so it will jump to this alternative's analogous
   3797                  jump (put in below, which in turn will jump to the next
   3798                  (if any) alternative's such jump, etc.).  The last such
   3799                  jump jumps to the correct final destination.  A picture:
   3800                           _____ _____
   3801                           |   | |   |
   3802                           |   v |   v
   3803                          a | b   | c
   3804 
   3805                  If we are at `b', then fixup_alt_jump right now points to a
   3806                  three-byte space after `a'.  We'll put in the jump, set
   3807                  fixup_alt_jump to right after `b', and leave behind three
   3808                  bytes which we'll fill in when we get to after `c'.  */
   3809 
   3810               if (fixup_alt_jump)
   3811                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   3812 
   3813               /* Mark and leave space for a jump after this alternative,
   3814                  to be filled in later either by next alternative or
   3815                  when know we're at the end of a series of alternatives.  */
   3816               fixup_alt_jump = b;
   3817               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3818               b += 1 + OFFSET_ADDRESS_SIZE;
   3819 
   3820               laststart = 0;
   3821               begalt = b;
   3822               break;
   3823 
   3824 
   3825             case '{':
   3826               /* If \{ is a literal.  */
   3827               if (!(syntax & RE_INTERVALS)
   3828                      /* If we're at `\{' and it's not the open-interval
   3829                         operator.  */
   3830 		  || (syntax & RE_NO_BK_BRACES))
   3831                 goto normal_backslash;
   3832 
   3833             handle_interval:
   3834               {
   3835                 /* If got here, then the syntax allows intervals.  */
   3836 
   3837                 /* At least (most) this many matches must be made.  */
   3838                 int lower_bound = -1, upper_bound = -1;
   3839 
   3840 		/* Place in the uncompiled pattern (i.e., just after
   3841 		   the '{') to go back to if the interval is invalid.  */
   3842 		const CHAR_TYPE *beg_interval = p;
   3843 
   3844                 if (p == pend)
   3845 		  goto invalid_interval;
   3846 
   3847                 GET_UNSIGNED_NUMBER (lower_bound);
   3848 
   3849                 if (c == ',')
   3850                   {
   3851                     GET_UNSIGNED_NUMBER (upper_bound);
   3852 		    if (upper_bound < 0)
   3853 		      upper_bound = RE_DUP_MAX;
   3854                   }
   3855                 else
   3856                   /* Interval such as `{1}' => match exactly once. */
   3857                   upper_bound = lower_bound;
   3858 
   3859                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
   3860 		  goto invalid_interval;
   3861 
   3862                 if (!(syntax & RE_NO_BK_BRACES))
   3863                   {
   3864 		    if (c != '\\' || p == pend)
   3865 		      goto invalid_interval;
   3866                     PATFETCH (c);
   3867                   }
   3868 
   3869                 if (c != '}')
   3870 		  goto invalid_interval;
   3871 
   3872                 /* If it's invalid to have no preceding re.  */
   3873                 if (!laststart)
   3874                   {
   3875 		    if (syntax & RE_CONTEXT_INVALID_OPS
   3876 			&& !(syntax & RE_INVALID_INTERVAL_ORD))
   3877                       FREE_STACK_RETURN (REG_BADRPT);
   3878                     else if (syntax & RE_CONTEXT_INDEP_OPS)
   3879                       laststart = b;
   3880                     else
   3881                       goto unfetch_interval;
   3882                   }
   3883 
   3884                 /* We just parsed a valid interval.  */
   3885 
   3886                 if (RE_DUP_MAX < upper_bound)
   3887 		  FREE_STACK_RETURN (REG_BADBR);
   3888 
   3889                 /* If the upper bound is zero, don't want to succeed at
   3890                    all; jump from `laststart' to `b + 3', which will be
   3891 		   the end of the buffer after we insert the jump.  */
   3892 		/* ifdef MBS_SUPPORT, 'b + 1 + OFFSET_ADDRESS_SIZE'
   3893 		   instead of 'b + 3'.  */
   3894                  if (upper_bound == 0)
   3895                    {
   3896                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3897                      INSERT_JUMP (jump, laststart, b + 1
   3898 				  + OFFSET_ADDRESS_SIZE);
   3899                      b += 1 + OFFSET_ADDRESS_SIZE;
   3900                    }
   3901 
   3902                  /* Otherwise, we have a nontrivial interval.  When
   3903                     we're all done, the pattern will look like:
   3904                       set_number_at <jump count> <upper bound>
   3905                       set_number_at <succeed_n count> <lower bound>
   3906                       succeed_n <after jump addr> <succeed_n count>
   3907                       <body of loop>
   3908                       jump_n <succeed_n addr> <jump count>
   3909                     (The upper bound and `jump_n' are omitted if
   3910                     `upper_bound' is 1, though.)  */
   3911                  else
   3912                    { /* If the upper bound is > 1, we need to insert
   3913                         more at the end of the loop.  */
   3914                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
   3915 		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
   3916 
   3917                      GET_BUFFER_SPACE (nbytes);
   3918 
   3919                      /* Initialize lower bound of the `succeed_n', even
   3920                         though it will be set during matching by its
   3921                         attendant `set_number_at' (inserted next),
   3922                         because `re_compile_fastmap' needs to know.
   3923                         Jump to the `jump_n' we might insert below.  */
   3924                      INSERT_JUMP2 (succeed_n, laststart,
   3925                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
   3926 				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
   3927 				   , lower_bound);
   3928                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3929 
   3930                      /* Code to initialize the lower bound.  Insert
   3931                         before the `succeed_n'.  The `5' is the last two
   3932                         bytes of this `set_number_at', plus 3 bytes of
   3933                         the following `succeed_n'.  */
   3934 		     /* ifdef MBS_SUPPORT, The '1+2*OFFSET_ADDRESS_SIZE'
   3935 			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
   3936 			of the following `succeed_n'.  */
   3937                      insert_op2 (set_number_at, laststart, 1
   3938 				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
   3939                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3940 
   3941                      if (upper_bound > 1)
   3942                        { /* More than one repetition is allowed, so
   3943                             append a backward jump to the `succeed_n'
   3944                             that starts this interval.
   3945 
   3946                             When we've reached this during matching,
   3947                             we'll have matched the interval once, so
   3948                             jump back only `upper_bound - 1' times.  */
   3949                          STORE_JUMP2 (jump_n, b, laststart
   3950 				      + 2 * OFFSET_ADDRESS_SIZE + 1,
   3951                                       upper_bound - 1);
   3952                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3953 
   3954                          /* The location we want to set is the second
   3955                             parameter of the `jump_n'; that is `b-2' as
   3956                             an absolute address.  `laststart' will be
   3957                             the `set_number_at' we're about to insert;
   3958                             `laststart+3' the number to set, the source
   3959                             for the relative address.  But we are
   3960                             inserting into the middle of the pattern --
   3961                             so everything is getting moved up by 5.
   3962                             Conclusion: (b - 2) - (laststart + 3) + 5,
   3963                             i.e., b - laststart.
   3964 
   3965                             We insert this at the beginning of the loop
   3966                             so that if we fail during matching, we'll
   3967                             reinitialize the bounds.  */
   3968                          insert_op2 (set_number_at, laststart, b - laststart,
   3969                                      upper_bound - 1, b);
   3970                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3971                        }
   3972                    }
   3973                 pending_exact = 0;
   3974 		break;
   3975 
   3976 	      invalid_interval:
   3977 		if (!(syntax & RE_INVALID_INTERVAL_ORD))
   3978 		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
   3979 	      unfetch_interval:
   3980 		/* Match the characters as literals.  */
   3981 		p = beg_interval;
   3982 		c = '{';
   3983 		if (syntax & RE_NO_BK_BRACES)
   3984 		  goto normal_char;
   3985 		else
   3986 		  goto normal_backslash;
   3987 	      }
   3988 
   3989 #ifdef emacs
   3990             /* There is no way to specify the before_dot and after_dot
   3991                operators.  rms says this is ok.  --karl  */
   3992             case '=':
   3993               BUF_PUSH (at_dot);
   3994               break;
   3995 
   3996             case 's':
   3997               laststart = b;
   3998               PATFETCH (c);
   3999               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
   4000               break;
   4001 
   4002             case 'S':
   4003               laststart = b;
   4004               PATFETCH (c);
   4005               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
   4006               break;
   4007 #endif /* emacs */
   4008 
   4009 
   4010             case 'w':
   4011 	      if (syntax & RE_NO_GNU_OPS)
   4012 		goto normal_char;
   4013               laststart = b;
   4014               BUF_PUSH (wordchar);
   4015               break;
   4016 
   4017 
   4018             case 'W':
   4019 	      if (syntax & RE_NO_GNU_OPS)
   4020 		goto normal_char;
   4021               laststart = b;
   4022               BUF_PUSH (notwordchar);
   4023               break;
   4024 
   4025 
   4026             case '<':
   4027 	      if (syntax & RE_NO_GNU_OPS)
   4028 		goto normal_char;
   4029               BUF_PUSH (wordbeg);
   4030               break;
   4031 
   4032             case '>':
   4033 	      if (syntax & RE_NO_GNU_OPS)
   4034 		goto normal_char;
   4035               BUF_PUSH (wordend);
   4036               break;
   4037 
   4038             case 'b':
   4039 	      if (syntax & RE_NO_GNU_OPS)
   4040 		goto normal_char;
   4041               BUF_PUSH (wordbound);
   4042               break;
   4043 
   4044             case 'B':
   4045 	      if (syntax & RE_NO_GNU_OPS)
   4046 		goto normal_char;
   4047               BUF_PUSH (notwordbound);
   4048               break;
   4049 
   4050             case '`':
   4051 	      if (syntax & RE_NO_GNU_OPS)
   4052 		goto normal_char;
   4053               BUF_PUSH (begbuf);
   4054               break;
   4055 
   4056             case '\'':
   4057 	      if (syntax & RE_NO_GNU_OPS)
   4058 		goto normal_char;
   4059               BUF_PUSH (endbuf);
   4060               break;
   4061 
   4062             case '1': case '2': case '3': case '4': case '5':
   4063             case '6': case '7': case '8': case '9':
   4064               if (syntax & RE_NO_BK_REFS)
   4065                 goto normal_char;
   4066 
   4067               c1 = c - '0';
   4068 
   4069               if (c1 > regnum)
   4070                 FREE_STACK_RETURN (REG_ESUBREG);
   4071 
   4072               /* Can't back reference to a subexpression if inside of it.  */
   4073               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
   4074                 goto normal_char;
   4075 
   4076               laststart = b;
   4077               BUF_PUSH_2 (duplicate, c1);
   4078               break;
   4079 
   4080 
   4081             case '+':
   4082             case '?':
   4083               if (syntax & RE_BK_PLUS_QM)
   4084                 goto handle_plus;
   4085               else
   4086                 goto normal_backslash;
   4087 
   4088             default:
   4089             normal_backslash:
   4090               /* You might think it would be useful for \ to mean
   4091                  not to translate; but if we don't translate it
   4092                  it will never match anything.  */
   4093               c = TRANSLATE (c);
   4094               goto normal_char;
   4095             }
   4096           break;
   4097 
   4098 
   4099 	default:
   4100         /* Expects the character in `c'.  */
   4101 	normal_char:
   4102 	      /* If no exactn currently being built.  */
   4103           if (!pending_exact
   4104 #ifdef MBS_SUPPORT
   4105 	      /* If last exactn handle binary(or character) and
   4106 		 new exactn handle character(or binary).  */
   4107 	      || is_exactn_bin != is_binary[p - 1 - pattern]
   4108 #endif /* MBS_SUPPORT */
   4109 
   4110               /* If last exactn not at current position.  */
   4111               || pending_exact + *pending_exact + 1 != b
   4112 
   4113               /* We have only one byte following the exactn for the count.  */
   4114 	      || *pending_exact == (1 << BYTEWIDTH) - 1
   4115 
   4116               /* If followed by a repetition operator.  */
   4117               || *p == '*' || *p == '^'
   4118 	      || ((syntax & RE_BK_PLUS_QM)
   4119 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
   4120 		  : (*p == '+' || *p == '?'))
   4121 	      || ((syntax & RE_INTERVALS)
   4122                   && ((syntax & RE_NO_BK_BRACES)
   4123 		      ? *p == '{'
   4124                       : (p[0] == '\\' && p[1] == '{'))))
   4125 	    {
   4126 	      /* Start building a new exactn.  */
   4127 
   4128               laststart = b;
   4129 
   4130 #ifdef MBS_SUPPORT
   4131 	      /* Is this exactn binary data or character? */
   4132 	      is_exactn_bin = is_binary[p - 1 - pattern];
   4133 	      if (is_exactn_bin)
   4134 		  BUF_PUSH_2 (exactn_bin, 0);
   4135 	      else
   4136 		  BUF_PUSH_2 (exactn, 0);
   4137 #else
   4138 	      BUF_PUSH_2 (exactn, 0);
   4139 #endif /* MBS_SUPPORT */
   4140 	      pending_exact = b - 1;
   4141             }
   4142 
   4143 	  BUF_PUSH (c);
   4144           (*pending_exact)++;
   4145 	  break;
   4146         } /* switch (c) */
   4147     } /* while p != pend */
   4148 
   4149 
   4150   /* Through the pattern now.  */
   4151 
   4152   if (fixup_alt_jump)
   4153     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   4154 
   4155   if (!COMPILE_STACK_EMPTY)
   4156     FREE_STACK_RETURN (REG_EPAREN);
   4157 
   4158   /* If we don't want backtracking, force success
   4159      the first time we reach the end of the compiled pattern.  */
   4160   if (syntax & RE_NO_POSIX_BACKTRACKING)
   4161     BUF_PUSH (succeed);
   4162 
   4163 #ifdef MBS_SUPPORT
   4164   free (pattern);
   4165   free (mbs_offset);
   4166   free (is_binary);
   4167 #endif
   4168   free (compile_stack.stack);
   4169 
   4170   /* We have succeeded; set the length of the buffer.  */
   4171 #ifdef MBS_SUPPORT
   4172   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
   4173 #else
   4174   bufp->used = b - bufp->buffer;
   4175 #endif
   4176 
   4177 #ifdef DEBUG
   4178   if (debug)
   4179     {
   4180       DEBUG_PRINT1 ("\nCompiled pattern: \n");
   4181       print_compiled_pattern (bufp);
   4182     }
   4183 #endif /* DEBUG */
   4184 
   4185 #ifndef MATCH_MAY_ALLOCATE
   4186   /* Initialize the failure stack to the largest possible stack.  This
   4187      isn't necessary unless we're trying to avoid calling alloca in
   4188      the search and match routines.  */
   4189   {
   4190     int num_regs = bufp->re_nsub + 1;
   4191 
   4192     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
   4193        is strictly greater than re_max_failures, the largest possible stack
   4194        is 2 * re_max_failures failure points.  */
   4195     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
   4196       {
   4197 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
   4198 
   4199 # ifdef emacs
   4200 	if (! fail_stack.stack)
   4201 	  fail_stack.stack
   4202 	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
   4203 					    * sizeof (fail_stack_elt_t));
   4204 	else
   4205 	  fail_stack.stack
   4206 	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
   4207 					     (fail_stack.size
   4208 					      * sizeof (fail_stack_elt_t)));
   4209 # else /* not emacs */
   4210 	if (! fail_stack.stack)
   4211 	  fail_stack.stack
   4212 	    = (fail_stack_elt_t *) malloc (fail_stack.size
   4213 					   * sizeof (fail_stack_elt_t));
   4214 	else
   4215 	  fail_stack.stack
   4216 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
   4217 					    (fail_stack.size
   4218 					     * sizeof (fail_stack_elt_t)));
   4219 # endif /* not emacs */
   4220       }
   4221 
   4222     regex_grow_registers (num_regs);
   4223   }
   4224 #endif /* not MATCH_MAY_ALLOCATE */
   4225 
   4226   return REG_NOERROR;
   4227 } /* regex_compile */
   4228 
   4229 /* Subroutines for `regex_compile'.  */
   4231 
   4232 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
   4233 /* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t.  */
   4234 
   4235 static void
   4236 store_op1 (op, loc, arg)
   4237     re_opcode_t op;
   4238     US_CHAR_TYPE *loc;
   4239     int arg;
   4240 {
   4241   *loc = (US_CHAR_TYPE) op;
   4242   STORE_NUMBER (loc + 1, arg);
   4243 }
   4244 
   4245 
   4246 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4247 /* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t.  */
   4248 
   4249 static void
   4250 store_op2 (op, loc, arg1, arg2)
   4251     re_opcode_t op;
   4252     US_CHAR_TYPE *loc;
   4253     int arg1, arg2;
   4254 {
   4255   *loc = (US_CHAR_TYPE) op;
   4256   STORE_NUMBER (loc + 1, arg1);
   4257   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
   4258 }
   4259 
   4260 
   4261 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
   4262    for OP followed by two-byte integer parameter ARG.  */
   4263 /* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t.  */
   4264 
   4265 static void
   4266 insert_op1 (op, loc, arg, end)
   4267     re_opcode_t op;
   4268     US_CHAR_TYPE *loc;
   4269     int arg;
   4270     US_CHAR_TYPE *end;
   4271 {
   4272   register US_CHAR_TYPE *pfrom = end;
   4273   register US_CHAR_TYPE *pto = end + 1 + OFFSET_ADDRESS_SIZE;
   4274 
   4275   while (pfrom != loc)
   4276     *--pto = *--pfrom;
   4277 
   4278   store_op1 (op, loc, arg);
   4279 }
   4280 
   4281 
   4282 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4283 /* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t.  */
   4284 
   4285 static void
   4286 insert_op2 (op, loc, arg1, arg2, end)
   4287     re_opcode_t op;
   4288     US_CHAR_TYPE *loc;
   4289     int arg1, arg2;
   4290     US_CHAR_TYPE *end;
   4291 {
   4292   register US_CHAR_TYPE *pfrom = end;
   4293   register US_CHAR_TYPE *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
   4294 
   4295   while (pfrom != loc)
   4296     *--pto = *--pfrom;
   4297 
   4298   store_op2 (op, loc, arg1, arg2);
   4299 }
   4300 
   4301 
   4302 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
   4303    after an alternative or a begin-subexpression.  We assume there is at
   4304    least one character before the ^.  */
   4305 
   4306 static boolean
   4307 at_begline_loc_p (pattern, p, syntax)
   4308     const CHAR_TYPE *pattern, *p;
   4309     reg_syntax_t syntax;
   4310 {
   4311   const CHAR_TYPE *prev = p - 2;
   4312   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
   4313 
   4314   return
   4315        /* After a subexpression?  */
   4316        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
   4317        /* After an alternative?  */
   4318     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
   4319 }
   4320 
   4321 
   4322 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
   4323    at least one character after the $, i.e., `P < PEND'.  */
   4324 
   4325 static boolean
   4326 at_endline_loc_p (p, pend, syntax)
   4327     const CHAR_TYPE *p, *pend;
   4328     reg_syntax_t syntax;
   4329 {
   4330   const CHAR_TYPE *next = p;
   4331   boolean next_backslash = *next == '\\';
   4332   const CHAR_TYPE *next_next = p + 1 < pend ? p + 1 : 0;
   4333 
   4334   return
   4335        /* Before a subexpression?  */
   4336        (syntax & RE_NO_BK_PARENS ? *next == ')'
   4337         : next_backslash && next_next && *next_next == ')')
   4338        /* Before an alternative?  */
   4339     || (syntax & RE_NO_BK_VBAR ? *next == '|'
   4340         : next_backslash && next_next && *next_next == '|');
   4341 }
   4342 
   4343 
   4344 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
   4345    false if it's not.  */
   4346 
   4347 static boolean
   4348 group_in_compile_stack (compile_stack, regnum)
   4349     compile_stack_type compile_stack;
   4350     regnum_t regnum;
   4351 {
   4352   int this_element;
   4353 
   4354   for (this_element = compile_stack.avail - 1;
   4355        this_element >= 0;
   4356        this_element--)
   4357     if (compile_stack.stack[this_element].regnum == regnum)
   4358       return true;
   4359 
   4360   return false;
   4361 }
   4362 
   4363 #ifdef MBS_SUPPORT
   4364 /* This insert space, which size is "num", into the pattern at "loc".
   4365    "end" must point the end of the allocated buffer.  */
   4366 static void
   4367 insert_space (num, loc, end)
   4368      int num;
   4369      CHAR_TYPE *loc;
   4370      CHAR_TYPE *end;
   4371 {
   4372   register CHAR_TYPE *pto = end;
   4373   register CHAR_TYPE *pfrom = end - num;
   4374 
   4375   while (pfrom >= loc)
   4376     *pto-- = *pfrom--;
   4377 }
   4378 #endif /* MBS_SUPPORT */
   4379 
   4380 #ifdef MBS_SUPPORT
   4381 static reg_errcode_t
   4382 compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
   4383 	       char_set)
   4384      CHAR_TYPE range_start_char;
   4385      const CHAR_TYPE **p_ptr, *pend;
   4386      CHAR_TYPE *char_set, *b;
   4387      RE_TRANSLATE_TYPE translate;
   4388      reg_syntax_t syntax;
   4389 {
   4390   const CHAR_TYPE *p = *p_ptr;
   4391   CHAR_TYPE range_start, range_end;
   4392   reg_errcode_t ret;
   4393 # ifdef _LIBC
   4394   uint32_t nrules;
   4395   uint32_t start_val, end_val;
   4396 # endif
   4397   if (p == pend)
   4398     return REG_ERANGE;
   4399 
   4400 # ifdef _LIBC
   4401   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   4402   if (nrules != 0)
   4403     {
   4404       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
   4405 						       _NL_COLLATE_COLLSEQWC);
   4406       const unsigned char *extra = (const unsigned char *)
   4407 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   4408 
   4409       if (range_start_char < -1)
   4410 	{
   4411 	  /* range_start is a collating symbol.  */
   4412 	  int32_t *wextra;
   4413 	  /* Retreive the index and get collation sequence value.  */
   4414 	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
   4415 	  start_val = wextra[1 + *wextra];
   4416 	}
   4417       else
   4418 	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
   4419 
   4420       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
   4421 
   4422       /* Report an error if the range is empty and the syntax prohibits
   4423 	 this.  */
   4424       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4425 	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
   4426 
   4427       /* Insert space to the end of the char_ranges.  */
   4428       insert_space(2, b - char_set[5] - 2, b - 1);
   4429       *(b - char_set[5] - 2) = (wchar_t)start_val;
   4430       *(b - char_set[5] - 1) = (wchar_t)end_val;
   4431       char_set[4]++; /* ranges_index */
   4432     }
   4433   else
   4434 # endif
   4435     {
   4436       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
   4437 	range_start_char;
   4438       range_end = TRANSLATE (p[0]);
   4439       /* Report an error if the range is empty and the syntax prohibits
   4440 	 this.  */
   4441       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4442 	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
   4443 
   4444       /* Insert space to the end of the char_ranges.  */
   4445       insert_space(2, b - char_set[5] - 2, b - 1);
   4446       *(b - char_set[5] - 2) = range_start;
   4447       *(b - char_set[5] - 1) = range_end;
   4448       char_set[4]++; /* ranges_index */
   4449     }
   4450   /* Have to increment the pointer into the pattern string, so the
   4451      caller isn't still at the ending character.  */
   4452   (*p_ptr)++;
   4453 
   4454   return ret;
   4455 }
   4456 #else
   4457 /* Read the ending character of a range (in a bracket expression) from the
   4458    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
   4459    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
   4460    Then we set the translation of all bits between the starting and
   4461    ending characters (inclusive) in the compiled pattern B.
   4462 
   4463    Return an error code.
   4464 
   4465    We use these short variable names so we can use the same macros as
   4466    `regex_compile' itself.  */
   4467 
   4468 static reg_errcode_t
   4469 compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
   4470      unsigned int range_start_char;
   4471      const char **p_ptr, *pend;
   4472      RE_TRANSLATE_TYPE translate;
   4473      reg_syntax_t syntax;
   4474      unsigned char *b;
   4475 {
   4476   unsigned this_char;
   4477   const char *p = *p_ptr;
   4478   reg_errcode_t ret;
   4479 # if _LIBC
   4480   const unsigned char *collseq;
   4481   unsigned int start_colseq;
   4482   unsigned int end_colseq;
   4483 # else
   4484   unsigned end_char;
   4485 # endif
   4486 
   4487   if (p == pend)
   4488     return REG_ERANGE;
   4489 
   4490   /* Have to increment the pointer into the pattern string, so the
   4491      caller isn't still at the ending character.  */
   4492   (*p_ptr)++;
   4493 
   4494   /* Report an error if the range is empty and the syntax prohibits this.  */
   4495   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
   4496 
   4497 # if _LIBC
   4498   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
   4499 						 _NL_COLLATE_COLLSEQMB);
   4500 
   4501   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
   4502   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
   4503   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
   4504     {
   4505       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
   4506 
   4507       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
   4508 	{
   4509 	  SET_LIST_BIT (TRANSLATE (this_char));
   4510 	  ret = REG_NOERROR;
   4511 	}
   4512     }
   4513 # else
   4514   /* Here we see why `this_char' has to be larger than an `unsigned
   4515      char' -- we would otherwise go into an infinite loop, since all
   4516      characters <= 0xff.  */
   4517   range_start_char = TRANSLATE (range_start_char);
   4518   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
   4519      and some compilers cast it to int implicitly, so following for_loop
   4520      may fall to (almost) infinite loop.
   4521      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
   4522      To avoid this, we cast p[0] to unsigned int and truncate it.  */
   4523   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
   4524 
   4525   for (this_char = range_start_char; this_char <= end_char; ++this_char)
   4526     {
   4527       SET_LIST_BIT (TRANSLATE (this_char));
   4528       ret = REG_NOERROR;
   4529     }
   4530 # endif
   4531 
   4532   return ret;
   4533 }
   4534 #endif /* MBS_SUPPORT */
   4535 
   4536 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
   4538    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
   4539    characters can start a string that matches the pattern.  This fastmap
   4540    is used by re_search to skip quickly over impossible starting points.
   4541 
   4542    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
   4543    area as BUFP->fastmap.
   4544 
   4545    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
   4546    the pattern buffer.
   4547 
   4548    Returns 0 if we succeed, -2 if an internal error.   */
   4549 
   4550 #ifdef MBS_SUPPORT
   4551 /* local function for re_compile_fastmap.
   4552    truncate wchar_t character to char.  */
   4553 static unsigned char truncate_wchar (CHAR_TYPE c);
   4554 
   4555 static unsigned char
   4556 truncate_wchar (c)
   4557      CHAR_TYPE c;
   4558 {
   4559   unsigned char buf[MB_LEN_MAX];
   4560   int retval = wctomb(buf, c);
   4561   return retval > 0 ? buf[0] : (unsigned char)c;
   4562 }
   4563 #endif /* MBS_SUPPORT */
   4564 
   4565 int
   4566 re_compile_fastmap (bufp)
   4567      struct re_pattern_buffer *bufp;
   4568 {
   4569   int j, k;
   4570 #ifdef MATCH_MAY_ALLOCATE
   4571   fail_stack_type fail_stack;
   4572 #endif
   4573 #ifndef REGEX_MALLOC
   4574   char *destination;
   4575 #endif
   4576 
   4577   register char *fastmap = bufp->fastmap;
   4578 
   4579 #ifdef MBS_SUPPORT
   4580   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   4581      pattern to (char*) in regex_compile.  */
   4582   US_CHAR_TYPE *pattern = (US_CHAR_TYPE*)bufp->buffer;
   4583   register US_CHAR_TYPE *pend = (US_CHAR_TYPE*) (bufp->buffer + bufp->used);
   4584 #else
   4585   US_CHAR_TYPE *pattern = bufp->buffer;
   4586   register US_CHAR_TYPE *pend = pattern + bufp->used;
   4587 #endif /* MBS_SUPPORT */
   4588   US_CHAR_TYPE *p = pattern;
   4589 
   4590 #ifdef REL_ALLOC
   4591   /* This holds the pointer to the failure stack, when
   4592      it is allocated relocatably.  */
   4593   fail_stack_elt_t *failure_stack_ptr;
   4594 #endif
   4595 
   4596   /* Assume that each path through the pattern can be null until
   4597      proven otherwise.  We set this false at the bottom of switch
   4598      statement, to which we get only if a particular path doesn't
   4599      match the empty string.  */
   4600   boolean path_can_be_null = true;
   4601 
   4602   /* We aren't doing a `succeed_n' to begin with.  */
   4603   boolean succeed_n_p = false;
   4604 
   4605   assert (fastmap != NULL && p != NULL);
   4606 
   4607   INIT_FAIL_STACK ();
   4608   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
   4609   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
   4610   bufp->can_be_null = 0;
   4611 
   4612   while (1)
   4613     {
   4614       if (p == pend || *p == succeed)
   4615 	{
   4616 	  /* We have reached the (effective) end of pattern.  */
   4617 	  if (!FAIL_STACK_EMPTY ())
   4618 	    {
   4619 	      bufp->can_be_null |= path_can_be_null;
   4620 
   4621 	      /* Reset for next path.  */
   4622 	      path_can_be_null = true;
   4623 
   4624 	      p = fail_stack.stack[--fail_stack.avail].pointer;
   4625 
   4626 	      continue;
   4627 	    }
   4628 	  else
   4629 	    break;
   4630 	}
   4631 
   4632       /* We should never be about to go beyond the end of the pattern.  */
   4633       assert (p < pend);
   4634 
   4635       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   4636 	{
   4637 
   4638         /* I guess the idea here is to simply not bother with a fastmap
   4639            if a backreference is used, since it's too hard to figure out
   4640            the fastmap for the corresponding group.  Setting
   4641            `can_be_null' stops `re_search_2' from using the fastmap, so
   4642            that is all we do.  */
   4643 	case duplicate:
   4644 	  bufp->can_be_null = 1;
   4645           goto done;
   4646 
   4647 
   4648       /* Following are the cases which match a character.  These end
   4649          with `break'.  */
   4650 
   4651 #ifdef MBS_SUPPORT
   4652 	case exactn:
   4653           fastmap[truncate_wchar(p[1])] = 1;
   4654 	  break;
   4655 	case exactn_bin:
   4656 	  fastmap[p[1]] = 1;
   4657 	  break;
   4658 #else
   4659 	case exactn:
   4660           fastmap[p[1]] = 1;
   4661 	  break;
   4662 #endif /* MBS_SUPPORT */
   4663 
   4664 
   4665 #ifdef MBS_SUPPORT
   4666         /* It is hard to distinguish fastmap from (multi byte) characters
   4667            which depends on current locale.  */
   4668         case charset:
   4669 	case charset_not:
   4670 	case wordchar:
   4671 	case notwordchar:
   4672           bufp->can_be_null = 1;
   4673           goto done;
   4674 #else
   4675         case charset:
   4676           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4677 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
   4678               fastmap[j] = 1;
   4679 	  break;
   4680 
   4681 
   4682 	case charset_not:
   4683 	  /* Chars beyond end of map must be allowed.  */
   4684 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
   4685             fastmap[j] = 1;
   4686 
   4687 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4688 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
   4689               fastmap[j] = 1;
   4690           break;
   4691 
   4692 
   4693 	case wordchar:
   4694 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4695 	    if (SYNTAX (j) == Sword)
   4696 	      fastmap[j] = 1;
   4697 	  break;
   4698 
   4699 
   4700 	case notwordchar:
   4701 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4702 	    if (SYNTAX (j) != Sword)
   4703 	      fastmap[j] = 1;
   4704 	  break;
   4705 #endif
   4706 
   4707         case anychar:
   4708 	  {
   4709 	    int fastmap_newline = fastmap['\n'];
   4710 
   4711 	    /* `.' matches anything ...  */
   4712 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
   4713 	      fastmap[j] = 1;
   4714 
   4715 	    /* ... except perhaps newline.  */
   4716 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
   4717 	      fastmap['\n'] = fastmap_newline;
   4718 
   4719 	    /* Return if we have already set `can_be_null'; if we have,
   4720 	       then the fastmap is irrelevant.  Something's wrong here.  */
   4721 	    else if (bufp->can_be_null)
   4722 	      goto done;
   4723 
   4724 	    /* Otherwise, have to check alternative paths.  */
   4725 	    break;
   4726 	  }
   4727 
   4728 #ifdef emacs
   4729         case syntaxspec:
   4730 	  k = *p++;
   4731 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4732 	    if (SYNTAX (j) == (enum syntaxcode) k)
   4733 	      fastmap[j] = 1;
   4734 	  break;
   4735 
   4736 
   4737 	case notsyntaxspec:
   4738 	  k = *p++;
   4739 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4740 	    if (SYNTAX (j) != (enum syntaxcode) k)
   4741 	      fastmap[j] = 1;
   4742 	  break;
   4743 
   4744 
   4745       /* All cases after this match the empty string.  These end with
   4746          `continue'.  */
   4747 
   4748 
   4749 	case before_dot:
   4750 	case at_dot:
   4751 	case after_dot:
   4752           continue;
   4753 #endif /* emacs */
   4754 
   4755 
   4756         case no_op:
   4757         case begline:
   4758         case endline:
   4759 	case begbuf:
   4760 	case endbuf:
   4761 	case wordbound:
   4762 	case notwordbound:
   4763 	case wordbeg:
   4764 	case wordend:
   4765         case push_dummy_failure:
   4766           continue;
   4767 
   4768 
   4769 	case jump_n:
   4770         case pop_failure_jump:
   4771 	case maybe_pop_jump:
   4772 	case jump:
   4773         case jump_past_alt:
   4774 	case dummy_failure_jump:
   4775           EXTRACT_NUMBER_AND_INCR (j, p);
   4776 	  p += j;
   4777 	  if (j > 0)
   4778 	    continue;
   4779 
   4780           /* Jump backward implies we just went through the body of a
   4781              loop and matched nothing.  Opcode jumped to should be
   4782              `on_failure_jump' or `succeed_n'.  Just treat it like an
   4783              ordinary jump.  For a * loop, it has pushed its failure
   4784              point already; if so, discard that as redundant.  */
   4785           if ((re_opcode_t) *p != on_failure_jump
   4786 	      && (re_opcode_t) *p != succeed_n)
   4787 	    continue;
   4788 
   4789           p++;
   4790           EXTRACT_NUMBER_AND_INCR (j, p);
   4791           p += j;
   4792 
   4793           /* If what's on the stack is where we are now, pop it.  */
   4794           if (!FAIL_STACK_EMPTY ()
   4795 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
   4796             fail_stack.avail--;
   4797 
   4798           continue;
   4799 
   4800 
   4801         case on_failure_jump:
   4802         case on_failure_keep_string_jump:
   4803 	handle_on_failure_jump:
   4804           EXTRACT_NUMBER_AND_INCR (j, p);
   4805 
   4806           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
   4807              end of the pattern.  We don't want to push such a point,
   4808              since when we restore it above, entering the switch will
   4809              increment `p' past the end of the pattern.  We don't need
   4810              to push such a point since we obviously won't find any more
   4811              fastmap entries beyond `pend'.  Such a pattern can match
   4812              the null string, though.  */
   4813           if (p + j < pend)
   4814             {
   4815               if (!PUSH_PATTERN_OP (p + j, fail_stack))
   4816 		{
   4817 		  RESET_FAIL_STACK ();
   4818 		  return -2;
   4819 		}
   4820             }
   4821           else
   4822             bufp->can_be_null = 1;
   4823 
   4824           if (succeed_n_p)
   4825             {
   4826               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
   4827               succeed_n_p = false;
   4828 	    }
   4829 
   4830           continue;
   4831 
   4832 
   4833 	case succeed_n:
   4834           /* Get to the number of times to succeed.  */
   4835           p += OFFSET_ADDRESS_SIZE;
   4836 
   4837           /* Increment p past the n for when k != 0.  */
   4838           EXTRACT_NUMBER_AND_INCR (k, p);
   4839           if (k == 0)
   4840 	    {
   4841               p -= 2 * OFFSET_ADDRESS_SIZE;
   4842   	      succeed_n_p = true;  /* Spaghetti code alert.  */
   4843               goto handle_on_failure_jump;
   4844             }
   4845           continue;
   4846 
   4847 
   4848 	case set_number_at:
   4849           p += 2 * OFFSET_ADDRESS_SIZE;
   4850           continue;
   4851 
   4852 
   4853 	case start_memory:
   4854         case stop_memory:
   4855 	  p += 2;
   4856 	  continue;
   4857 
   4858 
   4859 	default:
   4860           abort (); /* We have listed all the cases.  */
   4861         } /* switch *p++ */
   4862 
   4863       /* Getting here means we have found the possible starting
   4864          characters for one path of the pattern -- and that the empty
   4865          string does not match.  We need not follow this path further.
   4866          Instead, look at the next alternative (remembered on the
   4867          stack), or quit if no more.  The test at the top of the loop
   4868          does these things.  */
   4869       path_can_be_null = false;
   4870       p = pend;
   4871     } /* while p */
   4872 
   4873   /* Set `can_be_null' for the last path (also the first path, if the
   4874      pattern is empty).  */
   4875   bufp->can_be_null |= path_can_be_null;
   4876 
   4877  done:
   4878   RESET_FAIL_STACK ();
   4879   return 0;
   4880 } /* re_compile_fastmap */
   4881 #ifdef _LIBC
   4882 weak_alias (__re_compile_fastmap, re_compile_fastmap)
   4883 #endif
   4884 
   4885 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
   4887    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
   4888    this memory for recording register information.  STARTS and ENDS
   4889    must be allocated using the malloc library routine, and must each
   4890    be at least NUM_REGS * sizeof (regoff_t) bytes long.
   4891 
   4892    If NUM_REGS == 0, then subsequent matches should allocate their own
   4893    register data.
   4894 
   4895    Unless this function is called, the first search or match using
   4896    PATTERN_BUFFER will allocate its own register data, without
   4897    freeing the old data.  */
   4898 
   4899 void
   4900 re_set_registers (bufp, regs, num_regs, starts, ends)
   4901     struct re_pattern_buffer *bufp;
   4902     struct re_registers *regs;
   4903     unsigned num_regs;
   4904     regoff_t *starts, *ends;
   4905 {
   4906   if (num_regs)
   4907     {
   4908       bufp->regs_allocated = REGS_REALLOCATE;
   4909       regs->num_regs = num_regs;
   4910       regs->start = starts;
   4911       regs->end = ends;
   4912     }
   4913   else
   4914     {
   4915       bufp->regs_allocated = REGS_UNALLOCATED;
   4916       regs->num_regs = 0;
   4917       regs->start = regs->end = (regoff_t *) 0;
   4918     }
   4919 }
   4920 #ifdef _LIBC
   4921 weak_alias (__re_set_registers, re_set_registers)
   4922 #endif
   4923 
   4924 /* Searching routines.  */
   4926 
   4927 /* Like re_search_2, below, but only one string is specified, and
   4928    doesn't let you say where to stop matching.  */
   4929 
   4930 int
   4931 re_search (bufp, string, size, startpos, range, regs)
   4932      struct re_pattern_buffer *bufp;
   4933      const char *string;
   4934      int size, startpos, range;
   4935      struct re_registers *regs;
   4936 {
   4937   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
   4938 		      regs, size);
   4939 }
   4940 #ifdef _LIBC
   4941 weak_alias (__re_search, re_search)
   4942 #endif
   4943 
   4944 
   4945 /* Using the compiled pattern in BUFP->buffer, first tries to match the
   4946    virtual concatenation of STRING1 and STRING2, starting first at index
   4947    STARTPOS, then at STARTPOS + 1, and so on.
   4948 
   4949    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
   4950 
   4951    RANGE is how far to scan while trying to match.  RANGE = 0 means try
   4952    only at STARTPOS; in general, the last start tried is STARTPOS +
   4953    RANGE.
   4954 
   4955    In REGS, return the indices of the virtual concatenation of STRING1
   4956    and STRING2 that matched the entire BUFP->buffer and its contained
   4957    subexpressions.
   4958 
   4959    Do not consider matching one past the index STOP in the virtual
   4960    concatenation of STRING1 and STRING2.
   4961 
   4962    We return either the position in the strings at which the match was
   4963    found, -1 if no match, or -2 if error (such as failure
   4964    stack overflow).  */
   4965 
   4966 int
   4967 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
   4968      struct re_pattern_buffer *bufp;
   4969      const char *string1, *string2;
   4970      int size1, size2;
   4971      int startpos;
   4972      int range;
   4973      struct re_registers *regs;
   4974      int stop;
   4975 {
   4976   int val;
   4977   register char *fastmap = bufp->fastmap;
   4978   register RE_TRANSLATE_TYPE translate = bufp->translate;
   4979   int total_size = size1 + size2;
   4980   int endpos = startpos + range;
   4981 
   4982   /* Check for out-of-range STARTPOS.  */
   4983   if (startpos < 0 || startpos > total_size)
   4984     return -1;
   4985 
   4986   /* Fix up RANGE if it might eventually take us outside
   4987      the virtual concatenation of STRING1 and STRING2.
   4988      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
   4989   if (endpos < 0)
   4990     range = 0 - startpos;
   4991   else if (endpos > total_size)
   4992     range = total_size - startpos;
   4993 
   4994   /* If the search isn't to be a backwards one, don't waste time in a
   4995      search for a pattern that must be anchored.  */
   4996   if (bufp->used > 0 && range > 0
   4997       && ((re_opcode_t) bufp->buffer[0] == begbuf
   4998 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
   4999 	  || ((re_opcode_t) bufp->buffer[0] == begline
   5000 	      && !bufp->newline_anchor)))
   5001     {
   5002       if (startpos > 0)
   5003 	return -1;
   5004       else
   5005 	range = 1;
   5006     }
   5007 
   5008 #ifdef emacs
   5009   /* In a forward search for something that starts with \=.
   5010      don't keep searching past point.  */
   5011   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
   5012     {
   5013       range = PT - startpos;
   5014       if (range <= 0)
   5015 	return -1;
   5016     }
   5017 #endif /* emacs */
   5018 
   5019   /* Update the fastmap now if not correct already.  */
   5020   if (fastmap && !bufp->fastmap_accurate)
   5021     if (re_compile_fastmap (bufp) == -2)
   5022       return -2;
   5023 
   5024   /* Loop through the string, looking for a place to start matching.  */
   5025   for (;;)
   5026     {
   5027       /* If a fastmap is supplied, skip quickly over characters that
   5028          cannot be the start of a match.  If the pattern can match the
   5029          null string, however, we don't need to skip characters; we want
   5030          the first null string.  */
   5031       if (fastmap && startpos < total_size && !bufp->can_be_null)
   5032 	{
   5033 	  if (range > 0)	/* Searching forwards.  */
   5034 	    {
   5035 	      register const char *d;
   5036 	      register int lim = 0;
   5037 	      int irange = range;
   5038 
   5039               if (startpos < size1 && startpos + range >= size1)
   5040                 lim = range - (size1 - startpos);
   5041 
   5042 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
   5043 
   5044               /* Written out as an if-else to avoid testing `translate'
   5045                  inside the loop.  */
   5046 	      if (translate)
   5047                 while (range > lim
   5048                        && !fastmap[(unsigned char)
   5049 				   translate[(unsigned char) *d++]])
   5050                   range--;
   5051 	      else
   5052                 while (range > lim && !fastmap[(unsigned char) *d++])
   5053                   range--;
   5054 
   5055 	      startpos += irange - range;
   5056 	    }
   5057 	  else				/* Searching backwards.  */
   5058 	    {
   5059 	      register CHAR_TYPE c = (size1 == 0 || startpos >= size1
   5060 				      ? string2[startpos - size1]
   5061 				      : string1[startpos]);
   5062 
   5063 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
   5064 		goto advance;
   5065 	    }
   5066 	}
   5067 
   5068       /* If can't match the null string, and that's all we have left, fail.  */
   5069       if (range >= 0 && startpos == total_size && fastmap
   5070           && !bufp->can_be_null)
   5071 	return -1;
   5072 
   5073       val = re_match_2_internal (bufp, string1, size1, string2, size2,
   5074 				 startpos, regs, stop);
   5075 #ifndef REGEX_MALLOC
   5076 # ifdef C_ALLOCA
   5077       alloca (0);
   5078 # endif
   5079 #endif
   5080 
   5081       if (val >= 0)
   5082 	return startpos;
   5083 
   5084       if (val == -2)
   5085 	return -2;
   5086 
   5087     advance:
   5088       if (!range)
   5089         break;
   5090       else if (range > 0)
   5091         {
   5092           range--;
   5093           startpos++;
   5094         }
   5095       else
   5096         {
   5097           range++;
   5098           startpos--;
   5099         }
   5100     }
   5101   return -1;
   5102 } /* re_search_2 */
   5103 #ifdef _LIBC
   5104 weak_alias (__re_search_2, re_search_2)
   5105 #endif
   5106 
   5107 #ifdef MBS_SUPPORT
   5109 /* This converts PTR, a pointer into one of the search wchar_t strings
   5110    `string1' and `string2' into an multibyte string offset from the
   5111    beginning of that string. We use mbs_offset to optimize.
   5112    See convert_mbs_to_wcs.  */
   5113 # define POINTER_TO_OFFSET(ptr)						\
   5114   (FIRST_STRING_P (ptr)							\
   5115    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
   5116    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
   5117 		 + csize1)))
   5118 #else
   5119 /* This converts PTR, a pointer into one of the search strings `string1'
   5120    and `string2' into an offset from the beginning of that string.  */
   5121 # define POINTER_TO_OFFSET(ptr)			\
   5122   (FIRST_STRING_P (ptr)				\
   5123    ? ((regoff_t) ((ptr) - string1))		\
   5124    : ((regoff_t) ((ptr) - string2 + size1)))
   5125 #endif /* MBS_SUPPORT */
   5126 
   5127 /* Macros for dealing with the split strings in re_match_2.  */
   5128 
   5129 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
   5130 
   5131 /* Call before fetching a character with *d.  This switches over to
   5132    string2 if necessary.  */
   5133 #define PREFETCH()							\
   5134   while (d == dend)						    	\
   5135     {									\
   5136       /* End of string2 => fail.  */					\
   5137       if (dend == end_match_2) 						\
   5138         goto fail;							\
   5139       /* End of string1 => advance to string2.  */ 			\
   5140       d = string2;						        \
   5141       dend = end_match_2;						\
   5142     }
   5143 
   5144 
   5145 /* Test if at very beginning or at very end of the virtual concatenation
   5146    of `string1' and `string2'.  If only one string, it's `string2'.  */
   5147 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
   5148 #define AT_STRINGS_END(d) ((d) == end2)
   5149 
   5150 
   5151 /* Test if D points to a character which is word-constituent.  We have
   5152    two special cases to check for: if past the end of string1, look at
   5153    the first character in string2; and if before the beginning of
   5154    string2, look at the last character in string1.  */
   5155 #ifdef MBS_SUPPORT
   5156 /* Use internationalized API instead of SYNTAX.  */
   5157 # define WORDCHAR_P(d)							\
   5158   (iswalnum ((wint_t)((d) == end1 ? *string2				\
   5159            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0)
   5160 #else
   5161 # define WORDCHAR_P(d)							\
   5162   (SYNTAX ((d) == end1 ? *string2					\
   5163            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
   5164    == Sword)
   5165 #endif /* MBS_SUPPORT */
   5166 
   5167 /* Disabled due to a compiler bug -- see comment at case wordbound */
   5168 #if 0
   5169 /* Test if the character before D and the one at D differ with respect
   5170    to being word-constituent.  */
   5171 #define AT_WORD_BOUNDARY(d)						\
   5172   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
   5173    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
   5174 #endif
   5175 
   5176 /* Free everything we malloc.  */
   5177 #ifdef MATCH_MAY_ALLOCATE
   5178 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
   5179 # ifdef MBS_SUPPORT
   5180 #  define FREE_VARIABLES()						\
   5181   do {									\
   5182     REGEX_FREE_STACK (fail_stack.stack);				\
   5183     FREE_VAR (regstart);						\
   5184     FREE_VAR (regend);							\
   5185     FREE_VAR (old_regstart);						\
   5186     FREE_VAR (old_regend);						\
   5187     FREE_VAR (best_regstart);						\
   5188     FREE_VAR (best_regend);						\
   5189     FREE_VAR (reg_info);						\
   5190     FREE_VAR (reg_dummy);						\
   5191     FREE_VAR (reg_info_dummy);						\
   5192     FREE_VAR (string1);							\
   5193     FREE_VAR (string2);							\
   5194     FREE_VAR (mbs_offset1);						\
   5195     FREE_VAR (mbs_offset2);						\
   5196   } while (0)
   5197 # else /* not MBS_SUPPORT */
   5198 #  define FREE_VARIABLES()						\
   5199   do {									\
   5200     REGEX_FREE_STACK (fail_stack.stack);				\
   5201     FREE_VAR (regstart);						\
   5202     FREE_VAR (regend);							\
   5203     FREE_VAR (old_regstart);						\
   5204     FREE_VAR (old_regend);						\
   5205     FREE_VAR (best_regstart);						\
   5206     FREE_VAR (best_regend);						\
   5207     FREE_VAR (reg_info);						\
   5208     FREE_VAR (reg_dummy);						\
   5209     FREE_VAR (reg_info_dummy);						\
   5210   } while (0)
   5211 # endif /* MBS_SUPPORT */
   5212 #else
   5213 # define FREE_VAR(var) if (var) free (var); var = NULL
   5214 # ifdef MBS_SUPPORT
   5215 #  define FREE_VARIABLES()						\
   5216   do {									\
   5217     FREE_VAR (string1);							\
   5218     FREE_VAR (string2);							\
   5219     FREE_VAR (mbs_offset1);						\
   5220     FREE_VAR (mbs_offset2);						\
   5221   } while (0)
   5222 # else
   5223 #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
   5224 # endif /* MBS_SUPPORT */
   5225 #endif /* not MATCH_MAY_ALLOCATE */
   5226 
   5227 /* These values must meet several constraints.  They must not be valid
   5228    register values; since we have a limit of 255 registers (because
   5229    we use only one byte in the pattern for the register number), we can
   5230    use numbers larger than 255.  They must differ by 1, because of
   5231    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
   5232    be larger than the value for the highest register, so we do not try
   5233    to actually save any registers when none are active.  */
   5234 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
   5235 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
   5236 
   5237 /* Matching routines.  */
   5239 
   5240 #ifndef emacs   /* Emacs never uses this.  */
   5241 /* re_match is like re_match_2 except it takes only a single string.  */
   5242 
   5243 int
   5244 re_match (bufp, string, size, pos, regs)
   5245      struct re_pattern_buffer *bufp;
   5246      const char *string;
   5247      int size, pos;
   5248      struct re_registers *regs;
   5249 {
   5250   int result = re_match_2_internal (bufp, NULL, 0, string, size,
   5251 				    pos, regs, size);
   5252 # ifndef REGEX_MALLOC
   5253 #  ifdef C_ALLOCA
   5254   alloca (0);
   5255 #  endif
   5256 # endif
   5257   return result;
   5258 }
   5259 # ifdef _LIBC
   5260 weak_alias (__re_match, re_match)
   5261 # endif
   5262 #endif /* not emacs */
   5263 
   5264 static boolean group_match_null_string_p _RE_ARGS ((US_CHAR_TYPE **p,
   5265 						    US_CHAR_TYPE *end,
   5266 						register_info_type *reg_info));
   5267 static boolean alt_match_null_string_p _RE_ARGS ((US_CHAR_TYPE *p,
   5268 						  US_CHAR_TYPE *end,
   5269 						register_info_type *reg_info));
   5270 static boolean common_op_match_null_string_p _RE_ARGS ((US_CHAR_TYPE **p,
   5271 							US_CHAR_TYPE *end,
   5272 						register_info_type *reg_info));
   5273 static int bcmp_translate _RE_ARGS ((const CHAR_TYPE *s1, const CHAR_TYPE *s2,
   5274 				     int len, char *translate));
   5275 
   5276 /* re_match_2 matches the compiled pattern in BUFP against the
   5277    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
   5278    and SIZE2, respectively).  We start matching at POS, and stop
   5279    matching at STOP.
   5280 
   5281    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
   5282    store offsets for the substring each group matched in REGS.  See the
   5283    documentation for exactly how many groups we fill.
   5284 
   5285    We return -1 if no match, -2 if an internal error (such as the
   5286    failure stack overflowing).  Otherwise, we return the length of the
   5287    matched substring.  */
   5288 
   5289 int
   5290 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
   5291      struct re_pattern_buffer *bufp;
   5292      const char *string1, *string2;
   5293      int size1, size2;
   5294      int pos;
   5295      struct re_registers *regs;
   5296      int stop;
   5297 {
   5298   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
   5299 				    pos, regs, stop);
   5300 #ifndef REGEX_MALLOC
   5301 # ifdef C_ALLOCA
   5302   alloca (0);
   5303 # endif
   5304 #endif
   5305   return result;
   5306 }
   5307 #ifdef _LIBC
   5308 weak_alias (__re_match_2, re_match_2)
   5309 #endif
   5310 
   5311 #ifdef MBS_SUPPORT
   5312 
   5313 static int count_mbs_length PARAMS ((int *, int));
   5314 
   5315 /* This check the substring (from 0, to length) of the multibyte string,
   5316    to which offset_buffer correspond. And count how many wchar_t_characters
   5317    the substring occupy. We use offset_buffer to optimization.
   5318    See convert_mbs_to_wcs.  */
   5319 
   5320 static int
   5321 count_mbs_length(offset_buffer, length)
   5322      int *offset_buffer;
   5323      int length;
   5324 {
   5325   int wcs_size;
   5326 
   5327   /* Check whether the size is valid.  */
   5328   if (length < 0)
   5329     return -1;
   5330 
   5331   if (offset_buffer == NULL)
   5332     return 0;
   5333 
   5334   for (wcs_size = 0 ; offset_buffer[wcs_size] != -1 ; wcs_size++)
   5335     {
   5336       if (offset_buffer[wcs_size] == length)
   5337 	return wcs_size;
   5338       if (offset_buffer[wcs_size] > length)
   5339 	/* It is a fragment of a wide character.  */
   5340 	return -1;
   5341     }
   5342 
   5343   /* We reached at the sentinel.  */
   5344   return -1;
   5345 }
   5346 #endif /* MBS_SUPPORT */
   5347 
   5348 /* This is a separate function so that we can force an alloca cleanup
   5349    afterwards.  */
   5350 static int
   5351 #ifdef MBS_SUPPORT
   5352 re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos, regs, stop)
   5353      struct re_pattern_buffer *bufp;
   5354      const char *cstring1, *cstring2;
   5355      int csize1, csize2;
   5356 #else
   5357 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   5358      struct re_pattern_buffer *bufp;
   5359      const char *string1, *string2;
   5360      int size1, size2;
   5361 #endif
   5362      int pos;
   5363      struct re_registers *regs;
   5364      int stop;
   5365 {
   5366   /* General temporaries.  */
   5367   int mcnt;
   5368   US_CHAR_TYPE *p1;
   5369 #ifdef MBS_SUPPORT
   5370   /* We need wchar_t* buffers correspond to string1, string2.  */
   5371   CHAR_TYPE *string1 = NULL, *string2 = NULL;
   5372   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
   5373   int size1 = 0, size2 = 0;
   5374   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   5375   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
   5376   /* They hold whether each wchar_t is binary data or not.  */
   5377   char *is_binary = NULL;
   5378 #endif /* MBS_SUPPORT */
   5379 
   5380   /* Just past the end of the corresponding string.  */
   5381   const CHAR_TYPE *end1, *end2;
   5382 
   5383   /* Pointers into string1 and string2, just past the last characters in
   5384      each to consider matching.  */
   5385   const CHAR_TYPE *end_match_1, *end_match_2;
   5386 
   5387   /* Where we are in the data, and the end of the current string.  */
   5388   const CHAR_TYPE *d, *dend;
   5389 
   5390   /* Where we are in the pattern, and the end of the pattern.  */
   5391 #ifdef MBS_SUPPORT
   5392   US_CHAR_TYPE *pattern, *p;
   5393   register US_CHAR_TYPE *pend;
   5394 #else
   5395   US_CHAR_TYPE *p = bufp->buffer;
   5396   register US_CHAR_TYPE *pend = p + bufp->used;
   5397 #endif /* MBS_SUPPORT */
   5398 
   5399   /* Mark the opcode just after a start_memory, so we can test for an
   5400      empty subpattern when we get to the stop_memory.  */
   5401   US_CHAR_TYPE *just_past_start_mem = 0;
   5402 
   5403   /* We use this to map every character in the string.  */
   5404   RE_TRANSLATE_TYPE translate = bufp->translate;
   5405 
   5406   /* Failure point stack.  Each place that can handle a failure further
   5407      down the line pushes a failure point on this stack.  It consists of
   5408      restart, regend, and reg_info for all registers corresponding to
   5409      the subexpressions we're currently inside, plus the number of such
   5410      registers, and, finally, two char *'s.  The first char * is where
   5411      to resume scanning the pattern; the second one is where to resume
   5412      scanning the strings.  If the latter is zero, the failure point is
   5413      a ``dummy''; if a failure happens and the failure point is a dummy,
   5414      it gets discarded and the next next one is tried.  */
   5415 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5416   fail_stack_type fail_stack;
   5417 #endif
   5418 #ifdef DEBUG
   5419   static unsigned failure_id;
   5420   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
   5421 #endif
   5422 
   5423 #ifdef REL_ALLOC
   5424   /* This holds the pointer to the failure stack, when
   5425      it is allocated relocatably.  */
   5426   fail_stack_elt_t *failure_stack_ptr;
   5427 #endif
   5428 
   5429   /* We fill all the registers internally, independent of what we
   5430      return, for use in backreferences.  The number here includes
   5431      an element for register zero.  */
   5432   size_t num_regs = bufp->re_nsub + 1;
   5433 
   5434   /* The currently active registers.  */
   5435   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   5436   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   5437 
   5438   /* Information on the contents of registers. These are pointers into
   5439      the input strings; they record just what was matched (on this
   5440      attempt) by a subexpression part of the pattern, that is, the
   5441      regnum-th regstart pointer points to where in the pattern we began
   5442      matching and the regnum-th regend points to right after where we
   5443      stopped matching the regnum-th subexpression.  (The zeroth register
   5444      keeps track of what the whole pattern matches.)  */
   5445 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5446   const CHAR_TYPE **regstart, **regend;
   5447 #endif
   5448 
   5449   /* If a group that's operated upon by a repetition operator fails to
   5450      match anything, then the register for its start will need to be
   5451      restored because it will have been set to wherever in the string we
   5452      are when we last see its open-group operator.  Similarly for a
   5453      register's end.  */
   5454 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5455   const CHAR_TYPE **old_regstart, **old_regend;
   5456 #endif
   5457 
   5458   /* The is_active field of reg_info helps us keep track of which (possibly
   5459      nested) subexpressions we are currently in. The matched_something
   5460      field of reg_info[reg_num] helps us tell whether or not we have
   5461      matched any of the pattern so far this time through the reg_num-th
   5462      subexpression.  These two fields get reset each time through any
   5463      loop their register is in.  */
   5464 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5465   register_info_type *reg_info;
   5466 #endif
   5467 
   5468   /* The following record the register info as found in the above
   5469      variables when we find a match better than any we've seen before.
   5470      This happens as we backtrack through the failure points, which in
   5471      turn happens only if we have not yet matched the entire string. */
   5472   unsigned best_regs_set = false;
   5473 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5474   const CHAR_TYPE **best_regstart, **best_regend;
   5475 #endif
   5476 
   5477   /* Logically, this is `best_regend[0]'.  But we don't want to have to
   5478      allocate space for that if we're not allocating space for anything
   5479      else (see below).  Also, we never need info about register 0 for
   5480      any of the other register vectors, and it seems rather a kludge to
   5481      treat `best_regend' differently than the rest.  So we keep track of
   5482      the end of the best match so far in a separate variable.  We
   5483      initialize this to NULL so that when we backtrack the first time
   5484      and need to test it, it's not garbage.  */
   5485   const CHAR_TYPE *match_end = NULL;
   5486 
   5487   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
   5488   int set_regs_matched_done = 0;
   5489 
   5490   /* Used when we pop values we don't care about.  */
   5491 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5492   const CHAR_TYPE **reg_dummy;
   5493   register_info_type *reg_info_dummy;
   5494 #endif
   5495 
   5496 #ifdef DEBUG
   5497   /* Counts the total number of registers pushed.  */
   5498   unsigned num_regs_pushed = 0;
   5499 #endif
   5500 
   5501   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
   5502 
   5503   INIT_FAIL_STACK ();
   5504 
   5505 #ifdef MATCH_MAY_ALLOCATE
   5506   /* Do not bother to initialize all the register variables if there are
   5507      no groups in the pattern, as it takes a fair amount of time.  If
   5508      there are groups, we include space for register 0 (the whole
   5509      pattern), even though we never use it, since it simplifies the
   5510      array indexing.  We should fix this.  */
   5511   if (bufp->re_nsub)
   5512     {
   5513       regstart = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
   5514       regend = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
   5515       old_regstart = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
   5516       old_regend = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
   5517       best_regstart = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
   5518       best_regend = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
   5519       reg_info = REGEX_TALLOC (num_regs, register_info_type);
   5520       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
   5521       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
   5522 
   5523       if (!(regstart && regend && old_regstart && old_regend && reg_info
   5524             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
   5525         {
   5526           FREE_VARIABLES ();
   5527           return -2;
   5528         }
   5529     }
   5530   else
   5531     {
   5532       /* We must initialize all our variables to NULL, so that
   5533          `FREE_VARIABLES' doesn't try to free them.  */
   5534       regstart = regend = old_regstart = old_regend = best_regstart
   5535         = best_regend = reg_dummy = NULL;
   5536       reg_info = reg_info_dummy = (register_info_type *) NULL;
   5537     }
   5538 #endif /* MATCH_MAY_ALLOCATE */
   5539 
   5540   /* The starting position is bogus.  */
   5541 #ifdef MBS_SUPPORT
   5542   if (pos < 0 || pos > csize1 + csize2)
   5543 #else
   5544   if (pos < 0 || pos > size1 + size2)
   5545 #endif
   5546     {
   5547       FREE_VARIABLES ();
   5548       return -1;
   5549     }
   5550 
   5551 #ifdef MBS_SUPPORT
   5552   /* Allocate wchar_t array for string1 and string2 and
   5553      fill them with converted string.  */
   5554   if (csize1 != 0)
   5555     {
   5556       string1 = REGEX_TALLOC (csize1 + 1, CHAR_TYPE);
   5557       mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
   5558       is_binary = REGEX_TALLOC (csize1 + 1, char);
   5559       if (!string1 || !mbs_offset1 || !is_binary)
   5560 	{
   5561 	  FREE_VAR (string1);
   5562 	  FREE_VAR (mbs_offset1);
   5563 	  FREE_VAR (is_binary);
   5564 	  return -2;
   5565 	}
   5566       size1 = convert_mbs_to_wcs(string1, cstring1, csize1,
   5567 				 mbs_offset1, is_binary);
   5568       string1[size1] = L'\0'; /* for a sentinel  */
   5569       FREE_VAR (is_binary);
   5570     }
   5571   if (csize2 != 0)
   5572     {
   5573       string2 = REGEX_TALLOC (csize2 + 1, CHAR_TYPE);
   5574       mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
   5575       is_binary = REGEX_TALLOC (csize2 + 1, char);
   5576       if (!string2 || !mbs_offset2 || !is_binary)
   5577 	{
   5578 	  FREE_VAR (string1);
   5579 	  FREE_VAR (mbs_offset1);
   5580 	  FREE_VAR (string2);
   5581 	  FREE_VAR (mbs_offset2);
   5582 	  FREE_VAR (is_binary);
   5583 	  return -2;
   5584 	}
   5585       size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
   5586 				 mbs_offset2, is_binary);
   5587       string2[size2] = L'\0'; /* for a sentinel  */
   5588       FREE_VAR (is_binary);
   5589     }
   5590 
   5591   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   5592      pattern to (char*) in regex_compile.  */
   5593   p = pattern = (CHAR_TYPE*)bufp->buffer;
   5594   pend = (CHAR_TYPE*)(bufp->buffer + bufp->used);
   5595 
   5596 #endif /* MBS_SUPPORT */
   5597 
   5598   /* Initialize subexpression text positions to -1 to mark ones that no
   5599      start_memory/stop_memory has been seen for. Also initialize the
   5600      register information struct.  */
   5601   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   5602     {
   5603       regstart[mcnt] = regend[mcnt]
   5604         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
   5605 
   5606       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
   5607       IS_ACTIVE (reg_info[mcnt]) = 0;
   5608       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   5609       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   5610     }
   5611 
   5612   /* We move `string1' into `string2' if the latter's empty -- but not if
   5613      `string1' is null.  */
   5614   if (size2 == 0 && string1 != NULL)
   5615     {
   5616       string2 = string1;
   5617       size2 = size1;
   5618       string1 = 0;
   5619       size1 = 0;
   5620     }
   5621   end1 = string1 + size1;
   5622   end2 = string2 + size2;
   5623 
   5624   /* Compute where to stop matching, within the two strings.  */
   5625 #ifdef MBS_SUPPORT
   5626   if (stop <= csize1)
   5627     {
   5628       mcnt = count_mbs_length(mbs_offset1, stop);
   5629       end_match_1 = string1 + mcnt;
   5630       end_match_2 = string2;
   5631     }
   5632   else
   5633     {
   5634       end_match_1 = end1;
   5635       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
   5636       end_match_2 = string2 + mcnt;
   5637     }
   5638   if (mcnt < 0)
   5639     { /* count_mbs_length return error.  */
   5640       FREE_VARIABLES ();
   5641       return -1;
   5642     }
   5643 #else
   5644   if (stop <= size1)
   5645     {
   5646       end_match_1 = string1 + stop;
   5647       end_match_2 = string2;
   5648     }
   5649   else
   5650     {
   5651       end_match_1 = end1;
   5652       end_match_2 = string2 + stop - size1;
   5653     }
   5654 #endif /* MBS_SUPPORT */
   5655 
   5656   /* `p' scans through the pattern as `d' scans through the data.
   5657      `dend' is the end of the input string that `d' points within.  `d'
   5658      is advanced into the following input string whenever necessary, but
   5659      this happens before fetching; therefore, at the beginning of the
   5660      loop, `d' can be pointing at the end of a string, but it cannot
   5661      equal `string2'.  */
   5662 #ifdef MBS_SUPPORT
   5663   if (size1 > 0 && pos <= csize1)
   5664     {
   5665       mcnt = count_mbs_length(mbs_offset1, pos);
   5666       d = string1 + mcnt;
   5667       dend = end_match_1;
   5668     }
   5669   else
   5670     {
   5671       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
   5672       d = string2 + mcnt;
   5673       dend = end_match_2;
   5674     }
   5675 
   5676   if (mcnt < 0)
   5677     { /* count_mbs_length return error.  */
   5678       FREE_VARIABLES ();
   5679       return -1;
   5680     }
   5681 #else
   5682   if (size1 > 0 && pos <= size1)
   5683     {
   5684       d = string1 + pos;
   5685       dend = end_match_1;
   5686     }
   5687   else
   5688     {
   5689       d = string2 + pos - size1;
   5690       dend = end_match_2;
   5691     }
   5692 #endif /* MBS_SUPPORT */
   5693 
   5694   DEBUG_PRINT1 ("The compiled pattern is:\n");
   5695   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
   5696   DEBUG_PRINT1 ("The string to match is: `");
   5697   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
   5698   DEBUG_PRINT1 ("'\n");
   5699 
   5700   /* This loops over pattern commands.  It exits by returning from the
   5701      function if the match is complete, or it drops through if the match
   5702      fails at this starting point in the input data.  */
   5703   for (;;)
   5704     {
   5705 #ifdef _LIBC
   5706       DEBUG_PRINT2 ("\n%p: ", p);
   5707 #else
   5708       DEBUG_PRINT2 ("\n0x%x: ", p);
   5709 #endif
   5710 
   5711       if (p == pend)
   5712 	{ /* End of pattern means we might have succeeded.  */
   5713           DEBUG_PRINT1 ("end of pattern ... ");
   5714 
   5715 	  /* If we haven't matched the entire string, and we want the
   5716              longest match, try backtracking.  */
   5717           if (d != end_match_2)
   5718 	    {
   5719 	      /* 1 if this match ends in the same string (string1 or string2)
   5720 		 as the best previous match.  */
   5721 	      boolean same_str_p = (FIRST_STRING_P (match_end)
   5722 				    == MATCHING_IN_FIRST_STRING);
   5723 	      /* 1 if this match is the best seen so far.  */
   5724 	      boolean best_match_p;
   5725 
   5726 	      /* AIX compiler got confused when this was combined
   5727 		 with the previous declaration.  */
   5728 	      if (same_str_p)
   5729 		best_match_p = d > match_end;
   5730 	      else
   5731 		best_match_p = !MATCHING_IN_FIRST_STRING;
   5732 
   5733               DEBUG_PRINT1 ("backtracking.\n");
   5734 
   5735               if (!FAIL_STACK_EMPTY ())
   5736                 { /* More failure points to try.  */
   5737 
   5738                   /* If exceeds best match so far, save it.  */
   5739                   if (!best_regs_set || best_match_p)
   5740                     {
   5741                       best_regs_set = true;
   5742                       match_end = d;
   5743 
   5744                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
   5745 
   5746                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   5747                         {
   5748                           best_regstart[mcnt] = regstart[mcnt];
   5749                           best_regend[mcnt] = regend[mcnt];
   5750                         }
   5751                     }
   5752                   goto fail;
   5753                 }
   5754 
   5755               /* If no failure points, don't restore garbage.  And if
   5756                  last match is real best match, don't restore second
   5757                  best one. */
   5758               else if (best_regs_set && !best_match_p)
   5759                 {
   5760   	        restore_best_regs:
   5761                   /* Restore best match.  It may happen that `dend ==
   5762                      end_match_1' while the restored d is in string2.
   5763                      For example, the pattern `x.*y.*z' against the
   5764                      strings `x-' and `y-z-', if the two strings are
   5765                      not consecutive in memory.  */
   5766                   DEBUG_PRINT1 ("Restoring best registers.\n");
   5767 
   5768                   d = match_end;
   5769                   dend = ((d >= string1 && d <= end1)
   5770 		           ? end_match_1 : end_match_2);
   5771 
   5772 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   5773 		    {
   5774 		      regstart[mcnt] = best_regstart[mcnt];
   5775 		      regend[mcnt] = best_regend[mcnt];
   5776 		    }
   5777                 }
   5778             } /* d != end_match_2 */
   5779 
   5780 	succeed_label:
   5781           DEBUG_PRINT1 ("Accepting match.\n");
   5782           /* If caller wants register contents data back, do it.  */
   5783           if (regs && !bufp->no_sub)
   5784 	    {
   5785 	      /* Have the register data arrays been allocated?  */
   5786               if (bufp->regs_allocated == REGS_UNALLOCATED)
   5787                 { /* No.  So allocate them with malloc.  We need one
   5788                      extra element beyond `num_regs' for the `-1' marker
   5789                      GNU code uses.  */
   5790                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
   5791                   regs->start = TALLOC (regs->num_regs, regoff_t);
   5792                   regs->end = TALLOC (regs->num_regs, regoff_t);
   5793                   if (regs->start == NULL || regs->end == NULL)
   5794 		    {
   5795 		      FREE_VARIABLES ();
   5796 		      return -2;
   5797 		    }
   5798                   bufp->regs_allocated = REGS_REALLOCATE;
   5799                 }
   5800               else if (bufp->regs_allocated == REGS_REALLOCATE)
   5801                 { /* Yes.  If we need more elements than were already
   5802                      allocated, reallocate them.  If we need fewer, just
   5803                      leave it alone.  */
   5804                   if (regs->num_regs < num_regs + 1)
   5805                     {
   5806                       regs->num_regs = num_regs + 1;
   5807                       RETALLOC (regs->start, regs->num_regs, regoff_t);
   5808                       RETALLOC (regs->end, regs->num_regs, regoff_t);
   5809                       if (regs->start == NULL || regs->end == NULL)
   5810 			{
   5811 			  FREE_VARIABLES ();
   5812 			  return -2;
   5813 			}
   5814                     }
   5815                 }
   5816               else
   5817 		{
   5818 		  /* These braces fend off a "empty body in an else-statement"
   5819 		     warning under GCC when assert expands to nothing.  */
   5820 		  assert (bufp->regs_allocated == REGS_FIXED);
   5821 		}
   5822 
   5823               /* Convert the pointer data in `regstart' and `regend' to
   5824                  indices.  Register zero has to be set differently,
   5825                  since we haven't kept track of any info for it.  */
   5826               if (regs->num_regs > 0)
   5827                 {
   5828                   regs->start[0] = pos;
   5829 #ifdef MBS_SUPPORT
   5830 		  if (MATCHING_IN_FIRST_STRING)
   5831 		    regs->end[0] = mbs_offset1 != NULL ?
   5832 					mbs_offset1[d-string1] : 0;
   5833 		  else
   5834 		    regs->end[0] = csize1 + (mbs_offset2 != NULL ?
   5835 					     mbs_offset2[d-string2] : 0);
   5836 #else
   5837                   regs->end[0] = (MATCHING_IN_FIRST_STRING
   5838 				  ? ((regoff_t) (d - string1))
   5839 			          : ((regoff_t) (d - string2 + size1)));
   5840 #endif /* MBS_SUPPORT */
   5841                 }
   5842 
   5843               /* Go through the first `min (num_regs, regs->num_regs)'
   5844                  registers, since that is all we initialized.  */
   5845 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
   5846 		   mcnt++)
   5847 		{
   5848                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
   5849                     regs->start[mcnt] = regs->end[mcnt] = -1;
   5850                   else
   5851                     {
   5852 		      regs->start[mcnt]
   5853 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
   5854                       regs->end[mcnt]
   5855 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
   5856                     }
   5857 		}
   5858 
   5859               /* If the regs structure we return has more elements than
   5860                  were in the pattern, set the extra elements to -1.  If
   5861                  we (re)allocated the registers, this is the case,
   5862                  because we always allocate enough to have at least one
   5863                  -1 at the end.  */
   5864               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
   5865                 regs->start[mcnt] = regs->end[mcnt] = -1;
   5866 	    } /* regs && !bufp->no_sub */
   5867 
   5868           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
   5869                         nfailure_points_pushed, nfailure_points_popped,
   5870                         nfailure_points_pushed - nfailure_points_popped);
   5871           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
   5872 
   5873 #ifdef MBS_SUPPORT
   5874 	  if (MATCHING_IN_FIRST_STRING)
   5875 	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
   5876 	  else
   5877 	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
   5878 			csize1;
   5879           mcnt -= pos;
   5880 #else
   5881           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
   5882 			    ? string1
   5883 			    : string2 - size1);
   5884 #endif /* MBS_SUPPORT */
   5885 
   5886           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
   5887 
   5888           FREE_VARIABLES ();
   5889           return mcnt;
   5890         }
   5891 
   5892       /* Otherwise match next pattern command.  */
   5893       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   5894 	{
   5895         /* Ignore these.  Used to ignore the n of succeed_n's which
   5896            currently have n == 0.  */
   5897         case no_op:
   5898           DEBUG_PRINT1 ("EXECUTING no_op.\n");
   5899           break;
   5900 
   5901 	case succeed:
   5902           DEBUG_PRINT1 ("EXECUTING succeed.\n");
   5903 	  goto succeed_label;
   5904 
   5905         /* Match the next n pattern characters exactly.  The following
   5906            byte in the pattern defines n, and the n bytes after that
   5907            are the characters to match.  */
   5908 	case exactn:
   5909 #ifdef MBS_SUPPORT
   5910 	case exactn_bin:
   5911 #endif
   5912 	  mcnt = *p++;
   5913           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
   5914 
   5915           /* This is written out as an if-else so we don't waste time
   5916              testing `translate' inside the loop.  */
   5917           if (translate)
   5918 	    {
   5919 	      do
   5920 		{
   5921 		  PREFETCH ();
   5922 #ifdef MBS_SUPPORT
   5923 		  if (*d <= 0xff)
   5924 		    {
   5925 		      if ((US_CHAR_TYPE) translate[(unsigned char) *d++]
   5926 			  != (US_CHAR_TYPE) *p++)
   5927 			goto fail;
   5928 		    }
   5929 		  else
   5930 		    {
   5931 		      if (*d++ != (CHAR_TYPE) *p++)
   5932 			goto fail;
   5933 		    }
   5934 #else
   5935 		  if ((US_CHAR_TYPE) translate[(unsigned char) *d++]
   5936 		      != (US_CHAR_TYPE) *p++)
   5937                     goto fail;
   5938 #endif /* MBS_SUPPORT */
   5939 		}
   5940 	      while (--mcnt);
   5941 	    }
   5942 	  else
   5943 	    {
   5944 	      do
   5945 		{
   5946 		  PREFETCH ();
   5947 		  if (*d++ != (CHAR_TYPE) *p++) goto fail;
   5948 		}
   5949 	      while (--mcnt);
   5950 	    }
   5951 	  SET_REGS_MATCHED ();
   5952           break;
   5953 
   5954 
   5955         /* Match any character except possibly a newline or a null.  */
   5956 	case anychar:
   5957           DEBUG_PRINT1 ("EXECUTING anychar.\n");
   5958 
   5959           PREFETCH ();
   5960 
   5961           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
   5962               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
   5963 	    goto fail;
   5964 
   5965           SET_REGS_MATCHED ();
   5966           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
   5967           d++;
   5968 	  break;
   5969 
   5970 
   5971 	case charset:
   5972 	case charset_not:
   5973 	  {
   5974 	    register US_CHAR_TYPE c;
   5975 #ifdef MBS_SUPPORT
   5976 	    unsigned int i, char_class_length, coll_symbol_length,
   5977               equiv_class_length, ranges_length, chars_length, length;
   5978 	    CHAR_TYPE *workp, *workp2, *charset_top;
   5979 #define WORK_BUFFER_SIZE 128
   5980             CHAR_TYPE str_buf[WORK_BUFFER_SIZE];
   5981 # ifdef _LIBC
   5982 	    uint32_t nrules;
   5983 # endif /* _LIBC */
   5984 #endif /* MBS_SUPPORT */
   5985 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
   5986 
   5987             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
   5988 	    PREFETCH ();
   5989 	    c = TRANSLATE (*d); /* The character to match.  */
   5990 #ifdef MBS_SUPPORT
   5991 # ifdef _LIBC
   5992 	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   5993 # endif /* _LIBC */
   5994 	    charset_top = p - 1;
   5995 	    char_class_length = *p++;
   5996 	    coll_symbol_length = *p++;
   5997 	    equiv_class_length = *p++;
   5998 	    ranges_length = *p++;
   5999 	    chars_length = *p++;
   6000 	    /* p points charset[6], so the address of the next instruction
   6001 	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
   6002 	       where l=length of char_classes, m=length of collating_symbol,
   6003 	       n=equivalence_class, o=length of char_range,
   6004 	       p'=length of character.  */
   6005 	    workp = p;
   6006 	    /* Update p to indicate the next instruction.  */
   6007 	    p += char_class_length + coll_symbol_length+ equiv_class_length +
   6008               2*ranges_length + chars_length;
   6009 
   6010             /* match with char_class?  */
   6011 	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
   6012 	      {
   6013 		wctype_t wctype;
   6014 		uintptr_t alignedp = ((uintptr_t)workp
   6015 				      + __alignof__(wctype_t) - 1)
   6016 		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   6017 		wctype = *((wctype_t*)alignedp);
   6018 		workp += CHAR_CLASS_SIZE;
   6019 		if (iswctype((wint_t)c, wctype))
   6020 		  goto char_set_matched;
   6021 	      }
   6022 
   6023             /* match with collating_symbol?  */
   6024 # ifdef _LIBC
   6025 	    if (nrules != 0)
   6026 	      {
   6027 		const unsigned char *extra = (const unsigned char *)
   6028 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   6029 
   6030 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
   6031 		     workp++)
   6032 		  {
   6033 		    int32_t *wextra;
   6034 		    wextra = (int32_t*)(extra + *workp++);
   6035 		    for (i = 0; i < *wextra; ++i)
   6036 		      if (TRANSLATE(d[i]) != wextra[1 + i])
   6037 			break;
   6038 
   6039 		    if (i == *wextra)
   6040 		      {
   6041 			/* Update d, however d will be incremented at
   6042 			   char_set_matched:, we decrement d here.  */
   6043 			d += i - 1;
   6044 			goto char_set_matched;
   6045 		      }
   6046 		  }
   6047 	      }
   6048 	    else /* (nrules == 0) */
   6049 # endif
   6050 	      /* If we can't look up collation data, we use wcscoll
   6051 		 instead.  */
   6052 	      {
   6053 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
   6054 		  {
   6055 		    const CHAR_TYPE *backup_d = d, *backup_dend = dend;
   6056 		    length = wcslen(workp);
   6057 
   6058 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6059 		       any substring of the string never match with the
   6060 		       collating symbol.  */
   6061 		    if (wcscoll(workp, d) > 0)
   6062 		      {
   6063 			workp += length + 1;
   6064 			continue;
   6065 		      }
   6066 
   6067 		    /* First, we compare the collating symbol with
   6068 		       the first character of the string.
   6069 		       If it don't match, we add the next character to
   6070 		       the compare buffer in turn.  */
   6071 		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
   6072 		      {
   6073 			int match;
   6074 			if (d == dend)
   6075 			  {
   6076 			    if (dend == end_match_2)
   6077 			      break;
   6078 			    d = string2;
   6079 			    dend = end_match_2;
   6080 			  }
   6081 
   6082 			/* add next character to the compare buffer.  */
   6083 			str_buf[i] = TRANSLATE(*d);
   6084 			str_buf[i+1] = '\0';
   6085 
   6086 			match = wcscoll(workp, str_buf);
   6087 			if (match == 0)
   6088 			  goto char_set_matched;
   6089 
   6090 			if (match < 0)
   6091 			  /* (str_buf > workp) indicate (str_buf + X > workp),
   6092 			     because for all X (str_buf + X > str_buf).
   6093 			     So we don't need continue this loop.  */
   6094 			  break;
   6095 
   6096 			/* Otherwise(str_buf < workp),
   6097 			   (str_buf+next_character) may equals (workp).
   6098 			   So we continue this loop.  */
   6099 		      }
   6100 		    /* not matched */
   6101 		    d = backup_d;
   6102 		    dend = backup_dend;
   6103 		    workp += length + 1;
   6104 		  }
   6105               }
   6106             /* match with equivalence_class?  */
   6107 # ifdef _LIBC
   6108 	    if (nrules != 0)
   6109 	      {
   6110                 const CHAR_TYPE *backup_d = d, *backup_dend = dend;
   6111 		/* Try to match the equivalence class against
   6112 		   those known to the collate implementation.  */
   6113 		const int32_t *table;
   6114 		const int32_t *weights;
   6115 		const int32_t *extra;
   6116 		const int32_t *indirect;
   6117 		int32_t idx, idx2;
   6118 		wint_t *cp;
   6119 		size_t len;
   6120 
   6121 		/* This #include defines a local function!  */
   6122 #  include <locale/weightwc.h>
   6123 
   6124 		table = (const int32_t *)
   6125 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
   6126 		weights = (const wint_t *)
   6127 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
   6128 		extra = (const wint_t *)
   6129 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
   6130 		indirect = (const int32_t *)
   6131 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
   6132 
   6133 		/* Write 1 collating element to str_buf, and
   6134 		   get its index.  */
   6135 		idx2 = 0;
   6136 
   6137 		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
   6138 		  {
   6139 		    cp = (wint_t*)str_buf;
   6140 		    if (d == dend)
   6141 		      {
   6142 			if (dend == end_match_2)
   6143 			  break;
   6144 			d = string2;
   6145 			dend = end_match_2;
   6146 		      }
   6147 		    str_buf[i] = TRANSLATE(*(d+i));
   6148 		    str_buf[i+1] = '\0'; /* sentinel */
   6149 		    idx2 = findidx ((const wint_t**)&cp);
   6150 		  }
   6151 
   6152 		/* Update d, however d will be incremented at
   6153 		   char_set_matched:, we decrement d here.  */
   6154 		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
   6155 		if (d >= dend)
   6156 		  {
   6157 		    if (dend == end_match_2)
   6158 			d = dend;
   6159 		    else
   6160 		      {
   6161 			d = string2;
   6162 			dend = end_match_2;
   6163 		      }
   6164 		  }
   6165 
   6166 		len = weights[idx2];
   6167 
   6168 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
   6169 		     workp++)
   6170 		  {
   6171 		    idx = (int32_t)*workp;
   6172 		    /* We already checked idx != 0 in regex_compile. */
   6173 
   6174 		    if (idx2 != 0 && len == weights[idx])
   6175 		      {
   6176 			int cnt = 0;
   6177 			while (cnt < len && (weights[idx + 1 + cnt]
   6178 					     == weights[idx2 + 1 + cnt]))
   6179 			  ++cnt;
   6180 
   6181 			if (cnt == len)
   6182 			  goto char_set_matched;
   6183 		      }
   6184 		  }
   6185 		/* not matched */
   6186                 d = backup_d;
   6187                 dend = backup_dend;
   6188 	      }
   6189 	    else /* (nrules == 0) */
   6190 # endif
   6191 	      /* If we can't look up collation data, we use wcscoll
   6192 		 instead.  */
   6193 	      {
   6194 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
   6195 		  {
   6196 		    const CHAR_TYPE *backup_d = d, *backup_dend = dend;
   6197 		    length = wcslen(workp);
   6198 
   6199 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6200 		       any substring of the string never match with the
   6201 		       collating symbol.  */
   6202 		    if (wcscoll(workp, d) > 0)
   6203 		      {
   6204 			workp += length + 1;
   6205 			break;
   6206 		      }
   6207 
   6208 		    /* First, we compare the equivalence class with
   6209 		       the first character of the string.
   6210 		       If it don't match, we add the next character to
   6211 		       the compare buffer in turn.  */
   6212 		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
   6213 		      {
   6214 			int match;
   6215 			if (d == dend)
   6216 			  {
   6217 			    if (dend == end_match_2)
   6218 			      break;
   6219 			    d = string2;
   6220 			    dend = end_match_2;
   6221 			  }
   6222 
   6223 			/* add next character to the compare buffer.  */
   6224 			str_buf[i] = TRANSLATE(*d);
   6225 			str_buf[i+1] = '\0';
   6226 
   6227 			match = wcscoll(workp, str_buf);
   6228 
   6229 			if (match == 0)
   6230 			  goto char_set_matched;
   6231 
   6232 			if (match < 0)
   6233 			/* (str_buf > workp) indicate (str_buf + X > workp),
   6234 			   because for all X (str_buf + X > str_buf).
   6235 			   So we don't need continue this loop.  */
   6236 			  break;
   6237 
   6238 			/* Otherwise(str_buf < workp),
   6239 			   (str_buf+next_character) may equals (workp).
   6240 			   So we continue this loop.  */
   6241 		      }
   6242 		    /* not matched */
   6243 		    d = backup_d;
   6244 		    dend = backup_dend;
   6245 		    workp += length + 1;
   6246 		  }
   6247 	      }
   6248 
   6249             /* match with char_range?  */
   6250 #ifdef _LIBC
   6251 	    if (nrules != 0)
   6252 	      {
   6253 		uint32_t collseqval;
   6254 		const char *collseq = (const char *)
   6255 		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
   6256 
   6257 		collseqval = collseq_table_lookup (collseq, c);
   6258 
   6259 		for (; workp < p - chars_length ;)
   6260 		  {
   6261 		    uint32_t start_val, end_val;
   6262 
   6263 		    /* We already compute the collation sequence value
   6264 		       of the characters (or collating symbols).  */
   6265 		    start_val = (uint32_t) *workp++; /* range_start */
   6266 		    end_val = (uint32_t) *workp++; /* range_end */
   6267 
   6268 		    if (start_val <= collseqval && collseqval <= end_val)
   6269 		      goto char_set_matched;
   6270 		  }
   6271 	      }
   6272 	    else
   6273 #endif
   6274 	      {
   6275 		/* We set range_start_char at str_buf[0], range_end_char
   6276 		   at str_buf[4], and compared char at str_buf[2].  */
   6277 		str_buf[1] = 0;
   6278 		str_buf[2] = c;
   6279 		str_buf[3] = 0;
   6280 		str_buf[5] = 0;
   6281 		for (; workp < p - chars_length ;)
   6282 		  {
   6283 		    wchar_t *range_start_char, *range_end_char;
   6284 
   6285 		    /* match if (range_start_char <= c <= range_end_char).  */
   6286 
   6287 		    /* If range_start(or end) < 0, we assume -range_start(end)
   6288 		       is the offset of the collating symbol which is specified
   6289 		       as the character of the range start(end).  */
   6290 
   6291 		    /* range_start */
   6292 		    if (*workp < 0)
   6293 		      range_start_char = charset_top - (*workp++);
   6294 		    else
   6295 		      {
   6296 			str_buf[0] = *workp++;
   6297 			range_start_char = str_buf;
   6298 		      }
   6299 
   6300 		    /* range_end */
   6301 		    if (*workp < 0)
   6302 		      range_end_char = charset_top - (*workp++);
   6303 		    else
   6304 		      {
   6305 			str_buf[4] = *workp++;
   6306 			range_end_char = str_buf + 4;
   6307 		      }
   6308 
   6309 		    if (wcscoll(range_start_char, str_buf+2) <= 0 &&
   6310 			wcscoll(str_buf+2, range_end_char) <= 0)
   6311 
   6312 		      goto char_set_matched;
   6313 		  }
   6314 	      }
   6315 
   6316             /* match with char?  */
   6317 	    for (; workp < p ; workp++)
   6318 	      if (c == *workp)
   6319 		goto char_set_matched;
   6320 
   6321 	    not = !not;
   6322 
   6323 	  char_set_matched:
   6324 	    if (not) goto fail;
   6325 #else
   6326             /* Cast to `unsigned' instead of `unsigned char' in case the
   6327                bit list is a full 32 bytes long.  */
   6328 	    if (c < (unsigned) (*p * BYTEWIDTH)
   6329 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   6330 	      not = !not;
   6331 
   6332 	    p += 1 + *p;
   6333 
   6334 	    if (!not) goto fail;
   6335 #undef WORK_BUFFER_SIZE
   6336 #endif /* MBS_SUPPORT */
   6337 	    SET_REGS_MATCHED ();
   6338             d++;
   6339 	    break;
   6340 	  }
   6341 
   6342 
   6343         /* The beginning of a group is represented by start_memory.
   6344            The arguments are the register number in the next byte, and the
   6345            number of groups inner to this one in the next.  The text
   6346            matched within the group is recorded (in the internal
   6347            registers data structure) under the register number.  */
   6348         case start_memory:
   6349 	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
   6350 			(long int) *p, (long int) p[1]);
   6351 
   6352           /* Find out if this group can match the empty string.  */
   6353 	  p1 = p;		/* To send to group_match_null_string_p.  */
   6354 
   6355           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
   6356             REG_MATCH_NULL_STRING_P (reg_info[*p])
   6357               = group_match_null_string_p (&p1, pend, reg_info);
   6358 
   6359           /* Save the position in the string where we were the last time
   6360              we were at this open-group operator in case the group is
   6361              operated upon by a repetition operator, e.g., with `(a*)*b'
   6362              against `ab'; then we want to ignore where we are now in
   6363              the string in case this attempt to match fails.  */
   6364           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6365                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
   6366                              : regstart[*p];
   6367 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
   6368 			 POINTER_TO_OFFSET (old_regstart[*p]));
   6369 
   6370           regstart[*p] = d;
   6371 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
   6372 
   6373           IS_ACTIVE (reg_info[*p]) = 1;
   6374           MATCHED_SOMETHING (reg_info[*p]) = 0;
   6375 
   6376 	  /* Clear this whenever we change the register activity status.  */
   6377 	  set_regs_matched_done = 0;
   6378 
   6379           /* This is the new highest active register.  */
   6380           highest_active_reg = *p;
   6381 
   6382           /* If nothing was active before, this is the new lowest active
   6383              register.  */
   6384           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   6385             lowest_active_reg = *p;
   6386 
   6387           /* Move past the register number and inner group count.  */
   6388           p += 2;
   6389 	  just_past_start_mem = p;
   6390 
   6391           break;
   6392 
   6393 
   6394         /* The stop_memory opcode represents the end of a group.  Its
   6395            arguments are the same as start_memory's: the register
   6396            number, and the number of inner groups.  */
   6397 	case stop_memory:
   6398 	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
   6399 			(long int) *p, (long int) p[1]);
   6400 
   6401           /* We need to save the string position the last time we were at
   6402              this close-group operator in case the group is operated
   6403              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
   6404              against `aba'; then we want to ignore where we are now in
   6405              the string in case this attempt to match fails.  */
   6406           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6407                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
   6408 			   : regend[*p];
   6409 	  DEBUG_PRINT2 ("      old_regend: %d\n",
   6410 			 POINTER_TO_OFFSET (old_regend[*p]));
   6411 
   6412           regend[*p] = d;
   6413 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
   6414 
   6415           /* This register isn't active anymore.  */
   6416           IS_ACTIVE (reg_info[*p]) = 0;
   6417 
   6418 	  /* Clear this whenever we change the register activity status.  */
   6419 	  set_regs_matched_done = 0;
   6420 
   6421           /* If this was the only register active, nothing is active
   6422              anymore.  */
   6423           if (lowest_active_reg == highest_active_reg)
   6424             {
   6425               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6426               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6427             }
   6428           else
   6429             { /* We must scan for the new highest active register, since
   6430                  it isn't necessarily one less than now: consider
   6431                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
   6432                  new highest active register is 1.  */
   6433               US_CHAR_TYPE r = *p - 1;
   6434               while (r > 0 && !IS_ACTIVE (reg_info[r]))
   6435                 r--;
   6436 
   6437               /* If we end up at register zero, that means that we saved
   6438                  the registers as the result of an `on_failure_jump', not
   6439                  a `start_memory', and we jumped to past the innermost
   6440                  `stop_memory'.  For example, in ((.)*) we save
   6441                  registers 1 and 2 as a result of the *, but when we pop
   6442                  back to the second ), we are at the stop_memory 1.
   6443                  Thus, nothing is active.  */
   6444 	      if (r == 0)
   6445                 {
   6446                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6447                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6448                 }
   6449               else
   6450                 highest_active_reg = r;
   6451             }
   6452 
   6453           /* If just failed to match something this time around with a
   6454              group that's operated on by a repetition operator, try to
   6455              force exit from the ``loop'', and restore the register
   6456              information for this group that we had before trying this
   6457              last match.  */
   6458           if ((!MATCHED_SOMETHING (reg_info[*p])
   6459                || just_past_start_mem == p - 1)
   6460 	      && (p + 2) < pend)
   6461             {
   6462               boolean is_a_jump_n = false;
   6463 
   6464               p1 = p + 2;
   6465               mcnt = 0;
   6466               switch ((re_opcode_t) *p1++)
   6467                 {
   6468                   case jump_n:
   6469 		    is_a_jump_n = true;
   6470                   case pop_failure_jump:
   6471 		  case maybe_pop_jump:
   6472 		  case jump:
   6473 		  case dummy_failure_jump:
   6474                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6475 		    if (is_a_jump_n)
   6476 		      p1 += OFFSET_ADDRESS_SIZE;
   6477                     break;
   6478 
   6479                   default:
   6480                     /* do nothing */ ;
   6481                 }
   6482 	      p1 += mcnt;
   6483 
   6484               /* If the next operation is a jump backwards in the pattern
   6485 	         to an on_failure_jump right before the start_memory
   6486                  corresponding to this stop_memory, exit from the loop
   6487                  by forcing a failure after pushing on the stack the
   6488                  on_failure_jump's jump in the pattern, and d.  */
   6489               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
   6490                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
   6491 		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
   6492 		{
   6493                   /* If this group ever matched anything, then restore
   6494                      what its registers were before trying this last
   6495                      failed match, e.g., with `(a*)*b' against `ab' for
   6496                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
   6497                      against `aba' for regend[3].
   6498 
   6499                      Also restore the registers for inner groups for,
   6500                      e.g., `((a*)(b*))*' against `aba' (register 3 would
   6501                      otherwise get trashed).  */
   6502 
   6503                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
   6504 		    {
   6505 		      unsigned r;
   6506 
   6507                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
   6508 
   6509 		      /* Restore this and inner groups' (if any) registers.  */
   6510                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
   6511 			   r++)
   6512                         {
   6513                           regstart[r] = old_regstart[r];
   6514 
   6515                           /* xx why this test?  */
   6516                           if (old_regend[r] >= regstart[r])
   6517                             regend[r] = old_regend[r];
   6518                         }
   6519                     }
   6520 		  p1++;
   6521                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6522                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
   6523 
   6524                   goto fail;
   6525                 }
   6526             }
   6527 
   6528           /* Move past the register number and the inner group count.  */
   6529           p += 2;
   6530           break;
   6531 
   6532 
   6533 	/* \<digit> has been turned into a `duplicate' command which is
   6534            followed by the numeric value of <digit> as the register number.  */
   6535         case duplicate:
   6536 	  {
   6537 	    register const CHAR_TYPE *d2, *dend2;
   6538 	    int regno = *p++;   /* Get which register to match against.  */
   6539 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
   6540 
   6541 	    /* Can't back reference a group which we've never matched.  */
   6542             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
   6543               goto fail;
   6544 
   6545             /* Where in input to try to start matching.  */
   6546             d2 = regstart[regno];
   6547 
   6548             /* Where to stop matching; if both the place to start and
   6549                the place to stop matching are in the same string, then
   6550                set to the place to stop, otherwise, for now have to use
   6551                the end of the first string.  */
   6552 
   6553             dend2 = ((FIRST_STRING_P (regstart[regno])
   6554 		      == FIRST_STRING_P (regend[regno]))
   6555 		     ? regend[regno] : end_match_1);
   6556 	    for (;;)
   6557 	      {
   6558 		/* If necessary, advance to next segment in register
   6559                    contents.  */
   6560 		while (d2 == dend2)
   6561 		  {
   6562 		    if (dend2 == end_match_2) break;
   6563 		    if (dend2 == regend[regno]) break;
   6564 
   6565                     /* End of string1 => advance to string2. */
   6566                     d2 = string2;
   6567                     dend2 = regend[regno];
   6568 		  }
   6569 		/* At end of register contents => success */
   6570 		if (d2 == dend2) break;
   6571 
   6572 		/* If necessary, advance to next segment in data.  */
   6573 		PREFETCH ();
   6574 
   6575 		/* How many characters left in this segment to match.  */
   6576 		mcnt = dend - d;
   6577 
   6578 		/* Want how many consecutive characters we can match in
   6579                    one shot, so, if necessary, adjust the count.  */
   6580                 if (mcnt > dend2 - d2)
   6581 		  mcnt = dend2 - d2;
   6582 
   6583 		/* Compare that many; failure if mismatch, else move
   6584                    past them.  */
   6585 		if (translate
   6586                     ? bcmp_translate (d, d2, mcnt, translate)
   6587                     : memcmp (d, d2, mcnt*sizeof(US_CHAR_TYPE)))
   6588 		  goto fail;
   6589 		d += mcnt, d2 += mcnt;
   6590 
   6591 		/* Do this because we've match some characters.  */
   6592 		SET_REGS_MATCHED ();
   6593 	      }
   6594 	  }
   6595 	  break;
   6596 
   6597 
   6598         /* begline matches the empty string at the beginning of the string
   6599            (unless `not_bol' is set in `bufp'), and, if
   6600            `newline_anchor' is set, after newlines.  */
   6601 	case begline:
   6602           DEBUG_PRINT1 ("EXECUTING begline.\n");
   6603 
   6604           if (AT_STRINGS_BEG (d))
   6605             {
   6606               if (!bufp->not_bol) break;
   6607             }
   6608           else if (d[-1] == '\n' && bufp->newline_anchor)
   6609             {
   6610               break;
   6611             }
   6612           /* In all other cases, we fail.  */
   6613           goto fail;
   6614 
   6615 
   6616         /* endline is the dual of begline.  */
   6617 	case endline:
   6618           DEBUG_PRINT1 ("EXECUTING endline.\n");
   6619 
   6620           if (AT_STRINGS_END (d))
   6621             {
   6622               if (!bufp->not_eol) break;
   6623             }
   6624 
   6625           /* We have to ``prefetch'' the next character.  */
   6626           else if ((d == end1 ? *string2 : *d) == '\n'
   6627                    && bufp->newline_anchor)
   6628             {
   6629               break;
   6630             }
   6631           goto fail;
   6632 
   6633 
   6634 	/* Match at the very beginning of the data.  */
   6635         case begbuf:
   6636           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
   6637           if (AT_STRINGS_BEG (d))
   6638             break;
   6639           goto fail;
   6640 
   6641 
   6642 	/* Match at the very end of the data.  */
   6643         case endbuf:
   6644           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
   6645 	  if (AT_STRINGS_END (d))
   6646 	    break;
   6647           goto fail;
   6648 
   6649 
   6650         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
   6651            pushes NULL as the value for the string on the stack.  Then
   6652            `pop_failure_point' will keep the current value for the
   6653            string, instead of restoring it.  To see why, consider
   6654            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
   6655            then the . fails against the \n.  But the next thing we want
   6656            to do is match the \n against the \n; if we restored the
   6657            string value, we would be back at the foo.
   6658 
   6659            Because this is used only in specific cases, we don't need to
   6660            check all the things that `on_failure_jump' does, to make
   6661            sure the right things get saved on the stack.  Hence we don't
   6662            share its code.  The only reason to push anything on the
   6663            stack at all is that otherwise we would have to change
   6664            `anychar's code to do something besides goto fail in this
   6665            case; that seems worse than this.  */
   6666         case on_failure_keep_string_jump:
   6667           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
   6668 
   6669           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   6670 #ifdef _LIBC
   6671           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
   6672 #else
   6673           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
   6674 #endif
   6675 
   6676           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
   6677           break;
   6678 
   6679 
   6680 	/* Uses of on_failure_jump:
   6681 
   6682            Each alternative starts with an on_failure_jump that points
   6683            to the beginning of the next alternative.  Each alternative
   6684            except the last ends with a jump that in effect jumps past
   6685            the rest of the alternatives.  (They really jump to the
   6686            ending jump of the following alternative, because tensioning
   6687            these jumps is a hassle.)
   6688 
   6689            Repeats start with an on_failure_jump that points past both
   6690            the repetition text and either the following jump or
   6691            pop_failure_jump back to this on_failure_jump.  */
   6692 	case on_failure_jump:
   6693         on_failure:
   6694           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
   6695 
   6696           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   6697 #ifdef _LIBC
   6698           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
   6699 #else
   6700           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
   6701 #endif
   6702 
   6703           /* If this on_failure_jump comes right before a group (i.e.,
   6704              the original * applied to a group), save the information
   6705              for that group and all inner ones, so that if we fail back
   6706              to this point, the group's information will be correct.
   6707              For example, in \(a*\)*\1, we need the preceding group,
   6708              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
   6709 
   6710           /* We can't use `p' to check ahead because we push
   6711              a failure point to `p + mcnt' after we do this.  */
   6712           p1 = p;
   6713 
   6714           /* We need to skip no_op's before we look for the
   6715              start_memory in case this on_failure_jump is happening as
   6716              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
   6717              against aba.  */
   6718           while (p1 < pend && (re_opcode_t) *p1 == no_op)
   6719             p1++;
   6720 
   6721           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
   6722             {
   6723               /* We have a new highest active register now.  This will
   6724                  get reset at the start_memory we are about to get to,
   6725                  but we will have saved all the registers relevant to
   6726                  this repetition op, as described above.  */
   6727               highest_active_reg = *(p1 + 1) + *(p1 + 2);
   6728               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   6729                 lowest_active_reg = *(p1 + 1);
   6730             }
   6731 
   6732           DEBUG_PRINT1 (":\n");
   6733           PUSH_FAILURE_POINT (p + mcnt, d, -2);
   6734           break;
   6735 
   6736 
   6737         /* A smart repeat ends with `maybe_pop_jump'.
   6738 	   We change it to either `pop_failure_jump' or `jump'.  */
   6739         case maybe_pop_jump:
   6740           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   6741           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
   6742           {
   6743 	    register US_CHAR_TYPE *p2 = p;
   6744 
   6745             /* Compare the beginning of the repeat with what in the
   6746                pattern follows its end. If we can establish that there
   6747                is nothing that they would both match, i.e., that we
   6748                would have to backtrack because of (as in, e.g., `a*a')
   6749                then we can change to pop_failure_jump, because we'll
   6750                never have to backtrack.
   6751 
   6752                This is not true in the case of alternatives: in
   6753                `(a|ab)*' we do need to backtrack to the `ab' alternative
   6754                (e.g., if the string was `ab').  But instead of trying to
   6755                detect that here, the alternative has put on a dummy
   6756                failure point which is what we will end up popping.  */
   6757 
   6758 	    /* Skip over open/close-group commands.
   6759 	       If what follows this loop is a ...+ construct,
   6760 	       look at what begins its body, since we will have to
   6761 	       match at least one of that.  */
   6762 	    while (1)
   6763 	      {
   6764 		if (p2 + 2 < pend
   6765 		    && ((re_opcode_t) *p2 == stop_memory
   6766 			|| (re_opcode_t) *p2 == start_memory))
   6767 		  p2 += 3;
   6768 		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
   6769 			 && (re_opcode_t) *p2 == dummy_failure_jump)
   6770 		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
   6771 		else
   6772 		  break;
   6773 	      }
   6774 
   6775 	    p1 = p + mcnt;
   6776 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
   6777 	       to the `maybe_finalize_jump' of this case.  Examine what
   6778 	       follows.  */
   6779 
   6780             /* If we're at the end of the pattern, we can change.  */
   6781             if (p2 == pend)
   6782 	      {
   6783 		/* Consider what happens when matching ":\(.*\)"
   6784 		   against ":/".  I don't really understand this code
   6785 		   yet.  */
   6786   	        p[-(1+OFFSET_ADDRESS_SIZE)] = (US_CHAR_TYPE)
   6787 		  pop_failure_jump;
   6788                 DEBUG_PRINT1
   6789                   ("  End of pattern: change to `pop_failure_jump'.\n");
   6790               }
   6791 
   6792             else if ((re_opcode_t) *p2 == exactn
   6793 #ifdef MBS_SUPPORT
   6794 		     || (re_opcode_t) *p2 == exactn_bin
   6795 #endif
   6796 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
   6797 	      {
   6798 		register US_CHAR_TYPE c
   6799                   = *p2 == (US_CHAR_TYPE) endline ? '\n' : p2[2];
   6800 
   6801                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
   6802 #ifdef MBS_SUPPORT
   6803 		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
   6804 #endif
   6805 		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
   6806                   {
   6807   		    p[-(1+OFFSET_ADDRESS_SIZE)] = (US_CHAR_TYPE)
   6808 		      pop_failure_jump;
   6809 #ifdef MBS_SUPPORT
   6810 		    if (MB_CUR_MAX != 1)
   6811 		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
   6812 				    (wint_t) c,
   6813 				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
   6814 		    else
   6815 #endif
   6816 		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
   6817 				    (char) c,
   6818 				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
   6819                   }
   6820 
   6821 #ifndef MBS_SUPPORT
   6822 		else if ((re_opcode_t) p1[3] == charset
   6823 			 || (re_opcode_t) p1[3] == charset_not)
   6824 		  {
   6825 		    int not = (re_opcode_t) p1[3] == charset_not;
   6826 
   6827 		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
   6828 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   6829 		      not = !not;
   6830 
   6831                     /* `not' is equal to 1 if c would match, which means
   6832                         that we can't change to pop_failure_jump.  */
   6833 		    if (!not)
   6834                       {
   6835   		        p[-3] = (unsigned char) pop_failure_jump;
   6836                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   6837                       }
   6838 		  }
   6839 #endif /* not MBS_SUPPORT */
   6840 	      }
   6841 #ifndef MBS_SUPPORT
   6842             else if ((re_opcode_t) *p2 == charset)
   6843 	      {
   6844 		/* We win if the first character of the loop is not part
   6845                    of the charset.  */
   6846                 if ((re_opcode_t) p1[3] == exactn
   6847  		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
   6848  			  && (p2[2 + p1[5] / BYTEWIDTH]
   6849  			      & (1 << (p1[5] % BYTEWIDTH)))))
   6850 		  {
   6851 		    p[-3] = (unsigned char) pop_failure_jump;
   6852 		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   6853                   }
   6854 
   6855 		else if ((re_opcode_t) p1[3] == charset_not)
   6856 		  {
   6857 		    int idx;
   6858 		    /* We win if the charset_not inside the loop
   6859 		       lists every character listed in the charset after.  */
   6860 		    for (idx = 0; idx < (int) p2[1]; idx++)
   6861 		      if (! (p2[2 + idx] == 0
   6862 			     || (idx < (int) p1[4]
   6863 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
   6864 			break;
   6865 
   6866 		    if (idx == p2[1])
   6867                       {
   6868   		        p[-3] = (unsigned char) pop_failure_jump;
   6869                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   6870                       }
   6871 		  }
   6872 		else if ((re_opcode_t) p1[3] == charset)
   6873 		  {
   6874 		    int idx;
   6875 		    /* We win if the charset inside the loop
   6876 		       has no overlap with the one after the loop.  */
   6877 		    for (idx = 0;
   6878 			 idx < (int) p2[1] && idx < (int) p1[4];
   6879 			 idx++)
   6880 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
   6881 			break;
   6882 
   6883 		    if (idx == p2[1] || idx == p1[4])
   6884                       {
   6885   		        p[-3] = (unsigned char) pop_failure_jump;
   6886                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   6887                       }
   6888 		  }
   6889 	      }
   6890 #endif /* not MBS_SUPPORT */
   6891 	  }
   6892 	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
   6893 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
   6894 	    {
   6895 	      p[-1] = (US_CHAR_TYPE) jump;
   6896               DEBUG_PRINT1 ("  Match => jump.\n");
   6897 	      goto unconditional_jump;
   6898 	    }
   6899         /* Note fall through.  */
   6900 
   6901 
   6902 	/* The end of a simple repeat has a pop_failure_jump back to
   6903            its matching on_failure_jump, where the latter will push a
   6904            failure point.  The pop_failure_jump takes off failure
   6905            points put on by this pop_failure_jump's matching
   6906            on_failure_jump; we got through the pattern to here from the
   6907            matching on_failure_jump, so didn't fail.  */
   6908         case pop_failure_jump:
   6909           {
   6910             /* We need to pass separate storage for the lowest and
   6911                highest registers, even though we don't care about the
   6912                actual values.  Otherwise, we will restore only one
   6913                register from the stack, since lowest will == highest in
   6914                `pop_failure_point'.  */
   6915             active_reg_t dummy_low_reg, dummy_high_reg;
   6916             US_CHAR_TYPE *pdummy = NULL;
   6917             const CHAR_TYPE *sdummy = NULL;
   6918 
   6919             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
   6920             POP_FAILURE_POINT (sdummy, pdummy,
   6921                                dummy_low_reg, dummy_high_reg,
   6922                                reg_dummy, reg_dummy, reg_info_dummy);
   6923           }
   6924 	  /* Note fall through.  */
   6925 
   6926 	unconditional_jump:
   6927 #ifdef _LIBC
   6928 	  DEBUG_PRINT2 ("\n%p: ", p);
   6929 #else
   6930 	  DEBUG_PRINT2 ("\n0x%x: ", p);
   6931 #endif
   6932           /* Note fall through.  */
   6933 
   6934         /* Unconditionally jump (without popping any failure points).  */
   6935         case jump:
   6936 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
   6937           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
   6938 	  p += mcnt;				/* Do the jump.  */
   6939 #ifdef _LIBC
   6940           DEBUG_PRINT2 ("(to %p).\n", p);
   6941 #else
   6942           DEBUG_PRINT2 ("(to 0x%x).\n", p);
   6943 #endif
   6944 	  break;
   6945 
   6946 
   6947         /* We need this opcode so we can detect where alternatives end
   6948            in `group_match_null_string_p' et al.  */
   6949         case jump_past_alt:
   6950           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
   6951           goto unconditional_jump;
   6952 
   6953 
   6954         /* Normally, the on_failure_jump pushes a failure point, which
   6955            then gets popped at pop_failure_jump.  We will end up at
   6956            pop_failure_jump, also, and with a pattern of, say, `a+', we
   6957            are skipping over the on_failure_jump, so we have to push
   6958            something meaningless for pop_failure_jump to pop.  */
   6959         case dummy_failure_jump:
   6960           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
   6961           /* It doesn't matter what we push for the string here.  What
   6962              the code at `fail' tests is the value for the pattern.  */
   6963           PUSH_FAILURE_POINT (NULL, NULL, -2);
   6964           goto unconditional_jump;
   6965 
   6966 
   6967         /* At the end of an alternative, we need to push a dummy failure
   6968            point in case we are followed by a `pop_failure_jump', because
   6969            we don't want the failure point for the alternative to be
   6970            popped.  For example, matching `(a|ab)*' against `aab'
   6971            requires that we match the `ab' alternative.  */
   6972         case push_dummy_failure:
   6973           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
   6974           /* See comments just above at `dummy_failure_jump' about the
   6975              two zeroes.  */
   6976           PUSH_FAILURE_POINT (NULL, NULL, -2);
   6977           break;
   6978 
   6979         /* Have to succeed matching what follows at least n times.
   6980            After that, handle like `on_failure_jump'.  */
   6981         case succeed_n:
   6982           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   6983           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
   6984 
   6985           assert (mcnt >= 0);
   6986           /* Originally, this is how many times we HAVE to succeed.  */
   6987           if (mcnt > 0)
   6988             {
   6989                mcnt--;
   6990 	       p += OFFSET_ADDRESS_SIZE;
   6991                STORE_NUMBER_AND_INCR (p, mcnt);
   6992 #ifdef _LIBC
   6993                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
   6994 			     , mcnt);
   6995 #else
   6996                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
   6997 			     , mcnt);
   6998 #endif
   6999             }
   7000 	  else if (mcnt == 0)
   7001             {
   7002 #ifdef _LIBC
   7003               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
   7004 			    p + OFFSET_ADDRESS_SIZE);
   7005 #else
   7006               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
   7007 			    p + OFFSET_ADDRESS_SIZE);
   7008 #endif /* _LIBC */
   7009 
   7010 #ifdef MBS_SUPPORT
   7011 	      p[1] = (US_CHAR_TYPE) no_op;
   7012 #else
   7013 	      p[2] = (US_CHAR_TYPE) no_op;
   7014               p[3] = (US_CHAR_TYPE) no_op;
   7015 #endif /* MBS_SUPPORT */
   7016               goto on_failure;
   7017             }
   7018           break;
   7019 
   7020         case jump_n:
   7021           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   7022           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
   7023 
   7024           /* Originally, this is how many times we CAN jump.  */
   7025           if (mcnt)
   7026             {
   7027                mcnt--;
   7028                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
   7029 
   7030 #ifdef _LIBC
   7031                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7032 			     mcnt);
   7033 #else
   7034                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7035 			     mcnt);
   7036 #endif /* _LIBC */
   7037 	       goto unconditional_jump;
   7038             }
   7039           /* If don't have to jump any more, skip over the rest of command.  */
   7040 	  else
   7041 	    p += 2 * OFFSET_ADDRESS_SIZE;
   7042           break;
   7043 
   7044 	case set_number_at:
   7045 	  {
   7046             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
   7047 
   7048             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7049             p1 = p + mcnt;
   7050             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7051 #ifdef _LIBC
   7052             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
   7053 #else
   7054             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
   7055 #endif
   7056 	    STORE_NUMBER (p1, mcnt);
   7057             break;
   7058           }
   7059 
   7060 #if 0
   7061 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
   7062 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
   7063 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
   7064 	   macro and introducing temporary variables works around the bug.  */
   7065 
   7066 	case wordbound:
   7067 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7068 	  if (AT_WORD_BOUNDARY (d))
   7069 	    break;
   7070 	  goto fail;
   7071 
   7072 	case notwordbound:
   7073 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7074 	  if (AT_WORD_BOUNDARY (d))
   7075 	    goto fail;
   7076 	  break;
   7077 #else
   7078 	case wordbound:
   7079 	{
   7080 	  boolean prevchar, thischar;
   7081 
   7082 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7083 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7084 	    break;
   7085 
   7086 	  prevchar = WORDCHAR_P (d - 1);
   7087 	  thischar = WORDCHAR_P (d);
   7088 	  if (prevchar != thischar)
   7089 	    break;
   7090 	  goto fail;
   7091 	}
   7092 
   7093       case notwordbound:
   7094 	{
   7095 	  boolean prevchar, thischar;
   7096 
   7097 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7098 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7099 	    goto fail;
   7100 
   7101 	  prevchar = WORDCHAR_P (d - 1);
   7102 	  thischar = WORDCHAR_P (d);
   7103 	  if (prevchar != thischar)
   7104 	    goto fail;
   7105 	  break;
   7106 	}
   7107 #endif
   7108 
   7109 	case wordbeg:
   7110           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
   7111 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
   7112 	    break;
   7113           goto fail;
   7114 
   7115 	case wordend:
   7116           DEBUG_PRINT1 ("EXECUTING wordend.\n");
   7117 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
   7118               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
   7119 	    break;
   7120           goto fail;
   7121 
   7122 #ifdef emacs
   7123   	case before_dot:
   7124           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
   7125  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
   7126   	    goto fail;
   7127   	  break;
   7128 
   7129   	case at_dot:
   7130           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
   7131  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
   7132   	    goto fail;
   7133   	  break;
   7134 
   7135   	case after_dot:
   7136           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
   7137           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
   7138   	    goto fail;
   7139   	  break;
   7140 
   7141 	case syntaxspec:
   7142           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
   7143 	  mcnt = *p++;
   7144 	  goto matchsyntax;
   7145 
   7146         case wordchar:
   7147           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
   7148 	  mcnt = (int) Sword;
   7149         matchsyntax:
   7150 	  PREFETCH ();
   7151 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7152 	  d++;
   7153 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
   7154 	    goto fail;
   7155           SET_REGS_MATCHED ();
   7156 	  break;
   7157 
   7158 	case notsyntaxspec:
   7159           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
   7160 	  mcnt = *p++;
   7161 	  goto matchnotsyntax;
   7162 
   7163         case notwordchar:
   7164           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
   7165 	  mcnt = (int) Sword;
   7166         matchnotsyntax:
   7167 	  PREFETCH ();
   7168 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7169 	  d++;
   7170 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
   7171 	    goto fail;
   7172 	  SET_REGS_MATCHED ();
   7173           break;
   7174 
   7175 #else /* not emacs */
   7176 	case wordchar:
   7177           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
   7178 	  PREFETCH ();
   7179           if (!WORDCHAR_P (d))
   7180             goto fail;
   7181 	  SET_REGS_MATCHED ();
   7182           d++;
   7183 	  break;
   7184 
   7185 	case notwordchar:
   7186           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
   7187 	  PREFETCH ();
   7188 	  if (WORDCHAR_P (d))
   7189             goto fail;
   7190           SET_REGS_MATCHED ();
   7191           d++;
   7192 	  break;
   7193 #endif /* not emacs */
   7194 
   7195         default:
   7196           abort ();
   7197 	}
   7198       continue;  /* Successfully executed one pattern command; keep going.  */
   7199 
   7200 
   7201     /* We goto here if a matching operation fails. */
   7202     fail:
   7203       if (!FAIL_STACK_EMPTY ())
   7204 	{ /* A restart point is known.  Restore to that state.  */
   7205           DEBUG_PRINT1 ("\nFAIL:\n");
   7206           POP_FAILURE_POINT (d, p,
   7207                              lowest_active_reg, highest_active_reg,
   7208                              regstart, regend, reg_info);
   7209 
   7210           /* If this failure point is a dummy, try the next one.  */
   7211           if (!p)
   7212 	    goto fail;
   7213 
   7214           /* If we failed to the end of the pattern, don't examine *p.  */
   7215 	  assert (p <= pend);
   7216           if (p < pend)
   7217             {
   7218               boolean is_a_jump_n = false;
   7219 
   7220               /* If failed to a backwards jump that's part of a repetition
   7221                  loop, need to pop this failure point and use the next one.  */
   7222               switch ((re_opcode_t) *p)
   7223                 {
   7224                 case jump_n:
   7225                   is_a_jump_n = true;
   7226                 case maybe_pop_jump:
   7227                 case pop_failure_jump:
   7228                 case jump:
   7229                   p1 = p + 1;
   7230                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7231                   p1 += mcnt;
   7232 
   7233                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
   7234                       || (!is_a_jump_n
   7235                           && (re_opcode_t) *p1 == on_failure_jump))
   7236                     goto fail;
   7237                   break;
   7238                 default:
   7239                   /* do nothing */ ;
   7240                 }
   7241             }
   7242 
   7243           if (d >= string1 && d <= end1)
   7244 	    dend = end_match_1;
   7245         }
   7246       else
   7247         break;   /* Matching at this starting point really fails.  */
   7248     } /* for (;;) */
   7249 
   7250   if (best_regs_set)
   7251     goto restore_best_regs;
   7252 
   7253   FREE_VARIABLES ();
   7254 
   7255   return -1;         			/* Failure to match.  */
   7256 } /* re_match_2 */
   7257 
   7258 /* Subroutine definitions for re_match_2.  */
   7260 
   7261 
   7262 /* We are passed P pointing to a register number after a start_memory.
   7263 
   7264    Return true if the pattern up to the corresponding stop_memory can
   7265    match the empty string, and false otherwise.
   7266 
   7267    If we find the matching stop_memory, sets P to point to one past its number.
   7268    Otherwise, sets P to an undefined byte less than or equal to END.
   7269 
   7270    We don't handle duplicates properly (yet).  */
   7271 
   7272 static boolean
   7273 group_match_null_string_p (p, end, reg_info)
   7274     US_CHAR_TYPE **p, *end;
   7275     register_info_type *reg_info;
   7276 {
   7277   int mcnt;
   7278   /* Point to after the args to the start_memory.  */
   7279   US_CHAR_TYPE *p1 = *p + 2;
   7280 
   7281   while (p1 < end)
   7282     {
   7283       /* Skip over opcodes that can match nothing, and return true or
   7284 	 false, as appropriate, when we get to one that can't, or to the
   7285          matching stop_memory.  */
   7286 
   7287       switch ((re_opcode_t) *p1)
   7288         {
   7289         /* Could be either a loop or a series of alternatives.  */
   7290         case on_failure_jump:
   7291           p1++;
   7292           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7293 
   7294           /* If the next operation is not a jump backwards in the
   7295 	     pattern.  */
   7296 
   7297 	  if (mcnt >= 0)
   7298 	    {
   7299               /* Go through the on_failure_jumps of the alternatives,
   7300                  seeing if any of the alternatives cannot match nothing.
   7301                  The last alternative starts with only a jump,
   7302                  whereas the rest start with on_failure_jump and end
   7303                  with a jump, e.g., here is the pattern for `a|b|c':
   7304 
   7305                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
   7306                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
   7307                  /exactn/1/c
   7308 
   7309                  So, we have to first go through the first (n-1)
   7310                  alternatives and then deal with the last one separately.  */
   7311 
   7312 
   7313               /* Deal with the first (n-1) alternatives, which start
   7314                  with an on_failure_jump (see above) that jumps to right
   7315                  past a jump_past_alt.  */
   7316 
   7317               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
   7318 		     jump_past_alt)
   7319                 {
   7320                   /* `mcnt' holds how many bytes long the alternative
   7321                      is, including the ending `jump_past_alt' and
   7322                      its number.  */
   7323 
   7324                   if (!alt_match_null_string_p (p1, p1 + mcnt -
   7325 						(1 + OFFSET_ADDRESS_SIZE),
   7326 						reg_info))
   7327                     return false;
   7328 
   7329                   /* Move to right after this alternative, including the
   7330 		     jump_past_alt.  */
   7331                   p1 += mcnt;
   7332 
   7333                   /* Break if it's the beginning of an n-th alternative
   7334                      that doesn't begin with an on_failure_jump.  */
   7335                   if ((re_opcode_t) *p1 != on_failure_jump)
   7336                     break;
   7337 
   7338 		  /* Still have to check that it's not an n-th
   7339 		     alternative that starts with an on_failure_jump.  */
   7340 		  p1++;
   7341                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7342                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
   7343 		      jump_past_alt)
   7344                     {
   7345 		      /* Get to the beginning of the n-th alternative.  */
   7346                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
   7347                       break;
   7348                     }
   7349                 }
   7350 
   7351               /* Deal with the last alternative: go back and get number
   7352                  of the `jump_past_alt' just before it.  `mcnt' contains
   7353                  the length of the alternative.  */
   7354               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
   7355 
   7356               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
   7357                 return false;
   7358 
   7359               p1 += mcnt;	/* Get past the n-th alternative.  */
   7360             } /* if mcnt > 0 */
   7361           break;
   7362 
   7363 
   7364         case stop_memory:
   7365 	  assert (p1[1] == **p);
   7366           *p = p1 + 2;
   7367           return true;
   7368 
   7369 
   7370         default:
   7371           if (!common_op_match_null_string_p (&p1, end, reg_info))
   7372             return false;
   7373         }
   7374     } /* while p1 < end */
   7375 
   7376   return false;
   7377 } /* group_match_null_string_p */
   7378 
   7379 
   7380 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
   7381    It expects P to be the first byte of a single alternative and END one
   7382    byte past the last. The alternative can contain groups.  */
   7383 
   7384 static boolean
   7385 alt_match_null_string_p (p, end, reg_info)
   7386     US_CHAR_TYPE *p, *end;
   7387     register_info_type *reg_info;
   7388 {
   7389   int mcnt;
   7390   US_CHAR_TYPE *p1 = p;
   7391 
   7392   while (p1 < end)
   7393     {
   7394       /* Skip over opcodes that can match nothing, and break when we get
   7395          to one that can't.  */
   7396 
   7397       switch ((re_opcode_t) *p1)
   7398         {
   7399 	/* It's a loop.  */
   7400         case on_failure_jump:
   7401           p1++;
   7402           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7403           p1 += mcnt;
   7404           break;
   7405 
   7406 	default:
   7407           if (!common_op_match_null_string_p (&p1, end, reg_info))
   7408             return false;
   7409         }
   7410     }  /* while p1 < end */
   7411 
   7412   return true;
   7413 } /* alt_match_null_string_p */
   7414 
   7415 
   7416 /* Deals with the ops common to group_match_null_string_p and
   7417    alt_match_null_string_p.
   7418 
   7419    Sets P to one after the op and its arguments, if any.  */
   7420 
   7421 static boolean
   7422 common_op_match_null_string_p (p, end, reg_info)
   7423     US_CHAR_TYPE **p, *end;
   7424     register_info_type *reg_info;
   7425 {
   7426   int mcnt;
   7427   boolean ret;
   7428   int reg_no;
   7429   US_CHAR_TYPE *p1 = *p;
   7430 
   7431   switch ((re_opcode_t) *p1++)
   7432     {
   7433     case no_op:
   7434     case begline:
   7435     case endline:
   7436     case begbuf:
   7437     case endbuf:
   7438     case wordbeg:
   7439     case wordend:
   7440     case wordbound:
   7441     case notwordbound:
   7442 #ifdef emacs
   7443     case before_dot:
   7444     case at_dot:
   7445     case after_dot:
   7446 #endif
   7447       break;
   7448 
   7449     case start_memory:
   7450       reg_no = *p1;
   7451       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
   7452       ret = group_match_null_string_p (&p1, end, reg_info);
   7453 
   7454       /* Have to set this here in case we're checking a group which
   7455          contains a group and a back reference to it.  */
   7456 
   7457       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
   7458         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
   7459 
   7460       if (!ret)
   7461         return false;
   7462       break;
   7463 
   7464     /* If this is an optimized succeed_n for zero times, make the jump.  */
   7465     case jump:
   7466       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7467       if (mcnt >= 0)
   7468         p1 += mcnt;
   7469       else
   7470         return false;
   7471       break;
   7472 
   7473     case succeed_n:
   7474       /* Get to the number of times to succeed.  */
   7475       p1 += OFFSET_ADDRESS_SIZE;
   7476       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7477 
   7478       if (mcnt == 0)
   7479         {
   7480           p1 -= 2 * OFFSET_ADDRESS_SIZE;
   7481           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7482           p1 += mcnt;
   7483         }
   7484       else
   7485         return false;
   7486       break;
   7487 
   7488     case duplicate:
   7489       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
   7490         return false;
   7491       break;
   7492 
   7493     case set_number_at:
   7494       p1 += 2 * OFFSET_ADDRESS_SIZE;
   7495 
   7496     default:
   7497       /* All other opcodes mean we cannot match the empty string.  */
   7498       return false;
   7499   }
   7500 
   7501   *p = p1;
   7502   return true;
   7503 } /* common_op_match_null_string_p */
   7504 
   7505 
   7506 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
   7507    bytes; nonzero otherwise.  */
   7508 
   7509 static int
   7510 bcmp_translate (s1, s2, len, translate)
   7511      const CHAR_TYPE *s1, *s2;
   7512      register int len;
   7513      RE_TRANSLATE_TYPE translate;
   7514 {
   7515   register const US_CHAR_TYPE *p1 = (const US_CHAR_TYPE *) s1;
   7516   register const US_CHAR_TYPE *p2 = (const US_CHAR_TYPE *) s2;
   7517   while (len)
   7518     {
   7519 #ifdef MBS_SUPPORT
   7520       if (((*p1<=0xff)?translate[*p1++]:*p1++)
   7521 	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
   7522 	return 1;
   7523 #else
   7524       if (translate[*p1++] != translate[*p2++]) return 1;
   7525 #endif /* MBS_SUPPORT */
   7526       len--;
   7527     }
   7528   return 0;
   7529 }
   7530 
   7531 /* Entry points for GNU code.  */
   7533 
   7534 /* re_compile_pattern is the GNU regular expression compiler: it
   7535    compiles PATTERN (of length SIZE) and puts the result in BUFP.
   7536    Returns 0 if the pattern was valid, otherwise an error string.
   7537 
   7538    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
   7539    are set in BUFP on entry.
   7540 
   7541    We call regex_compile to do the actual compilation.  */
   7542 
   7543 const char *
   7544 re_compile_pattern (pattern, length, bufp)
   7545      const char *pattern;
   7546      size_t length;
   7547      struct re_pattern_buffer *bufp;
   7548 {
   7549   reg_errcode_t ret;
   7550 
   7551   /* GNU code is written to assume at least RE_NREGS registers will be set
   7552      (and at least one extra will be -1).  */
   7553   bufp->regs_allocated = REGS_UNALLOCATED;
   7554 
   7555   /* And GNU code determines whether or not to get register information
   7556      by passing null for the REGS argument to re_match, etc., not by
   7557      setting no_sub.  */
   7558   bufp->no_sub = 0;
   7559 
   7560   /* Match anchors at newline.  */
   7561   bufp->newline_anchor = 1;
   7562 
   7563   ret = regex_compile (pattern, length, re_syntax_options, bufp);
   7564 
   7565   if (!ret)
   7566     return NULL;
   7567   return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
   7568 }
   7569 #ifdef _LIBC
   7570 weak_alias (__re_compile_pattern, re_compile_pattern)
   7571 #endif
   7572 
   7573 /* Entry points compatible with 4.2 BSD regex library.  We don't define
   7575    them unless specifically requested.  */
   7576 
   7577 #if defined _REGEX_RE_COMP || defined _LIBC
   7578 
   7579 /* BSD has one and only one pattern buffer.  */
   7580 static struct re_pattern_buffer re_comp_buf;
   7581 
   7582 char *
   7583 #ifdef _LIBC
   7584 /* Make these definitions weak in libc, so POSIX programs can redefine
   7585    these names if they don't use our functions, and still use
   7586    regcomp/regexec below without link errors.  */
   7587 weak_function
   7588 #endif
   7589 re_comp (s)
   7590     const char *s;
   7591 {
   7592   reg_errcode_t ret;
   7593 
   7594   if (!s)
   7595     {
   7596       if (!re_comp_buf.buffer)
   7597 	return gettext ("No previous regular expression");
   7598       return 0;
   7599     }
   7600 
   7601   if (!re_comp_buf.buffer)
   7602     {
   7603       re_comp_buf.buffer = (unsigned char *) malloc (200);
   7604       if (re_comp_buf.buffer == NULL)
   7605         return (char *) gettext (re_error_msgid
   7606 				 + re_error_msgid_idx[(int) REG_ESPACE]);
   7607       re_comp_buf.allocated = 200;
   7608 
   7609       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
   7610       if (re_comp_buf.fastmap == NULL)
   7611 	return (char *) gettext (re_error_msgid
   7612 				 + re_error_msgid_idx[(int) REG_ESPACE]);
   7613     }
   7614 
   7615   /* Since `re_exec' always passes NULL for the `regs' argument, we
   7616      don't need to initialize the pattern buffer fields which affect it.  */
   7617 
   7618   /* Match anchors at newlines.  */
   7619   re_comp_buf.newline_anchor = 1;
   7620 
   7621   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   7622 
   7623   if (!ret)
   7624     return NULL;
   7625 
   7626   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
   7627   return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
   7628 }
   7629 
   7630 
   7631 int
   7632 #ifdef _LIBC
   7633 weak_function
   7634 #endif
   7635 re_exec (s)
   7636     const char *s;
   7637 {
   7638   const int len = strlen (s);
   7639   return
   7640     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
   7641 }
   7642 
   7643 #endif /* _REGEX_RE_COMP */
   7644 
   7645 /* POSIX.2 functions.  Don't define these for Emacs.  */
   7647 
   7648 #ifndef emacs
   7649 
   7650 /* regcomp takes a regular expression as a string and compiles it.
   7651 
   7652    PREG is a regex_t *.  We do not expect any fields to be initialized,
   7653    since POSIX says we shouldn't.  Thus, we set
   7654 
   7655      `buffer' to the compiled pattern;
   7656      `used' to the length of the compiled pattern;
   7657      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
   7658        REG_EXTENDED bit in CFLAGS is set; otherwise, to
   7659        RE_SYNTAX_POSIX_BASIC;
   7660      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
   7661      `fastmap' to an allocated space for the fastmap;
   7662      `fastmap_accurate' to zero;
   7663      `re_nsub' to the number of subexpressions in PATTERN.
   7664 
   7665    PATTERN is the address of the pattern string.
   7666 
   7667    CFLAGS is a series of bits which affect compilation.
   7668 
   7669      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
   7670      use POSIX basic syntax.
   7671 
   7672      If REG_NEWLINE is set, then . and [^...] don't match newline.
   7673      Also, regexec will try a match beginning after every newline.
   7674 
   7675      If REG_ICASE is set, then we considers upper- and lowercase
   7676      versions of letters to be equivalent when matching.
   7677 
   7678      If REG_NOSUB is set, then when PREG is passed to regexec, that
   7679      routine will report only success or failure, and nothing about the
   7680      registers.
   7681 
   7682    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
   7683    the return codes and their meanings.)  */
   7684 
   7685 int
   7686 regcomp (preg, pattern, cflags)
   7687     regex_t *preg;
   7688     const char *pattern;
   7689     int cflags;
   7690 {
   7691   reg_errcode_t ret;
   7692   reg_syntax_t syntax
   7693     = (cflags & REG_EXTENDED) ?
   7694       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
   7695 
   7696   /* regex_compile will allocate the space for the compiled pattern.  */
   7697   preg->buffer = 0;
   7698   preg->allocated = 0;
   7699   preg->used = 0;
   7700 
   7701   /* Try to allocate space for the fastmap.  */
   7702   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
   7703 
   7704   if (cflags & REG_ICASE)
   7705     {
   7706       unsigned i;
   7707 
   7708       preg->translate
   7709 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
   7710 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
   7711       if (preg->translate == NULL)
   7712         return (int) REG_ESPACE;
   7713 
   7714       /* Map uppercase characters to corresponding lowercase ones.  */
   7715       for (i = 0; i < CHAR_SET_SIZE; i++)
   7716         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
   7717     }
   7718   else
   7719     preg->translate = NULL;
   7720 
   7721   /* If REG_NEWLINE is set, newlines are treated differently.  */
   7722   if (cflags & REG_NEWLINE)
   7723     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
   7724       syntax &= ~RE_DOT_NEWLINE;
   7725       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
   7726       /* It also changes the matching behavior.  */
   7727       preg->newline_anchor = 1;
   7728     }
   7729   else
   7730     preg->newline_anchor = 0;
   7731 
   7732   preg->no_sub = !!(cflags & REG_NOSUB);
   7733 
   7734   /* POSIX says a null character in the pattern terminates it, so we
   7735      can use strlen here in compiling the pattern.  */
   7736   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
   7737 
   7738   /* POSIX doesn't distinguish between an unmatched open-group and an
   7739      unmatched close-group: both are REG_EPAREN.  */
   7740   if (ret == REG_ERPAREN) ret = REG_EPAREN;
   7741 
   7742   if (ret == REG_NOERROR && preg->fastmap)
   7743     {
   7744       /* Compute the fastmap now, since regexec cannot modify the pattern
   7745 	 buffer.  */
   7746       if (re_compile_fastmap (preg) == -2)
   7747 	{
   7748 	  /* Some error occurred while computing the fastmap, just forget
   7749 	     about it.  */
   7750 	  free (preg->fastmap);
   7751 	  preg->fastmap = NULL;
   7752 	}
   7753     }
   7754 
   7755   return (int) ret;
   7756 }
   7757 #ifdef _LIBC
   7758 weak_alias (__regcomp, regcomp)
   7759 #endif
   7760 
   7761 
   7762 /* regexec searches for a given pattern, specified by PREG, in the
   7763    string STRING.
   7764 
   7765    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
   7766    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
   7767    least NMATCH elements, and we set them to the offsets of the
   7768    corresponding matched substrings.
   7769 
   7770    EFLAGS specifies `execution flags' which affect matching: if
   7771    REG_NOTBOL is set, then ^ does not match at the beginning of the
   7772    string; if REG_NOTEOL is set, then $ does not match at the end.
   7773 
   7774    We return 0 if we find a match and REG_NOMATCH if not.  */
   7775 
   7776 int
   7777 regexec (preg, string, nmatch, pmatch, eflags)
   7778     const regex_t *preg;
   7779     const char *string;
   7780     size_t nmatch;
   7781     regmatch_t pmatch[];
   7782     int eflags;
   7783 {
   7784   int ret;
   7785   struct re_registers regs;
   7786   regex_t private_preg;
   7787   int len = strlen (string);
   7788   boolean want_reg_info = !preg->no_sub && nmatch > 0;
   7789 
   7790   private_preg = *preg;
   7791 
   7792   private_preg.not_bol = !!(eflags & REG_NOTBOL);
   7793   private_preg.not_eol = !!(eflags & REG_NOTEOL);
   7794 
   7795   /* The user has told us exactly how many registers to return
   7796      information about, via `nmatch'.  We have to pass that on to the
   7797      matching routines.  */
   7798   private_preg.regs_allocated = REGS_FIXED;
   7799 
   7800   if (want_reg_info)
   7801     {
   7802       regs.num_regs = nmatch;
   7803       regs.start = TALLOC (nmatch * 2, regoff_t);
   7804       if (regs.start == NULL)
   7805         return (int) REG_NOMATCH;
   7806       regs.end = regs.start + nmatch;
   7807     }
   7808 
   7809   /* Perform the searching operation.  */
   7810   ret = re_search (&private_preg, string, len,
   7811                    /* start: */ 0, /* range: */ len,
   7812                    want_reg_info ? &regs : (struct re_registers *) 0);
   7813 
   7814   /* Copy the register information to the POSIX structure.  */
   7815   if (want_reg_info)
   7816     {
   7817       if (ret >= 0)
   7818         {
   7819           unsigned r;
   7820 
   7821           for (r = 0; r < nmatch; r++)
   7822             {
   7823               pmatch[r].rm_so = regs.start[r];
   7824               pmatch[r].rm_eo = regs.end[r];
   7825             }
   7826         }
   7827 
   7828       /* If we needed the temporary register info, free the space now.  */
   7829       free (regs.start);
   7830     }
   7831 
   7832   /* We want zero return to mean success, unlike `re_search'.  */
   7833   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
   7834 }
   7835 #ifdef _LIBC
   7836 weak_alias (__regexec, regexec)
   7837 #endif
   7838 
   7839 
   7840 /* Returns a message corresponding to an error code, ERRCODE, returned
   7841    from either regcomp or regexec.   We don't use PREG here.  */
   7842 
   7843 size_t
   7844 regerror (errcode, preg, errbuf, errbuf_size)
   7845     int errcode;
   7846     const regex_t *preg;
   7847     char *errbuf;
   7848     size_t errbuf_size;
   7849 {
   7850   const char *msg;
   7851   size_t msg_size;
   7852 
   7853   if (errcode < 0
   7854       || errcode >= (int) (sizeof (re_error_msgid_idx)
   7855 			   / sizeof (re_error_msgid_idx[0])))
   7856     /* Only error codes returned by the rest of the code should be passed
   7857        to this routine.  If we are given anything else, or if other regex
   7858        code generates an invalid error code, then the program has a bug.
   7859        Dump core so we can fix it.  */
   7860     abort ();
   7861 
   7862   msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
   7863 
   7864   msg_size = strlen (msg) + 1; /* Includes the null.  */
   7865 
   7866   if (errbuf_size != 0)
   7867     {
   7868       if (msg_size > errbuf_size)
   7869         {
   7870 #if defined HAVE_MEMPCPY || defined _LIBC
   7871 	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
   7872 #else
   7873           memcpy (errbuf, msg, errbuf_size - 1);
   7874           errbuf[errbuf_size - 1] = 0;
   7875 #endif
   7876         }
   7877       else
   7878         memcpy (errbuf, msg, msg_size);
   7879     }
   7880 
   7881   return msg_size;
   7882 }
   7883 #ifdef _LIBC
   7884 weak_alias (__regerror, regerror)
   7885 #endif
   7886 
   7887 
   7888 /* Free dynamically allocated space used by PREG.  */
   7889 
   7890 void
   7891 regfree (preg)
   7892     regex_t *preg;
   7893 {
   7894   if (preg->buffer != NULL)
   7895     free (preg->buffer);
   7896   preg->buffer = NULL;
   7897 
   7898   preg->allocated = 0;
   7899   preg->used = 0;
   7900 
   7901   if (preg->fastmap != NULL)
   7902     free (preg->fastmap);
   7903   preg->fastmap = NULL;
   7904   preg->fastmap_accurate = 0;
   7905 
   7906   if (preg->translate != NULL)
   7907     free (preg->translate);
   7908   preg->translate = NULL;
   7909 }
   7910 #ifdef _LIBC
   7911 weak_alias (__regfree, regfree)
   7912 #endif
   7913 
   7914 #endif /* not emacs  */
   7915