Home | History | Annotate | Line # | Download | only in libiberty
      1       1.1  christos /* Extended regular expression matching and search library,
      2       1.1  christos    version 0.12.
      3       1.1  christos    (Implements POSIX draft P1003.2/D11.2, except for some of the
      4       1.1  christos    internationalization features.)
      5       1.1  christos 
      6  1.1.1.10  christos    Copyright (C) 1993-2024 Free Software Foundation, Inc.
      7       1.1  christos    This file is part of the GNU C Library.
      8       1.1  christos 
      9       1.1  christos    The GNU C Library is free software; you can redistribute it and/or
     10       1.1  christos    modify it under the terms of the GNU Lesser General Public
     11       1.1  christos    License as published by the Free Software Foundation; either
     12       1.1  christos    version 2.1 of the License, or (at your option) any later version.
     13       1.1  christos 
     14       1.1  christos    The GNU C Library is distributed in the hope that it will be useful,
     15       1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16       1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17       1.1  christos    Lesser General Public License for more details.
     18       1.1  christos 
     19       1.1  christos    You should have received a copy of the GNU Lesser General Public
     20       1.1  christos    License along with the GNU C Library; if not, write to the Free
     21       1.1  christos    Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     22       1.1  christos    02110-1301 USA.  */
     23       1.1  christos 
     24       1.1  christos /* This file has been modified for usage in libiberty.  It includes "xregex.h"
     25       1.1  christos    instead of <regex.h>.  The "xregex.h" header file renames all external
     26       1.1  christos    routines with an "x" prefix so they do not collide with the native regex
     27       1.1  christos    routines or with other components regex routines. */
     28       1.1  christos /* AIX requires this to be the first thing in the file. */
     29       1.1  christos #if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
     30       1.1  christos   #pragma alloca
     31       1.1  christos #endif
     32       1.1  christos 
     33   1.1.1.9  christos #if __GNUC__ >= 12
     34   1.1.1.9  christos #  pragma GCC diagnostic ignored "-Wuse-after-free"
     35   1.1.1.9  christos #endif
     36   1.1.1.9  christos 
     37       1.1  christos #undef	_GNU_SOURCE
     38       1.1  christos #define _GNU_SOURCE
     39       1.1  christos 
     40       1.1  christos #ifndef INSIDE_RECURSION
     41       1.1  christos # ifdef HAVE_CONFIG_H
     42       1.1  christos #  include <config.h>
     43       1.1  christos # endif
     44       1.1  christos #endif
     45       1.1  christos 
     46       1.1  christos #include <ansidecl.h>
     47       1.1  christos 
     48       1.1  christos #ifndef INSIDE_RECURSION
     49       1.1  christos 
     50       1.1  christos # if defined STDC_HEADERS && !defined emacs
     51       1.1  christos #  include <stddef.h>
     52   1.1.1.2  christos #  define PTR_INT_TYPE ptrdiff_t
     53       1.1  christos # else
     54       1.1  christos /* We need this for `regex.h', and perhaps for the Emacs include files.  */
     55       1.1  christos #  include <sys/types.h>
     56   1.1.1.2  christos #  define PTR_INT_TYPE long
     57       1.1  christos # endif
     58       1.1  christos 
     59       1.1  christos # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
     60       1.1  christos 
     61       1.1  christos /* For platform which support the ISO C amendement 1 functionality we
     62       1.1  christos    support user defined character classes.  */
     63       1.1  christos # if defined _LIBC || WIDE_CHAR_SUPPORT
     64       1.1  christos /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
     65       1.1  christos #  include <wchar.h>
     66       1.1  christos #  include <wctype.h>
     67       1.1  christos # endif
     68       1.1  christos 
     69       1.1  christos # ifdef _LIBC
     70       1.1  christos /* We have to keep the namespace clean.  */
     71       1.1  christos #  define regfree(preg) __regfree (preg)
     72       1.1  christos #  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
     73       1.1  christos #  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
     74       1.1  christos #  define regerror(errcode, preg, errbuf, errbuf_size) \
     75       1.1  christos 	__regerror(errcode, preg, errbuf, errbuf_size)
     76       1.1  christos #  define re_set_registers(bu, re, nu, st, en) \
     77       1.1  christos 	__re_set_registers (bu, re, nu, st, en)
     78       1.1  christos #  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
     79       1.1  christos 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
     80       1.1  christos #  define re_match(bufp, string, size, pos, regs) \
     81       1.1  christos 	__re_match (bufp, string, size, pos, regs)
     82       1.1  christos #  define re_search(bufp, string, size, startpos, range, regs) \
     83       1.1  christos 	__re_search (bufp, string, size, startpos, range, regs)
     84       1.1  christos #  define re_compile_pattern(pattern, length, bufp) \
     85       1.1  christos 	__re_compile_pattern (pattern, length, bufp)
     86       1.1  christos #  define re_set_syntax(syntax) __re_set_syntax (syntax)
     87       1.1  christos #  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
     88       1.1  christos 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
     89       1.1  christos #  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
     90       1.1  christos 
     91       1.1  christos #  define btowc __btowc
     92       1.1  christos 
     93       1.1  christos /* We are also using some library internals.  */
     94       1.1  christos #  include <locale/localeinfo.h>
     95       1.1  christos #  include <locale/elem-hash.h>
     96       1.1  christos #  include <langinfo.h>
     97       1.1  christos #  include <locale/coll-lookup.h>
     98       1.1  christos # endif
     99       1.1  christos 
    100       1.1  christos /* This is for other GNU distributions with internationalized messages.  */
    101       1.1  christos # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
    102       1.1  christos #  include <libintl.h>
    103       1.1  christos #  ifdef _LIBC
    104       1.1  christos #   undef gettext
    105       1.1  christos #   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
    106       1.1  christos #  endif
    107       1.1  christos # else
    108       1.1  christos #  define gettext(msgid) (msgid)
    109       1.1  christos # endif
    110       1.1  christos 
    111       1.1  christos # ifndef gettext_noop
    112       1.1  christos /* This define is so xgettext can find the internationalizable
    113       1.1  christos    strings.  */
    114       1.1  christos #  define gettext_noop(String) String
    115       1.1  christos # endif
    116       1.1  christos 
    117       1.1  christos /* The `emacs' switch turns on certain matching commands
    118       1.1  christos    that make sense only in Emacs. */
    119       1.1  christos # ifdef emacs
    120       1.1  christos 
    121       1.1  christos #  include "lisp.h"
    122       1.1  christos #  include "buffer.h"
    123       1.1  christos #  include "syntax.h"
    124       1.1  christos 
    125       1.1  christos # else  /* not emacs */
    126       1.1  christos 
    127       1.1  christos /* If we are not linking with Emacs proper,
    128       1.1  christos    we can't use the relocating allocator
    129       1.1  christos    even if config.h says that we can.  */
    130       1.1  christos #  undef REL_ALLOC
    131       1.1  christos 
    132       1.1  christos #  if defined STDC_HEADERS || defined _LIBC
    133       1.1  christos #   include <stdlib.h>
    134       1.1  christos #  else
    135       1.1  christos char *malloc ();
    136       1.1  christos char *realloc ();
    137       1.1  christos #  endif
    138       1.1  christos 
    139       1.1  christos /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
    140       1.1  christos    If nothing else has been done, use the method below.  */
    141       1.1  christos #  ifdef INHIBIT_STRING_HEADER
    142       1.1  christos #   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
    143       1.1  christos #    if !defined bzero && !defined bcopy
    144       1.1  christos #     undef INHIBIT_STRING_HEADER
    145       1.1  christos #    endif
    146       1.1  christos #   endif
    147       1.1  christos #  endif
    148       1.1  christos 
    149       1.1  christos /* This is the normal way of making sure we have a bcopy and a bzero.
    150       1.1  christos    This is used in most programs--a few other programs avoid this
    151       1.1  christos    by defining INHIBIT_STRING_HEADER.  */
    152       1.1  christos #  ifndef INHIBIT_STRING_HEADER
    153       1.1  christos #   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
    154       1.1  christos #    include <string.h>
    155       1.1  christos #    ifndef bzero
    156       1.1  christos #     ifndef _LIBC
    157   1.1.1.4  christos #      define bzero(s, n)	((void) memset (s, '\0', n))
    158       1.1  christos #     else
    159       1.1  christos #      define bzero(s, n)	__bzero (s, n)
    160       1.1  christos #     endif
    161       1.1  christos #    endif
    162       1.1  christos #   else
    163       1.1  christos #    include <strings.h>
    164       1.1  christos #    ifndef memcmp
    165       1.1  christos #     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
    166       1.1  christos #    endif
    167       1.1  christos #    ifndef memcpy
    168       1.1  christos #     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
    169       1.1  christos #    endif
    170       1.1  christos #   endif
    171       1.1  christos #  endif
    172       1.1  christos 
    173       1.1  christos /* Define the syntax stuff for \<, \>, etc.  */
    174       1.1  christos 
    175       1.1  christos /* This must be nonzero for the wordchar and notwordchar pattern
    176       1.1  christos    commands in re_match_2.  */
    177       1.1  christos #  ifndef Sword
    178       1.1  christos #   define Sword 1
    179       1.1  christos #  endif
    180       1.1  christos 
    181       1.1  christos #  ifdef SWITCH_ENUM_BUG
    182       1.1  christos #   define SWITCH_ENUM_CAST(x) ((int)(x))
    183       1.1  christos #  else
    184       1.1  christos #   define SWITCH_ENUM_CAST(x) (x)
    185       1.1  christos #  endif
    186       1.1  christos 
    187       1.1  christos # endif /* not emacs */
    188       1.1  christos 
    189       1.1  christos # if defined _LIBC || HAVE_LIMITS_H
    190       1.1  christos #  include <limits.h>
    191       1.1  christos # endif
    192       1.1  christos 
    193       1.1  christos # ifndef MB_LEN_MAX
    194       1.1  christos #  define MB_LEN_MAX 1
    195       1.1  christos # endif
    196       1.1  christos 
    197       1.1  christos /* Get the interface, including the syntax bits.  */
    199       1.1  christos # include "xregex.h"  /* change for libiberty */
    200       1.1  christos 
    201       1.1  christos /* isalpha etc. are used for the character classes.  */
    202       1.1  christos # include <ctype.h>
    203       1.1  christos 
    204       1.1  christos /* Jim Meyering writes:
    205       1.1  christos 
    206       1.1  christos    "... Some ctype macros are valid only for character codes that
    207       1.1  christos    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
    208       1.1  christos    using /bin/cc or gcc but without giving an ansi option).  So, all
    209       1.1  christos    ctype uses should be through macros like ISPRINT...  If
    210       1.1  christos    STDC_HEADERS is defined, then autoconf has verified that the ctype
    211       1.1  christos    macros don't need to be guarded with references to isascii. ...
    212       1.1  christos    Defining isascii to 1 should let any compiler worth its salt
    213       1.1  christos    eliminate the && through constant folding."
    214       1.1  christos    Solaris defines some of these symbols so we must undefine them first.  */
    215       1.1  christos 
    216       1.1  christos # undef ISASCII
    217       1.1  christos # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
    218       1.1  christos #  define ISASCII(c) 1
    219       1.1  christos # else
    220       1.1  christos #  define ISASCII(c) isascii(c)
    221       1.1  christos # endif
    222       1.1  christos 
    223       1.1  christos # ifdef isblank
    224       1.1  christos #  define ISBLANK(c) (ISASCII (c) && isblank (c))
    225       1.1  christos # else
    226       1.1  christos #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
    227       1.1  christos # endif
    228       1.1  christos # ifdef isgraph
    229       1.1  christos #  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
    230       1.1  christos # else
    231       1.1  christos #  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
    232       1.1  christos # endif
    233       1.1  christos 
    234       1.1  christos # undef ISPRINT
    235       1.1  christos # define ISPRINT(c) (ISASCII (c) && isprint (c))
    236       1.1  christos # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
    237       1.1  christos # define ISALNUM(c) (ISASCII (c) && isalnum (c))
    238       1.1  christos # define ISALPHA(c) (ISASCII (c) && isalpha (c))
    239       1.1  christos # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
    240       1.1  christos # define ISLOWER(c) (ISASCII (c) && islower (c))
    241       1.1  christos # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
    242       1.1  christos # define ISSPACE(c) (ISASCII (c) && isspace (c))
    243       1.1  christos # define ISUPPER(c) (ISASCII (c) && isupper (c))
    244       1.1  christos # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
    245       1.1  christos 
    246       1.1  christos # ifdef _tolower
    247       1.1  christos #  define TOLOWER(c) _tolower(c)
    248       1.1  christos # else
    249       1.1  christos #  define TOLOWER(c) tolower(c)
    250       1.1  christos # endif
    251       1.1  christos 
    252       1.1  christos # ifndef NULL
    253       1.1  christos #  define NULL (void *)0
    254       1.1  christos # endif
    255       1.1  christos 
    256       1.1  christos /* We remove any previous definition of `SIGN_EXTEND_CHAR',
    257       1.1  christos    since ours (we hope) works properly with all combinations of
    258       1.1  christos    machines, compilers, `char' and `unsigned char' argument types.
    259       1.1  christos    (Per Bothner suggested the basic approach.)  */
    260       1.1  christos # undef SIGN_EXTEND_CHAR
    261       1.1  christos # if __STDC__
    262       1.1  christos #  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
    263       1.1  christos # else  /* not __STDC__ */
    264       1.1  christos /* As in Harbison and Steele.  */
    265       1.1  christos #  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
    266       1.1  christos # endif
    267       1.1  christos 
    268       1.1  christos # ifndef emacs
    270       1.1  christos /* How many characters in the character set.  */
    271       1.1  christos #  define CHAR_SET_SIZE 256
    272       1.1  christos 
    273       1.1  christos #  ifdef SYNTAX_TABLE
    274       1.1  christos 
    275       1.1  christos extern char *re_syntax_table;
    276       1.1  christos 
    277       1.1  christos #  else /* not SYNTAX_TABLE */
    278       1.1  christos 
    279       1.1  christos static char re_syntax_table[CHAR_SET_SIZE];
    280       1.1  christos 
    281       1.1  christos static void init_syntax_once (void);
    282       1.1  christos 
    283       1.1  christos static void
    284       1.1  christos init_syntax_once (void)
    285       1.1  christos {
    286       1.1  christos    register int c;
    287       1.1  christos    static int done = 0;
    288       1.1  christos 
    289       1.1  christos    if (done)
    290       1.1  christos      return;
    291       1.1  christos    bzero (re_syntax_table, sizeof re_syntax_table);
    292       1.1  christos 
    293       1.1  christos    for (c = 0; c < CHAR_SET_SIZE; ++c)
    294       1.1  christos      if (ISALNUM (c))
    295       1.1  christos 	re_syntax_table[c] = Sword;
    296       1.1  christos 
    297       1.1  christos    re_syntax_table['_'] = Sword;
    298       1.1  christos 
    299       1.1  christos    done = 1;
    300       1.1  christos }
    301       1.1  christos 
    302       1.1  christos #  endif /* not SYNTAX_TABLE */
    303       1.1  christos 
    304       1.1  christos #  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
    305       1.1  christos 
    306       1.1  christos # endif /* emacs */
    307       1.1  christos 
    308       1.1  christos /* Integer type for pointers.  */
    310       1.1  christos # if !defined _LIBC && !defined HAVE_UINTPTR_T
    311       1.1  christos typedef unsigned long int uintptr_t;
    312       1.1  christos # endif
    313       1.1  christos 
    314       1.1  christos /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
    315       1.1  christos    use `alloca' instead of `malloc'.  This is because using malloc in
    316       1.1  christos    re_search* or re_match* could cause memory leaks when C-g is used in
    317       1.1  christos    Emacs; also, malloc is slower and causes storage fragmentation.  On
    318       1.1  christos    the other hand, malloc is more portable, and easier to debug.
    319       1.1  christos 
    320       1.1  christos    Because we sometimes use alloca, some routines have to be macros,
    321       1.1  christos    not functions -- `alloca'-allocated space disappears at the end of the
    322       1.1  christos    function it is called in.  */
    323       1.1  christos 
    324       1.1  christos # ifdef REGEX_MALLOC
    325       1.1  christos 
    326       1.1  christos #  define REGEX_ALLOCATE malloc
    327       1.1  christos #  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
    328       1.1  christos #  define REGEX_FREE free
    329       1.1  christos 
    330       1.1  christos # else /* not REGEX_MALLOC  */
    331       1.1  christos 
    332       1.1  christos /* Emacs already defines alloca, sometimes.  */
    333       1.1  christos #  ifndef alloca
    334       1.1  christos 
    335       1.1  christos /* Make alloca work the best possible way.  */
    336       1.1  christos #   ifdef __GNUC__
    337       1.1  christos #    define alloca __builtin_alloca
    338       1.1  christos #   else /* not __GNUC__ */
    339       1.1  christos #    if HAVE_ALLOCA_H
    340       1.1  christos #     include <alloca.h>
    341       1.1  christos #    endif /* HAVE_ALLOCA_H */
    342       1.1  christos #   endif /* not __GNUC__ */
    343       1.1  christos 
    344       1.1  christos #  endif /* not alloca */
    345       1.1  christos 
    346       1.1  christos #  define REGEX_ALLOCATE alloca
    347       1.1  christos 
    348       1.1  christos /* Assumes a `char *destination' variable.  */
    349       1.1  christos #  define REGEX_REALLOCATE(source, osize, nsize)			\
    350       1.1  christos   (destination = (char *) alloca (nsize),				\
    351       1.1  christos    memcpy (destination, source, osize))
    352       1.1  christos 
    353       1.1  christos /* No need to do anything to free, after alloca.  */
    354       1.1  christos #  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
    355       1.1  christos 
    356       1.1  christos # endif /* not REGEX_MALLOC */
    357       1.1  christos 
    358       1.1  christos /* Define how to allocate the failure stack.  */
    359       1.1  christos 
    360       1.1  christos # if defined REL_ALLOC && defined REGEX_MALLOC
    361       1.1  christos 
    362       1.1  christos #  define REGEX_ALLOCATE_STACK(size)				\
    363       1.1  christos   r_alloc (&failure_stack_ptr, (size))
    364       1.1  christos #  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
    365       1.1  christos   r_re_alloc (&failure_stack_ptr, (nsize))
    366       1.1  christos #  define REGEX_FREE_STACK(ptr)					\
    367       1.1  christos   r_alloc_free (&failure_stack_ptr)
    368       1.1  christos 
    369       1.1  christos # else /* not using relocating allocator */
    370       1.1  christos 
    371       1.1  christos #  ifdef REGEX_MALLOC
    372       1.1  christos 
    373       1.1  christos #   define REGEX_ALLOCATE_STACK malloc
    374       1.1  christos #   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
    375       1.1  christos #   define REGEX_FREE_STACK free
    376       1.1  christos 
    377       1.1  christos #  else /* not REGEX_MALLOC */
    378       1.1  christos 
    379       1.1  christos #   define REGEX_ALLOCATE_STACK alloca
    380       1.1  christos 
    381       1.1  christos #   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
    382       1.1  christos    REGEX_REALLOCATE (source, osize, nsize)
    383       1.1  christos /* No need to explicitly free anything.  */
    384       1.1  christos #   define REGEX_FREE_STACK(arg)
    385       1.1  christos 
    386       1.1  christos #  endif /* not REGEX_MALLOC */
    387       1.1  christos # endif /* not using relocating allocator */
    388       1.1  christos 
    389       1.1  christos 
    390       1.1  christos /* True if `size1' is non-NULL and PTR is pointing anywhere inside
    391       1.1  christos    `string1' or just past its end.  This works if PTR is NULL, which is
    392       1.1  christos    a good thing.  */
    393       1.1  christos # define FIRST_STRING_P(ptr) 					\
    394       1.1  christos   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
    395       1.1  christos 
    396       1.1  christos /* (Re)Allocate N items of type T using malloc, or fail.  */
    397       1.1  christos # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
    398       1.1  christos # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
    399       1.1  christos # define RETALLOC_IF(addr, n, t) \
    400       1.1  christos   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
    401       1.1  christos # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
    402       1.1  christos 
    403       1.1  christos # define BYTEWIDTH 8 /* In bits.  */
    404       1.1  christos 
    405       1.1  christos # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
    406       1.1  christos 
    407       1.1  christos # undef MAX
    408       1.1  christos # undef MIN
    409       1.1  christos # define MAX(a, b) ((a) > (b) ? (a) : (b))
    410       1.1  christos # define MIN(a, b) ((a) < (b) ? (a) : (b))
    411       1.1  christos 
    412       1.1  christos typedef char boolean;
    413       1.1  christos # define false 0
    414       1.1  christos # define true 1
    415       1.1  christos 
    416       1.1  christos static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
    417       1.1  christos                                          reg_syntax_t syntax,
    418       1.1  christos                                          struct re_pattern_buffer *bufp);
    419       1.1  christos 
    420       1.1  christos static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
    421       1.1  christos                                      const char *string1, int size1,
    422       1.1  christos                                      const char *string2, int size2,
    423       1.1  christos                                      int pos,
    424       1.1  christos                                      struct re_registers *regs,
    425       1.1  christos                                      int stop);
    426       1.1  christos static int byte_re_search_2 (struct re_pattern_buffer *bufp,
    427       1.1  christos                              const char *string1, int size1,
    428       1.1  christos                              const char *string2, int size2,
    429       1.1  christos                              int startpos, int range,
    430       1.1  christos                              struct re_registers *regs, int stop);
    431       1.1  christos static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
    432       1.1  christos 
    433       1.1  christos #ifdef MBS_SUPPORT
    434       1.1  christos static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
    435       1.1  christos                                         reg_syntax_t syntax,
    436       1.1  christos                                         struct re_pattern_buffer *bufp);
    437       1.1  christos 
    438       1.1  christos 
    439       1.1  christos static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
    440       1.1  christos                                     const char *cstring1, int csize1,
    441       1.1  christos                                     const char *cstring2, int csize2,
    442       1.1  christos                                     int pos,
    443       1.1  christos                                     struct re_registers *regs,
    444       1.1  christos                                     int stop,
    445       1.1  christos                                     wchar_t *string1, int size1,
    446       1.1  christos                                     wchar_t *string2, int size2,
    447       1.1  christos                                     int *mbs_offset1, int *mbs_offset2);
    448       1.1  christos static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
    449       1.1  christos                             const char *string1, int size1,
    450       1.1  christos                             const char *string2, int size2,
    451       1.1  christos                             int startpos, int range,
    452       1.1  christos                             struct re_registers *regs, int stop);
    453       1.1  christos static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
    454       1.1  christos #endif
    455       1.1  christos 
    456       1.1  christos /* These are the command codes that appear in compiled regular
    458       1.1  christos    expressions.  Some opcodes are followed by argument bytes.  A
    459       1.1  christos    command code can specify any interpretation whatsoever for its
    460       1.1  christos    arguments.  Zero bytes may appear in the compiled regular expression.  */
    461       1.1  christos 
    462       1.1  christos typedef enum
    463       1.1  christos {
    464       1.1  christos   no_op = 0,
    465       1.1  christos 
    466       1.1  christos   /* Succeed right away--no more backtracking.  */
    467       1.1  christos   succeed,
    468       1.1  christos 
    469       1.1  christos         /* Followed by one byte giving n, then by n literal bytes.  */
    470       1.1  christos   exactn,
    471       1.1  christos 
    472       1.1  christos # ifdef MBS_SUPPORT
    473       1.1  christos 	/* Same as exactn, but contains binary data.  */
    474       1.1  christos   exactn_bin,
    475       1.1  christos # endif
    476       1.1  christos 
    477       1.1  christos         /* Matches any (more or less) character.  */
    478       1.1  christos   anychar,
    479       1.1  christos 
    480       1.1  christos         /* Matches any one char belonging to specified set.  First
    481       1.1  christos            following byte is number of bitmap bytes.  Then come bytes
    482       1.1  christos            for a bitmap saying which chars are in.  Bits in each byte
    483       1.1  christos            are ordered low-bit-first.  A character is in the set if its
    484       1.1  christos            bit is 1.  A character too large to have a bit in the map is
    485       1.1  christos            automatically not in the set.  */
    486       1.1  christos         /* ifdef MBS_SUPPORT, following element is length of character
    487       1.1  christos 	   classes, length of collating symbols, length of equivalence
    488       1.1  christos 	   classes, length of character ranges, and length of characters.
    489       1.1  christos 	   Next, character class element, collating symbols elements,
    490       1.1  christos 	   equivalence class elements, range elements, and character
    491       1.1  christos 	   elements follow.
    492       1.1  christos 	   See regex_compile function.  */
    493       1.1  christos   charset,
    494       1.1  christos 
    495       1.1  christos         /* Same parameters as charset, but match any character that is
    496       1.1  christos            not one of those specified.  */
    497       1.1  christos   charset_not,
    498       1.1  christos 
    499       1.1  christos         /* Start remembering the text that is matched, for storing in a
    500       1.1  christos            register.  Followed by one byte with the register number, in
    501       1.1  christos            the range 0 to one less than the pattern buffer's re_nsub
    502       1.1  christos            field.  Then followed by one byte with the number of groups
    503       1.1  christos            inner to this one.  (This last has to be part of the
    504       1.1  christos            start_memory only because we need it in the on_failure_jump
    505       1.1  christos            of re_match_2.)  */
    506       1.1  christos   start_memory,
    507       1.1  christos 
    508       1.1  christos         /* Stop remembering the text that is matched and store it in a
    509       1.1  christos            memory register.  Followed by one byte with the register
    510       1.1  christos            number, in the range 0 to one less than `re_nsub' in the
    511       1.1  christos            pattern buffer, and one byte with the number of inner groups,
    512       1.1  christos            just like `start_memory'.  (We need the number of inner
    513       1.1  christos            groups here because we don't have any easy way of finding the
    514       1.1  christos            corresponding start_memory when we're at a stop_memory.)  */
    515       1.1  christos   stop_memory,
    516       1.1  christos 
    517       1.1  christos         /* Match a duplicate of something remembered. Followed by one
    518       1.1  christos            byte containing the register number.  */
    519       1.1  christos   duplicate,
    520       1.1  christos 
    521       1.1  christos         /* Fail unless at beginning of line.  */
    522       1.1  christos   begline,
    523       1.1  christos 
    524       1.1  christos         /* Fail unless at end of line.  */
    525       1.1  christos   endline,
    526       1.1  christos 
    527       1.1  christos         /* Succeeds if at beginning of buffer (if emacs) or at beginning
    528       1.1  christos            of string to be matched (if not).  */
    529       1.1  christos   begbuf,
    530       1.1  christos 
    531       1.1  christos         /* Analogously, for end of buffer/string.  */
    532       1.1  christos   endbuf,
    533       1.1  christos 
    534       1.1  christos         /* Followed by two byte relative address to which to jump.  */
    535       1.1  christos   jump,
    536       1.1  christos 
    537       1.1  christos 	/* Same as jump, but marks the end of an alternative.  */
    538       1.1  christos   jump_past_alt,
    539       1.1  christos 
    540       1.1  christos         /* Followed by two-byte relative address of place to resume at
    541       1.1  christos            in case of failure.  */
    542       1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    543       1.1  christos   on_failure_jump,
    544       1.1  christos 
    545       1.1  christos         /* Like on_failure_jump, but pushes a placeholder instead of the
    546       1.1  christos            current string position when executed.  */
    547       1.1  christos   on_failure_keep_string_jump,
    548       1.1  christos 
    549       1.1  christos         /* Throw away latest failure point and then jump to following
    550       1.1  christos            two-byte relative address.  */
    551       1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    552       1.1  christos   pop_failure_jump,
    553       1.1  christos 
    554       1.1  christos         /* Change to pop_failure_jump if know won't have to backtrack to
    555       1.1  christos            match; otherwise change to jump.  This is used to jump
    556       1.1  christos            back to the beginning of a repeat.  If what follows this jump
    557       1.1  christos            clearly won't match what the repeat does, such that we can be
    558       1.1  christos            sure that there is no use backtracking out of repetitions
    559       1.1  christos            already matched, then we change it to a pop_failure_jump.
    560       1.1  christos            Followed by two-byte address.  */
    561       1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    562       1.1  christos   maybe_pop_jump,
    563       1.1  christos 
    564       1.1  christos         /* Jump to following two-byte address, and push a dummy failure
    565       1.1  christos            point. This failure point will be thrown away if an attempt
    566       1.1  christos            is made to use it for a failure.  A `+' construct makes this
    567       1.1  christos            before the first repeat.  Also used as an intermediary kind
    568       1.1  christos            of jump when compiling an alternative.  */
    569       1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    570       1.1  christos   dummy_failure_jump,
    571       1.1  christos 
    572       1.1  christos 	/* Push a dummy failure point and continue.  Used at the end of
    573       1.1  christos 	   alternatives.  */
    574       1.1  christos   push_dummy_failure,
    575       1.1  christos 
    576       1.1  christos         /* Followed by two-byte relative address and two-byte number n.
    577       1.1  christos            After matching N times, jump to the address upon failure.  */
    578       1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    579       1.1  christos   succeed_n,
    580       1.1  christos 
    581       1.1  christos         /* Followed by two-byte relative address, and two-byte number n.
    582       1.1  christos            Jump to the address N times, then fail.  */
    583       1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    584       1.1  christos   jump_n,
    585       1.1  christos 
    586       1.1  christos         /* Set the following two-byte relative address to the
    587       1.1  christos            subsequent two-byte number.  The address *includes* the two
    588       1.1  christos            bytes of number.  */
    589       1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    590       1.1  christos   set_number_at,
    591       1.1  christos 
    592       1.1  christos   wordchar,	/* Matches any word-constituent character.  */
    593       1.1  christos   notwordchar,	/* Matches any char that is not a word-constituent.  */
    594       1.1  christos 
    595       1.1  christos   wordbeg,	/* Succeeds if at word beginning.  */
    596       1.1  christos   wordend,	/* Succeeds if at word end.  */
    597       1.1  christos 
    598       1.1  christos   wordbound,	/* Succeeds if at a word boundary.  */
    599       1.1  christos   notwordbound	/* Succeeds if not at a word boundary.  */
    600       1.1  christos 
    601       1.1  christos # ifdef emacs
    602       1.1  christos   ,before_dot,	/* Succeeds if before point.  */
    603       1.1  christos   at_dot,	/* Succeeds if at point.  */
    604       1.1  christos   after_dot,	/* Succeeds if after point.  */
    605       1.1  christos 
    606       1.1  christos 	/* Matches any character whose syntax is specified.  Followed by
    607       1.1  christos            a byte which contains a syntax code, e.g., Sword.  */
    608       1.1  christos   syntaxspec,
    609       1.1  christos 
    610       1.1  christos 	/* Matches any character whose syntax is not that specified.  */
    611       1.1  christos   notsyntaxspec
    612       1.1  christos # endif /* emacs */
    613       1.1  christos } re_opcode_t;
    614       1.1  christos #endif /* not INSIDE_RECURSION */
    615       1.1  christos 
    616       1.1  christos 
    618       1.1  christos #ifdef BYTE
    619       1.1  christos # define CHAR_T char
    620       1.1  christos # define UCHAR_T unsigned char
    621       1.1  christos # define COMPILED_BUFFER_VAR bufp->buffer
    622       1.1  christos # define OFFSET_ADDRESS_SIZE 2
    623       1.1  christos # define PREFIX(name) byte_##name
    624       1.1  christos # define ARG_PREFIX(name) name
    625       1.1  christos # define PUT_CHAR(c) putchar (c)
    626       1.1  christos #else
    627       1.1  christos # ifdef WCHAR
    628       1.1  christos #  define CHAR_T wchar_t
    629       1.1  christos #  define UCHAR_T wchar_t
    630       1.1  christos #  define COMPILED_BUFFER_VAR wc_buffer
    631       1.1  christos #  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
    632       1.1  christos #  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
    633       1.1  christos #  define PREFIX(name) wcs_##name
    634       1.1  christos #  define ARG_PREFIX(name) c##name
    635       1.1  christos /* Should we use wide stream??  */
    636       1.1  christos #  define PUT_CHAR(c) printf ("%C", c);
    637       1.1  christos #  define TRUE 1
    638       1.1  christos #  define FALSE 0
    639       1.1  christos # else
    640       1.1  christos #  ifdef MBS_SUPPORT
    641       1.1  christos #   define WCHAR
    642       1.1  christos #   define INSIDE_RECURSION
    643       1.1  christos #   include "regex.c"
    644       1.1  christos #   undef INSIDE_RECURSION
    645       1.1  christos #  endif
    646       1.1  christos #  define BYTE
    647       1.1  christos #  define INSIDE_RECURSION
    648       1.1  christos #  include "regex.c"
    649       1.1  christos #  undef INSIDE_RECURSION
    650       1.1  christos # endif
    651       1.1  christos #endif
    652       1.1  christos 
    653       1.1  christos #ifdef INSIDE_RECURSION
    654       1.1  christos /* Common operations on the compiled pattern.  */
    655       1.1  christos 
    656       1.1  christos /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
    657       1.1  christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    658       1.1  christos 
    659       1.1  christos # ifdef WCHAR
    660       1.1  christos #  define STORE_NUMBER(destination, number)				\
    661       1.1  christos   do {									\
    662       1.1  christos     *(destination) = (UCHAR_T)(number);				\
    663       1.1  christos   } while (0)
    664       1.1  christos # else /* BYTE */
    665       1.1  christos #  define STORE_NUMBER(destination, number)				\
    666       1.1  christos   do {									\
    667       1.1  christos     (destination)[0] = (number) & 0377;					\
    668       1.1  christos     (destination)[1] = (number) >> 8;					\
    669       1.1  christos   } while (0)
    670       1.1  christos # endif /* WCHAR */
    671       1.1  christos 
    672       1.1  christos /* Same as STORE_NUMBER, except increment DESTINATION to
    673       1.1  christos    the byte after where the number is stored.  Therefore, DESTINATION
    674       1.1  christos    must be an lvalue.  */
    675       1.1  christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    676       1.1  christos 
    677       1.1  christos # define STORE_NUMBER_AND_INCR(destination, number)			\
    678       1.1  christos   do {									\
    679       1.1  christos     STORE_NUMBER (destination, number);					\
    680       1.1  christos     (destination) += OFFSET_ADDRESS_SIZE;				\
    681       1.1  christos   } while (0)
    682       1.1  christos 
    683       1.1  christos /* Put into DESTINATION a number stored in two contiguous bytes starting
    684       1.1  christos    at SOURCE.  */
    685       1.1  christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    686       1.1  christos 
    687       1.1  christos # ifdef WCHAR
    688       1.1  christos #  define EXTRACT_NUMBER(destination, source)				\
    689       1.1  christos   do {									\
    690       1.1  christos     (destination) = *(source);						\
    691   1.1.1.5  christos   } while (0)
    692       1.1  christos # else /* BYTE */
    693       1.1  christos #  define EXTRACT_NUMBER(destination, source)				\
    694       1.1  christos   do {									\
    695       1.1  christos     (destination) = *(source) & 0377;					\
    696       1.1  christos     (destination) += ((unsigned) SIGN_EXTEND_CHAR (*((source) + 1))) << 8; \
    697       1.1  christos   } while (0)
    698       1.1  christos # endif
    699       1.1  christos 
    700       1.1  christos # ifdef DEBUG
    701       1.1  christos static void PREFIX(extract_number) (int *dest, UCHAR_T *source);
    702       1.1  christos static void
    703       1.1  christos PREFIX(extract_number) (int *dest, UCHAR_T *source)
    704       1.1  christos {
    705       1.1  christos #  ifdef WCHAR
    706       1.1  christos   *dest = *source;
    707       1.1  christos #  else /* BYTE */
    708       1.1  christos   int temp = SIGN_EXTEND_CHAR (*(source + 1));
    709       1.1  christos   *dest = *source & 0377;
    710       1.1  christos   *dest += temp << 8;
    711       1.1  christos #  endif
    712       1.1  christos }
    713       1.1  christos 
    714       1.1  christos #  ifndef EXTRACT_MACROS /* To debug the macros.  */
    715       1.1  christos #   undef EXTRACT_NUMBER
    716       1.1  christos #   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
    717       1.1  christos #  endif /* not EXTRACT_MACROS */
    718       1.1  christos 
    719       1.1  christos # endif /* DEBUG */
    720       1.1  christos 
    721       1.1  christos /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
    722       1.1  christos    SOURCE must be an lvalue.  */
    723       1.1  christos 
    724       1.1  christos # define EXTRACT_NUMBER_AND_INCR(destination, source)			\
    725       1.1  christos   do {									\
    726       1.1  christos     EXTRACT_NUMBER (destination, source);				\
    727       1.1  christos     (source) += OFFSET_ADDRESS_SIZE; 					\
    728       1.1  christos   } while (0)
    729       1.1  christos 
    730       1.1  christos # ifdef DEBUG
    731       1.1  christos static void PREFIX(extract_number_and_incr) (int *destination,
    732       1.1  christos                                              UCHAR_T **source);
    733       1.1  christos static void
    734       1.1  christos PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
    735       1.1  christos {
    736       1.1  christos   PREFIX(extract_number) (destination, *source);
    737       1.1  christos   *source += OFFSET_ADDRESS_SIZE;
    738       1.1  christos }
    739       1.1  christos 
    740       1.1  christos #  ifndef EXTRACT_MACROS
    741       1.1  christos #   undef EXTRACT_NUMBER_AND_INCR
    742       1.1  christos #   define EXTRACT_NUMBER_AND_INCR(dest, src) \
    743       1.1  christos   PREFIX(extract_number_and_incr) (&dest, &src)
    744       1.1  christos #  endif /* not EXTRACT_MACROS */
    745       1.1  christos 
    746       1.1  christos # endif /* DEBUG */
    747       1.1  christos 
    748       1.1  christos 
    749       1.1  christos 
    751       1.1  christos /* If DEBUG is defined, Regex prints many voluminous messages about what
    752       1.1  christos    it is doing (if the variable `debug' is nonzero).  If linked with the
    753       1.1  christos    main program in `iregex.c', you can enter patterns and strings
    754       1.1  christos    interactively.  And if linked with the main program in `main.c' and
    755       1.1  christos    the other test files, you can run the already-written tests.  */
    756       1.1  christos 
    757       1.1  christos # ifdef DEBUG
    758       1.1  christos 
    759       1.1  christos #  ifndef DEFINED_ONCE
    760       1.1  christos 
    761       1.1  christos /* We use standard I/O for debugging.  */
    762       1.1  christos #   include <stdio.h>
    763       1.1  christos 
    764       1.1  christos /* It is useful to test things that ``must'' be true when debugging.  */
    765       1.1  christos #   include <assert.h>
    766       1.1  christos 
    767       1.1  christos static int debug;
    768       1.1  christos 
    769       1.1  christos #   define DEBUG_STATEMENT(e) e
    770       1.1  christos #   define DEBUG_PRINT1(x) if (debug) printf (x)
    771       1.1  christos #   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
    772       1.1  christos #   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
    773       1.1  christos #   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
    774       1.1  christos #  endif /* not DEFINED_ONCE */
    775       1.1  christos 
    776       1.1  christos #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
    777       1.1  christos   if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
    778       1.1  christos #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
    779       1.1  christos   if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
    780       1.1  christos 
    781       1.1  christos 
    782       1.1  christos /* Print the fastmap in human-readable form.  */
    783       1.1  christos 
    784       1.1  christos #  ifndef DEFINED_ONCE
    785       1.1  christos void
    786       1.1  christos print_fastmap (char *fastmap)
    787       1.1  christos {
    788       1.1  christos   unsigned was_a_range = 0;
    789       1.1  christos   unsigned i = 0;
    790       1.1  christos 
    791       1.1  christos   while (i < (1 << BYTEWIDTH))
    792       1.1  christos     {
    793       1.1  christos       if (fastmap[i++])
    794       1.1  christos 	{
    795       1.1  christos 	  was_a_range = 0;
    796       1.1  christos           putchar (i - 1);
    797       1.1  christos           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
    798       1.1  christos             {
    799       1.1  christos               was_a_range = 1;
    800       1.1  christos               i++;
    801       1.1  christos             }
    802       1.1  christos 	  if (was_a_range)
    803       1.1  christos             {
    804       1.1  christos               printf ("-");
    805       1.1  christos               putchar (i - 1);
    806       1.1  christos             }
    807       1.1  christos         }
    808       1.1  christos     }
    809       1.1  christos   putchar ('\n');
    810       1.1  christos }
    811       1.1  christos #  endif /* not DEFINED_ONCE */
    812       1.1  christos 
    813       1.1  christos 
    814       1.1  christos /* Print a compiled pattern string in human-readable form, starting at
    815       1.1  christos    the START pointer into it and ending just before the pointer END.  */
    816       1.1  christos 
    817       1.1  christos void
    818       1.1  christos PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
    819       1.1  christos {
    820       1.1  christos   int mcnt, mcnt2;
    821       1.1  christos   UCHAR_T *p1;
    822       1.1  christos   UCHAR_T *p = start;
    823       1.1  christos   UCHAR_T *pend = end;
    824       1.1  christos 
    825       1.1  christos   if (start == NULL)
    826       1.1  christos     {
    827       1.1  christos       printf ("(null)\n");
    828       1.1  christos       return;
    829       1.1  christos     }
    830       1.1  christos 
    831       1.1  christos   /* Loop over pattern commands.  */
    832       1.1  christos   while (p < pend)
    833       1.1  christos     {
    834       1.1  christos #  ifdef _LIBC
    835       1.1  christos       printf ("%td:\t", p - start);
    836       1.1  christos #  else
    837       1.1  christos       printf ("%ld:\t", (long int) (p - start));
    838       1.1  christos #  endif
    839       1.1  christos 
    840       1.1  christos       switch ((re_opcode_t) *p++)
    841       1.1  christos 	{
    842       1.1  christos         case no_op:
    843       1.1  christos           printf ("/no_op");
    844       1.1  christos           break;
    845       1.1  christos 
    846       1.1  christos 	case exactn:
    847       1.1  christos 	  mcnt = *p++;
    848       1.1  christos           printf ("/exactn/%d", mcnt);
    849       1.1  christos           do
    850       1.1  christos 	    {
    851       1.1  christos               putchar ('/');
    852       1.1  christos 	      PUT_CHAR (*p++);
    853       1.1  christos             }
    854       1.1  christos           while (--mcnt);
    855       1.1  christos           break;
    856       1.1  christos 
    857       1.1  christos #  ifdef MBS_SUPPORT
    858       1.1  christos 	case exactn_bin:
    859       1.1  christos 	  mcnt = *p++;
    860       1.1  christos 	  printf ("/exactn_bin/%d", mcnt);
    861       1.1  christos           do
    862       1.1  christos 	    {
    863       1.1  christos 	      printf("/%lx", (long int) *p++);
    864       1.1  christos             }
    865       1.1  christos           while (--mcnt);
    866       1.1  christos           break;
    867       1.1  christos #  endif /* MBS_SUPPORT */
    868       1.1  christos 
    869       1.1  christos 	case start_memory:
    870       1.1  christos           mcnt = *p++;
    871       1.1  christos           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
    872       1.1  christos           break;
    873       1.1  christos 
    874       1.1  christos 	case stop_memory:
    875       1.1  christos           mcnt = *p++;
    876       1.1  christos 	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
    877       1.1  christos           break;
    878       1.1  christos 
    879       1.1  christos 	case duplicate:
    880       1.1  christos 	  printf ("/duplicate/%ld", (long int) *p++);
    881       1.1  christos 	  break;
    882       1.1  christos 
    883       1.1  christos 	case anychar:
    884       1.1  christos 	  printf ("/anychar");
    885       1.1  christos 	  break;
    886       1.1  christos 
    887       1.1  christos 	case charset:
    888       1.1  christos         case charset_not:
    889       1.1  christos           {
    890       1.1  christos #  ifdef WCHAR
    891       1.1  christos 	    int i, length;
    892       1.1  christos 	    wchar_t *workp = p;
    893       1.1  christos 	    printf ("/charset [%s",
    894       1.1  christos 	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
    895       1.1  christos 	    p += 5;
    896       1.1  christos 	    length = *workp++; /* the length of char_classes */
    897       1.1  christos 	    for (i=0 ; i<length ; i++)
    898       1.1  christos 	      printf("[:%lx:]", (long int) *p++);
    899       1.1  christos 	    length = *workp++; /* the length of collating_symbol */
    900       1.1  christos 	    for (i=0 ; i<length ;)
    901       1.1  christos 	      {
    902       1.1  christos 		printf("[.");
    903       1.1  christos 		while(*p != 0)
    904       1.1  christos 		  PUT_CHAR((i++,*p++));
    905       1.1  christos 		i++,p++;
    906       1.1  christos 		printf(".]");
    907       1.1  christos 	      }
    908       1.1  christos 	    length = *workp++; /* the length of equivalence_class */
    909       1.1  christos 	    for (i=0 ; i<length ;)
    910       1.1  christos 	      {
    911       1.1  christos 		printf("[=");
    912       1.1  christos 		while(*p != 0)
    913       1.1  christos 		  PUT_CHAR((i++,*p++));
    914       1.1  christos 		i++,p++;
    915       1.1  christos 		printf("=]");
    916       1.1  christos 	      }
    917       1.1  christos 	    length = *workp++; /* the length of char_range */
    918       1.1  christos 	    for (i=0 ; i<length ; i++)
    919       1.1  christos 	      {
    920       1.1  christos 		wchar_t range_start = *p++;
    921       1.1  christos 		wchar_t range_end = *p++;
    922       1.1  christos 		printf("%C-%C", range_start, range_end);
    923       1.1  christos 	      }
    924       1.1  christos 	    length = *workp++; /* the length of char */
    925       1.1  christos 	    for (i=0 ; i<length ; i++)
    926       1.1  christos 	      printf("%C", *p++);
    927       1.1  christos 	    putchar (']');
    928       1.1  christos #  else
    929       1.1  christos             register int c, last = -100;
    930       1.1  christos 	    register int in_range = 0;
    931       1.1  christos 
    932       1.1  christos 	    printf ("/charset [%s",
    933       1.1  christos 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
    934       1.1  christos 
    935       1.1  christos             assert (p + *p < pend);
    936       1.1  christos 
    937       1.1  christos             for (c = 0; c < 256; c++)
    938       1.1  christos 	      if (c / 8 < *p
    939       1.1  christos 		  && (p[1 + (c/8)] & (1 << (c % 8))))
    940       1.1  christos 		{
    941       1.1  christos 		  /* Are we starting a range?  */
    942       1.1  christos 		  if (last + 1 == c && ! in_range)
    943       1.1  christos 		    {
    944       1.1  christos 		      putchar ('-');
    945       1.1  christos 		      in_range = 1;
    946       1.1  christos 		    }
    947       1.1  christos 		  /* Have we broken a range?  */
    948       1.1  christos 		  else if (last + 1 != c && in_range)
    949       1.1  christos               {
    950       1.1  christos 		      putchar (last);
    951       1.1  christos 		      in_range = 0;
    952       1.1  christos 		    }
    953       1.1  christos 
    954       1.1  christos 		  if (! in_range)
    955       1.1  christos 		    putchar (c);
    956       1.1  christos 
    957       1.1  christos 		  last = c;
    958       1.1  christos               }
    959       1.1  christos 
    960       1.1  christos 	    if (in_range)
    961       1.1  christos 	      putchar (last);
    962       1.1  christos 
    963       1.1  christos 	    putchar (']');
    964       1.1  christos 
    965       1.1  christos 	    p += 1 + *p;
    966       1.1  christos #  endif /* WCHAR */
    967       1.1  christos 	  }
    968       1.1  christos 	  break;
    969       1.1  christos 
    970       1.1  christos 	case begline:
    971       1.1  christos 	  printf ("/begline");
    972       1.1  christos           break;
    973       1.1  christos 
    974       1.1  christos 	case endline:
    975       1.1  christos           printf ("/endline");
    976       1.1  christos           break;
    977       1.1  christos 
    978       1.1  christos 	case on_failure_jump:
    979       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
    980       1.1  christos #  ifdef _LIBC
    981       1.1  christos   	  printf ("/on_failure_jump to %td", p + mcnt - start);
    982       1.1  christos #  else
    983       1.1  christos   	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
    984       1.1  christos #  endif
    985       1.1  christos           break;
    986       1.1  christos 
    987       1.1  christos 	case on_failure_keep_string_jump:
    988       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
    989       1.1  christos #  ifdef _LIBC
    990       1.1  christos   	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
    991       1.1  christos #  else
    992       1.1  christos   	  printf ("/on_failure_keep_string_jump to %ld",
    993       1.1  christos 		  (long int) (p + mcnt - start));
    994       1.1  christos #  endif
    995       1.1  christos           break;
    996       1.1  christos 
    997       1.1  christos 	case dummy_failure_jump:
    998       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
    999       1.1  christos #  ifdef _LIBC
   1000       1.1  christos   	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
   1001       1.1  christos #  else
   1002       1.1  christos   	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
   1003       1.1  christos #  endif
   1004       1.1  christos           break;
   1005       1.1  christos 
   1006       1.1  christos 	case push_dummy_failure:
   1007       1.1  christos           printf ("/push_dummy_failure");
   1008       1.1  christos           break;
   1009       1.1  christos 
   1010       1.1  christos         case maybe_pop_jump:
   1011       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1012       1.1  christos #  ifdef _LIBC
   1013       1.1  christos   	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
   1014       1.1  christos #  else
   1015       1.1  christos   	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
   1016       1.1  christos #  endif
   1017       1.1  christos 	  break;
   1018       1.1  christos 
   1019       1.1  christos         case pop_failure_jump:
   1020       1.1  christos 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1021       1.1  christos #  ifdef _LIBC
   1022       1.1  christos   	  printf ("/pop_failure_jump to %td", p + mcnt - start);
   1023       1.1  christos #  else
   1024       1.1  christos   	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
   1025       1.1  christos #  endif
   1026       1.1  christos 	  break;
   1027       1.1  christos 
   1028       1.1  christos         case jump_past_alt:
   1029       1.1  christos 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1030       1.1  christos #  ifdef _LIBC
   1031       1.1  christos   	  printf ("/jump_past_alt to %td", p + mcnt - start);
   1032       1.1  christos #  else
   1033       1.1  christos   	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
   1034       1.1  christos #  endif
   1035       1.1  christos 	  break;
   1036       1.1  christos 
   1037       1.1  christos         case jump:
   1038       1.1  christos 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1039       1.1  christos #  ifdef _LIBC
   1040       1.1  christos   	  printf ("/jump to %td", p + mcnt - start);
   1041       1.1  christos #  else
   1042       1.1  christos   	  printf ("/jump to %ld", (long int) (p + mcnt - start));
   1043       1.1  christos #  endif
   1044       1.1  christos 	  break;
   1045       1.1  christos 
   1046       1.1  christos         case succeed_n:
   1047       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1048       1.1  christos 	  p1 = p + mcnt;
   1049       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1050       1.1  christos #  ifdef _LIBC
   1051       1.1  christos 	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
   1052       1.1  christos #  else
   1053       1.1  christos 	  printf ("/succeed_n to %ld, %d times",
   1054       1.1  christos 		  (long int) (p1 - start), mcnt2);
   1055       1.1  christos #  endif
   1056       1.1  christos           break;
   1057       1.1  christos 
   1058       1.1  christos         case jump_n:
   1059       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1060       1.1  christos 	  p1 = p + mcnt;
   1061       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1062       1.1  christos 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
   1063       1.1  christos           break;
   1064       1.1  christos 
   1065       1.1  christos         case set_number_at:
   1066       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1067       1.1  christos 	  p1 = p + mcnt;
   1068       1.1  christos           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1069       1.1  christos #  ifdef _LIBC
   1070       1.1  christos 	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
   1071       1.1  christos #  else
   1072       1.1  christos 	  printf ("/set_number_at location %ld to %d",
   1073       1.1  christos 		  (long int) (p1 - start), mcnt2);
   1074       1.1  christos #  endif
   1075       1.1  christos           break;
   1076       1.1  christos 
   1077       1.1  christos         case wordbound:
   1078       1.1  christos 	  printf ("/wordbound");
   1079       1.1  christos 	  break;
   1080       1.1  christos 
   1081       1.1  christos 	case notwordbound:
   1082       1.1  christos 	  printf ("/notwordbound");
   1083       1.1  christos           break;
   1084       1.1  christos 
   1085       1.1  christos 	case wordbeg:
   1086       1.1  christos 	  printf ("/wordbeg");
   1087       1.1  christos 	  break;
   1088       1.1  christos 
   1089       1.1  christos 	case wordend:
   1090       1.1  christos 	  printf ("/wordend");
   1091       1.1  christos 	  break;
   1092       1.1  christos 
   1093       1.1  christos #  ifdef emacs
   1094       1.1  christos 	case before_dot:
   1095       1.1  christos 	  printf ("/before_dot");
   1096       1.1  christos           break;
   1097       1.1  christos 
   1098       1.1  christos 	case at_dot:
   1099       1.1  christos 	  printf ("/at_dot");
   1100       1.1  christos           break;
   1101       1.1  christos 
   1102       1.1  christos 	case after_dot:
   1103       1.1  christos 	  printf ("/after_dot");
   1104       1.1  christos           break;
   1105       1.1  christos 
   1106       1.1  christos 	case syntaxspec:
   1107       1.1  christos           printf ("/syntaxspec");
   1108       1.1  christos 	  mcnt = *p++;
   1109       1.1  christos 	  printf ("/%d", mcnt);
   1110       1.1  christos           break;
   1111       1.1  christos 
   1112       1.1  christos 	case notsyntaxspec:
   1113       1.1  christos           printf ("/notsyntaxspec");
   1114       1.1  christos 	  mcnt = *p++;
   1115       1.1  christos 	  printf ("/%d", mcnt);
   1116       1.1  christos 	  break;
   1117       1.1  christos #  endif /* emacs */
   1118       1.1  christos 
   1119       1.1  christos 	case wordchar:
   1120       1.1  christos 	  printf ("/wordchar");
   1121       1.1  christos           break;
   1122       1.1  christos 
   1123       1.1  christos 	case notwordchar:
   1124       1.1  christos 	  printf ("/notwordchar");
   1125       1.1  christos           break;
   1126       1.1  christos 
   1127       1.1  christos 	case begbuf:
   1128       1.1  christos 	  printf ("/begbuf");
   1129       1.1  christos           break;
   1130       1.1  christos 
   1131       1.1  christos 	case endbuf:
   1132       1.1  christos 	  printf ("/endbuf");
   1133       1.1  christos           break;
   1134       1.1  christos 
   1135       1.1  christos         default:
   1136       1.1  christos           printf ("?%ld", (long int) *(p-1));
   1137       1.1  christos 	}
   1138       1.1  christos 
   1139       1.1  christos       putchar ('\n');
   1140       1.1  christos     }
   1141       1.1  christos 
   1142       1.1  christos #  ifdef _LIBC
   1143       1.1  christos   printf ("%td:\tend of pattern.\n", p - start);
   1144       1.1  christos #  else
   1145       1.1  christos   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
   1146       1.1  christos #  endif
   1147       1.1  christos }
   1148       1.1  christos 
   1149       1.1  christos 
   1150       1.1  christos void
   1151       1.1  christos PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
   1152       1.1  christos {
   1153       1.1  christos   UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
   1154       1.1  christos 
   1155       1.1  christos   PREFIX(print_partial_compiled_pattern) (buffer, buffer
   1156       1.1  christos 				  + bufp->used / sizeof(UCHAR_T));
   1157       1.1  christos   printf ("%ld bytes used/%ld bytes allocated.\n",
   1158       1.1  christos 	  bufp->used, bufp->allocated);
   1159       1.1  christos 
   1160       1.1  christos   if (bufp->fastmap_accurate && bufp->fastmap)
   1161       1.1  christos     {
   1162       1.1  christos       printf ("fastmap: ");
   1163       1.1  christos       print_fastmap (bufp->fastmap);
   1164       1.1  christos     }
   1165       1.1  christos 
   1166       1.1  christos #  ifdef _LIBC
   1167       1.1  christos   printf ("re_nsub: %Zd\t", bufp->re_nsub);
   1168       1.1  christos #  else
   1169       1.1  christos   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
   1170       1.1  christos #  endif
   1171       1.1  christos   printf ("regs_alloc: %d\t", bufp->regs_allocated);
   1172       1.1  christos   printf ("can_be_null: %d\t", bufp->can_be_null);
   1173       1.1  christos   printf ("newline_anchor: %d\n", bufp->newline_anchor);
   1174       1.1  christos   printf ("no_sub: %d\t", bufp->no_sub);
   1175       1.1  christos   printf ("not_bol: %d\t", bufp->not_bol);
   1176       1.1  christos   printf ("not_eol: %d\t", bufp->not_eol);
   1177       1.1  christos   printf ("syntax: %lx\n", bufp->syntax);
   1178       1.1  christos   /* Perhaps we should print the translate table?  */
   1179       1.1  christos }
   1180       1.1  christos 
   1181       1.1  christos 
   1182       1.1  christos void
   1183       1.1  christos PREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
   1184       1.1  christos                              int size1, const CHAR_T *string2, int size2)
   1185       1.1  christos {
   1186       1.1  christos   int this_char;
   1187       1.1  christos 
   1188       1.1  christos   if (where == NULL)
   1189       1.1  christos     printf ("(null)");
   1190       1.1  christos   else
   1191       1.1  christos     {
   1192       1.1  christos       int cnt;
   1193       1.1  christos 
   1194       1.1  christos       if (FIRST_STRING_P (where))
   1195       1.1  christos         {
   1196       1.1  christos           for (this_char = where - string1; this_char < size1; this_char++)
   1197       1.1  christos 	    PUT_CHAR (string1[this_char]);
   1198       1.1  christos 
   1199       1.1  christos           where = string2;
   1200       1.1  christos         }
   1201       1.1  christos 
   1202       1.1  christos       cnt = 0;
   1203       1.1  christos       for (this_char = where - string2; this_char < size2; this_char++)
   1204       1.1  christos 	{
   1205       1.1  christos 	  PUT_CHAR (string2[this_char]);
   1206       1.1  christos 	  if (++cnt > 100)
   1207       1.1  christos 	    {
   1208       1.1  christos 	      fputs ("...", stdout);
   1209       1.1  christos 	      break;
   1210       1.1  christos 	    }
   1211       1.1  christos 	}
   1212       1.1  christos     }
   1213       1.1  christos }
   1214       1.1  christos 
   1215       1.1  christos #  ifndef DEFINED_ONCE
   1216       1.1  christos void
   1217       1.1  christos printchar (int c)
   1218       1.1  christos {
   1219       1.1  christos   putc (c, stderr);
   1220       1.1  christos }
   1221       1.1  christos #  endif
   1222       1.1  christos 
   1223       1.1  christos # else /* not DEBUG */
   1224       1.1  christos 
   1225       1.1  christos #  ifndef DEFINED_ONCE
   1226       1.1  christos #   undef assert
   1227       1.1  christos #   define assert(e)
   1228       1.1  christos 
   1229       1.1  christos #   define DEBUG_STATEMENT(e)
   1230       1.1  christos #   define DEBUG_PRINT1(x)
   1231       1.1  christos #   define DEBUG_PRINT2(x1, x2)
   1232       1.1  christos #   define DEBUG_PRINT3(x1, x2, x3)
   1233       1.1  christos #   define DEBUG_PRINT4(x1, x2, x3, x4)
   1234       1.1  christos #  endif /* not DEFINED_ONCE */
   1235       1.1  christos #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
   1236       1.1  christos #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
   1237       1.1  christos 
   1238       1.1  christos # endif /* not DEBUG */
   1239       1.1  christos 
   1240       1.1  christos 
   1241       1.1  christos 
   1243       1.1  christos # ifdef WCHAR
   1244       1.1  christos /* This  convert a multibyte string to a wide character string.
   1245       1.1  christos    And write their correspondances to offset_buffer(see below)
   1246       1.1  christos    and write whether each wchar_t is binary data to is_binary.
   1247       1.1  christos    This assume invalid multibyte sequences as binary data.
   1248       1.1  christos    We assume offset_buffer and is_binary is already allocated
   1249       1.1  christos    enough space.  */
   1250       1.1  christos 
   1251       1.1  christos static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
   1252       1.1  christos 				  size_t len, int *offset_buffer,
   1253       1.1  christos 				  char *is_binary);
   1254       1.1  christos static size_t
   1255       1.1  christos convert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
   1256       1.1  christos                     int *offset_buffer, char *is_binary)
   1257       1.1  christos      /* It hold correspondances between src(char string) and
   1258       1.1  christos 	dest(wchar_t string) for optimization.
   1259       1.1  christos 	e.g. src  = "xxxyzz"
   1260       1.1  christos              dest = {'X', 'Y', 'Z'}
   1261       1.1  christos 	      (each "xxx", "y" and "zz" represent one multibyte character
   1262       1.1  christos 	       corresponding to 'X', 'Y' and 'Z'.)
   1263       1.1  christos 	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
   1264       1.1  christos 	  	        = {0, 3, 4, 6}
   1265       1.1  christos      */
   1266       1.1  christos {
   1267       1.1  christos   wchar_t *pdest = dest;
   1268       1.1  christos   const unsigned char *psrc = src;
   1269       1.1  christos   size_t wc_count = 0;
   1270       1.1  christos 
   1271       1.1  christos   mbstate_t mbs;
   1272       1.1  christos   int i, consumed;
   1273       1.1  christos   size_t mb_remain = len;
   1274       1.1  christos   size_t mb_count = 0;
   1275       1.1  christos 
   1276       1.1  christos   /* Initialize the conversion state.  */
   1277       1.1  christos   memset (&mbs, 0, sizeof (mbstate_t));
   1278       1.1  christos 
   1279       1.1  christos   offset_buffer[0] = 0;
   1280       1.1  christos   for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
   1281       1.1  christos 	 psrc += consumed)
   1282       1.1  christos     {
   1283       1.1  christos #ifdef _LIBC
   1284       1.1  christos       consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
   1285       1.1  christos #else
   1286       1.1  christos       consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
   1287       1.1  christos #endif
   1288       1.1  christos 
   1289       1.1  christos       if (consumed <= 0)
   1290       1.1  christos 	/* failed to convert. maybe src contains binary data.
   1291       1.1  christos 	   So we consume 1 byte manualy.  */
   1292       1.1  christos 	{
   1293       1.1  christos 	  *pdest = *psrc;
   1294       1.1  christos 	  consumed = 1;
   1295       1.1  christos 	  is_binary[wc_count] = TRUE;
   1296       1.1  christos 	}
   1297       1.1  christos       else
   1298       1.1  christos 	is_binary[wc_count] = FALSE;
   1299       1.1  christos       /* In sjis encoding, we use yen sign as escape character in
   1300       1.1  christos 	 place of reverse solidus. So we convert 0x5c(yen sign in
   1301       1.1  christos 	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
   1302       1.1  christos 	 solidus in UCS2).  */
   1303       1.1  christos       if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
   1304       1.1  christos 	*pdest = (wchar_t) *psrc;
   1305       1.1  christos 
   1306       1.1  christos       offset_buffer[wc_count + 1] = mb_count += consumed;
   1307       1.1  christos     }
   1308       1.1  christos 
   1309       1.1  christos   /* Fill remain of the buffer with sentinel.  */
   1310       1.1  christos   for (i = wc_count + 1 ; i <= len ; i++)
   1311       1.1  christos     offset_buffer[i] = mb_count + 1;
   1312       1.1  christos 
   1313       1.1  christos   return wc_count;
   1314       1.1  christos }
   1315       1.1  christos 
   1316       1.1  christos # endif /* WCHAR */
   1317       1.1  christos 
   1318       1.1  christos #else /* not INSIDE_RECURSION */
   1319       1.1  christos 
   1320       1.1  christos /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
   1321       1.1  christos    also be assigned to arbitrarily: each pattern buffer stores its own
   1322       1.1  christos    syntax, so it can be changed between regex compilations.  */
   1323       1.1  christos /* This has no initializer because initialized variables in Emacs
   1324       1.1  christos    become read-only after dumping.  */
   1325       1.1  christos reg_syntax_t re_syntax_options;
   1326       1.1  christos 
   1327       1.1  christos 
   1328       1.1  christos /* Specify the precise syntax of regexps for compilation.  This provides
   1329       1.1  christos    for compatibility for various utilities which historically have
   1330       1.1  christos    different, incompatible syntaxes.
   1331       1.1  christos 
   1332       1.1  christos    The argument SYNTAX is a bit mask comprised of the various bits
   1333       1.1  christos    defined in regex.h.  We return the old syntax.  */
   1334       1.1  christos 
   1335       1.1  christos reg_syntax_t
   1336       1.1  christos re_set_syntax (reg_syntax_t syntax)
   1337       1.1  christos {
   1338       1.1  christos   reg_syntax_t ret = re_syntax_options;
   1339       1.1  christos 
   1340       1.1  christos   re_syntax_options = syntax;
   1341       1.1  christos # ifdef DEBUG
   1342       1.1  christos   if (syntax & RE_DEBUG)
   1343       1.1  christos     debug = 1;
   1344       1.1  christos   else if (debug) /* was on but now is not */
   1345       1.1  christos     debug = 0;
   1346       1.1  christos # endif /* DEBUG */
   1347       1.1  christos   return ret;
   1348       1.1  christos }
   1349       1.1  christos # ifdef _LIBC
   1350       1.1  christos weak_alias (__re_set_syntax, re_set_syntax)
   1351       1.1  christos # endif
   1352       1.1  christos 
   1353       1.1  christos /* This table gives an error message for each of the error codes listed
   1355       1.1  christos    in regex.h.  Obviously the order here has to be same as there.
   1356       1.1  christos    POSIX doesn't require that we do anything for REG_NOERROR,
   1357       1.1  christos    but why not be nice?  */
   1358       1.1  christos 
   1359       1.1  christos static const char *re_error_msgid[] =
   1360       1.1  christos   {
   1361       1.1  christos     gettext_noop ("Success"),	/* REG_NOERROR */
   1362       1.1  christos     gettext_noop ("No match"),	/* REG_NOMATCH */
   1363       1.1  christos     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
   1364       1.1  christos     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
   1365       1.1  christos     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
   1366       1.1  christos     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
   1367       1.1  christos     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
   1368       1.1  christos     gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
   1369       1.1  christos     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
   1370       1.1  christos     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
   1371       1.1  christos     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
   1372       1.1  christos     gettext_noop ("Invalid range end"),	/* REG_ERANGE */
   1373       1.1  christos     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
   1374       1.1  christos     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
   1375       1.1  christos     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
   1376       1.1  christos     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
   1377       1.1  christos     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
   1378       1.1  christos   };
   1379       1.1  christos 
   1380       1.1  christos #endif /* INSIDE_RECURSION */
   1382       1.1  christos 
   1383       1.1  christos #ifndef DEFINED_ONCE
   1384       1.1  christos /* Avoiding alloca during matching, to placate r_alloc.  */
   1385       1.1  christos 
   1386       1.1  christos /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
   1387       1.1  christos    searching and matching functions should not call alloca.  On some
   1388       1.1  christos    systems, alloca is implemented in terms of malloc, and if we're
   1389       1.1  christos    using the relocating allocator routines, then malloc could cause a
   1390       1.1  christos    relocation, which might (if the strings being searched are in the
   1391       1.1  christos    ralloc heap) shift the data out from underneath the regexp
   1392       1.1  christos    routines.
   1393       1.1  christos 
   1394       1.1  christos    Here's another reason to avoid allocation: Emacs
   1395       1.1  christos    processes input from X in a signal handler; processing X input may
   1396       1.1  christos    call malloc; if input arrives while a matching routine is calling
   1397       1.1  christos    malloc, then we're scrod.  But Emacs can't just block input while
   1398       1.1  christos    calling matching routines; then we don't notice interrupts when
   1399       1.1  christos    they come in.  So, Emacs blocks input around all regexp calls
   1400       1.1  christos    except the matching calls, which it leaves unprotected, in the
   1401       1.1  christos    faith that they will not malloc.  */
   1402       1.1  christos 
   1403       1.1  christos /* Normally, this is fine.  */
   1404       1.1  christos # define MATCH_MAY_ALLOCATE
   1405       1.1  christos 
   1406       1.1  christos /* When using GNU C, we are not REALLY using the C alloca, no matter
   1407       1.1  christos    what config.h may say.  So don't take precautions for it.  */
   1408       1.1  christos # ifdef __GNUC__
   1409       1.1  christos #  undef C_ALLOCA
   1410       1.1  christos # endif
   1411       1.1  christos 
   1412       1.1  christos /* The match routines may not allocate if (1) they would do it with malloc
   1413       1.1  christos    and (2) it's not safe for them to use malloc.
   1414       1.1  christos    Note that if REL_ALLOC is defined, matching would not use malloc for the
   1415       1.1  christos    failure stack, but we would still use it for the register vectors;
   1416       1.1  christos    so REL_ALLOC should not affect this.  */
   1417       1.1  christos # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
   1418       1.1  christos #  undef MATCH_MAY_ALLOCATE
   1419       1.1  christos # endif
   1420       1.1  christos #endif /* not DEFINED_ONCE */
   1421       1.1  christos 
   1422       1.1  christos #ifdef INSIDE_RECURSION
   1424       1.1  christos /* Failure stack declarations and macros; both re_compile_fastmap and
   1425       1.1  christos    re_match_2 use a failure stack.  These have to be macros because of
   1426       1.1  christos    REGEX_ALLOCATE_STACK.  */
   1427       1.1  christos 
   1428       1.1  christos 
   1429       1.1  christos /* Number of failure points for which to initially allocate space
   1430       1.1  christos    when matching.  If this number is exceeded, we allocate more
   1431       1.1  christos    space, so it is not a hard limit.  */
   1432       1.1  christos # ifndef INIT_FAILURE_ALLOC
   1433       1.1  christos #  define INIT_FAILURE_ALLOC 5
   1434       1.1  christos # endif
   1435       1.1  christos 
   1436       1.1  christos /* Roughly the maximum number of failure points on the stack.  Would be
   1437       1.1  christos    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
   1438       1.1  christos    This is a variable only so users of regex can assign to it; we never
   1439       1.1  christos    change it ourselves.  */
   1440       1.1  christos 
   1441       1.1  christos # ifdef INT_IS_16BIT
   1442       1.1  christos 
   1443       1.1  christos #  ifndef DEFINED_ONCE
   1444       1.1  christos #   if defined MATCH_MAY_ALLOCATE
   1445       1.1  christos /* 4400 was enough to cause a crash on Alpha OSF/1,
   1446       1.1  christos    whose default stack limit is 2mb.  */
   1447       1.1  christos long int re_max_failures = 4000;
   1448       1.1  christos #   else
   1449       1.1  christos long int re_max_failures = 2000;
   1450       1.1  christos #   endif
   1451       1.1  christos #  endif
   1452       1.1  christos 
   1453       1.1  christos union PREFIX(fail_stack_elt)
   1454       1.1  christos {
   1455       1.1  christos   UCHAR_T *pointer;
   1456       1.1  christos   long int integer;
   1457       1.1  christos };
   1458       1.1  christos 
   1459       1.1  christos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
   1460       1.1  christos 
   1461       1.1  christos typedef struct
   1462       1.1  christos {
   1463       1.1  christos   PREFIX(fail_stack_elt_t) *stack;
   1464       1.1  christos   unsigned long int size;
   1465       1.1  christos   unsigned long int avail;		/* Offset of next open position.  */
   1466       1.1  christos } PREFIX(fail_stack_type);
   1467       1.1  christos 
   1468       1.1  christos # else /* not INT_IS_16BIT */
   1469       1.1  christos 
   1470       1.1  christos #  ifndef DEFINED_ONCE
   1471       1.1  christos #   if defined MATCH_MAY_ALLOCATE
   1472       1.1  christos /* 4400 was enough to cause a crash on Alpha OSF/1,
   1473       1.1  christos    whose default stack limit is 2mb.  */
   1474       1.1  christos int re_max_failures = 4000;
   1475       1.1  christos #   else
   1476       1.1  christos int re_max_failures = 2000;
   1477       1.1  christos #   endif
   1478       1.1  christos #  endif
   1479       1.1  christos 
   1480       1.1  christos union PREFIX(fail_stack_elt)
   1481       1.1  christos {
   1482       1.1  christos   UCHAR_T *pointer;
   1483       1.1  christos   int integer;
   1484       1.1  christos };
   1485       1.1  christos 
   1486       1.1  christos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
   1487       1.1  christos 
   1488       1.1  christos typedef struct
   1489       1.1  christos {
   1490       1.1  christos   PREFIX(fail_stack_elt_t) *stack;
   1491       1.1  christos   unsigned size;
   1492       1.1  christos   unsigned avail;			/* Offset of next open position.  */
   1493       1.1  christos } PREFIX(fail_stack_type);
   1494       1.1  christos 
   1495       1.1  christos # endif /* INT_IS_16BIT */
   1496       1.1  christos 
   1497       1.1  christos # ifndef DEFINED_ONCE
   1498       1.1  christos #  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
   1499       1.1  christos #  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
   1500       1.1  christos #  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
   1501       1.1  christos # endif
   1502       1.1  christos 
   1503       1.1  christos 
   1504       1.1  christos /* Define macros to initialize and free the failure stack.
   1505       1.1  christos    Do `return -2' if the alloc fails.  */
   1506       1.1  christos 
   1507       1.1  christos # ifdef MATCH_MAY_ALLOCATE
   1508       1.1  christos #  define INIT_FAIL_STACK()						\
   1509       1.1  christos   do {									\
   1510       1.1  christos     fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
   1511       1.1  christos       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
   1512       1.1  christos 									\
   1513       1.1  christos     if (fail_stack.stack == NULL)				\
   1514       1.1  christos       return -2;							\
   1515       1.1  christos 									\
   1516       1.1  christos     fail_stack.size = INIT_FAILURE_ALLOC;			\
   1517       1.1  christos     fail_stack.avail = 0;					\
   1518       1.1  christos   } while (0)
   1519       1.1  christos 
   1520       1.1  christos #  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
   1521       1.1  christos # else
   1522       1.1  christos #  define INIT_FAIL_STACK()						\
   1523       1.1  christos   do {									\
   1524       1.1  christos     fail_stack.avail = 0;					\
   1525       1.1  christos   } while (0)
   1526       1.1  christos 
   1527       1.1  christos #  define RESET_FAIL_STACK()
   1528       1.1  christos # endif
   1529       1.1  christos 
   1530       1.1  christos 
   1531       1.1  christos /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
   1532       1.1  christos 
   1533       1.1  christos    Return 1 if succeeds, and 0 if either ran out of memory
   1534       1.1  christos    allocating space for it or it was already too large.
   1535       1.1  christos 
   1536       1.1  christos    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
   1537       1.1  christos 
   1538       1.1  christos # define DOUBLE_FAIL_STACK(fail_stack)					\
   1539       1.1  christos   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
   1540       1.1  christos    ? 0									\
   1541       1.1  christos    : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
   1542       1.1  christos         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
   1543       1.1  christos           (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
   1544       1.1  christos           ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
   1545       1.1  christos 									\
   1546       1.1  christos       (fail_stack).stack == NULL					\
   1547       1.1  christos       ? 0								\
   1548       1.1  christos       : ((fail_stack).size <<= 1, 					\
   1549       1.1  christos          1)))
   1550       1.1  christos 
   1551       1.1  christos 
   1552       1.1  christos /* Push pointer POINTER on FAIL_STACK.
   1553       1.1  christos    Return 1 if was able to do so and 0 if ran out of memory allocating
   1554       1.1  christos    space to do so.  */
   1555       1.1  christos # define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
   1556       1.1  christos   ((FAIL_STACK_FULL ()							\
   1557       1.1  christos     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
   1558       1.1  christos    ? 0									\
   1559       1.1  christos    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
   1560       1.1  christos       1))
   1561       1.1  christos 
   1562       1.1  christos /* Push a pointer value onto the failure stack.
   1563       1.1  christos    Assumes the variable `fail_stack'.  Probably should only
   1564       1.1  christos    be called from within `PUSH_FAILURE_POINT'.  */
   1565       1.1  christos # define PUSH_FAILURE_POINTER(item)					\
   1566       1.1  christos   fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
   1567       1.1  christos 
   1568       1.1  christos /* This pushes an integer-valued item onto the failure stack.
   1569       1.1  christos    Assumes the variable `fail_stack'.  Probably should only
   1570       1.1  christos    be called from within `PUSH_FAILURE_POINT'.  */
   1571       1.1  christos # define PUSH_FAILURE_INT(item)					\
   1572       1.1  christos   fail_stack.stack[fail_stack.avail++].integer = (item)
   1573       1.1  christos 
   1574       1.1  christos /* Push a fail_stack_elt_t value onto the failure stack.
   1575       1.1  christos    Assumes the variable `fail_stack'.  Probably should only
   1576       1.1  christos    be called from within `PUSH_FAILURE_POINT'.  */
   1577       1.1  christos # define PUSH_FAILURE_ELT(item)					\
   1578       1.1  christos   fail_stack.stack[fail_stack.avail++] =  (item)
   1579       1.1  christos 
   1580       1.1  christos /* These three POP... operations complement the three PUSH... operations.
   1581       1.1  christos    All assume that `fail_stack' is nonempty.  */
   1582       1.1  christos # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
   1583       1.1  christos # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
   1584       1.1  christos # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
   1585       1.1  christos 
   1586       1.1  christos /* Used to omit pushing failure point id's when we're not debugging.  */
   1587       1.1  christos # ifdef DEBUG
   1588       1.1  christos #  define DEBUG_PUSH PUSH_FAILURE_INT
   1589       1.1  christos #  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
   1590       1.1  christos # else
   1591       1.1  christos #  define DEBUG_PUSH(item)
   1592       1.1  christos #  define DEBUG_POP(item_addr)
   1593       1.1  christos # endif
   1594       1.1  christos 
   1595       1.1  christos 
   1596       1.1  christos /* Push the information about the state we will need
   1597       1.1  christos    if we ever fail back to it.
   1598       1.1  christos 
   1599       1.1  christos    Requires variables fail_stack, regstart, regend, reg_info, and
   1600       1.1  christos    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
   1601       1.1  christos    be declared.
   1602       1.1  christos 
   1603       1.1  christos    Does `return FAILURE_CODE' if runs out of memory.  */
   1604       1.1  christos 
   1605       1.1  christos # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
   1606       1.1  christos   do {									\
   1607       1.1  christos     char *destination;							\
   1608       1.1  christos     /* Must be int, so when we don't save any registers, the arithmetic	\
   1609       1.1  christos        of 0 + -1 isn't done as unsigned.  */				\
   1610       1.1  christos     /* Can't be int, since there is not a shred of a guarantee that int	\
   1611       1.1  christos        is wide enough to hold a value of something to which pointer can	\
   1612       1.1  christos        be assigned */							\
   1613       1.1  christos     active_reg_t this_reg;						\
   1614       1.1  christos     									\
   1615       1.1  christos     DEBUG_STATEMENT (failure_id++);					\
   1616       1.1  christos     DEBUG_STATEMENT (nfailure_points_pushed++);				\
   1617       1.1  christos     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
   1618       1.1  christos     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
   1619       1.1  christos     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
   1620       1.1  christos 									\
   1621       1.1  christos     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
   1622       1.1  christos     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
   1623       1.1  christos 									\
   1624       1.1  christos     /* Ensure we have enough space allocated for what we will push.  */	\
   1625       1.1  christos     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
   1626       1.1  christos       {									\
   1627       1.1  christos         if (!DOUBLE_FAIL_STACK (fail_stack))				\
   1628       1.1  christos           return failure_code;						\
   1629       1.1  christos 									\
   1630       1.1  christos         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
   1631       1.1  christos 		       (fail_stack).size);				\
   1632       1.1  christos         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
   1633       1.1  christos       }									\
   1634       1.1  christos 									\
   1635       1.1  christos     /* Push the info, starting with the registers.  */			\
   1636       1.1  christos     DEBUG_PRINT1 ("\n");						\
   1637       1.1  christos 									\
   1638       1.1  christos     if (1)								\
   1639       1.1  christos       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
   1640       1.1  christos 	   this_reg++)							\
   1641       1.1  christos 	{								\
   1642       1.1  christos 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
   1643       1.1  christos 	  DEBUG_STATEMENT (num_regs_pushed++);				\
   1644       1.1  christos 									\
   1645       1.1  christos 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
   1646       1.1  christos 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
   1647       1.1  christos 									\
   1648       1.1  christos 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
   1649       1.1  christos 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
   1650       1.1  christos 									\
   1651       1.1  christos 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
   1652       1.1  christos 			reg_info[this_reg].word.pointer);		\
   1653       1.1  christos 	  DEBUG_PRINT2 (" match_null=%d",				\
   1654       1.1  christos 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
   1655       1.1  christos 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
   1656       1.1  christos 	  DEBUG_PRINT2 (" matched_something=%d",			\
   1657       1.1  christos 			MATCHED_SOMETHING (reg_info[this_reg]));	\
   1658       1.1  christos 	  DEBUG_PRINT2 (" ever_matched=%d",				\
   1659       1.1  christos 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
   1660       1.1  christos 	  DEBUG_PRINT1 ("\n");						\
   1661       1.1  christos 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
   1662       1.1  christos 	}								\
   1663       1.1  christos 									\
   1664       1.1  christos     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
   1665       1.1  christos     PUSH_FAILURE_INT (lowest_active_reg);				\
   1666       1.1  christos 									\
   1667       1.1  christos     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
   1668       1.1  christos     PUSH_FAILURE_INT (highest_active_reg);				\
   1669       1.1  christos 									\
   1670       1.1  christos     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
   1671       1.1  christos     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
   1672       1.1  christos     PUSH_FAILURE_POINTER (pattern_place);				\
   1673       1.1  christos 									\
   1674       1.1  christos     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
   1675       1.1  christos     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
   1676       1.1  christos 				 size2);				\
   1677       1.1  christos     DEBUG_PRINT1 ("'\n");						\
   1678       1.1  christos     PUSH_FAILURE_POINTER (string_place);				\
   1679       1.1  christos 									\
   1680       1.1  christos     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
   1681       1.1  christos     DEBUG_PUSH (failure_id);						\
   1682       1.1  christos   } while (0)
   1683       1.1  christos 
   1684       1.1  christos # ifndef DEFINED_ONCE
   1685       1.1  christos /* This is the number of items that are pushed and popped on the stack
   1686       1.1  christos    for each register.  */
   1687       1.1  christos #  define NUM_REG_ITEMS  3
   1688       1.1  christos 
   1689       1.1  christos /* Individual items aside from the registers.  */
   1690       1.1  christos #  ifdef DEBUG
   1691       1.1  christos #   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
   1692       1.1  christos #  else
   1693       1.1  christos #   define NUM_NONREG_ITEMS 4
   1694       1.1  christos #  endif
   1695       1.1  christos 
   1696       1.1  christos /* We push at most this many items on the stack.  */
   1697       1.1  christos /* We used to use (num_regs - 1), which is the number of registers
   1698       1.1  christos    this regexp will save; but that was changed to 5
   1699       1.1  christos    to avoid stack overflow for a regexp with lots of parens.  */
   1700       1.1  christos #  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
   1701       1.1  christos 
   1702       1.1  christos /* We actually push this many items.  */
   1703       1.1  christos #  define NUM_FAILURE_ITEMS				\
   1704       1.1  christos   (((0							\
   1705       1.1  christos      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
   1706       1.1  christos     * NUM_REG_ITEMS)					\
   1707       1.1  christos    + NUM_NONREG_ITEMS)
   1708       1.1  christos 
   1709       1.1  christos /* How many items can still be added to the stack without overflowing it.  */
   1710       1.1  christos #  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
   1711       1.1  christos # endif /* not DEFINED_ONCE */
   1712       1.1  christos 
   1713       1.1  christos 
   1714       1.1  christos /* Pops what PUSH_FAIL_STACK pushes.
   1715       1.1  christos 
   1716       1.1  christos    We restore into the parameters, all of which should be lvalues:
   1717       1.1  christos      STR -- the saved data position.
   1718       1.1  christos      PAT -- the saved pattern position.
   1719       1.1  christos      LOW_REG, HIGH_REG -- the highest and lowest active registers.
   1720       1.1  christos      REGSTART, REGEND -- arrays of string positions.
   1721       1.1  christos      REG_INFO -- array of information about each subexpression.
   1722       1.1  christos 
   1723       1.1  christos    Also assumes the variables `fail_stack' and (if debugging), `bufp',
   1724       1.1  christos    `pend', `string1', `size1', `string2', and `size2'.  */
   1725       1.1  christos # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
   1726       1.1  christos {									\
   1727       1.1  christos   DEBUG_STATEMENT (unsigned failure_id;)				\
   1728       1.1  christos   active_reg_t this_reg;						\
   1729       1.1  christos   const UCHAR_T *string_temp;						\
   1730       1.1  christos 									\
   1731       1.1  christos   assert (!FAIL_STACK_EMPTY ());					\
   1732       1.1  christos 									\
   1733       1.1  christos   /* Remove failure points and point to how many regs pushed.  */	\
   1734       1.1  christos   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
   1735       1.1  christos   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
   1736       1.1  christos   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
   1737       1.1  christos 									\
   1738       1.1  christos   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
   1739       1.1  christos 									\
   1740       1.1  christos   DEBUG_POP (&failure_id);						\
   1741       1.1  christos   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
   1742       1.1  christos 									\
   1743       1.1  christos   /* If the saved string location is NULL, it came from an		\
   1744       1.1  christos      on_failure_keep_string_jump opcode, and we want to throw away the	\
   1745       1.1  christos      saved NULL, thus retaining our current position in the string.  */	\
   1746       1.1  christos   string_temp = POP_FAILURE_POINTER ();					\
   1747       1.1  christos   if (string_temp != NULL)						\
   1748       1.1  christos     str = (const CHAR_T *) string_temp;					\
   1749       1.1  christos 									\
   1750       1.1  christos   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
   1751       1.1  christos   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
   1752       1.1  christos   DEBUG_PRINT1 ("'\n");							\
   1753       1.1  christos 									\
   1754       1.1  christos   pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
   1755       1.1  christos   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
   1756       1.1  christos   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
   1757       1.1  christos 									\
   1758       1.1  christos   /* Restore register info.  */						\
   1759       1.1  christos   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1760       1.1  christos   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
   1761       1.1  christos 									\
   1762       1.1  christos   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1763       1.1  christos   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
   1764       1.1  christos 									\
   1765       1.1  christos   if (1)								\
   1766       1.1  christos     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
   1767       1.1  christos       {									\
   1768       1.1  christos 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
   1769       1.1  christos 									\
   1770       1.1  christos 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
   1771       1.1  christos 	DEBUG_PRINT2 ("      info: %p\n",				\
   1772       1.1  christos 		      reg_info[this_reg].word.pointer);			\
   1773       1.1  christos 									\
   1774       1.1  christos 	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
   1775       1.1  christos 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
   1776       1.1  christos 									\
   1777       1.1  christos 	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
   1778       1.1  christos 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
   1779       1.1  christos       }									\
   1780       1.1  christos   else									\
   1781       1.1  christos     {									\
   1782       1.1  christos       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
   1783       1.1  christos 	{								\
   1784       1.1  christos 	  reg_info[this_reg].word.integer = 0;				\
   1785       1.1  christos 	  regend[this_reg] = 0;						\
   1786       1.1  christos 	  regstart[this_reg] = 0;					\
   1787       1.1  christos 	}								\
   1788       1.1  christos       highest_active_reg = high_reg;					\
   1789       1.1  christos     }									\
   1790       1.1  christos 									\
   1791       1.1  christos   set_regs_matched_done = 0;						\
   1792       1.1  christos   DEBUG_STATEMENT (nfailure_points_popped++);				\
   1793       1.1  christos } /* POP_FAILURE_POINT */
   1794       1.1  christos 
   1795       1.1  christos /* Structure for per-register (a.k.a. per-group) information.
   1797       1.1  christos    Other register information, such as the
   1798       1.1  christos    starting and ending positions (which are addresses), and the list of
   1799       1.1  christos    inner groups (which is a bits list) are maintained in separate
   1800       1.1  christos    variables.
   1801       1.1  christos 
   1802       1.1  christos    We are making a (strictly speaking) nonportable assumption here: that
   1803       1.1  christos    the compiler will pack our bit fields into something that fits into
   1804       1.1  christos    the type of `word', i.e., is something that fits into one item on the
   1805       1.1  christos    failure stack.  */
   1806       1.1  christos 
   1807       1.1  christos 
   1808       1.1  christos /* Declarations and macros for re_match_2.  */
   1809       1.1  christos 
   1810       1.1  christos typedef union
   1811       1.1  christos {
   1812       1.1  christos   PREFIX(fail_stack_elt_t) word;
   1813       1.1  christos   struct
   1814       1.1  christos   {
   1815       1.1  christos       /* This field is one if this group can match the empty string,
   1816       1.1  christos          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
   1817       1.1  christos # define MATCH_NULL_UNSET_VALUE 3
   1818       1.1  christos     unsigned match_null_string_p : 2;
   1819       1.1  christos     unsigned is_active : 1;
   1820       1.1  christos     unsigned matched_something : 1;
   1821       1.1  christos     unsigned ever_matched_something : 1;
   1822       1.1  christos   } bits;
   1823       1.1  christos } PREFIX(register_info_type);
   1824       1.1  christos 
   1825       1.1  christos # ifndef DEFINED_ONCE
   1826       1.1  christos #  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
   1827       1.1  christos #  define IS_ACTIVE(R)  ((R).bits.is_active)
   1828       1.1  christos #  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
   1829       1.1  christos #  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
   1830       1.1  christos 
   1831       1.1  christos 
   1832       1.1  christos /* Call this when have matched a real character; it sets `matched' flags
   1833       1.1  christos    for the subexpressions which we are currently inside.  Also records
   1834       1.1  christos    that those subexprs have matched.  */
   1835       1.1  christos #  define SET_REGS_MATCHED()						\
   1836       1.1  christos   do									\
   1837       1.1  christos     {									\
   1838       1.1  christos       if (!set_regs_matched_done)					\
   1839       1.1  christos 	{								\
   1840       1.1  christos 	  active_reg_t r;						\
   1841       1.1  christos 	  set_regs_matched_done = 1;					\
   1842       1.1  christos 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
   1843       1.1  christos 	    {								\
   1844       1.1  christos 	      MATCHED_SOMETHING (reg_info[r])				\
   1845       1.1  christos 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
   1846       1.1  christos 		= 1;							\
   1847       1.1  christos 	    }								\
   1848       1.1  christos 	}								\
   1849       1.1  christos     }									\
   1850       1.1  christos   while (0)
   1851       1.1  christos # endif /* not DEFINED_ONCE */
   1852       1.1  christos 
   1853       1.1  christos /* Registers are set to a sentinel when they haven't yet matched.  */
   1854       1.1  christos static CHAR_T PREFIX(reg_unset_dummy);
   1855       1.1  christos # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
   1856       1.1  christos # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
   1857       1.1  christos 
   1858       1.1  christos /* Subroutine declarations and macros for regex_compile.  */
   1859       1.1  christos static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
   1860       1.1  christos static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
   1861       1.1  christos                                int arg1, int arg2);
   1862       1.1  christos static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
   1863       1.1  christos                                 int arg, UCHAR_T *end);
   1864       1.1  christos static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
   1865       1.1  christos                                 int arg1, int arg2, UCHAR_T *end);
   1866       1.1  christos static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
   1867       1.1  christos                                          const CHAR_T *p,
   1868       1.1  christos                                          reg_syntax_t syntax);
   1869       1.1  christos static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
   1870       1.1  christos                                          const CHAR_T *pend,
   1871       1.1  christos                                          reg_syntax_t syntax);
   1872       1.1  christos # ifdef WCHAR
   1873       1.1  christos static reg_errcode_t wcs_compile_range (CHAR_T range_start,
   1874       1.1  christos                                         const CHAR_T **p_ptr,
   1875       1.1  christos                                         const CHAR_T *pend,
   1876       1.1  christos                                         char *translate,
   1877       1.1  christos                                         reg_syntax_t syntax,
   1878       1.1  christos                                         UCHAR_T *b,
   1879       1.1  christos                                         CHAR_T *char_set);
   1880       1.1  christos static void insert_space (int num, CHAR_T *loc, CHAR_T *end);
   1881       1.1  christos # else /* BYTE */
   1882       1.1  christos static reg_errcode_t byte_compile_range (unsigned int range_start,
   1883       1.1  christos                                          const char **p_ptr,
   1884       1.1  christos                                          const char *pend,
   1885       1.1  christos                                          char *translate,
   1886       1.1  christos                                          reg_syntax_t syntax,
   1887       1.1  christos                                          unsigned char *b);
   1888       1.1  christos # endif /* WCHAR */
   1889       1.1  christos 
   1890       1.1  christos /* Fetch the next character in the uncompiled pattern---translating it
   1891       1.1  christos    if necessary.  Also cast from a signed character in the constant
   1892       1.1  christos    string passed to us by the user to an unsigned char that we can use
   1893       1.1  christos    as an array index (in, e.g., `translate').  */
   1894       1.1  christos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1895       1.1  christos    because it is impossible to allocate 4GB array for some encodings
   1896       1.1  christos    which have 4 byte character_set like UCS4.  */
   1897       1.1  christos # ifndef PATFETCH
   1898       1.1  christos #  ifdef WCHAR
   1899       1.1  christos #   define PATFETCH(c)							\
   1900       1.1  christos   do {if (p == pend) return REG_EEND;					\
   1901       1.1  christos     c = (UCHAR_T) *p++;							\
   1902       1.1  christos     if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
   1903       1.1  christos   } while (0)
   1904       1.1  christos #  else /* BYTE */
   1905       1.1  christos #   define PATFETCH(c)							\
   1906       1.1  christos   do {if (p == pend) return REG_EEND;					\
   1907       1.1  christos     c = (unsigned char) *p++;						\
   1908       1.1  christos     if (translate) c = (unsigned char) translate[c];			\
   1909       1.1  christos   } while (0)
   1910       1.1  christos #  endif /* WCHAR */
   1911       1.1  christos # endif
   1912       1.1  christos 
   1913       1.1  christos /* Fetch the next character in the uncompiled pattern, with no
   1914       1.1  christos    translation.  */
   1915       1.1  christos # define PATFETCH_RAW(c)						\
   1916       1.1  christos   do {if (p == pend) return REG_EEND;					\
   1917       1.1  christos     c = (UCHAR_T) *p++; 	       					\
   1918       1.1  christos   } while (0)
   1919       1.1  christos 
   1920       1.1  christos /* Go backwards one character in the pattern.  */
   1921       1.1  christos # define PATUNFETCH p--
   1922       1.1  christos 
   1923       1.1  christos 
   1924       1.1  christos /* If `translate' is non-null, return translate[D], else just D.  We
   1925       1.1  christos    cast the subscript to translate because some data is declared as
   1926       1.1  christos    `char *', to avoid warnings when a string constant is passed.  But
   1927       1.1  christos    when we use a character as a subscript we must make it unsigned.  */
   1928       1.1  christos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1929       1.1  christos    because it is impossible to allocate 4GB array for some encodings
   1930       1.1  christos    which have 4 byte character_set like UCS4.  */
   1931       1.1  christos 
   1932       1.1  christos # ifndef TRANSLATE
   1933       1.1  christos #  ifdef WCHAR
   1934       1.1  christos #   define TRANSLATE(d) \
   1935       1.1  christos   ((translate && ((UCHAR_T) (d)) <= 0xff) \
   1936       1.1  christos    ? (char) translate[(unsigned char) (d)] : (d))
   1937       1.1  christos # else /* BYTE */
   1938       1.1  christos #   define TRANSLATE(d) \
   1939       1.1  christos   (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
   1940       1.1  christos #  endif /* WCHAR */
   1941       1.1  christos # endif
   1942       1.1  christos 
   1943       1.1  christos 
   1944       1.1  christos /* Macros for outputting the compiled pattern into `buffer'.  */
   1945       1.1  christos 
   1946       1.1  christos /* If the buffer isn't allocated when it comes in, use this.  */
   1947       1.1  christos # define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
   1948       1.1  christos 
   1949       1.1  christos /* Make sure we have at least N more bytes of space in buffer.  */
   1950       1.1  christos # ifdef WCHAR
   1951       1.1  christos #  define GET_BUFFER_SPACE(n)						\
   1952       1.1  christos     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
   1953       1.1  christos             + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
   1954       1.1  christos       EXTEND_BUFFER ()
   1955       1.1  christos # else /* BYTE */
   1956       1.1  christos #  define GET_BUFFER_SPACE(n)						\
   1957       1.1  christos     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
   1958       1.1  christos       EXTEND_BUFFER ()
   1959       1.1  christos # endif /* WCHAR */
   1960       1.1  christos 
   1961       1.1  christos /* Make sure we have one more byte of buffer space and then add C to it.  */
   1962       1.1  christos # define BUF_PUSH(c)							\
   1963       1.1  christos   do {									\
   1964       1.1  christos     GET_BUFFER_SPACE (1);						\
   1965       1.1  christos     *b++ = (UCHAR_T) (c);						\
   1966       1.1  christos   } while (0)
   1967       1.1  christos 
   1968       1.1  christos 
   1969       1.1  christos /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
   1970       1.1  christos # define BUF_PUSH_2(c1, c2)						\
   1971       1.1  christos   do {									\
   1972       1.1  christos     GET_BUFFER_SPACE (2);						\
   1973       1.1  christos     *b++ = (UCHAR_T) (c1);						\
   1974       1.1  christos     *b++ = (UCHAR_T) (c2);						\
   1975       1.1  christos   } while (0)
   1976       1.1  christos 
   1977       1.1  christos 
   1978       1.1  christos /* As with BUF_PUSH_2, except for three bytes.  */
   1979       1.1  christos # define BUF_PUSH_3(c1, c2, c3)						\
   1980       1.1  christos   do {									\
   1981       1.1  christos     GET_BUFFER_SPACE (3);						\
   1982       1.1  christos     *b++ = (UCHAR_T) (c1);						\
   1983       1.1  christos     *b++ = (UCHAR_T) (c2);						\
   1984       1.1  christos     *b++ = (UCHAR_T) (c3);						\
   1985       1.1  christos   } while (0)
   1986       1.1  christos 
   1987       1.1  christos /* Store a jump with opcode OP at LOC to location TO.  We store a
   1988       1.1  christos    relative address offset by the three bytes the jump itself occupies.  */
   1989       1.1  christos # define STORE_JUMP(op, loc, to) \
   1990       1.1  christos  PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
   1991       1.1  christos 
   1992       1.1  christos /* Likewise, for a two-argument jump.  */
   1993       1.1  christos # define STORE_JUMP2(op, loc, to, arg) \
   1994       1.1  christos   PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
   1995       1.1  christos 
   1996       1.1  christos /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
   1997       1.1  christos # define INSERT_JUMP(op, loc, to) \
   1998       1.1  christos   PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
   1999       1.1  christos 
   2000       1.1  christos /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
   2001       1.1  christos # define INSERT_JUMP2(op, loc, to, arg) \
   2002       1.1  christos   PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
   2003       1.1  christos 	      arg, b)
   2004       1.1  christos 
   2005       1.1  christos /* This is not an arbitrary limit: the arguments which represent offsets
   2006       1.1  christos    into the pattern are two bytes long.  So if 2^16 bytes turns out to
   2007       1.1  christos    be too small, many things would have to change.  */
   2008       1.1  christos /* Any other compiler which, like MSC, has allocation limit below 2^16
   2009       1.1  christos    bytes will have to use approach similar to what was done below for
   2010       1.1  christos    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
   2011       1.1  christos    reallocating to 0 bytes.  Such thing is not going to work too well.
   2012       1.1  christos    You have been warned!!  */
   2013       1.1  christos # ifndef DEFINED_ONCE
   2014       1.1  christos #  if defined _MSC_VER  && !defined WIN32
   2015       1.1  christos /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
   2016       1.1  christos    The REALLOC define eliminates a flurry of conversion warnings,
   2017       1.1  christos    but is not required. */
   2018       1.1  christos #   define MAX_BUF_SIZE  65500L
   2019       1.1  christos #   define REALLOC(p,s) realloc ((p), (size_t) (s))
   2020       1.1  christos #  else
   2021       1.1  christos #   define MAX_BUF_SIZE (1L << 16)
   2022       1.1  christos #   define REALLOC(p,s) realloc ((p), (s))
   2023       1.1  christos #  endif
   2024       1.1  christos 
   2025       1.1  christos /* Extend the buffer by twice its current size via realloc and
   2026       1.1  christos    reset the pointers that pointed into the old block to point to the
   2027       1.1  christos    correct places in the new one.  If extending the buffer results in it
   2028       1.1  christos    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
   2029       1.1  christos #  if __BOUNDED_POINTERS__
   2030       1.1  christos #   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
   2031       1.1  christos #   define MOVE_BUFFER_POINTER(P) \
   2032       1.1  christos   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
   2033       1.1  christos #   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
   2034       1.1  christos   else						\
   2035       1.1  christos     {						\
   2036       1.1  christos       SET_HIGH_BOUND (b);			\
   2037       1.1  christos       SET_HIGH_BOUND (begalt);			\
   2038       1.1  christos       if (fixup_alt_jump)			\
   2039       1.1  christos 	SET_HIGH_BOUND (fixup_alt_jump);	\
   2040       1.1  christos       if (laststart)				\
   2041       1.1  christos 	SET_HIGH_BOUND (laststart);		\
   2042       1.1  christos       if (pending_exact)			\
   2043       1.1  christos 	SET_HIGH_BOUND (pending_exact);		\
   2044       1.1  christos     }
   2045       1.1  christos #  else
   2046       1.1  christos #   define MOVE_BUFFER_POINTER(P) (P) += incr
   2047       1.1  christos #   define ELSE_EXTEND_BUFFER_HIGH_BOUND
   2048       1.1  christos #  endif
   2049       1.1  christos # endif /* not DEFINED_ONCE */
   2050       1.1  christos 
   2051       1.1  christos # ifdef WCHAR
   2052       1.1  christos #  define EXTEND_BUFFER()						\
   2053   1.1.1.2  christos   do {									\
   2054       1.1  christos     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
   2055       1.1  christos     int wchar_count;							\
   2056       1.1  christos     if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
   2057       1.1  christos       return REG_ESIZE;							\
   2058       1.1  christos     bufp->allocated <<= 1;						\
   2059       1.1  christos     if (bufp->allocated > MAX_BUF_SIZE)					\
   2060       1.1  christos       bufp->allocated = MAX_BUF_SIZE;					\
   2061       1.1  christos     /* How many characters the new buffer can have?  */			\
   2062   1.1.1.2  christos     wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
   2063       1.1  christos     if (wchar_count == 0) wchar_count = 1;				\
   2064       1.1  christos     /* Truncate the buffer to CHAR_T align.  */				\
   2065       1.1  christos     bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
   2066       1.1  christos     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
   2067       1.1  christos     bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
   2068       1.1  christos     if (COMPILED_BUFFER_VAR == NULL)					\
   2069       1.1  christos       return REG_ESPACE;						\
   2070       1.1  christos     /* If the buffer moved, move all the pointers into it.  */		\
   2071       1.1  christos     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2072       1.1  christos       {									\
   2073       1.1  christos 	PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer;		\
   2074       1.1  christos 	MOVE_BUFFER_POINTER (b);					\
   2075       1.1  christos 	MOVE_BUFFER_POINTER (begalt);					\
   2076       1.1  christos 	if (fixup_alt_jump)						\
   2077       1.1  christos 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2078       1.1  christos 	if (laststart)							\
   2079       1.1  christos 	  MOVE_BUFFER_POINTER (laststart);				\
   2080       1.1  christos 	if (pending_exact)						\
   2081       1.1  christos 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2082       1.1  christos       }									\
   2083       1.1  christos     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2084       1.1  christos   } while (0)
   2085       1.1  christos # else /* BYTE */
   2086       1.1  christos #  define EXTEND_BUFFER()						\
   2087       1.1  christos   do {									\
   2088       1.1  christos     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
   2089       1.1  christos     if (bufp->allocated == MAX_BUF_SIZE)				\
   2090   1.1.1.2  christos       return REG_ESIZE;							\
   2091       1.1  christos     bufp->allocated <<= 1;						\
   2092       1.1  christos     if (bufp->allocated > MAX_BUF_SIZE)					\
   2093       1.1  christos       bufp->allocated = MAX_BUF_SIZE;					\
   2094       1.1  christos     bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,		\
   2095       1.1  christos 						bufp->allocated);	\
   2096       1.1  christos     if (COMPILED_BUFFER_VAR == NULL)					\
   2097       1.1  christos       return REG_ESPACE;						\
   2098       1.1  christos     /* If the buffer moved, move all the pointers into it.  */		\
   2099       1.1  christos     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2100       1.1  christos       {									\
   2101       1.1  christos 	PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer;		\
   2102       1.1  christos 	MOVE_BUFFER_POINTER (b);					\
   2103       1.1  christos 	MOVE_BUFFER_POINTER (begalt);					\
   2104       1.1  christos 	if (fixup_alt_jump)						\
   2105       1.1  christos 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2106       1.1  christos 	if (laststart)							\
   2107       1.1  christos 	  MOVE_BUFFER_POINTER (laststart);				\
   2108       1.1  christos 	if (pending_exact)						\
   2109       1.1  christos 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2110       1.1  christos       }									\
   2111       1.1  christos     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2112       1.1  christos   } while (0)
   2113       1.1  christos # endif /* WCHAR */
   2114       1.1  christos 
   2115       1.1  christos # ifndef DEFINED_ONCE
   2116       1.1  christos /* Since we have one byte reserved for the register number argument to
   2117       1.1  christos    {start,stop}_memory, the maximum number of groups we can report
   2118       1.1  christos    things about is what fits in that byte.  */
   2119       1.1  christos #  define MAX_REGNUM 255
   2120       1.1  christos 
   2121       1.1  christos /* But patterns can have more than `MAX_REGNUM' registers.  We just
   2122       1.1  christos    ignore the excess.  */
   2123       1.1  christos typedef unsigned regnum_t;
   2124       1.1  christos 
   2125       1.1  christos 
   2126       1.1  christos /* Macros for the compile stack.  */
   2127       1.1  christos 
   2128       1.1  christos /* Since offsets can go either forwards or backwards, this type needs to
   2129       1.1  christos    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
   2130       1.1  christos /* int may be not enough when sizeof(int) == 2.  */
   2131       1.1  christos typedef long pattern_offset_t;
   2132       1.1  christos 
   2133       1.1  christos typedef struct
   2134       1.1  christos {
   2135       1.1  christos   pattern_offset_t begalt_offset;
   2136       1.1  christos   pattern_offset_t fixup_alt_jump;
   2137       1.1  christos   pattern_offset_t inner_group_offset;
   2138       1.1  christos   pattern_offset_t laststart_offset;
   2139       1.1  christos   regnum_t regnum;
   2140       1.1  christos } compile_stack_elt_t;
   2141       1.1  christos 
   2142       1.1  christos 
   2143       1.1  christos typedef struct
   2144       1.1  christos {
   2145       1.1  christos   compile_stack_elt_t *stack;
   2146       1.1  christos   unsigned size;
   2147       1.1  christos   unsigned avail;			/* Offset of next open position.  */
   2148       1.1  christos } compile_stack_type;
   2149       1.1  christos 
   2150       1.1  christos 
   2151       1.1  christos #  define INIT_COMPILE_STACK_SIZE 32
   2152       1.1  christos 
   2153       1.1  christos #  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
   2154       1.1  christos #  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
   2155       1.1  christos 
   2156       1.1  christos /* The next available element.  */
   2157       1.1  christos #  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
   2158       1.1  christos 
   2159       1.1  christos # endif /* not DEFINED_ONCE */
   2160       1.1  christos 
   2161       1.1  christos /* Set the bit for character C in a list.  */
   2162       1.1  christos # ifndef DEFINED_ONCE
   2163       1.1  christos #  define SET_LIST_BIT(c)                               \
   2164       1.1  christos   (b[((unsigned char) (c)) / BYTEWIDTH]               \
   2165       1.1  christos    |= 1 << (((unsigned char) c) % BYTEWIDTH))
   2166       1.1  christos # endif /* DEFINED_ONCE */
   2167       1.1  christos 
   2168       1.1  christos /* Get the next unsigned number in the uncompiled pattern.  */
   2169       1.1  christos # define GET_UNSIGNED_NUMBER(num) \
   2170       1.1  christos   {									\
   2171       1.1  christos     while (p != pend)							\
   2172       1.1  christos       {									\
   2173       1.1  christos 	PATFETCH (c);							\
   2174       1.1  christos 	if (c < '0' || c > '9')						\
   2175       1.1  christos 	  break;							\
   2176       1.1  christos 	if (num <= RE_DUP_MAX)						\
   2177       1.1  christos 	  {								\
   2178       1.1  christos 	    if (num < 0)						\
   2179       1.1  christos 	      num = 0;							\
   2180       1.1  christos 	    num = num * 10 + c - '0';					\
   2181       1.1  christos 	  }								\
   2182       1.1  christos       }									\
   2183       1.1  christos   }
   2184       1.1  christos 
   2185       1.1  christos # ifndef DEFINED_ONCE
   2186       1.1  christos #  if defined _LIBC || WIDE_CHAR_SUPPORT
   2187       1.1  christos /* The GNU C library provides support for user-defined character classes
   2188       1.1  christos    and the functions from ISO C amendement 1.  */
   2189       1.1  christos #   ifdef CHARCLASS_NAME_MAX
   2190       1.1  christos #    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
   2191       1.1  christos #   else
   2192       1.1  christos /* This shouldn't happen but some implementation might still have this
   2193       1.1  christos    problem.  Use a reasonable default value.  */
   2194       1.1  christos #    define CHAR_CLASS_MAX_LENGTH 256
   2195       1.1  christos #   endif
   2196       1.1  christos 
   2197       1.1  christos #   ifdef _LIBC
   2198       1.1  christos #    define IS_CHAR_CLASS(string) __wctype (string)
   2199       1.1  christos #   else
   2200       1.1  christos #    define IS_CHAR_CLASS(string) wctype (string)
   2201       1.1  christos #   endif
   2202       1.1  christos #  else
   2203       1.1  christos #   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
   2204       1.1  christos 
   2205       1.1  christos #   define IS_CHAR_CLASS(string)					\
   2206       1.1  christos    (STREQ (string, "alpha") || STREQ (string, "upper")			\
   2207       1.1  christos     || STREQ (string, "lower") || STREQ (string, "digit")		\
   2208       1.1  christos     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
   2209       1.1  christos     || STREQ (string, "space") || STREQ (string, "print")		\
   2210       1.1  christos     || STREQ (string, "punct") || STREQ (string, "graph")		\
   2211       1.1  christos     || STREQ (string, "cntrl") || STREQ (string, "blank"))
   2212       1.1  christos #  endif
   2213       1.1  christos # endif /* DEFINED_ONCE */
   2214       1.1  christos 
   2215       1.1  christos # ifndef MATCH_MAY_ALLOCATE
   2217       1.1  christos 
   2218       1.1  christos /* If we cannot allocate large objects within re_match_2_internal,
   2219       1.1  christos    we make the fail stack and register vectors global.
   2220       1.1  christos    The fail stack, we grow to the maximum size when a regexp
   2221       1.1  christos    is compiled.
   2222       1.1  christos    The register vectors, we adjust in size each time we
   2223       1.1  christos    compile a regexp, according to the number of registers it needs.  */
   2224       1.1  christos 
   2225       1.1  christos static PREFIX(fail_stack_type) fail_stack;
   2226       1.1  christos 
   2227       1.1  christos /* Size with which the following vectors are currently allocated.
   2228       1.1  christos    That is so we can make them bigger as needed,
   2229       1.1  christos    but never make them smaller.  */
   2230       1.1  christos #  ifdef DEFINED_ONCE
   2231       1.1  christos static int regs_allocated_size;
   2232       1.1  christos 
   2233       1.1  christos static const char **     regstart, **     regend;
   2234       1.1  christos static const char ** old_regstart, ** old_regend;
   2235       1.1  christos static const char **best_regstart, **best_regend;
   2236       1.1  christos static const char **reg_dummy;
   2237       1.1  christos #  endif /* DEFINED_ONCE */
   2238       1.1  christos 
   2239       1.1  christos static PREFIX(register_info_type) *PREFIX(reg_info);
   2240       1.1  christos static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
   2241       1.1  christos 
   2242       1.1  christos /* Make the register vectors big enough for NUM_REGS registers,
   2243       1.1  christos    but don't make them smaller.  */
   2244       1.1  christos 
   2245       1.1  christos static void
   2246       1.1  christos PREFIX(regex_grow_registers) (int num_regs)
   2247       1.1  christos {
   2248       1.1  christos   if (num_regs > regs_allocated_size)
   2249       1.1  christos     {
   2250       1.1  christos       RETALLOC_IF (regstart,	 num_regs, const char *);
   2251       1.1  christos       RETALLOC_IF (regend,	 num_regs, const char *);
   2252       1.1  christos       RETALLOC_IF (old_regstart, num_regs, const char *);
   2253       1.1  christos       RETALLOC_IF (old_regend,	 num_regs, const char *);
   2254       1.1  christos       RETALLOC_IF (best_regstart, num_regs, const char *);
   2255       1.1  christos       RETALLOC_IF (best_regend,	 num_regs, const char *);
   2256       1.1  christos       RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
   2257       1.1  christos       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
   2258       1.1  christos       RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
   2259       1.1  christos 
   2260       1.1  christos       regs_allocated_size = num_regs;
   2261       1.1  christos     }
   2262       1.1  christos }
   2263       1.1  christos 
   2264       1.1  christos # endif /* not MATCH_MAY_ALLOCATE */
   2265       1.1  christos 
   2266       1.1  christos # ifndef DEFINED_ONCE
   2268       1.1  christos static boolean group_in_compile_stack (compile_stack_type compile_stack,
   2269       1.1  christos                                        regnum_t regnum);
   2270       1.1  christos # endif /* not DEFINED_ONCE */
   2271       1.1  christos 
   2272       1.1  christos /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
   2273       1.1  christos    Returns one of error codes defined in `regex.h', or zero for success.
   2274       1.1  christos 
   2275       1.1  christos    Assumes the `allocated' (and perhaps `buffer') and `translate'
   2276       1.1  christos    fields are set in BUFP on entry.
   2277       1.1  christos 
   2278       1.1  christos    If it succeeds, results are put in BUFP (if it returns an error, the
   2279       1.1  christos    contents of BUFP are undefined):
   2280       1.1  christos      `buffer' is the compiled pattern;
   2281       1.1  christos      `syntax' is set to SYNTAX;
   2282       1.1  christos      `used' is set to the length of the compiled pattern;
   2283       1.1  christos      `fastmap_accurate' is zero;
   2284       1.1  christos      `re_nsub' is the number of subexpressions in PATTERN;
   2285       1.1  christos      `not_bol' and `not_eol' are zero;
   2286       1.1  christos 
   2287       1.1  christos    The `fastmap' and `newline_anchor' fields are neither
   2288       1.1  christos    examined nor set.  */
   2289       1.1  christos 
   2290       1.1  christos /* Return, freeing storage we allocated.  */
   2291       1.1  christos # ifdef WCHAR
   2292       1.1  christos #  define FREE_STACK_RETURN(value)		\
   2293       1.1  christos   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
   2294       1.1  christos # else
   2295       1.1  christos #  define FREE_STACK_RETURN(value)		\
   2296       1.1  christos   return (free (compile_stack.stack), value)
   2297       1.1  christos # endif /* WCHAR */
   2298       1.1  christos 
   2299       1.1  christos static reg_errcode_t
   2300       1.1  christos PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
   2301       1.1  christos                        size_t ARG_PREFIX(size), reg_syntax_t syntax,
   2302       1.1  christos                        struct re_pattern_buffer *bufp)
   2303       1.1  christos {
   2304       1.1  christos   /* We fetch characters from PATTERN here.  Even though PATTERN is
   2305       1.1  christos      `char *' (i.e., signed), we declare these variables as unsigned, so
   2306       1.1  christos      they can be reliably used as array indices.  */
   2307       1.1  christos   register UCHAR_T c, c1;
   2308       1.1  christos 
   2309       1.1  christos #ifdef WCHAR
   2310       1.1  christos   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
   2311       1.1  christos   CHAR_T *pattern, *COMPILED_BUFFER_VAR;
   2312       1.1  christos   size_t size;
   2313       1.1  christos   /* offset buffer for optimization. See convert_mbs_to_wc.  */
   2314       1.1  christos   int *mbs_offset = NULL;
   2315       1.1  christos   /* It hold whether each wchar_t is binary data or not.  */
   2316       1.1  christos   char *is_binary = NULL;
   2317       1.1  christos   /* A flag whether exactn is handling binary data or not.  */
   2318       1.1  christos   char is_exactn_bin = FALSE;
   2319       1.1  christos #endif /* WCHAR */
   2320       1.1  christos 
   2321       1.1  christos   /* A random temporary spot in PATTERN.  */
   2322       1.1  christos   const CHAR_T *p1;
   2323       1.1  christos 
   2324       1.1  christos   /* Points to the end of the buffer, where we should append.  */
   2325       1.1  christos   register UCHAR_T *b;
   2326       1.1  christos 
   2327       1.1  christos   /* Keeps track of unclosed groups.  */
   2328       1.1  christos   compile_stack_type compile_stack;
   2329       1.1  christos 
   2330       1.1  christos   /* Points to the current (ending) position in the pattern.  */
   2331       1.1  christos #ifdef WCHAR
   2332       1.1  christos   const CHAR_T *p;
   2333       1.1  christos   const CHAR_T *pend;
   2334       1.1  christos #else /* BYTE */
   2335       1.1  christos   const CHAR_T *p = pattern;
   2336       1.1  christos   const CHAR_T *pend = pattern + size;
   2337       1.1  christos #endif /* WCHAR */
   2338       1.1  christos 
   2339       1.1  christos   /* How to translate the characters in the pattern.  */
   2340       1.1  christos   RE_TRANSLATE_TYPE translate = bufp->translate;
   2341       1.1  christos 
   2342       1.1  christos   /* Address of the count-byte of the most recently inserted `exactn'
   2343       1.1  christos      command.  This makes it possible to tell if a new exact-match
   2344       1.1  christos      character can be added to that command or if the character requires
   2345       1.1  christos      a new `exactn' command.  */
   2346       1.1  christos   UCHAR_T *pending_exact = 0;
   2347       1.1  christos 
   2348       1.1  christos   /* Address of start of the most recently finished expression.
   2349       1.1  christos      This tells, e.g., postfix * where to find the start of its
   2350       1.1  christos      operand.  Reset at the beginning of groups and alternatives.  */
   2351       1.1  christos   UCHAR_T *laststart = 0;
   2352       1.1  christos 
   2353       1.1  christos   /* Address of beginning of regexp, or inside of last group.  */
   2354       1.1  christos   UCHAR_T *begalt;
   2355       1.1  christos 
   2356       1.1  christos   /* Address of the place where a forward jump should go to the end of
   2357       1.1  christos      the containing expression.  Each alternative of an `or' -- except the
   2358       1.1  christos      last -- ends with a forward jump of this sort.  */
   2359       1.1  christos   UCHAR_T *fixup_alt_jump = 0;
   2360       1.1  christos 
   2361       1.1  christos   /* Counts open-groups as they are encountered.  Remembered for the
   2362       1.1  christos      matching close-group on the compile stack, so the same register
   2363       1.1  christos      number is put in the stop_memory as the start_memory.  */
   2364       1.1  christos   regnum_t regnum = 0;
   2365       1.1  christos 
   2366       1.1  christos #ifdef WCHAR
   2367       1.1  christos   /* Initialize the wchar_t PATTERN and offset_buffer.  */
   2368       1.1  christos   p = pend = pattern = TALLOC(csize + 1, CHAR_T);
   2369       1.1  christos   mbs_offset = TALLOC(csize + 1, int);
   2370       1.1  christos   is_binary = TALLOC(csize + 1, char);
   2371       1.1  christos   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
   2372       1.1  christos     {
   2373       1.1  christos       free(pattern);
   2374       1.1  christos       free(mbs_offset);
   2375       1.1  christos       free(is_binary);
   2376       1.1  christos       return REG_ESPACE;
   2377       1.1  christos     }
   2378       1.1  christos   pattern[csize] = L'\0';	/* sentinel */
   2379       1.1  christos   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
   2380       1.1  christos   pend = p + size;
   2381       1.1  christos   if (size < 0)
   2382       1.1  christos     {
   2383       1.1  christos       free(pattern);
   2384       1.1  christos       free(mbs_offset);
   2385       1.1  christos       free(is_binary);
   2386       1.1  christos       return REG_BADPAT;
   2387       1.1  christos     }
   2388       1.1  christos #endif
   2389       1.1  christos 
   2390       1.1  christos #ifdef DEBUG
   2391       1.1  christos   DEBUG_PRINT1 ("\nCompiling pattern: ");
   2392       1.1  christos   if (debug)
   2393       1.1  christos     {
   2394       1.1  christos       unsigned debug_count;
   2395       1.1  christos 
   2396       1.1  christos       for (debug_count = 0; debug_count < size; debug_count++)
   2397       1.1  christos         PUT_CHAR (pattern[debug_count]);
   2398       1.1  christos       putchar ('\n');
   2399       1.1  christos     }
   2400       1.1  christos #endif /* DEBUG */
   2401       1.1  christos 
   2402       1.1  christos   /* Initialize the compile stack.  */
   2403       1.1  christos   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
   2404       1.1  christos   if (compile_stack.stack == NULL)
   2405       1.1  christos     {
   2406       1.1  christos #ifdef WCHAR
   2407       1.1  christos       free(pattern);
   2408       1.1  christos       free(mbs_offset);
   2409       1.1  christos       free(is_binary);
   2410       1.1  christos #endif
   2411       1.1  christos       return REG_ESPACE;
   2412       1.1  christos     }
   2413       1.1  christos 
   2414       1.1  christos   compile_stack.size = INIT_COMPILE_STACK_SIZE;
   2415       1.1  christos   compile_stack.avail = 0;
   2416       1.1  christos 
   2417       1.1  christos   /* Initialize the pattern buffer.  */
   2418       1.1  christos   bufp->syntax = syntax;
   2419       1.1  christos   bufp->fastmap_accurate = 0;
   2420       1.1  christos   bufp->not_bol = bufp->not_eol = 0;
   2421       1.1  christos 
   2422       1.1  christos   /* Set `used' to zero, so that if we return an error, the pattern
   2423       1.1  christos      printer (for debugging) will think there's no pattern.  We reset it
   2424       1.1  christos      at the end.  */
   2425       1.1  christos   bufp->used = 0;
   2426       1.1  christos 
   2427       1.1  christos   /* Always count groups, whether or not bufp->no_sub is set.  */
   2428       1.1  christos   bufp->re_nsub = 0;
   2429       1.1  christos 
   2430       1.1  christos #if !defined emacs && !defined SYNTAX_TABLE
   2431       1.1  christos   /* Initialize the syntax table.  */
   2432       1.1  christos    init_syntax_once ();
   2433       1.1  christos #endif
   2434       1.1  christos 
   2435       1.1  christos   if (bufp->allocated == 0)
   2436       1.1  christos     {
   2437       1.1  christos       if (bufp->buffer)
   2438       1.1  christos 	{ /* If zero allocated, but buffer is non-null, try to realloc
   2439       1.1  christos              enough space.  This loses if buffer's address is bogus, but
   2440       1.1  christos              that is the user's responsibility.  */
   2441       1.1  christos #ifdef WCHAR
   2442       1.1  christos 	  /* Free bufp->buffer and allocate an array for wchar_t pattern
   2443       1.1  christos 	     buffer.  */
   2444       1.1  christos           free(bufp->buffer);
   2445       1.1  christos           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
   2446       1.1  christos 					UCHAR_T);
   2447       1.1  christos #else
   2448       1.1  christos           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
   2449       1.1  christos #endif /* WCHAR */
   2450       1.1  christos         }
   2451       1.1  christos       else
   2452       1.1  christos         { /* Caller did not allocate a buffer.  Do it for them.  */
   2453       1.1  christos           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
   2454       1.1  christos 					UCHAR_T);
   2455       1.1  christos         }
   2456       1.1  christos 
   2457       1.1  christos       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
   2458       1.1  christos #ifdef WCHAR
   2459       1.1  christos       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
   2460       1.1  christos #endif /* WCHAR */
   2461       1.1  christos       bufp->allocated = INIT_BUF_SIZE;
   2462       1.1  christos     }
   2463       1.1  christos #ifdef WCHAR
   2464       1.1  christos   else
   2465       1.1  christos     COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
   2466       1.1  christos #endif
   2467       1.1  christos 
   2468       1.1  christos   begalt = b = COMPILED_BUFFER_VAR;
   2469       1.1  christos 
   2470       1.1  christos   /* Loop through the uncompiled pattern until we're at the end.  */
   2471       1.1  christos   while (p != pend)
   2472       1.1  christos     {
   2473       1.1  christos       PATFETCH (c);
   2474       1.1  christos 
   2475       1.1  christos       switch (c)
   2476       1.1  christos         {
   2477       1.1  christos         case '^':
   2478       1.1  christos           {
   2479       1.1  christos             if (   /* If at start of pattern, it's an operator.  */
   2480       1.1  christos                    p == pattern + 1
   2481       1.1  christos                    /* If context independent, it's an operator.  */
   2482       1.1  christos                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2483       1.1  christos                    /* Otherwise, depends on what's come before.  */
   2484       1.1  christos                 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
   2485       1.1  christos               BUF_PUSH (begline);
   2486       1.1  christos             else
   2487       1.1  christos               goto normal_char;
   2488       1.1  christos           }
   2489       1.1  christos           break;
   2490       1.1  christos 
   2491       1.1  christos 
   2492       1.1  christos         case '$':
   2493       1.1  christos           {
   2494       1.1  christos             if (   /* If at end of pattern, it's an operator.  */
   2495       1.1  christos                    p == pend
   2496       1.1  christos                    /* If context independent, it's an operator.  */
   2497       1.1  christos                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2498       1.1  christos                    /* Otherwise, depends on what's next.  */
   2499   1.1.1.6  christos                 || PREFIX(at_endline_loc_p) (p, pend, syntax))
   2500       1.1  christos                BUF_PUSH (endline);
   2501       1.1  christos              else
   2502       1.1  christos                goto normal_char;
   2503       1.1  christos            }
   2504       1.1  christos            break;
   2505       1.1  christos 
   2506       1.1  christos 
   2507       1.1  christos 	case '+':
   2508       1.1  christos         case '?':
   2509       1.1  christos           if ((syntax & RE_BK_PLUS_QM)
   2510       1.1  christos               || (syntax & RE_LIMITED_OPS))
   2511       1.1  christos             goto normal_char;
   2512       1.1  christos 	  /* Fall through.  */
   2513       1.1  christos         handle_plus:
   2514       1.1  christos         case '*':
   2515       1.1  christos           /* If there is no previous pattern... */
   2516       1.1  christos           if (!laststart)
   2517       1.1  christos             {
   2518       1.1  christos               if (syntax & RE_CONTEXT_INVALID_OPS)
   2519       1.1  christos                 FREE_STACK_RETURN (REG_BADRPT);
   2520       1.1  christos               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
   2521       1.1  christos                 goto normal_char;
   2522       1.1  christos             }
   2523       1.1  christos 
   2524       1.1  christos           {
   2525       1.1  christos             /* Are we optimizing this jump?  */
   2526       1.1  christos             boolean keep_string_p = false;
   2527       1.1  christos 
   2528       1.1  christos             /* 1 means zero (many) matches is allowed.  */
   2529       1.1  christos             char zero_times_ok = 0, many_times_ok = 0;
   2530       1.1  christos 
   2531       1.1  christos             /* If there is a sequence of repetition chars, collapse it
   2532       1.1  christos                down to just one (the right one).  We can't combine
   2533       1.1  christos                interval operators with these because of, e.g., `a{2}*',
   2534       1.1  christos                which should only match an even number of `a's.  */
   2535       1.1  christos 
   2536       1.1  christos             for (;;)
   2537       1.1  christos               {
   2538       1.1  christos                 zero_times_ok |= c != '+';
   2539       1.1  christos                 many_times_ok |= c != '?';
   2540       1.1  christos 
   2541       1.1  christos                 if (p == pend)
   2542       1.1  christos                   break;
   2543       1.1  christos 
   2544       1.1  christos                 PATFETCH (c);
   2545       1.1  christos 
   2546       1.1  christos                 if (c == '*'
   2547       1.1  christos                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
   2548       1.1  christos                   ;
   2549       1.1  christos 
   2550       1.1  christos                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
   2551       1.1  christos                   {
   2552       1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2553       1.1  christos 
   2554       1.1  christos                     PATFETCH (c1);
   2555       1.1  christos                     if (!(c1 == '+' || c1 == '?'))
   2556       1.1  christos                       {
   2557       1.1  christos                         PATUNFETCH;
   2558       1.1  christos                         PATUNFETCH;
   2559       1.1  christos                         break;
   2560       1.1  christos                       }
   2561       1.1  christos 
   2562       1.1  christos                     c = c1;
   2563       1.1  christos                   }
   2564       1.1  christos                 else
   2565       1.1  christos                   {
   2566       1.1  christos                     PATUNFETCH;
   2567       1.1  christos                     break;
   2568       1.1  christos                   }
   2569       1.1  christos 
   2570       1.1  christos                 /* If we get here, we found another repeat character.  */
   2571       1.1  christos                }
   2572       1.1  christos 
   2573       1.1  christos             /* Star, etc. applied to an empty pattern is equivalent
   2574       1.1  christos                to an empty pattern.  */
   2575       1.1  christos             if (!laststart)
   2576       1.1  christos               break;
   2577       1.1  christos 
   2578       1.1  christos             /* Now we know whether or not zero matches is allowed
   2579       1.1  christos                and also whether or not two or more matches is allowed.  */
   2580       1.1  christos             if (many_times_ok)
   2581       1.1  christos               { /* More than one repetition is allowed, so put in at the
   2582       1.1  christos                    end a backward relative jump from `b' to before the next
   2583       1.1  christos                    jump we're going to put in below (which jumps from
   2584       1.1  christos                    laststart to after this jump).
   2585       1.1  christos 
   2586       1.1  christos                    But if we are at the `*' in the exact sequence `.*\n',
   2587       1.1  christos                    insert an unconditional jump backwards to the .,
   2588       1.1  christos                    instead of the beginning of the loop.  This way we only
   2589       1.1  christos                    push a failure point once, instead of every time
   2590       1.1  christos                    through the loop.  */
   2591       1.1  christos                 assert (p - 1 > pattern);
   2592       1.1  christos 
   2593       1.1  christos                 /* Allocate the space for the jump.  */
   2594       1.1  christos                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2595       1.1  christos 
   2596       1.1  christos                 /* We know we are not at the first character of the pattern,
   2597       1.1  christos                    because laststart was nonzero.  And we've already
   2598       1.1  christos                    incremented `p', by the way, to be the character after
   2599       1.1  christos                    the `*'.  Do we have to do something analogous here
   2600       1.1  christos                    for null bytes, because of RE_DOT_NOT_NULL?  */
   2601       1.1  christos                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
   2602       1.1  christos 		    && zero_times_ok
   2603       1.1  christos                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
   2604       1.1  christos                     && !(syntax & RE_DOT_NEWLINE))
   2605       1.1  christos                   { /* We have .*\n.  */
   2606       1.1  christos                     STORE_JUMP (jump, b, laststart);
   2607       1.1  christos                     keep_string_p = true;
   2608       1.1  christos                   }
   2609       1.1  christos                 else
   2610       1.1  christos                   /* Anything else.  */
   2611       1.1  christos                   STORE_JUMP (maybe_pop_jump, b, laststart -
   2612       1.1  christos 			      (1 + OFFSET_ADDRESS_SIZE));
   2613       1.1  christos 
   2614       1.1  christos                 /* We've added more stuff to the buffer.  */
   2615       1.1  christos                 b += 1 + OFFSET_ADDRESS_SIZE;
   2616       1.1  christos               }
   2617       1.1  christos 
   2618       1.1  christos             /* On failure, jump from laststart to b + 3, which will be the
   2619       1.1  christos                end of the buffer after this jump is inserted.  */
   2620       1.1  christos 	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
   2621       1.1  christos 	       'b + 3'.  */
   2622       1.1  christos             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2623       1.1  christos             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
   2624       1.1  christos                                        : on_failure_jump,
   2625       1.1  christos                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
   2626       1.1  christos             pending_exact = 0;
   2627       1.1  christos             b += 1 + OFFSET_ADDRESS_SIZE;
   2628       1.1  christos 
   2629       1.1  christos             if (!zero_times_ok)
   2630       1.1  christos               {
   2631       1.1  christos                 /* At least one repetition is required, so insert a
   2632       1.1  christos                    `dummy_failure_jump' before the initial
   2633       1.1  christos                    `on_failure_jump' instruction of the loop. This
   2634       1.1  christos                    effects a skip over that instruction the first time
   2635       1.1  christos                    we hit that loop.  */
   2636       1.1  christos                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2637       1.1  christos                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
   2638       1.1  christos 			     2 + 2 * OFFSET_ADDRESS_SIZE);
   2639       1.1  christos                 b += 1 + OFFSET_ADDRESS_SIZE;
   2640       1.1  christos               }
   2641       1.1  christos             }
   2642       1.1  christos 	  break;
   2643       1.1  christos 
   2644       1.1  christos 
   2645       1.1  christos 	case '.':
   2646       1.1  christos           laststart = b;
   2647       1.1  christos           BUF_PUSH (anychar);
   2648       1.1  christos           break;
   2649       1.1  christos 
   2650       1.1  christos 
   2651       1.1  christos         case '[':
   2652       1.1  christos           {
   2653       1.1  christos             boolean had_char_class = false;
   2654       1.1  christos #ifdef WCHAR
   2655       1.1  christos 	    CHAR_T range_start = 0xffffffff;
   2656       1.1  christos #else
   2657       1.1  christos 	    unsigned int range_start = 0xffffffff;
   2658       1.1  christos #endif
   2659       1.1  christos             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2660       1.1  christos 
   2661       1.1  christos #ifdef WCHAR
   2662       1.1  christos 	    /* We assume a charset(_not) structure as a wchar_t array.
   2663       1.1  christos 	       charset[0] = (re_opcode_t) charset(_not)
   2664       1.1  christos                charset[1] = l (= length of char_classes)
   2665       1.1  christos                charset[2] = m (= length of collating_symbols)
   2666       1.1  christos                charset[3] = n (= length of equivalence_classes)
   2667       1.1  christos 	       charset[4] = o (= length of char_ranges)
   2668       1.1  christos 	       charset[5] = p (= length of chars)
   2669       1.1  christos 
   2670       1.1  christos                charset[6] = char_class (wctype_t)
   2671       1.1  christos                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
   2672       1.1  christos                          ...
   2673       1.1  christos                charset[l+5]  = char_class (wctype_t)
   2674       1.1  christos 
   2675       1.1  christos                charset[l+6]  = collating_symbol (wchar_t)
   2676       1.1  christos                             ...
   2677       1.1  christos                charset[l+m+5]  = collating_symbol (wchar_t)
   2678       1.1  christos 					ifdef _LIBC we use the index if
   2679       1.1  christos 					_NL_COLLATE_SYMB_EXTRAMB instead of
   2680       1.1  christos 					wchar_t string.
   2681       1.1  christos 
   2682       1.1  christos                charset[l+m+6]  = equivalence_classes (wchar_t)
   2683       1.1  christos                               ...
   2684       1.1  christos                charset[l+m+n+5]  = equivalence_classes (wchar_t)
   2685       1.1  christos 					ifdef _LIBC we use the index in
   2686       1.1  christos 					_NL_COLLATE_WEIGHT instead of
   2687       1.1  christos 					wchar_t string.
   2688       1.1  christos 
   2689       1.1  christos 	       charset[l+m+n+6] = range_start
   2690       1.1  christos 	       charset[l+m+n+7] = range_end
   2691       1.1  christos 	                       ...
   2692       1.1  christos 	       charset[l+m+n+2o+4] = range_start
   2693       1.1  christos 	       charset[l+m+n+2o+5] = range_end
   2694       1.1  christos 					ifdef _LIBC we use the value looked up
   2695       1.1  christos 					in _NL_COLLATE_COLLSEQ instead of
   2696       1.1  christos 					wchar_t character.
   2697       1.1  christos 
   2698       1.1  christos 	       charset[l+m+n+2o+6] = char
   2699       1.1  christos 	                          ...
   2700       1.1  christos 	       charset[l+m+n+2o+p+5] = char
   2701       1.1  christos 
   2702       1.1  christos 	     */
   2703       1.1  christos 
   2704       1.1  christos 	    /* We need at least 6 spaces: the opcode, the length of
   2705       1.1  christos                char_classes, the length of collating_symbols, the length of
   2706       1.1  christos                equivalence_classes, the length of char_ranges, the length of
   2707       1.1  christos                chars.  */
   2708       1.1  christos 	    GET_BUFFER_SPACE (6);
   2709       1.1  christos 
   2710       1.1  christos 	    /* Save b as laststart. And We use laststart as the pointer
   2711       1.1  christos 	       to the first element of the charset here.
   2712       1.1  christos 	       In other words, laststart[i] indicates charset[i].  */
   2713       1.1  christos             laststart = b;
   2714       1.1  christos 
   2715       1.1  christos             /* We test `*p == '^' twice, instead of using an if
   2716       1.1  christos                statement, so we only need one BUF_PUSH.  */
   2717       1.1  christos             BUF_PUSH (*p == '^' ? charset_not : charset);
   2718       1.1  christos             if (*p == '^')
   2719       1.1  christos               p++;
   2720       1.1  christos 
   2721       1.1  christos             /* Push the length of char_classes, the length of
   2722       1.1  christos                collating_symbols, the length of equivalence_classes, the
   2723       1.1  christos                length of char_ranges and the length of chars.  */
   2724       1.1  christos             BUF_PUSH_3 (0, 0, 0);
   2725       1.1  christos             BUF_PUSH_2 (0, 0);
   2726       1.1  christos 
   2727       1.1  christos             /* Remember the first position in the bracket expression.  */
   2728       1.1  christos             p1 = p;
   2729       1.1  christos 
   2730       1.1  christos             /* charset_not matches newline according to a syntax bit.  */
   2731       1.1  christos             if ((re_opcode_t) b[-6] == charset_not
   2732       1.1  christos                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   2733       1.1  christos 	      {
   2734       1.1  christos 		BUF_PUSH('\n');
   2735       1.1  christos 		laststart[5]++; /* Update the length of characters  */
   2736       1.1  christos 	      }
   2737       1.1  christos 
   2738       1.1  christos             /* Read in characters and ranges, setting map bits.  */
   2739       1.1  christos             for (;;)
   2740       1.1  christos               {
   2741       1.1  christos                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2742       1.1  christos 
   2743       1.1  christos                 PATFETCH (c);
   2744       1.1  christos 
   2745       1.1  christos                 /* \ might escape characters inside [...] and [^...].  */
   2746       1.1  christos                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   2747       1.1  christos                   {
   2748       1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2749       1.1  christos 
   2750       1.1  christos                     PATFETCH (c1);
   2751       1.1  christos 		    BUF_PUSH(c1);
   2752       1.1  christos 		    laststart[5]++; /* Update the length of chars  */
   2753       1.1  christos 		    range_start = c1;
   2754       1.1  christos                     continue;
   2755       1.1  christos                   }
   2756       1.1  christos 
   2757       1.1  christos                 /* Could be the end of the bracket expression.  If it's
   2758       1.1  christos                    not (i.e., when the bracket expression is `[]' so
   2759       1.1  christos                    far), the ']' character bit gets set way below.  */
   2760       1.1  christos                 if (c == ']' && p != p1 + 1)
   2761       1.1  christos                   break;
   2762       1.1  christos 
   2763       1.1  christos                 /* Look ahead to see if it's a range when the last thing
   2764       1.1  christos                    was a character class.  */
   2765       1.1  christos                 if (had_char_class && c == '-' && *p != ']')
   2766       1.1  christos                   FREE_STACK_RETURN (REG_ERANGE);
   2767       1.1  christos 
   2768       1.1  christos                 /* Look ahead to see if it's a range when the last thing
   2769       1.1  christos                    was a character: if this is a hyphen not at the
   2770       1.1  christos                    beginning or the end of a list, then it's the range
   2771       1.1  christos                    operator.  */
   2772       1.1  christos                 if (c == '-'
   2773       1.1  christos                     && !(p - 2 >= pattern && p[-2] == '[')
   2774       1.1  christos                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   2775       1.1  christos                     && *p != ']')
   2776       1.1  christos                   {
   2777       1.1  christos                     reg_errcode_t ret;
   2778       1.1  christos 		    /* Allocate the space for range_start and range_end.  */
   2779       1.1  christos 		    GET_BUFFER_SPACE (2);
   2780       1.1  christos 		    /* Update the pointer to indicate end of buffer.  */
   2781       1.1  christos                     b += 2;
   2782       1.1  christos                     ret = wcs_compile_range (range_start, &p, pend, translate,
   2783       1.1  christos                                          syntax, b, laststart);
   2784       1.1  christos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2785       1.1  christos                     range_start = 0xffffffff;
   2786       1.1  christos                   }
   2787       1.1  christos                 else if (p[0] == '-' && p[1] != ']')
   2788       1.1  christos                   { /* This handles ranges made up of characters only.  */
   2789       1.1  christos                     reg_errcode_t ret;
   2790       1.1  christos 
   2791       1.1  christos 		    /* Move past the `-'.  */
   2792       1.1  christos                     PATFETCH (c1);
   2793       1.1  christos 		    /* Allocate the space for range_start and range_end.  */
   2794       1.1  christos 		    GET_BUFFER_SPACE (2);
   2795       1.1  christos 		    /* Update the pointer to indicate end of buffer.  */
   2796       1.1  christos                     b += 2;
   2797       1.1  christos                     ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
   2798       1.1  christos                                          laststart);
   2799       1.1  christos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2800       1.1  christos 		    range_start = 0xffffffff;
   2801       1.1  christos                   }
   2802       1.1  christos 
   2803       1.1  christos                 /* See if we're at the beginning of a possible character
   2804       1.1  christos                    class.  */
   2805       1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   2806       1.1  christos                   { /* Leave room for the null.  */
   2807       1.1  christos                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   2808       1.1  christos 
   2809       1.1  christos                     PATFETCH (c);
   2810       1.1  christos                     c1 = 0;
   2811       1.1  christos 
   2812       1.1  christos                     /* If pattern is `[[:'.  */
   2813       1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2814       1.1  christos 
   2815       1.1  christos                     for (;;)
   2816       1.1  christos                       {
   2817       1.1  christos                         PATFETCH (c);
   2818       1.1  christos                         if ((c == ':' && *p == ']') || p == pend)
   2819       1.1  christos                           break;
   2820       1.1  christos 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   2821       1.1  christos 			  str[c1++] = c;
   2822       1.1  christos 			else
   2823       1.1  christos 			  /* This is in any case an invalid class name.  */
   2824       1.1  christos 			  str[0] = '\0';
   2825       1.1  christos                       }
   2826       1.1  christos                     str[c1] = '\0';
   2827       1.1  christos 
   2828       1.1  christos                     /* If isn't a word bracketed by `[:' and `:]':
   2829       1.1  christos                        undo the ending character, the letters, and leave
   2830       1.1  christos                        the leading `:' and `[' (but store them as character).  */
   2831       1.1  christos                     if (c == ':' && *p == ']')
   2832       1.1  christos                       {
   2833       1.1  christos 			wctype_t wt;
   2834       1.1  christos 			uintptr_t alignedp;
   2835       1.1  christos 
   2836       1.1  christos 			/* Query the character class as wctype_t.  */
   2837       1.1  christos 			wt = IS_CHAR_CLASS (str);
   2838       1.1  christos 			if (wt == 0)
   2839       1.1  christos 			  FREE_STACK_RETURN (REG_ECTYPE);
   2840       1.1  christos 
   2841       1.1  christos                         /* Throw away the ] at the end of the character
   2842       1.1  christos                            class.  */
   2843       1.1  christos                         PATFETCH (c);
   2844       1.1  christos 
   2845       1.1  christos                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2846       1.1  christos 
   2847       1.1  christos 			/* Allocate the space for character class.  */
   2848       1.1  christos                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
   2849       1.1  christos 			/* Update the pointer to indicate end of buffer.  */
   2850       1.1  christos                         b += CHAR_CLASS_SIZE;
   2851       1.1  christos 			/* Move data which follow character classes
   2852       1.1  christos 			    not to violate the data.  */
   2853       1.1  christos                         insert_space(CHAR_CLASS_SIZE,
   2854       1.1  christos 				     laststart + 6 + laststart[1],
   2855       1.1  christos 				     b - 1);
   2856       1.1  christos 			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
   2857       1.1  christos 				    + __alignof__(wctype_t) - 1)
   2858       1.1  christos 			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   2859       1.1  christos 			/* Store the character class.  */
   2860       1.1  christos                         *((wctype_t*)alignedp) = wt;
   2861       1.1  christos                         /* Update length of char_classes */
   2862       1.1  christos                         laststart[1] += CHAR_CLASS_SIZE;
   2863       1.1  christos 
   2864       1.1  christos                         had_char_class = true;
   2865       1.1  christos                       }
   2866       1.1  christos                     else
   2867       1.1  christos                       {
   2868       1.1  christos                         c1++;
   2869       1.1  christos                         while (c1--)
   2870       1.1  christos                           PATUNFETCH;
   2871       1.1  christos                         BUF_PUSH ('[');
   2872       1.1  christos                         BUF_PUSH (':');
   2873       1.1  christos                         laststart[5] += 2; /* Update the length of characters  */
   2874       1.1  christos 			range_start = ':';
   2875       1.1  christos                         had_char_class = false;
   2876       1.1  christos                       }
   2877       1.1  christos                   }
   2878       1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
   2879       1.1  christos 							  || *p == '.'))
   2880       1.1  christos 		  {
   2881       1.1  christos 		    CHAR_T str[128];	/* Should be large enough.  */
   2882       1.1  christos 		    CHAR_T delim = *p; /* '=' or '.'  */
   2883       1.1  christos # ifdef _LIBC
   2884       1.1  christos 		    uint32_t nrules =
   2885       1.1  christos 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   2886       1.1  christos # endif
   2887       1.1  christos 		    PATFETCH (c);
   2888       1.1  christos 		    c1 = 0;
   2889       1.1  christos 
   2890       1.1  christos 		    /* If pattern is `[[=' or '[[.'.  */
   2891       1.1  christos 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2892       1.1  christos 
   2893       1.1  christos 		    for (;;)
   2894       1.1  christos 		      {
   2895       1.1  christos 			PATFETCH (c);
   2896       1.1  christos 			if ((c == delim && *p == ']') || p == pend)
   2897       1.1  christos 			  break;
   2898       1.1  christos 			if (c1 < sizeof (str) - 1)
   2899       1.1  christos 			  str[c1++] = c;
   2900       1.1  christos 			else
   2901       1.1  christos 			  /* This is in any case an invalid class name.  */
   2902       1.1  christos 			  str[0] = '\0';
   2903       1.1  christos                       }
   2904       1.1  christos 		    str[c1] = '\0';
   2905       1.1  christos 
   2906       1.1  christos 		    if (c == delim && *p == ']' && str[0] != '\0')
   2907       1.1  christos 		      {
   2908       1.1  christos                         unsigned int i, offset;
   2909       1.1  christos 			/* If we have no collation data we use the default
   2910       1.1  christos 			   collation in which each character is in a class
   2911       1.1  christos 			   by itself.  It also means that ASCII is the
   2912       1.1  christos 			   character set and therefore we cannot have character
   2913       1.1  christos 			   with more than one byte in the multibyte
   2914       1.1  christos 			   representation.  */
   2915       1.1  christos 
   2916       1.1  christos                         /* If not defined _LIBC, we push the name and
   2917       1.1  christos 			   `\0' for the sake of matching performance.  */
   2918       1.1  christos 			int datasize = c1 + 1;
   2919       1.1  christos 
   2920       1.1  christos # ifdef _LIBC
   2921       1.1  christos 			int32_t idx = 0;
   2922       1.1  christos 			if (nrules == 0)
   2923       1.1  christos # endif
   2924       1.1  christos 			  {
   2925       1.1  christos 			    if (c1 != 1)
   2926       1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   2927       1.1  christos 			  }
   2928       1.1  christos # ifdef _LIBC
   2929       1.1  christos 			else
   2930       1.1  christos 			  {
   2931       1.1  christos 			    const int32_t *table;
   2932       1.1  christos 			    const int32_t *weights;
   2933       1.1  christos 			    const int32_t *extra;
   2934       1.1  christos 			    const int32_t *indirect;
   2935       1.1  christos 			    wint_t *cp;
   2936       1.1  christos 
   2937       1.1  christos 			    /* This #include defines a local function!  */
   2938       1.1  christos #  include <locale/weightwc.h>
   2939       1.1  christos 
   2940       1.1  christos 			    if(delim == '=')
   2941       1.1  christos 			      {
   2942       1.1  christos 				/* We push the index for equivalence class.  */
   2943       1.1  christos 				cp = (wint_t*)str;
   2944       1.1  christos 
   2945       1.1  christos 				table = (const int32_t *)
   2946       1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   2947       1.1  christos 					       _NL_COLLATE_TABLEWC);
   2948       1.1  christos 				weights = (const int32_t *)
   2949       1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   2950       1.1  christos 					       _NL_COLLATE_WEIGHTWC);
   2951       1.1  christos 				extra = (const int32_t *)
   2952       1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   2953       1.1  christos 					       _NL_COLLATE_EXTRAWC);
   2954       1.1  christos 				indirect = (const int32_t *)
   2955       1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   2956       1.1  christos 					       _NL_COLLATE_INDIRECTWC);
   2957       1.1  christos 
   2958       1.1  christos 				idx = findidx ((const wint_t**)&cp);
   2959       1.1  christos 				if (idx == 0 || cp < (wint_t*) str + c1)
   2960       1.1  christos 				  /* This is no valid character.  */
   2961       1.1  christos 				  FREE_STACK_RETURN (REG_ECOLLATE);
   2962       1.1  christos 
   2963       1.1  christos 				str[0] = (wchar_t)idx;
   2964       1.1  christos 			      }
   2965       1.1  christos 			    else /* delim == '.' */
   2966       1.1  christos 			      {
   2967       1.1  christos 				/* We push collation sequence value
   2968       1.1  christos 				   for collating symbol.  */
   2969       1.1  christos 				int32_t table_size;
   2970       1.1  christos 				const int32_t *symb_table;
   2971       1.1  christos 				const unsigned char *extra;
   2972       1.1  christos 				int32_t idx;
   2973       1.1  christos 				int32_t elem;
   2974       1.1  christos 				int32_t second;
   2975       1.1  christos 				int32_t hash;
   2976       1.1  christos 				char char_str[c1];
   2977       1.1  christos 
   2978       1.1  christos 				/* We have to convert the name to a single-byte
   2979       1.1  christos 				   string.  This is possible since the names
   2980       1.1  christos 				   consist of ASCII characters and the internal
   2981       1.1  christos 				   representation is UCS4.  */
   2982       1.1  christos 				for (i = 0; i < c1; ++i)
   2983       1.1  christos 				  char_str[i] = str[i];
   2984       1.1  christos 
   2985       1.1  christos 				table_size =
   2986       1.1  christos 				  _NL_CURRENT_WORD (LC_COLLATE,
   2987       1.1  christos 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
   2988       1.1  christos 				symb_table = (const int32_t *)
   2989       1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   2990       1.1  christos 					       _NL_COLLATE_SYMB_TABLEMB);
   2991       1.1  christos 				extra = (const unsigned char *)
   2992       1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   2993       1.1  christos 					       _NL_COLLATE_SYMB_EXTRAMB);
   2994       1.1  christos 
   2995       1.1  christos 				/* Locate the character in the hashing table.  */
   2996       1.1  christos 				hash = elem_hash (char_str, c1);
   2997       1.1  christos 
   2998       1.1  christos 				idx = 0;
   2999       1.1  christos 				elem = hash % table_size;
   3000       1.1  christos 				second = hash % (table_size - 2);
   3001       1.1  christos 				while (symb_table[2 * elem] != 0)
   3002       1.1  christos 				  {
   3003       1.1  christos 				    /* First compare the hashing value.  */
   3004       1.1  christos 				    if (symb_table[2 * elem] == hash
   3005       1.1  christos 					&& c1 == extra[symb_table[2 * elem + 1]]
   3006       1.1  christos 					&& memcmp (char_str,
   3007       1.1  christos 						   &extra[symb_table[2 * elem + 1]
   3008       1.1  christos 							 + 1], c1) == 0)
   3009       1.1  christos 				      {
   3010       1.1  christos 					/* Yep, this is the entry.  */
   3011       1.1  christos 					idx = symb_table[2 * elem + 1];
   3012       1.1  christos 					idx += 1 + extra[idx];
   3013       1.1  christos 					break;
   3014       1.1  christos 				      }
   3015       1.1  christos 
   3016       1.1  christos 				    /* Next entry.  */
   3017       1.1  christos 				    elem += second;
   3018       1.1  christos 				  }
   3019       1.1  christos 
   3020       1.1  christos 				if (symb_table[2 * elem] != 0)
   3021       1.1  christos 				  {
   3022       1.1  christos 				    /* Compute the index of the byte sequence
   3023       1.1  christos 				       in the table.  */
   3024       1.1  christos 				    idx += 1 + extra[idx];
   3025       1.1  christos 				    /* Adjust for the alignment.  */
   3026       1.1  christos 				    idx = (idx + 3) & ~3;
   3027       1.1  christos 
   3028       1.1  christos 				    str[0] = (wchar_t) idx + 4;
   3029       1.1  christos 				  }
   3030       1.1  christos 				else if (symb_table[2 * elem] == 0 && c1 == 1)
   3031       1.1  christos 				  {
   3032       1.1  christos 				    /* No valid character.  Match it as a
   3033       1.1  christos 				       single byte character.  */
   3034       1.1  christos 				    had_char_class = false;
   3035       1.1  christos 				    BUF_PUSH(str[0]);
   3036       1.1  christos 				    /* Update the length of characters  */
   3037       1.1  christos 				    laststart[5]++;
   3038       1.1  christos 				    range_start = str[0];
   3039       1.1  christos 
   3040       1.1  christos 				    /* Throw away the ] at the end of the
   3041       1.1  christos 				       collating symbol.  */
   3042       1.1  christos 				    PATFETCH (c);
   3043       1.1  christos 				    /* exit from the switch block.  */
   3044       1.1  christos 				    continue;
   3045       1.1  christos 				  }
   3046       1.1  christos 				else
   3047       1.1  christos 				  FREE_STACK_RETURN (REG_ECOLLATE);
   3048       1.1  christos 			      }
   3049       1.1  christos 			    datasize = 1;
   3050       1.1  christos 			  }
   3051       1.1  christos # endif
   3052       1.1  christos                         /* Throw away the ] at the end of the equivalence
   3053       1.1  christos                            class (or collating symbol).  */
   3054       1.1  christos                         PATFETCH (c);
   3055       1.1  christos 
   3056       1.1  christos 			/* Allocate the space for the equivalence class
   3057       1.1  christos 			   (or collating symbol) (and '\0' if needed).  */
   3058       1.1  christos                         GET_BUFFER_SPACE(datasize);
   3059       1.1  christos 			/* Update the pointer to indicate end of buffer.  */
   3060       1.1  christos                         b += datasize;
   3061       1.1  christos 
   3062       1.1  christos 			if (delim == '=')
   3063       1.1  christos 			  { /* equivalence class  */
   3064       1.1  christos 			    /* Calculate the offset of char_ranges,
   3065       1.1  christos 			       which is next to equivalence_classes.  */
   3066       1.1  christos 			    offset = laststart[1] + laststart[2]
   3067       1.1  christos 			      + laststart[3] +6;
   3068       1.1  christos 			    /* Insert space.  */
   3069       1.1  christos 			    insert_space(datasize, laststart + offset, b - 1);
   3070       1.1  christos 
   3071       1.1  christos 			    /* Write the equivalence_class and \0.  */
   3072       1.1  christos 			    for (i = 0 ; i < datasize ; i++)
   3073       1.1  christos 			      laststart[offset + i] = str[i];
   3074       1.1  christos 
   3075       1.1  christos 			    /* Update the length of equivalence_classes.  */
   3076       1.1  christos 			    laststart[3] += datasize;
   3077       1.1  christos 			    had_char_class = true;
   3078       1.1  christos 			  }
   3079       1.1  christos 			else /* delim == '.' */
   3080       1.1  christos 			  { /* collating symbol  */
   3081       1.1  christos 			    /* Calculate the offset of the equivalence_classes,
   3082       1.1  christos 			       which is next to collating_symbols.  */
   3083       1.1  christos 			    offset = laststart[1] + laststart[2] + 6;
   3084       1.1  christos 			    /* Insert space and write the collationg_symbol
   3085       1.1  christos 			       and \0.  */
   3086       1.1  christos 			    insert_space(datasize, laststart + offset, b-1);
   3087       1.1  christos 			    for (i = 0 ; i < datasize ; i++)
   3088       1.1  christos 			      laststart[offset + i] = str[i];
   3089       1.1  christos 
   3090       1.1  christos 			    /* In re_match_2_internal if range_start < -1, we
   3091       1.1  christos 			       assume -range_start is the offset of the
   3092       1.1  christos 			       collating symbol which is specified as
   3093       1.1  christos 			       the character of the range start.  So we assign
   3094       1.1  christos 			       -(laststart[1] + laststart[2] + 6) to
   3095       1.1  christos 			       range_start.  */
   3096       1.1  christos 			    range_start = -(laststart[1] + laststart[2] + 6);
   3097       1.1  christos 			    /* Update the length of collating_symbol.  */
   3098       1.1  christos 			    laststart[2] += datasize;
   3099       1.1  christos 			    had_char_class = false;
   3100       1.1  christos 			  }
   3101       1.1  christos 		      }
   3102       1.1  christos                     else
   3103       1.1  christos                       {
   3104       1.1  christos                         c1++;
   3105       1.1  christos                         while (c1--)
   3106       1.1  christos                           PATUNFETCH;
   3107       1.1  christos                         BUF_PUSH ('[');
   3108       1.1  christos                         BUF_PUSH (delim);
   3109       1.1  christos                         laststart[5] += 2; /* Update the length of characters  */
   3110       1.1  christos 			range_start = delim;
   3111       1.1  christos                         had_char_class = false;
   3112       1.1  christos                       }
   3113       1.1  christos 		  }
   3114       1.1  christos                 else
   3115       1.1  christos                   {
   3116       1.1  christos                     had_char_class = false;
   3117       1.1  christos 		    BUF_PUSH(c);
   3118       1.1  christos 		    laststart[5]++;  /* Update the length of characters  */
   3119       1.1  christos 		    range_start = c;
   3120       1.1  christos                   }
   3121       1.1  christos 	      }
   3122       1.1  christos 
   3123       1.1  christos #else /* BYTE */
   3124       1.1  christos             /* Ensure that we have enough space to push a charset: the
   3125       1.1  christos                opcode, the length count, and the bitset; 34 bytes in all.  */
   3126       1.1  christos 	    GET_BUFFER_SPACE (34);
   3127       1.1  christos 
   3128       1.1  christos             laststart = b;
   3129       1.1  christos 
   3130       1.1  christos             /* We test `*p == '^' twice, instead of using an if
   3131       1.1  christos                statement, so we only need one BUF_PUSH.  */
   3132       1.1  christos             BUF_PUSH (*p == '^' ? charset_not : charset);
   3133       1.1  christos             if (*p == '^')
   3134       1.1  christos               p++;
   3135       1.1  christos 
   3136       1.1  christos             /* Remember the first position in the bracket expression.  */
   3137       1.1  christos             p1 = p;
   3138       1.1  christos 
   3139       1.1  christos             /* Push the number of bytes in the bitmap.  */
   3140       1.1  christos             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
   3141       1.1  christos 
   3142       1.1  christos             /* Clear the whole map.  */
   3143       1.1  christos             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
   3144       1.1  christos 
   3145       1.1  christos             /* charset_not matches newline according to a syntax bit.  */
   3146       1.1  christos             if ((re_opcode_t) b[-2] == charset_not
   3147       1.1  christos                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   3148       1.1  christos               SET_LIST_BIT ('\n');
   3149       1.1  christos 
   3150       1.1  christos             /* Read in characters and ranges, setting map bits.  */
   3151       1.1  christos             for (;;)
   3152       1.1  christos               {
   3153       1.1  christos                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3154       1.1  christos 
   3155       1.1  christos                 PATFETCH (c);
   3156       1.1  christos 
   3157       1.1  christos                 /* \ might escape characters inside [...] and [^...].  */
   3158       1.1  christos                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   3159       1.1  christos                   {
   3160       1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3161       1.1  christos 
   3162       1.1  christos                     PATFETCH (c1);
   3163       1.1  christos                     SET_LIST_BIT (c1);
   3164       1.1  christos 		    range_start = c1;
   3165       1.1  christos                     continue;
   3166       1.1  christos                   }
   3167       1.1  christos 
   3168       1.1  christos                 /* Could be the end of the bracket expression.  If it's
   3169       1.1  christos                    not (i.e., when the bracket expression is `[]' so
   3170       1.1  christos                    far), the ']' character bit gets set way below.  */
   3171       1.1  christos                 if (c == ']' && p != p1 + 1)
   3172       1.1  christos                   break;
   3173       1.1  christos 
   3174       1.1  christos                 /* Look ahead to see if it's a range when the last thing
   3175       1.1  christos                    was a character class.  */
   3176       1.1  christos                 if (had_char_class && c == '-' && *p != ']')
   3177       1.1  christos                   FREE_STACK_RETURN (REG_ERANGE);
   3178       1.1  christos 
   3179       1.1  christos                 /* Look ahead to see if it's a range when the last thing
   3180       1.1  christos                    was a character: if this is a hyphen not at the
   3181       1.1  christos                    beginning or the end of a list, then it's the range
   3182       1.1  christos                    operator.  */
   3183       1.1  christos                 if (c == '-'
   3184       1.1  christos                     && !(p - 2 >= pattern && p[-2] == '[')
   3185       1.1  christos                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   3186       1.1  christos                     && *p != ']')
   3187       1.1  christos                   {
   3188       1.1  christos                     reg_errcode_t ret
   3189       1.1  christos                       = byte_compile_range (range_start, &p, pend, translate,
   3190       1.1  christos 					    syntax, b);
   3191       1.1  christos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3192       1.1  christos 		    range_start = 0xffffffff;
   3193       1.1  christos                   }
   3194       1.1  christos 
   3195       1.1  christos                 else if (p[0] == '-' && p[1] != ']')
   3196       1.1  christos                   { /* This handles ranges made up of characters only.  */
   3197       1.1  christos                     reg_errcode_t ret;
   3198       1.1  christos 
   3199       1.1  christos 		    /* Move past the `-'.  */
   3200       1.1  christos                     PATFETCH (c1);
   3201       1.1  christos 
   3202       1.1  christos                     ret = byte_compile_range (c, &p, pend, translate, syntax, b);
   3203       1.1  christos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3204       1.1  christos 		    range_start = 0xffffffff;
   3205       1.1  christos                   }
   3206       1.1  christos 
   3207       1.1  christos                 /* See if we're at the beginning of a possible character
   3208       1.1  christos                    class.  */
   3209       1.1  christos 
   3210       1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   3211       1.1  christos                   { /* Leave room for the null.  */
   3212       1.1  christos                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   3213       1.1  christos 
   3214       1.1  christos                     PATFETCH (c);
   3215       1.1  christos                     c1 = 0;
   3216       1.1  christos 
   3217       1.1  christos                     /* If pattern is `[[:'.  */
   3218       1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3219       1.1  christos 
   3220       1.1  christos                     for (;;)
   3221       1.1  christos                       {
   3222       1.1  christos                         PATFETCH (c);
   3223       1.1  christos                         if ((c == ':' && *p == ']') || p == pend)
   3224       1.1  christos                           break;
   3225       1.1  christos 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   3226       1.1  christos 			  str[c1++] = c;
   3227       1.1  christos 			else
   3228       1.1  christos 			  /* This is in any case an invalid class name.  */
   3229       1.1  christos 			  str[0] = '\0';
   3230       1.1  christos                       }
   3231       1.1  christos                     str[c1] = '\0';
   3232       1.1  christos 
   3233       1.1  christos                     /* If isn't a word bracketed by `[:' and `:]':
   3234       1.1  christos                        undo the ending character, the letters, and leave
   3235       1.1  christos                        the leading `:' and `[' (but set bits for them).  */
   3236       1.1  christos                     if (c == ':' && *p == ']')
   3237       1.1  christos                       {
   3238       1.1  christos # if defined _LIBC || WIDE_CHAR_SUPPORT
   3239       1.1  christos                         boolean is_lower = STREQ (str, "lower");
   3240       1.1  christos                         boolean is_upper = STREQ (str, "upper");
   3241       1.1  christos 			wctype_t wt;
   3242       1.1  christos                         int ch;
   3243       1.1  christos 
   3244       1.1  christos 			wt = IS_CHAR_CLASS (str);
   3245       1.1  christos 			if (wt == 0)
   3246       1.1  christos 			  FREE_STACK_RETURN (REG_ECTYPE);
   3247       1.1  christos 
   3248       1.1  christos                         /* Throw away the ] at the end of the character
   3249       1.1  christos                            class.  */
   3250       1.1  christos                         PATFETCH (c);
   3251       1.1  christos 
   3252       1.1  christos                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3253       1.1  christos 
   3254       1.1  christos                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
   3255       1.1  christos 			  {
   3256       1.1  christos #  ifdef _LIBC
   3257       1.1  christos 			    if (__iswctype (__btowc (ch), wt))
   3258       1.1  christos 			      SET_LIST_BIT (ch);
   3259       1.1  christos #  else
   3260       1.1  christos 			    if (iswctype (btowc (ch), wt))
   3261       1.1  christos 			      SET_LIST_BIT (ch);
   3262       1.1  christos #  endif
   3263       1.1  christos 
   3264       1.1  christos 			    if (translate && (is_upper || is_lower)
   3265       1.1  christos 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3266       1.1  christos 			      SET_LIST_BIT (ch);
   3267       1.1  christos 			  }
   3268       1.1  christos 
   3269       1.1  christos                         had_char_class = true;
   3270       1.1  christos # else
   3271       1.1  christos                         int ch;
   3272       1.1  christos                         boolean is_alnum = STREQ (str, "alnum");
   3273       1.1  christos                         boolean is_alpha = STREQ (str, "alpha");
   3274       1.1  christos                         boolean is_blank = STREQ (str, "blank");
   3275       1.1  christos                         boolean is_cntrl = STREQ (str, "cntrl");
   3276       1.1  christos                         boolean is_digit = STREQ (str, "digit");
   3277       1.1  christos                         boolean is_graph = STREQ (str, "graph");
   3278       1.1  christos                         boolean is_lower = STREQ (str, "lower");
   3279       1.1  christos                         boolean is_print = STREQ (str, "print");
   3280       1.1  christos                         boolean is_punct = STREQ (str, "punct");
   3281       1.1  christos                         boolean is_space = STREQ (str, "space");
   3282       1.1  christos                         boolean is_upper = STREQ (str, "upper");
   3283       1.1  christos                         boolean is_xdigit = STREQ (str, "xdigit");
   3284       1.1  christos 
   3285       1.1  christos                         if (!IS_CHAR_CLASS (str))
   3286       1.1  christos 			  FREE_STACK_RETURN (REG_ECTYPE);
   3287       1.1  christos 
   3288       1.1  christos                         /* Throw away the ] at the end of the character
   3289       1.1  christos                            class.  */
   3290       1.1  christos                         PATFETCH (c);
   3291       1.1  christos 
   3292       1.1  christos                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3293       1.1  christos 
   3294       1.1  christos                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
   3295       1.1  christos                           {
   3296       1.1  christos 			    /* This was split into 3 if's to
   3297       1.1  christos 			       avoid an arbitrary limit in some compiler.  */
   3298       1.1  christos                             if (   (is_alnum  && ISALNUM (ch))
   3299       1.1  christos                                 || (is_alpha  && ISALPHA (ch))
   3300       1.1  christos                                 || (is_blank  && ISBLANK (ch))
   3301       1.1  christos                                 || (is_cntrl  && ISCNTRL (ch)))
   3302       1.1  christos 			      SET_LIST_BIT (ch);
   3303       1.1  christos 			    if (   (is_digit  && ISDIGIT (ch))
   3304       1.1  christos                                 || (is_graph  && ISGRAPH (ch))
   3305       1.1  christos                                 || (is_lower  && ISLOWER (ch))
   3306       1.1  christos                                 || (is_print  && ISPRINT (ch)))
   3307       1.1  christos 			      SET_LIST_BIT (ch);
   3308       1.1  christos 			    if (   (is_punct  && ISPUNCT (ch))
   3309       1.1  christos                                 || (is_space  && ISSPACE (ch))
   3310       1.1  christos                                 || (is_upper  && ISUPPER (ch))
   3311       1.1  christos                                 || (is_xdigit && ISXDIGIT (ch)))
   3312       1.1  christos 			      SET_LIST_BIT (ch);
   3313       1.1  christos 			    if (   translate && (is_upper || is_lower)
   3314       1.1  christos 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3315       1.1  christos 			      SET_LIST_BIT (ch);
   3316       1.1  christos                           }
   3317       1.1  christos                         had_char_class = true;
   3318       1.1  christos # endif	/* libc || wctype.h */
   3319       1.1  christos                       }
   3320       1.1  christos                     else
   3321       1.1  christos                       {
   3322       1.1  christos                         c1++;
   3323       1.1  christos                         while (c1--)
   3324       1.1  christos                           PATUNFETCH;
   3325       1.1  christos                         SET_LIST_BIT ('[');
   3326       1.1  christos                         SET_LIST_BIT (':');
   3327       1.1  christos 			range_start = ':';
   3328       1.1  christos                         had_char_class = false;
   3329       1.1  christos                       }
   3330       1.1  christos                   }
   3331       1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
   3332       1.1  christos 		  {
   3333       1.1  christos 		    unsigned char str[MB_LEN_MAX + 1];
   3334       1.1  christos # ifdef _LIBC
   3335       1.1  christos 		    uint32_t nrules =
   3336       1.1  christos 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3337       1.1  christos # endif
   3338       1.1  christos 
   3339       1.1  christos 		    PATFETCH (c);
   3340       1.1  christos 		    c1 = 0;
   3341       1.1  christos 
   3342       1.1  christos 		    /* If pattern is `[[='.  */
   3343       1.1  christos 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3344       1.1  christos 
   3345       1.1  christos 		    for (;;)
   3346       1.1  christos 		      {
   3347       1.1  christos 			PATFETCH (c);
   3348       1.1  christos 			if ((c == '=' && *p == ']') || p == pend)
   3349       1.1  christos 			  break;
   3350       1.1  christos 			if (c1 < MB_LEN_MAX)
   3351       1.1  christos 			  str[c1++] = c;
   3352       1.1  christos 			else
   3353       1.1  christos 			  /* This is in any case an invalid class name.  */
   3354       1.1  christos 			  str[0] = '\0';
   3355       1.1  christos                       }
   3356       1.1  christos 		    str[c1] = '\0';
   3357       1.1  christos 
   3358       1.1  christos 		    if (c == '=' && *p == ']' && str[0] != '\0')
   3359       1.1  christos 		      {
   3360       1.1  christos 			/* If we have no collation data we use the default
   3361       1.1  christos 			   collation in which each character is in a class
   3362       1.1  christos 			   by itself.  It also means that ASCII is the
   3363       1.1  christos 			   character set and therefore we cannot have character
   3364       1.1  christos 			   with more than one byte in the multibyte
   3365       1.1  christos 			   representation.  */
   3366       1.1  christos # ifdef _LIBC
   3367       1.1  christos 			if (nrules == 0)
   3368       1.1  christos # endif
   3369       1.1  christos 			  {
   3370       1.1  christos 			    if (c1 != 1)
   3371       1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3372       1.1  christos 
   3373       1.1  christos 			    /* Throw away the ] at the end of the equivalence
   3374       1.1  christos 			       class.  */
   3375       1.1  christos 			    PATFETCH (c);
   3376       1.1  christos 
   3377       1.1  christos 			    /* Set the bit for the character.  */
   3378       1.1  christos 			    SET_LIST_BIT (str[0]);
   3379       1.1  christos 			  }
   3380       1.1  christos # ifdef _LIBC
   3381       1.1  christos 			else
   3382       1.1  christos 			  {
   3383       1.1  christos 			    /* Try to match the byte sequence in `str' against
   3384       1.1  christos 			       those known to the collate implementation.
   3385       1.1  christos 			       First find out whether the bytes in `str' are
   3386       1.1  christos 			       actually from exactly one character.  */
   3387       1.1  christos 			    const int32_t *table;
   3388       1.1  christos 			    const unsigned char *weights;
   3389       1.1  christos 			    const unsigned char *extra;
   3390       1.1  christos 			    const int32_t *indirect;
   3391       1.1  christos 			    int32_t idx;
   3392       1.1  christos 			    const unsigned char *cp = str;
   3393       1.1  christos 			    int ch;
   3394       1.1  christos 
   3395       1.1  christos 			    /* This #include defines a local function!  */
   3396       1.1  christos #  include <locale/weight.h>
   3397       1.1  christos 
   3398       1.1  christos 			    table = (const int32_t *)
   3399       1.1  christos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
   3400       1.1  christos 			    weights = (const unsigned char *)
   3401       1.1  christos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
   3402       1.1  christos 			    extra = (const unsigned char *)
   3403   1.1.1.3  christos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
   3404       1.1  christos 			    indirect = (const int32_t *)
   3405       1.1  christos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
   3406       1.1  christos 
   3407       1.1  christos 			    idx = findidx (&cp);
   3408       1.1  christos 			    if (idx == 0 || cp < str + c1)
   3409       1.1  christos 			      /* This is no valid character.  */
   3410       1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3411       1.1  christos 
   3412       1.1  christos 			    /* Throw away the ] at the end of the equivalence
   3413       1.1  christos 			       class.  */
   3414       1.1  christos 			    PATFETCH (c);
   3415       1.1  christos 
   3416       1.1  christos 			    /* Now we have to go through the whole table
   3417       1.1  christos 			       and find all characters which have the same
   3418       1.1  christos 			       first level weight.
   3419       1.1  christos 
   3420       1.1  christos 			       XXX Note that this is not entirely correct.
   3421       1.1  christos 			       we would have to match multibyte sequences
   3422       1.1  christos 			       but this is not possible with the current
   3423       1.1  christos 			       implementation.  */
   3424       1.1  christos 			    for (ch = 1; ch < 256; ++ch)
   3425       1.1  christos 			      /* XXX This test would have to be changed if we
   3426       1.1  christos 				 would allow matching multibyte sequences.  */
   3427       1.1  christos 			      if (table[ch] > 0)
   3428       1.1  christos 				{
   3429       1.1  christos 				  int32_t idx2 = table[ch];
   3430       1.1  christos 				  size_t len = weights[idx2];
   3431       1.1  christos 
   3432       1.1  christos 				  /* Test whether the lenghts match.  */
   3433       1.1  christos 				  if (weights[idx] == len)
   3434       1.1  christos 				    {
   3435       1.1  christos 				      /* They do.  New compare the bytes of
   3436       1.1  christos 					 the weight.  */
   3437       1.1  christos 				      size_t cnt = 0;
   3438       1.1  christos 
   3439       1.1  christos 				      while (cnt < len
   3440       1.1  christos 					     && (weights[idx + 1 + cnt]
   3441       1.1  christos 						 == weights[idx2 + 1 + cnt]))
   3442       1.1  christos 					++cnt;
   3443       1.1  christos 
   3444       1.1  christos 				      if (cnt == len)
   3445       1.1  christos 					/* They match.  Mark the character as
   3446       1.1  christos 					   acceptable.  */
   3447       1.1  christos 					SET_LIST_BIT (ch);
   3448       1.1  christos 				    }
   3449       1.1  christos 				}
   3450       1.1  christos 			  }
   3451       1.1  christos # endif
   3452       1.1  christos 			had_char_class = true;
   3453       1.1  christos 		      }
   3454       1.1  christos                     else
   3455       1.1  christos                       {
   3456       1.1  christos                         c1++;
   3457       1.1  christos                         while (c1--)
   3458       1.1  christos                           PATUNFETCH;
   3459       1.1  christos                         SET_LIST_BIT ('[');
   3460       1.1  christos                         SET_LIST_BIT ('=');
   3461       1.1  christos 			range_start = '=';
   3462       1.1  christos                         had_char_class = false;
   3463       1.1  christos                       }
   3464       1.1  christos 		  }
   3465       1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
   3466       1.1  christos 		  {
   3467       1.1  christos 		    unsigned char str[128];	/* Should be large enough.  */
   3468       1.1  christos # ifdef _LIBC
   3469       1.1  christos 		    uint32_t nrules =
   3470       1.1  christos 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3471       1.1  christos # endif
   3472       1.1  christos 
   3473       1.1  christos 		    PATFETCH (c);
   3474       1.1  christos 		    c1 = 0;
   3475       1.1  christos 
   3476       1.1  christos 		    /* If pattern is `[[.'.  */
   3477       1.1  christos 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3478       1.1  christos 
   3479       1.1  christos 		    for (;;)
   3480       1.1  christos 		      {
   3481       1.1  christos 			PATFETCH (c);
   3482       1.1  christos 			if ((c == '.' && *p == ']') || p == pend)
   3483       1.1  christos 			  break;
   3484       1.1  christos 			if (c1 < sizeof (str))
   3485       1.1  christos 			  str[c1++] = c;
   3486       1.1  christos 			else
   3487       1.1  christos 			  /* This is in any case an invalid class name.  */
   3488       1.1  christos 			  str[0] = '\0';
   3489       1.1  christos                       }
   3490       1.1  christos 		    str[c1] = '\0';
   3491       1.1  christos 
   3492       1.1  christos 		    if (c == '.' && *p == ']' && str[0] != '\0')
   3493       1.1  christos 		      {
   3494       1.1  christos 			/* If we have no collation data we use the default
   3495       1.1  christos 			   collation in which each character is the name
   3496       1.1  christos 			   for its own class which contains only the one
   3497       1.1  christos 			   character.  It also means that ASCII is the
   3498       1.1  christos 			   character set and therefore we cannot have character
   3499       1.1  christos 			   with more than one byte in the multibyte
   3500       1.1  christos 			   representation.  */
   3501       1.1  christos # ifdef _LIBC
   3502       1.1  christos 			if (nrules == 0)
   3503       1.1  christos # endif
   3504       1.1  christos 			  {
   3505       1.1  christos 			    if (c1 != 1)
   3506       1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3507       1.1  christos 
   3508       1.1  christos 			    /* Throw away the ] at the end of the equivalence
   3509       1.1  christos 			       class.  */
   3510       1.1  christos 			    PATFETCH (c);
   3511       1.1  christos 
   3512       1.1  christos 			    /* Set the bit for the character.  */
   3513       1.1  christos 			    SET_LIST_BIT (str[0]);
   3514       1.1  christos 			    range_start = ((const unsigned char *) str)[0];
   3515       1.1  christos 			  }
   3516       1.1  christos # ifdef _LIBC
   3517       1.1  christos 			else
   3518       1.1  christos 			  {
   3519       1.1  christos 			    /* Try to match the byte sequence in `str' against
   3520       1.1  christos 			       those known to the collate implementation.
   3521       1.1  christos 			       First find out whether the bytes in `str' are
   3522       1.1  christos 			       actually from exactly one character.  */
   3523       1.1  christos 			    int32_t table_size;
   3524       1.1  christos 			    const int32_t *symb_table;
   3525       1.1  christos 			    const unsigned char *extra;
   3526       1.1  christos 			    int32_t idx;
   3527       1.1  christos 			    int32_t elem;
   3528       1.1  christos 			    int32_t second;
   3529       1.1  christos 			    int32_t hash;
   3530       1.1  christos 
   3531       1.1  christos 			    table_size =
   3532       1.1  christos 			      _NL_CURRENT_WORD (LC_COLLATE,
   3533       1.1  christos 						_NL_COLLATE_SYMB_HASH_SIZEMB);
   3534       1.1  christos 			    symb_table = (const int32_t *)
   3535       1.1  christos 			      _NL_CURRENT (LC_COLLATE,
   3536       1.1  christos 					   _NL_COLLATE_SYMB_TABLEMB);
   3537       1.1  christos 			    extra = (const unsigned char *)
   3538       1.1  christos 			      _NL_CURRENT (LC_COLLATE,
   3539       1.1  christos 					   _NL_COLLATE_SYMB_EXTRAMB);
   3540       1.1  christos 
   3541       1.1  christos 			    /* Locate the character in the hashing table.  */
   3542       1.1  christos 			    hash = elem_hash (str, c1);
   3543       1.1  christos 
   3544       1.1  christos 			    idx = 0;
   3545       1.1  christos 			    elem = hash % table_size;
   3546       1.1  christos 			    second = hash % (table_size - 2);
   3547       1.1  christos 			    while (symb_table[2 * elem] != 0)
   3548       1.1  christos 			      {
   3549       1.1  christos 				/* First compare the hashing value.  */
   3550       1.1  christos 				if (symb_table[2 * elem] == hash
   3551       1.1  christos 				    && c1 == extra[symb_table[2 * elem + 1]]
   3552       1.1  christos 				    && memcmp (str,
   3553       1.1  christos 					       &extra[symb_table[2 * elem + 1]
   3554       1.1  christos 						     + 1],
   3555       1.1  christos 					       c1) == 0)
   3556       1.1  christos 				  {
   3557       1.1  christos 				    /* Yep, this is the entry.  */
   3558       1.1  christos 				    idx = symb_table[2 * elem + 1];
   3559       1.1  christos 				    idx += 1 + extra[idx];
   3560       1.1  christos 				    break;
   3561       1.1  christos 				  }
   3562       1.1  christos 
   3563       1.1  christos 				/* Next entry.  */
   3564       1.1  christos 				elem += second;
   3565       1.1  christos 			      }
   3566       1.1  christos 
   3567       1.1  christos 			    if (symb_table[2 * elem] == 0)
   3568       1.1  christos 			      /* This is no valid character.  */
   3569       1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3570       1.1  christos 
   3571       1.1  christos 			    /* Throw away the ] at the end of the equivalence
   3572       1.1  christos 			       class.  */
   3573       1.1  christos 			    PATFETCH (c);
   3574       1.1  christos 
   3575       1.1  christos 			    /* Now add the multibyte character(s) we found
   3576       1.1  christos 			       to the accept list.
   3577       1.1  christos 
   3578       1.1  christos 			       XXX Note that this is not entirely correct.
   3579       1.1  christos 			       we would have to match multibyte sequences
   3580       1.1  christos 			       but this is not possible with the current
   3581       1.1  christos 			       implementation.  Also, we have to match
   3582       1.1  christos 			       collating symbols, which expand to more than
   3583       1.1  christos 			       one file, as a whole and not allow the
   3584       1.1  christos 			       individual bytes.  */
   3585       1.1  christos 			    c1 = extra[idx++];
   3586       1.1  christos 			    if (c1 == 1)
   3587       1.1  christos 			      range_start = extra[idx];
   3588       1.1  christos 			    while (c1-- > 0)
   3589       1.1  christos 			      {
   3590       1.1  christos 				SET_LIST_BIT (extra[idx]);
   3591       1.1  christos 				++idx;
   3592       1.1  christos 			      }
   3593       1.1  christos 			  }
   3594       1.1  christos # endif
   3595       1.1  christos 			had_char_class = false;
   3596       1.1  christos 		      }
   3597       1.1  christos                     else
   3598       1.1  christos                       {
   3599       1.1  christos                         c1++;
   3600       1.1  christos                         while (c1--)
   3601       1.1  christos                           PATUNFETCH;
   3602       1.1  christos                         SET_LIST_BIT ('[');
   3603       1.1  christos                         SET_LIST_BIT ('.');
   3604       1.1  christos 			range_start = '.';
   3605       1.1  christos                         had_char_class = false;
   3606       1.1  christos                       }
   3607       1.1  christos 		  }
   3608       1.1  christos                 else
   3609       1.1  christos                   {
   3610       1.1  christos                     had_char_class = false;
   3611       1.1  christos                     SET_LIST_BIT (c);
   3612       1.1  christos 		    range_start = c;
   3613       1.1  christos                   }
   3614       1.1  christos               }
   3615       1.1  christos 
   3616       1.1  christos             /* Discard any (non)matching list bytes that are all 0 at the
   3617       1.1  christos                end of the map.  Decrease the map-length byte too.  */
   3618       1.1  christos             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
   3619       1.1  christos               b[-1]--;
   3620       1.1  christos             b += b[-1];
   3621       1.1  christos #endif /* WCHAR */
   3622       1.1  christos           }
   3623       1.1  christos           break;
   3624       1.1  christos 
   3625       1.1  christos 
   3626       1.1  christos 	case '(':
   3627       1.1  christos           if (syntax & RE_NO_BK_PARENS)
   3628       1.1  christos             goto handle_open;
   3629       1.1  christos           else
   3630       1.1  christos             goto normal_char;
   3631       1.1  christos 
   3632       1.1  christos 
   3633       1.1  christos         case ')':
   3634       1.1  christos           if (syntax & RE_NO_BK_PARENS)
   3635       1.1  christos             goto handle_close;
   3636       1.1  christos           else
   3637       1.1  christos             goto normal_char;
   3638       1.1  christos 
   3639       1.1  christos 
   3640       1.1  christos         case '\n':
   3641       1.1  christos           if (syntax & RE_NEWLINE_ALT)
   3642       1.1  christos             goto handle_alt;
   3643       1.1  christos           else
   3644       1.1  christos             goto normal_char;
   3645       1.1  christos 
   3646       1.1  christos 
   3647       1.1  christos 	case '|':
   3648       1.1  christos           if (syntax & RE_NO_BK_VBAR)
   3649       1.1  christos             goto handle_alt;
   3650       1.1  christos           else
   3651       1.1  christos             goto normal_char;
   3652       1.1  christos 
   3653       1.1  christos 
   3654       1.1  christos         case '{':
   3655       1.1  christos            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
   3656       1.1  christos              goto handle_interval;
   3657       1.1  christos            else
   3658       1.1  christos              goto normal_char;
   3659       1.1  christos 
   3660       1.1  christos 
   3661       1.1  christos         case '\\':
   3662       1.1  christos           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3663       1.1  christos 
   3664       1.1  christos           /* Do not translate the character after the \, so that we can
   3665       1.1  christos              distinguish, e.g., \B from \b, even if we normally would
   3666       1.1  christos              translate, e.g., B to b.  */
   3667       1.1  christos           PATFETCH_RAW (c);
   3668       1.1  christos 
   3669       1.1  christos           switch (c)
   3670       1.1  christos             {
   3671       1.1  christos             case '(':
   3672       1.1  christos               if (syntax & RE_NO_BK_PARENS)
   3673       1.1  christos                 goto normal_backslash;
   3674       1.1  christos 
   3675       1.1  christos             handle_open:
   3676       1.1  christos               bufp->re_nsub++;
   3677       1.1  christos               regnum++;
   3678       1.1  christos 
   3679       1.1  christos               if (COMPILE_STACK_FULL)
   3680       1.1  christos                 {
   3681       1.1  christos                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
   3682       1.1  christos                             compile_stack_elt_t);
   3683       1.1  christos                   if (compile_stack.stack == NULL) return REG_ESPACE;
   3684       1.1  christos 
   3685       1.1  christos                   compile_stack.size <<= 1;
   3686       1.1  christos                 }
   3687       1.1  christos 
   3688       1.1  christos               /* These are the values to restore when we hit end of this
   3689       1.1  christos                  group.  They are all relative offsets, so that if the
   3690       1.1  christos                  whole pattern moves because of realloc, they will still
   3691       1.1  christos                  be valid.  */
   3692       1.1  christos               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
   3693       1.1  christos               COMPILE_STACK_TOP.fixup_alt_jump
   3694       1.1  christos                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
   3695       1.1  christos               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
   3696       1.1  christos               COMPILE_STACK_TOP.regnum = regnum;
   3697       1.1  christos 
   3698       1.1  christos               /* We will eventually replace the 0 with the number of
   3699       1.1  christos                  groups inner to this one.  But do not push a
   3700       1.1  christos                  start_memory for groups beyond the last one we can
   3701       1.1  christos                  represent in the compiled pattern.  */
   3702       1.1  christos               if (regnum <= MAX_REGNUM)
   3703       1.1  christos                 {
   3704       1.1  christos                   COMPILE_STACK_TOP.inner_group_offset = b
   3705       1.1  christos 		    - COMPILED_BUFFER_VAR + 2;
   3706       1.1  christos                   BUF_PUSH_3 (start_memory, regnum, 0);
   3707       1.1  christos                 }
   3708       1.1  christos 
   3709       1.1  christos               compile_stack.avail++;
   3710       1.1  christos 
   3711       1.1  christos               fixup_alt_jump = 0;
   3712       1.1  christos               laststart = 0;
   3713       1.1  christos               begalt = b;
   3714       1.1  christos 	      /* If we've reached MAX_REGNUM groups, then this open
   3715       1.1  christos 		 won't actually generate any code, so we'll have to
   3716       1.1  christos 		 clear pending_exact explicitly.  */
   3717       1.1  christos 	      pending_exact = 0;
   3718       1.1  christos               break;
   3719       1.1  christos 
   3720       1.1  christos 
   3721       1.1  christos             case ')':
   3722       1.1  christos               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
   3723       1.1  christos 
   3724       1.1  christos               if (COMPILE_STACK_EMPTY)
   3725       1.1  christos 		{
   3726       1.1  christos 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3727       1.1  christos 		    goto normal_backslash;
   3728       1.1  christos 		  else
   3729       1.1  christos 		    FREE_STACK_RETURN (REG_ERPAREN);
   3730       1.1  christos 		}
   3731       1.1  christos 
   3732       1.1  christos             handle_close:
   3733       1.1  christos               if (fixup_alt_jump)
   3734       1.1  christos                 { /* Push a dummy failure point at the end of the
   3735       1.1  christos                      alternative for a possible future
   3736       1.1  christos                      `pop_failure_jump' to pop.  See comments at
   3737       1.1  christos                      `push_dummy_failure' in `re_match_2'.  */
   3738       1.1  christos                   BUF_PUSH (push_dummy_failure);
   3739       1.1  christos 
   3740       1.1  christos                   /* We allocated space for this jump when we assigned
   3741       1.1  christos                      to `fixup_alt_jump', in the `handle_alt' case below.  */
   3742       1.1  christos                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
   3743       1.1  christos                 }
   3744       1.1  christos 
   3745       1.1  christos               /* See similar code for backslashed left paren above.  */
   3746       1.1  christos               if (COMPILE_STACK_EMPTY)
   3747       1.1  christos 		{
   3748       1.1  christos 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3749       1.1  christos 		    goto normal_char;
   3750       1.1  christos 		  else
   3751       1.1  christos 		    FREE_STACK_RETURN (REG_ERPAREN);
   3752       1.1  christos 		}
   3753       1.1  christos 
   3754       1.1  christos               /* Since we just checked for an empty stack above, this
   3755       1.1  christos                  ``can't happen''.  */
   3756       1.1  christos               assert (compile_stack.avail != 0);
   3757       1.1  christos               {
   3758       1.1  christos                 /* We don't just want to restore into `regnum', because
   3759       1.1  christos                    later groups should continue to be numbered higher,
   3760       1.1  christos                    as in `(ab)c(de)' -- the second group is #2.  */
   3761       1.1  christos                 regnum_t this_group_regnum;
   3762       1.1  christos 
   3763       1.1  christos                 compile_stack.avail--;
   3764       1.1  christos                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
   3765       1.1  christos                 fixup_alt_jump
   3766       1.1  christos                   = COMPILE_STACK_TOP.fixup_alt_jump
   3767       1.1  christos                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
   3768       1.1  christos                     : 0;
   3769       1.1  christos                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
   3770       1.1  christos                 this_group_regnum = COMPILE_STACK_TOP.regnum;
   3771       1.1  christos 		/* If we've reached MAX_REGNUM groups, then this open
   3772       1.1  christos 		   won't actually generate any code, so we'll have to
   3773       1.1  christos 		   clear pending_exact explicitly.  */
   3774       1.1  christos 		pending_exact = 0;
   3775       1.1  christos 
   3776       1.1  christos                 /* We're at the end of the group, so now we know how many
   3777       1.1  christos                    groups were inside this one.  */
   3778       1.1  christos                 if (this_group_regnum <= MAX_REGNUM)
   3779       1.1  christos                   {
   3780       1.1  christos 		    UCHAR_T *inner_group_loc
   3781       1.1  christos                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
   3782       1.1  christos 
   3783       1.1  christos                     *inner_group_loc = regnum - this_group_regnum;
   3784       1.1  christos                     BUF_PUSH_3 (stop_memory, this_group_regnum,
   3785       1.1  christos                                 regnum - this_group_regnum);
   3786       1.1  christos                   }
   3787       1.1  christos               }
   3788       1.1  christos               break;
   3789       1.1  christos 
   3790       1.1  christos 
   3791       1.1  christos             case '|':					/* `\|'.  */
   3792       1.1  christos               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
   3793       1.1  christos                 goto normal_backslash;
   3794       1.1  christos             handle_alt:
   3795       1.1  christos               if (syntax & RE_LIMITED_OPS)
   3796       1.1  christos                 goto normal_char;
   3797       1.1  christos 
   3798       1.1  christos               /* Insert before the previous alternative a jump which
   3799       1.1  christos                  jumps to this alternative if the former fails.  */
   3800       1.1  christos               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3801       1.1  christos               INSERT_JUMP (on_failure_jump, begalt,
   3802       1.1  christos 			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
   3803       1.1  christos               pending_exact = 0;
   3804       1.1  christos               b += 1 + OFFSET_ADDRESS_SIZE;
   3805       1.1  christos 
   3806       1.1  christos               /* The alternative before this one has a jump after it
   3807       1.1  christos                  which gets executed if it gets matched.  Adjust that
   3808       1.1  christos                  jump so it will jump to this alternative's analogous
   3809       1.1  christos                  jump (put in below, which in turn will jump to the next
   3810       1.1  christos                  (if any) alternative's such jump, etc.).  The last such
   3811       1.1  christos                  jump jumps to the correct final destination.  A picture:
   3812       1.1  christos                           _____ _____
   3813       1.1  christos                           |   | |   |
   3814       1.1  christos                           |   v |   v
   3815       1.1  christos                          a | b   | c
   3816       1.1  christos 
   3817       1.1  christos                  If we are at `b', then fixup_alt_jump right now points to a
   3818       1.1  christos                  three-byte space after `a'.  We'll put in the jump, set
   3819       1.1  christos                  fixup_alt_jump to right after `b', and leave behind three
   3820       1.1  christos                  bytes which we'll fill in when we get to after `c'.  */
   3821       1.1  christos 
   3822       1.1  christos               if (fixup_alt_jump)
   3823       1.1  christos                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   3824       1.1  christos 
   3825       1.1  christos               /* Mark and leave space for a jump after this alternative,
   3826       1.1  christos                  to be filled in later either by next alternative or
   3827       1.1  christos                  when know we're at the end of a series of alternatives.  */
   3828       1.1  christos               fixup_alt_jump = b;
   3829       1.1  christos               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3830       1.1  christos               b += 1 + OFFSET_ADDRESS_SIZE;
   3831       1.1  christos 
   3832       1.1  christos               laststart = 0;
   3833       1.1  christos               begalt = b;
   3834       1.1  christos               break;
   3835       1.1  christos 
   3836       1.1  christos 
   3837       1.1  christos             case '{':
   3838       1.1  christos               /* If \{ is a literal.  */
   3839       1.1  christos               if (!(syntax & RE_INTERVALS)
   3840       1.1  christos                      /* If we're at `\{' and it's not the open-interval
   3841       1.1  christos                         operator.  */
   3842       1.1  christos 		  || (syntax & RE_NO_BK_BRACES))
   3843       1.1  christos                 goto normal_backslash;
   3844       1.1  christos 
   3845       1.1  christos             handle_interval:
   3846       1.1  christos               {
   3847       1.1  christos                 /* If got here, then the syntax allows intervals.  */
   3848       1.1  christos 
   3849       1.1  christos                 /* At least (most) this many matches must be made.  */
   3850       1.1  christos                 int lower_bound = -1, upper_bound = -1;
   3851       1.1  christos 
   3852       1.1  christos 		/* Place in the uncompiled pattern (i.e., just after
   3853       1.1  christos 		   the '{') to go back to if the interval is invalid.  */
   3854       1.1  christos 		const CHAR_T *beg_interval = p;
   3855       1.1  christos 
   3856       1.1  christos                 if (p == pend)
   3857       1.1  christos 		  goto invalid_interval;
   3858       1.1  christos 
   3859       1.1  christos                 GET_UNSIGNED_NUMBER (lower_bound);
   3860       1.1  christos 
   3861       1.1  christos                 if (c == ',')
   3862       1.1  christos                   {
   3863       1.1  christos                     GET_UNSIGNED_NUMBER (upper_bound);
   3864       1.1  christos 		    if (upper_bound < 0)
   3865       1.1  christos 		      upper_bound = RE_DUP_MAX;
   3866       1.1  christos                   }
   3867       1.1  christos                 else
   3868       1.1  christos                   /* Interval such as `{1}' => match exactly once. */
   3869       1.1  christos                   upper_bound = lower_bound;
   3870       1.1  christos 
   3871       1.1  christos                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
   3872       1.1  christos 		  goto invalid_interval;
   3873       1.1  christos 
   3874       1.1  christos                 if (!(syntax & RE_NO_BK_BRACES))
   3875       1.1  christos                   {
   3876       1.1  christos 		    if (c != '\\' || p == pend)
   3877       1.1  christos 		      goto invalid_interval;
   3878       1.1  christos                     PATFETCH (c);
   3879       1.1  christos                   }
   3880       1.1  christos 
   3881       1.1  christos                 if (c != '}')
   3882       1.1  christos 		  goto invalid_interval;
   3883       1.1  christos 
   3884       1.1  christos                 /* If it's invalid to have no preceding re.  */
   3885       1.1  christos                 if (!laststart)
   3886       1.1  christos                   {
   3887       1.1  christos 		    if (syntax & RE_CONTEXT_INVALID_OPS
   3888       1.1  christos 			&& !(syntax & RE_INVALID_INTERVAL_ORD))
   3889       1.1  christos                       FREE_STACK_RETURN (REG_BADRPT);
   3890       1.1  christos                     else if (syntax & RE_CONTEXT_INDEP_OPS)
   3891       1.1  christos                       laststart = b;
   3892       1.1  christos                     else
   3893       1.1  christos                       goto unfetch_interval;
   3894       1.1  christos                   }
   3895       1.1  christos 
   3896       1.1  christos                 /* We just parsed a valid interval.  */
   3897       1.1  christos 
   3898       1.1  christos                 if (RE_DUP_MAX < upper_bound)
   3899       1.1  christos 		  FREE_STACK_RETURN (REG_BADBR);
   3900       1.1  christos 
   3901       1.1  christos                 /* If the upper bound is zero, don't want to succeed at
   3902       1.1  christos                    all; jump from `laststart' to `b + 3', which will be
   3903       1.1  christos 		   the end of the buffer after we insert the jump.  */
   3904       1.1  christos 		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
   3905       1.1  christos 		   instead of 'b + 3'.  */
   3906       1.1  christos                  if (upper_bound == 0)
   3907       1.1  christos                    {
   3908       1.1  christos                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3909       1.1  christos                      INSERT_JUMP (jump, laststart, b + 1
   3910       1.1  christos 				  + OFFSET_ADDRESS_SIZE);
   3911       1.1  christos                      b += 1 + OFFSET_ADDRESS_SIZE;
   3912       1.1  christos                    }
   3913       1.1  christos 
   3914       1.1  christos                  /* Otherwise, we have a nontrivial interval.  When
   3915       1.1  christos                     we're all done, the pattern will look like:
   3916       1.1  christos                       set_number_at <jump count> <upper bound>
   3917       1.1  christos                       set_number_at <succeed_n count> <lower bound>
   3918       1.1  christos                       succeed_n <after jump addr> <succeed_n count>
   3919       1.1  christos                       <body of loop>
   3920       1.1  christos                       jump_n <succeed_n addr> <jump count>
   3921       1.1  christos                     (The upper bound and `jump_n' are omitted if
   3922       1.1  christos                     `upper_bound' is 1, though.)  */
   3923       1.1  christos                  else
   3924       1.1  christos                    { /* If the upper bound is > 1, we need to insert
   3925       1.1  christos                         more at the end of the loop.  */
   3926       1.1  christos                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
   3927       1.1  christos 		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
   3928       1.1  christos 
   3929       1.1  christos                      GET_BUFFER_SPACE (nbytes);
   3930       1.1  christos 
   3931       1.1  christos                      /* Initialize lower bound of the `succeed_n', even
   3932       1.1  christos                         though it will be set during matching by its
   3933       1.1  christos                         attendant `set_number_at' (inserted next),
   3934       1.1  christos                         because `re_compile_fastmap' needs to know.
   3935       1.1  christos                         Jump to the `jump_n' we might insert below.  */
   3936       1.1  christos                      INSERT_JUMP2 (succeed_n, laststart,
   3937       1.1  christos                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
   3938       1.1  christos 				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
   3939       1.1  christos 				   , lower_bound);
   3940       1.1  christos                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3941       1.1  christos 
   3942       1.1  christos                      /* Code to initialize the lower bound.  Insert
   3943       1.1  christos                         before the `succeed_n'.  The `5' is the last two
   3944       1.1  christos                         bytes of this `set_number_at', plus 3 bytes of
   3945       1.1  christos                         the following `succeed_n'.  */
   3946       1.1  christos 		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
   3947       1.1  christos 			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
   3948       1.1  christos 			of the following `succeed_n'.  */
   3949       1.1  christos                      PREFIX(insert_op2) (set_number_at, laststart, 1
   3950       1.1  christos 				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
   3951       1.1  christos                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3952       1.1  christos 
   3953       1.1  christos                      if (upper_bound > 1)
   3954       1.1  christos                        { /* More than one repetition is allowed, so
   3955       1.1  christos                             append a backward jump to the `succeed_n'
   3956       1.1  christos                             that starts this interval.
   3957       1.1  christos 
   3958       1.1  christos                             When we've reached this during matching,
   3959       1.1  christos                             we'll have matched the interval once, so
   3960       1.1  christos                             jump back only `upper_bound - 1' times.  */
   3961       1.1  christos                          STORE_JUMP2 (jump_n, b, laststart
   3962       1.1  christos 				      + 2 * OFFSET_ADDRESS_SIZE + 1,
   3963       1.1  christos                                       upper_bound - 1);
   3964       1.1  christos                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3965       1.1  christos 
   3966       1.1  christos                          /* The location we want to set is the second
   3967       1.1  christos                             parameter of the `jump_n'; that is `b-2' as
   3968       1.1  christos                             an absolute address.  `laststart' will be
   3969       1.1  christos                             the `set_number_at' we're about to insert;
   3970       1.1  christos                             `laststart+3' the number to set, the source
   3971       1.1  christos                             for the relative address.  But we are
   3972       1.1  christos                             inserting into the middle of the pattern --
   3973       1.1  christos                             so everything is getting moved up by 5.
   3974       1.1  christos                             Conclusion: (b - 2) - (laststart + 3) + 5,
   3975       1.1  christos                             i.e., b - laststart.
   3976       1.1  christos 
   3977       1.1  christos                             We insert this at the beginning of the loop
   3978       1.1  christos                             so that if we fail during matching, we'll
   3979       1.1  christos                             reinitialize the bounds.  */
   3980       1.1  christos                          PREFIX(insert_op2) (set_number_at, laststart,
   3981       1.1  christos 					     b - laststart,
   3982       1.1  christos 					     upper_bound - 1, b);
   3983       1.1  christos                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   3984       1.1  christos                        }
   3985       1.1  christos                    }
   3986       1.1  christos                 pending_exact = 0;
   3987       1.1  christos 		break;
   3988       1.1  christos 
   3989       1.1  christos 	      invalid_interval:
   3990       1.1  christos 		if (!(syntax & RE_INVALID_INTERVAL_ORD))
   3991       1.1  christos 		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
   3992       1.1  christos 	      unfetch_interval:
   3993       1.1  christos 		/* Match the characters as literals.  */
   3994       1.1  christos 		p = beg_interval;
   3995       1.1  christos 		c = '{';
   3996       1.1  christos 		if (syntax & RE_NO_BK_BRACES)
   3997       1.1  christos 		  goto normal_char;
   3998       1.1  christos 		else
   3999       1.1  christos 		  goto normal_backslash;
   4000       1.1  christos 	      }
   4001       1.1  christos 
   4002       1.1  christos #ifdef emacs
   4003       1.1  christos             /* There is no way to specify the before_dot and after_dot
   4004       1.1  christos                operators.  rms says this is ok.  --karl  */
   4005       1.1  christos             case '=':
   4006       1.1  christos               BUF_PUSH (at_dot);
   4007       1.1  christos               break;
   4008       1.1  christos 
   4009       1.1  christos             case 's':
   4010       1.1  christos               laststart = b;
   4011       1.1  christos               PATFETCH (c);
   4012       1.1  christos               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
   4013       1.1  christos               break;
   4014       1.1  christos 
   4015       1.1  christos             case 'S':
   4016       1.1  christos               laststart = b;
   4017       1.1  christos               PATFETCH (c);
   4018       1.1  christos               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
   4019       1.1  christos               break;
   4020       1.1  christos #endif /* emacs */
   4021       1.1  christos 
   4022       1.1  christos 
   4023       1.1  christos             case 'w':
   4024       1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4025       1.1  christos 		goto normal_char;
   4026       1.1  christos               laststart = b;
   4027       1.1  christos               BUF_PUSH (wordchar);
   4028       1.1  christos               break;
   4029       1.1  christos 
   4030       1.1  christos 
   4031       1.1  christos             case 'W':
   4032       1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4033       1.1  christos 		goto normal_char;
   4034       1.1  christos               laststart = b;
   4035       1.1  christos               BUF_PUSH (notwordchar);
   4036       1.1  christos               break;
   4037       1.1  christos 
   4038       1.1  christos 
   4039       1.1  christos             case '<':
   4040       1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4041       1.1  christos 		goto normal_char;
   4042       1.1  christos               BUF_PUSH (wordbeg);
   4043       1.1  christos               break;
   4044       1.1  christos 
   4045       1.1  christos             case '>':
   4046       1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4047       1.1  christos 		goto normal_char;
   4048       1.1  christos               BUF_PUSH (wordend);
   4049       1.1  christos               break;
   4050       1.1  christos 
   4051       1.1  christos             case 'b':
   4052       1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4053       1.1  christos 		goto normal_char;
   4054       1.1  christos               BUF_PUSH (wordbound);
   4055       1.1  christos               break;
   4056       1.1  christos 
   4057       1.1  christos             case 'B':
   4058       1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4059       1.1  christos 		goto normal_char;
   4060       1.1  christos               BUF_PUSH (notwordbound);
   4061       1.1  christos               break;
   4062       1.1  christos 
   4063       1.1  christos             case '`':
   4064       1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4065       1.1  christos 		goto normal_char;
   4066       1.1  christos               BUF_PUSH (begbuf);
   4067       1.1  christos               break;
   4068       1.1  christos 
   4069       1.1  christos             case '\'':
   4070       1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4071       1.1  christos 		goto normal_char;
   4072       1.1  christos               BUF_PUSH (endbuf);
   4073       1.1  christos               break;
   4074       1.1  christos 
   4075       1.1  christos             case '1': case '2': case '3': case '4': case '5':
   4076       1.1  christos             case '6': case '7': case '8': case '9':
   4077       1.1  christos               if (syntax & RE_NO_BK_REFS)
   4078       1.1  christos                 goto normal_char;
   4079       1.1  christos 
   4080       1.1  christos               c1 = c - '0';
   4081       1.1  christos 
   4082       1.1  christos               if (c1 > regnum)
   4083       1.1  christos                 FREE_STACK_RETURN (REG_ESUBREG);
   4084       1.1  christos 
   4085       1.1  christos               /* Can't back reference to a subexpression if inside of it.  */
   4086       1.1  christos               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
   4087       1.1  christos                 goto normal_char;
   4088       1.1  christos 
   4089       1.1  christos               laststart = b;
   4090       1.1  christos               BUF_PUSH_2 (duplicate, c1);
   4091       1.1  christos               break;
   4092       1.1  christos 
   4093       1.1  christos 
   4094       1.1  christos             case '+':
   4095       1.1  christos             case '?':
   4096       1.1  christos               if (syntax & RE_BK_PLUS_QM)
   4097       1.1  christos                 goto handle_plus;
   4098       1.1  christos               else
   4099       1.1  christos                 goto normal_backslash;
   4100       1.1  christos 
   4101       1.1  christos             default:
   4102       1.1  christos             normal_backslash:
   4103       1.1  christos               /* You might think it would be useful for \ to mean
   4104       1.1  christos                  not to translate; but if we don't translate it
   4105       1.1  christos                  it will never match anything.  */
   4106       1.1  christos               c = TRANSLATE (c);
   4107       1.1  christos               goto normal_char;
   4108       1.1  christos             }
   4109       1.1  christos           break;
   4110       1.1  christos 
   4111       1.1  christos 
   4112       1.1  christos 	default:
   4113       1.1  christos         /* Expects the character in `c'.  */
   4114       1.1  christos 	normal_char:
   4115       1.1  christos 	      /* If no exactn currently being built.  */
   4116       1.1  christos           if (!pending_exact
   4117       1.1  christos #ifdef WCHAR
   4118       1.1  christos 	      /* If last exactn handle binary(or character) and
   4119       1.1  christos 		 new exactn handle character(or binary).  */
   4120       1.1  christos 	      || is_exactn_bin != is_binary[p - 1 - pattern]
   4121       1.1  christos #endif /* WCHAR */
   4122       1.1  christos 
   4123       1.1  christos               /* If last exactn not at current position.  */
   4124       1.1  christos               || pending_exact + *pending_exact + 1 != b
   4125       1.1  christos 
   4126       1.1  christos               /* We have only one byte following the exactn for the count.  */
   4127       1.1  christos 	      || *pending_exact == (1 << BYTEWIDTH) - 1
   4128       1.1  christos 
   4129       1.1  christos               /* If followed by a repetition operator.  */
   4130       1.1  christos               || *p == '*' || *p == '^'
   4131       1.1  christos 	      || ((syntax & RE_BK_PLUS_QM)
   4132       1.1  christos 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
   4133       1.1  christos 		  : (*p == '+' || *p == '?'))
   4134       1.1  christos 	      || ((syntax & RE_INTERVALS)
   4135       1.1  christos                   && ((syntax & RE_NO_BK_BRACES)
   4136       1.1  christos 		      ? *p == '{'
   4137       1.1  christos                       : (p[0] == '\\' && p[1] == '{'))))
   4138       1.1  christos 	    {
   4139       1.1  christos 	      /* Start building a new exactn.  */
   4140       1.1  christos 
   4141       1.1  christos               laststart = b;
   4142       1.1  christos 
   4143       1.1  christos #ifdef WCHAR
   4144       1.1  christos 	      /* Is this exactn binary data or character? */
   4145       1.1  christos 	      is_exactn_bin = is_binary[p - 1 - pattern];
   4146       1.1  christos 	      if (is_exactn_bin)
   4147       1.1  christos 		  BUF_PUSH_2 (exactn_bin, 0);
   4148       1.1  christos 	      else
   4149       1.1  christos 		  BUF_PUSH_2 (exactn, 0);
   4150       1.1  christos #else
   4151       1.1  christos 	      BUF_PUSH_2 (exactn, 0);
   4152       1.1  christos #endif /* WCHAR */
   4153       1.1  christos 	      pending_exact = b - 1;
   4154       1.1  christos             }
   4155       1.1  christos 
   4156       1.1  christos 	  BUF_PUSH (c);
   4157       1.1  christos           (*pending_exact)++;
   4158       1.1  christos 	  break;
   4159       1.1  christos         } /* switch (c) */
   4160       1.1  christos     } /* while p != pend */
   4161       1.1  christos 
   4162       1.1  christos 
   4163       1.1  christos   /* Through the pattern now.  */
   4164       1.1  christos 
   4165       1.1  christos   if (fixup_alt_jump)
   4166       1.1  christos     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   4167       1.1  christos 
   4168       1.1  christos   if (!COMPILE_STACK_EMPTY)
   4169       1.1  christos     FREE_STACK_RETURN (REG_EPAREN);
   4170       1.1  christos 
   4171       1.1  christos   /* If we don't want backtracking, force success
   4172       1.1  christos      the first time we reach the end of the compiled pattern.  */
   4173       1.1  christos   if (syntax & RE_NO_POSIX_BACKTRACKING)
   4174       1.1  christos     BUF_PUSH (succeed);
   4175       1.1  christos 
   4176       1.1  christos #ifdef WCHAR
   4177       1.1  christos   free (pattern);
   4178       1.1  christos   free (mbs_offset);
   4179       1.1  christos   free (is_binary);
   4180       1.1  christos #endif
   4181       1.1  christos   free (compile_stack.stack);
   4182       1.1  christos 
   4183       1.1  christos   /* We have succeeded; set the length of the buffer.  */
   4184       1.1  christos #ifdef WCHAR
   4185       1.1  christos   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
   4186       1.1  christos #else
   4187       1.1  christos   bufp->used = b - bufp->buffer;
   4188       1.1  christos #endif
   4189       1.1  christos 
   4190       1.1  christos #ifdef DEBUG
   4191       1.1  christos   if (debug)
   4192       1.1  christos     {
   4193       1.1  christos       DEBUG_PRINT1 ("\nCompiled pattern: \n");
   4194       1.1  christos       PREFIX(print_compiled_pattern) (bufp);
   4195       1.1  christos     }
   4196       1.1  christos #endif /* DEBUG */
   4197       1.1  christos 
   4198       1.1  christos #ifndef MATCH_MAY_ALLOCATE
   4199       1.1  christos   /* Initialize the failure stack to the largest possible stack.  This
   4200       1.1  christos      isn't necessary unless we're trying to avoid calling alloca in
   4201       1.1  christos      the search and match routines.  */
   4202       1.1  christos   {
   4203       1.1  christos     int num_regs = bufp->re_nsub + 1;
   4204       1.1  christos 
   4205       1.1  christos     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
   4206       1.1  christos        is strictly greater than re_max_failures, the largest possible stack
   4207       1.1  christos        is 2 * re_max_failures failure points.  */
   4208       1.1  christos     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
   4209       1.1  christos       {
   4210       1.1  christos 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
   4211       1.1  christos 
   4212       1.1  christos # ifdef emacs
   4213       1.1  christos 	if (! fail_stack.stack)
   4214       1.1  christos 	  fail_stack.stack
   4215       1.1  christos 	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
   4216       1.1  christos 				    * sizeof (PREFIX(fail_stack_elt_t)));
   4217       1.1  christos 	else
   4218       1.1  christos 	  fail_stack.stack
   4219       1.1  christos 	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
   4220       1.1  christos 				     (fail_stack.size
   4221       1.1  christos 				      * sizeof (PREFIX(fail_stack_elt_t))));
   4222       1.1  christos # else /* not emacs */
   4223       1.1  christos 	if (! fail_stack.stack)
   4224       1.1  christos 	  fail_stack.stack
   4225       1.1  christos 	    = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
   4226       1.1  christos 				   * sizeof (PREFIX(fail_stack_elt_t)));
   4227       1.1  christos 	else
   4228       1.1  christos 	  fail_stack.stack
   4229       1.1  christos 	    = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
   4230       1.1  christos 					    (fail_stack.size
   4231       1.1  christos 				     * sizeof (PREFIX(fail_stack_elt_t))));
   4232       1.1  christos # endif /* not emacs */
   4233       1.1  christos       }
   4234       1.1  christos 
   4235       1.1  christos    PREFIX(regex_grow_registers) (num_regs);
   4236       1.1  christos   }
   4237       1.1  christos #endif /* not MATCH_MAY_ALLOCATE */
   4238       1.1  christos 
   4239       1.1  christos   return REG_NOERROR;
   4240       1.1  christos } /* regex_compile */
   4241       1.1  christos 
   4242       1.1  christos /* Subroutines for `regex_compile'.  */
   4243       1.1  christos 
   4244       1.1  christos /* Store OP at LOC followed by two-byte integer parameter ARG.  */
   4245       1.1  christos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4246       1.1  christos 
   4247       1.1  christos static void
   4248       1.1  christos PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
   4249       1.1  christos {
   4250       1.1  christos   *loc = (UCHAR_T) op;
   4251       1.1  christos   STORE_NUMBER (loc + 1, arg);
   4252       1.1  christos }
   4253       1.1  christos 
   4254       1.1  christos 
   4255       1.1  christos /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4256       1.1  christos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4257       1.1  christos 
   4258       1.1  christos static void
   4259       1.1  christos PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
   4260       1.1  christos {
   4261       1.1  christos   *loc = (UCHAR_T) op;
   4262       1.1  christos   STORE_NUMBER (loc + 1, arg1);
   4263       1.1  christos   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
   4264       1.1  christos }
   4265       1.1  christos 
   4266       1.1  christos 
   4267       1.1  christos /* Copy the bytes from LOC to END to open up three bytes of space at LOC
   4268       1.1  christos    for OP followed by two-byte integer parameter ARG.  */
   4269       1.1  christos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4270       1.1  christos 
   4271       1.1  christos static void
   4272       1.1  christos PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
   4273       1.1  christos {
   4274       1.1  christos   register UCHAR_T *pfrom = end;
   4275       1.1  christos   register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
   4276       1.1  christos 
   4277       1.1  christos   while (pfrom != loc)
   4278       1.1  christos     *--pto = *--pfrom;
   4279       1.1  christos 
   4280       1.1  christos   PREFIX(store_op1) (op, loc, arg);
   4281       1.1  christos }
   4282       1.1  christos 
   4283       1.1  christos 
   4284       1.1  christos /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4285       1.1  christos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4286       1.1  christos 
   4287       1.1  christos static void
   4288       1.1  christos PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1,
   4289       1.1  christos                     int arg2, UCHAR_T *end)
   4290       1.1  christos {
   4291       1.1  christos   register UCHAR_T *pfrom = end;
   4292       1.1  christos   register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
   4293       1.1  christos 
   4294       1.1  christos   while (pfrom != loc)
   4295       1.1  christos     *--pto = *--pfrom;
   4296       1.1  christos 
   4297       1.1  christos   PREFIX(store_op2) (op, loc, arg1, arg2);
   4298       1.1  christos }
   4299       1.1  christos 
   4300       1.1  christos 
   4301       1.1  christos /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
   4302       1.1  christos    after an alternative or a begin-subexpression.  We assume there is at
   4303       1.1  christos    least one character before the ^.  */
   4304       1.1  christos 
   4305       1.1  christos static boolean
   4306       1.1  christos PREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
   4307       1.1  christos                           reg_syntax_t syntax)
   4308       1.1  christos {
   4309       1.1  christos   const CHAR_T *prev = p - 2;
   4310       1.1  christos   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
   4311       1.1  christos 
   4312       1.1  christos   return
   4313       1.1  christos        /* After a subexpression?  */
   4314       1.1  christos        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
   4315       1.1  christos        /* After an alternative?  */
   4316       1.1  christos     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
   4317       1.1  christos }
   4318       1.1  christos 
   4319       1.1  christos 
   4320       1.1  christos /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
   4321       1.1  christos    at least one character after the $, i.e., `P < PEND'.  */
   4322       1.1  christos 
   4323       1.1  christos static boolean
   4324       1.1  christos PREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
   4325       1.1  christos                           reg_syntax_t syntax)
   4326       1.1  christos {
   4327       1.1  christos   const CHAR_T *next = p;
   4328       1.1  christos   boolean next_backslash = *next == '\\';
   4329       1.1  christos   const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
   4330       1.1  christos 
   4331       1.1  christos   return
   4332       1.1  christos        /* Before a subexpression?  */
   4333       1.1  christos        (syntax & RE_NO_BK_PARENS ? *next == ')'
   4334       1.1  christos         : next_backslash && next_next && *next_next == ')')
   4335       1.1  christos        /* Before an alternative?  */
   4336       1.1  christos     || (syntax & RE_NO_BK_VBAR ? *next == '|'
   4337       1.1  christos         : next_backslash && next_next && *next_next == '|');
   4338       1.1  christos }
   4339       1.1  christos 
   4340       1.1  christos #else /* not INSIDE_RECURSION */
   4341       1.1  christos 
   4342       1.1  christos /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
   4343       1.1  christos    false if it's not.  */
   4344       1.1  christos 
   4345       1.1  christos static boolean
   4346       1.1  christos group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
   4347       1.1  christos {
   4348       1.1  christos   int this_element;
   4349       1.1  christos 
   4350       1.1  christos   for (this_element = compile_stack.avail - 1;
   4351       1.1  christos        this_element >= 0;
   4352       1.1  christos        this_element--)
   4353       1.1  christos     if (compile_stack.stack[this_element].regnum == regnum)
   4354       1.1  christos       return true;
   4355       1.1  christos 
   4356       1.1  christos   return false;
   4357       1.1  christos }
   4358       1.1  christos #endif /* not INSIDE_RECURSION */
   4359       1.1  christos 
   4360       1.1  christos #ifdef INSIDE_RECURSION
   4361       1.1  christos 
   4362       1.1  christos #ifdef WCHAR
   4363       1.1  christos /* This insert space, which size is "num", into the pattern at "loc".
   4364       1.1  christos    "end" must point the end of the allocated buffer.  */
   4365       1.1  christos static void
   4366       1.1  christos insert_space (int num, CHAR_T *loc, CHAR_T *end)
   4367       1.1  christos {
   4368       1.1  christos   register CHAR_T *pto = end;
   4369       1.1  christos   register CHAR_T *pfrom = end - num;
   4370       1.1  christos 
   4371       1.1  christos   while (pfrom >= loc)
   4372       1.1  christos     *pto-- = *pfrom--;
   4373       1.1  christos }
   4374       1.1  christos #endif /* WCHAR */
   4375       1.1  christos 
   4376       1.1  christos #ifdef WCHAR
   4377       1.1  christos static reg_errcode_t
   4378       1.1  christos wcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
   4379       1.1  christos                    const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
   4380       1.1  christos                    reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set)
   4381       1.1  christos {
   4382       1.1  christos   const CHAR_T *p = *p_ptr;
   4383       1.1  christos   CHAR_T range_start, range_end;
   4384       1.1  christos   reg_errcode_t ret;
   4385       1.1  christos # ifdef _LIBC
   4386       1.1  christos   uint32_t nrules;
   4387       1.1  christos   uint32_t start_val, end_val;
   4388       1.1  christos # endif
   4389       1.1  christos   if (p == pend)
   4390       1.1  christos     return REG_ERANGE;
   4391       1.1  christos 
   4392       1.1  christos # ifdef _LIBC
   4393       1.1  christos   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   4394       1.1  christos   if (nrules != 0)
   4395       1.1  christos     {
   4396       1.1  christos       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
   4397       1.1  christos 						       _NL_COLLATE_COLLSEQWC);
   4398       1.1  christos       const unsigned char *extra = (const unsigned char *)
   4399       1.1  christos 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   4400       1.1  christos 
   4401       1.1  christos       if (range_start_char < -1)
   4402       1.1  christos 	{
   4403       1.1  christos 	  /* range_start is a collating symbol.  */
   4404       1.1  christos 	  int32_t *wextra;
   4405       1.1  christos 	  /* Retreive the index and get collation sequence value.  */
   4406       1.1  christos 	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
   4407       1.1  christos 	  start_val = wextra[1 + *wextra];
   4408       1.1  christos 	}
   4409       1.1  christos       else
   4410       1.1  christos 	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
   4411       1.1  christos 
   4412       1.1  christos       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
   4413       1.1  christos 
   4414       1.1  christos       /* Report an error if the range is empty and the syntax prohibits
   4415       1.1  christos 	 this.  */
   4416       1.1  christos       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4417       1.1  christos 	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
   4418       1.1  christos 
   4419       1.1  christos       /* Insert space to the end of the char_ranges.  */
   4420       1.1  christos       insert_space(2, b - char_set[5] - 2, b - 1);
   4421       1.1  christos       *(b - char_set[5] - 2) = (wchar_t)start_val;
   4422       1.1  christos       *(b - char_set[5] - 1) = (wchar_t)end_val;
   4423       1.1  christos       char_set[4]++; /* ranges_index */
   4424       1.1  christos     }
   4425       1.1  christos   else
   4426       1.1  christos # endif
   4427       1.1  christos     {
   4428       1.1  christos       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
   4429       1.1  christos 	range_start_char;
   4430       1.1  christos       range_end = TRANSLATE (p[0]);
   4431       1.1  christos       /* Report an error if the range is empty and the syntax prohibits
   4432       1.1  christos 	 this.  */
   4433       1.1  christos       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4434       1.1  christos 	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
   4435       1.1  christos 
   4436       1.1  christos       /* Insert space to the end of the char_ranges.  */
   4437       1.1  christos       insert_space(2, b - char_set[5] - 2, b - 1);
   4438       1.1  christos       *(b - char_set[5] - 2) = range_start;
   4439       1.1  christos       *(b - char_set[5] - 1) = range_end;
   4440       1.1  christos       char_set[4]++; /* ranges_index */
   4441       1.1  christos     }
   4442       1.1  christos   /* Have to increment the pointer into the pattern string, so the
   4443       1.1  christos      caller isn't still at the ending character.  */
   4444       1.1  christos   (*p_ptr)++;
   4445       1.1  christos 
   4446       1.1  christos   return ret;
   4447       1.1  christos }
   4448       1.1  christos #else /* BYTE */
   4449       1.1  christos /* Read the ending character of a range (in a bracket expression) from the
   4450       1.1  christos    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
   4451       1.1  christos    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
   4452       1.1  christos    Then we set the translation of all bits between the starting and
   4453       1.1  christos    ending characters (inclusive) in the compiled pattern B.
   4454       1.1  christos 
   4455       1.1  christos    Return an error code.
   4456       1.1  christos 
   4457       1.1  christos    We use these short variable names so we can use the same macros as
   4458       1.1  christos    `regex_compile' itself.  */
   4459       1.1  christos 
   4460       1.1  christos static reg_errcode_t
   4461       1.1  christos byte_compile_range (unsigned int range_start_char, const char **p_ptr,
   4462       1.1  christos                     const char *pend, RE_TRANSLATE_TYPE translate,
   4463       1.1  christos                     reg_syntax_t syntax, unsigned char *b)
   4464       1.1  christos {
   4465       1.1  christos   unsigned this_char;
   4466       1.1  christos   const char *p = *p_ptr;
   4467       1.1  christos   reg_errcode_t ret;
   4468       1.1  christos # if _LIBC
   4469       1.1  christos   const unsigned char *collseq;
   4470       1.1  christos   unsigned int start_colseq;
   4471       1.1  christos   unsigned int end_colseq;
   4472       1.1  christos # else
   4473       1.1  christos   unsigned end_char;
   4474       1.1  christos # endif
   4475       1.1  christos 
   4476       1.1  christos   if (p == pend)
   4477       1.1  christos     return REG_ERANGE;
   4478       1.1  christos 
   4479       1.1  christos   /* Have to increment the pointer into the pattern string, so the
   4480       1.1  christos      caller isn't still at the ending character.  */
   4481       1.1  christos   (*p_ptr)++;
   4482       1.1  christos 
   4483       1.1  christos   /* Report an error if the range is empty and the syntax prohibits this.  */
   4484       1.1  christos   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
   4485       1.1  christos 
   4486       1.1  christos # if _LIBC
   4487       1.1  christos   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
   4488       1.1  christos 						 _NL_COLLATE_COLLSEQMB);
   4489       1.1  christos 
   4490       1.1  christos   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
   4491       1.1  christos   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
   4492       1.1  christos   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
   4493       1.1  christos     {
   4494       1.1  christos       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
   4495       1.1  christos 
   4496       1.1  christos       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
   4497       1.1  christos 	{
   4498       1.1  christos 	  SET_LIST_BIT (TRANSLATE (this_char));
   4499       1.1  christos 	  ret = REG_NOERROR;
   4500       1.1  christos 	}
   4501       1.1  christos     }
   4502       1.1  christos # else
   4503       1.1  christos   /* Here we see why `this_char' has to be larger than an `unsigned
   4504       1.1  christos      char' -- we would otherwise go into an infinite loop, since all
   4505       1.1  christos      characters <= 0xff.  */
   4506       1.1  christos   range_start_char = TRANSLATE (range_start_char);
   4507       1.1  christos   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
   4508       1.1  christos      and some compilers cast it to int implicitly, so following for_loop
   4509       1.1  christos      may fall to (almost) infinite loop.
   4510       1.1  christos      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
   4511       1.1  christos      To avoid this, we cast p[0] to unsigned int and truncate it.  */
   4512       1.1  christos   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
   4513       1.1  christos 
   4514       1.1  christos   for (this_char = range_start_char; this_char <= end_char; ++this_char)
   4515       1.1  christos     {
   4516       1.1  christos       SET_LIST_BIT (TRANSLATE (this_char));
   4517       1.1  christos       ret = REG_NOERROR;
   4518       1.1  christos     }
   4519       1.1  christos # endif
   4520       1.1  christos 
   4521       1.1  christos   return ret;
   4522       1.1  christos }
   4523       1.1  christos #endif /* WCHAR */
   4524       1.1  christos 
   4525       1.1  christos /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
   4527       1.1  christos    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
   4528       1.1  christos    characters can start a string that matches the pattern.  This fastmap
   4529       1.1  christos    is used by re_search to skip quickly over impossible starting points.
   4530       1.1  christos 
   4531       1.1  christos    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
   4532       1.1  christos    area as BUFP->fastmap.
   4533       1.1  christos 
   4534       1.1  christos    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
   4535       1.1  christos    the pattern buffer.
   4536       1.1  christos 
   4537       1.1  christos    Returns 0 if we succeed, -2 if an internal error.   */
   4538       1.1  christos 
   4539       1.1  christos #ifdef WCHAR
   4540       1.1  christos /* local function for re_compile_fastmap.
   4541       1.1  christos    truncate wchar_t character to char.  */
   4542       1.1  christos static unsigned char truncate_wchar (CHAR_T c);
   4543       1.1  christos 
   4544       1.1  christos static unsigned char
   4545       1.1  christos truncate_wchar (CHAR_T c)
   4546       1.1  christos {
   4547       1.1  christos   unsigned char buf[MB_CUR_MAX];
   4548       1.1  christos   mbstate_t state;
   4549       1.1  christos   int retval;
   4550       1.1  christos   memset (&state, '\0', sizeof (state));
   4551       1.1  christos # ifdef _LIBC
   4552       1.1  christos   retval = __wcrtomb (buf, c, &state);
   4553       1.1  christos # else
   4554       1.1  christos   retval = wcrtomb (buf, c, &state);
   4555       1.1  christos # endif
   4556       1.1  christos   return retval > 0 ? buf[0] : (unsigned char) c;
   4557       1.1  christos }
   4558       1.1  christos #endif /* WCHAR */
   4559       1.1  christos 
   4560       1.1  christos static int
   4561       1.1  christos PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
   4562       1.1  christos {
   4563       1.1  christos   int j, k;
   4564       1.1  christos #ifdef MATCH_MAY_ALLOCATE
   4565       1.1  christos   PREFIX(fail_stack_type) fail_stack;
   4566       1.1  christos #endif
   4567       1.1  christos #ifndef REGEX_MALLOC
   4568       1.1  christos   char *destination;
   4569       1.1  christos #endif
   4570       1.1  christos 
   4571       1.1  christos   register char *fastmap = bufp->fastmap;
   4572       1.1  christos 
   4573       1.1  christos #ifdef WCHAR
   4574       1.1  christos   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   4575       1.1  christos      pattern to (char*) in regex_compile.  */
   4576       1.1  christos   UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
   4577       1.1  christos   register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
   4578       1.1  christos #else /* BYTE */
   4579       1.1  christos   UCHAR_T *pattern = bufp->buffer;
   4580       1.1  christos   register UCHAR_T *pend = pattern + bufp->used;
   4581       1.1  christos #endif /* WCHAR */
   4582       1.1  christos   UCHAR_T *p = pattern;
   4583       1.1  christos 
   4584       1.1  christos #ifdef REL_ALLOC
   4585       1.1  christos   /* This holds the pointer to the failure stack, when
   4586       1.1  christos      it is allocated relocatably.  */
   4587       1.1  christos   fail_stack_elt_t *failure_stack_ptr;
   4588       1.1  christos #endif
   4589       1.1  christos 
   4590       1.1  christos   /* Assume that each path through the pattern can be null until
   4591       1.1  christos      proven otherwise.  We set this false at the bottom of switch
   4592       1.1  christos      statement, to which we get only if a particular path doesn't
   4593       1.1  christos      match the empty string.  */
   4594       1.1  christos   boolean path_can_be_null = true;
   4595       1.1  christos 
   4596       1.1  christos   /* We aren't doing a `succeed_n' to begin with.  */
   4597       1.1  christos   boolean succeed_n_p = false;
   4598       1.1  christos 
   4599       1.1  christos   assert (fastmap != NULL && p != NULL);
   4600       1.1  christos 
   4601       1.1  christos   INIT_FAIL_STACK ();
   4602       1.1  christos   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
   4603       1.1  christos   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
   4604       1.1  christos   bufp->can_be_null = 0;
   4605       1.1  christos 
   4606       1.1  christos   while (1)
   4607       1.1  christos     {
   4608       1.1  christos       if (p == pend || *p == (UCHAR_T) succeed)
   4609       1.1  christos 	{
   4610       1.1  christos 	  /* We have reached the (effective) end of pattern.  */
   4611       1.1  christos 	  if (!FAIL_STACK_EMPTY ())
   4612       1.1  christos 	    {
   4613       1.1  christos 	      bufp->can_be_null |= path_can_be_null;
   4614       1.1  christos 
   4615       1.1  christos 	      /* Reset for next path.  */
   4616       1.1  christos 	      path_can_be_null = true;
   4617       1.1  christos 
   4618       1.1  christos 	      p = fail_stack.stack[--fail_stack.avail].pointer;
   4619       1.1  christos 
   4620       1.1  christos 	      continue;
   4621       1.1  christos 	    }
   4622       1.1  christos 	  else
   4623       1.1  christos 	    break;
   4624       1.1  christos 	}
   4625       1.1  christos 
   4626       1.1  christos       /* We should never be about to go beyond the end of the pattern.  */
   4627       1.1  christos       assert (p < pend);
   4628       1.1  christos 
   4629       1.1  christos       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   4630       1.1  christos 	{
   4631       1.1  christos 
   4632       1.1  christos         /* I guess the idea here is to simply not bother with a fastmap
   4633       1.1  christos            if a backreference is used, since it's too hard to figure out
   4634       1.1  christos            the fastmap for the corresponding group.  Setting
   4635       1.1  christos            `can_be_null' stops `re_search_2' from using the fastmap, so
   4636       1.1  christos            that is all we do.  */
   4637       1.1  christos 	case duplicate:
   4638       1.1  christos 	  bufp->can_be_null = 1;
   4639       1.1  christos           goto done;
   4640       1.1  christos 
   4641       1.1  christos 
   4642       1.1  christos       /* Following are the cases which match a character.  These end
   4643       1.1  christos          with `break'.  */
   4644       1.1  christos 
   4645       1.1  christos #ifdef WCHAR
   4646       1.1  christos 	case exactn:
   4647       1.1  christos           fastmap[truncate_wchar(p[1])] = 1;
   4648       1.1  christos 	  break;
   4649       1.1  christos #else /* BYTE */
   4650       1.1  christos 	case exactn:
   4651       1.1  christos           fastmap[p[1]] = 1;
   4652       1.1  christos 	  break;
   4653       1.1  christos #endif /* WCHAR */
   4654       1.1  christos #ifdef MBS_SUPPORT
   4655       1.1  christos 	case exactn_bin:
   4656       1.1  christos 	  fastmap[p[1]] = 1;
   4657       1.1  christos 	  break;
   4658       1.1  christos #endif
   4659       1.1  christos 
   4660       1.1  christos #ifdef WCHAR
   4661       1.1  christos         /* It is hard to distinguish fastmap from (multi byte) characters
   4662       1.1  christos            which depends on current locale.  */
   4663       1.1  christos         case charset:
   4664       1.1  christos 	case charset_not:
   4665       1.1  christos 	case wordchar:
   4666       1.1  christos 	case notwordchar:
   4667       1.1  christos           bufp->can_be_null = 1;
   4668       1.1  christos           goto done;
   4669       1.1  christos #else /* BYTE */
   4670       1.1  christos         case charset:
   4671       1.1  christos           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4672       1.1  christos 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
   4673       1.1  christos               fastmap[j] = 1;
   4674       1.1  christos 	  break;
   4675       1.1  christos 
   4676       1.1  christos 
   4677       1.1  christos 	case charset_not:
   4678       1.1  christos 	  /* Chars beyond end of map must be allowed.  */
   4679       1.1  christos 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
   4680       1.1  christos             fastmap[j] = 1;
   4681       1.1  christos 
   4682       1.1  christos 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4683       1.1  christos 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
   4684       1.1  christos               fastmap[j] = 1;
   4685       1.1  christos           break;
   4686       1.1  christos 
   4687       1.1  christos 
   4688       1.1  christos 	case wordchar:
   4689       1.1  christos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4690       1.1  christos 	    if (SYNTAX (j) == Sword)
   4691       1.1  christos 	      fastmap[j] = 1;
   4692       1.1  christos 	  break;
   4693       1.1  christos 
   4694       1.1  christos 
   4695       1.1  christos 	case notwordchar:
   4696       1.1  christos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4697       1.1  christos 	    if (SYNTAX (j) != Sword)
   4698       1.1  christos 	      fastmap[j] = 1;
   4699       1.1  christos 	  break;
   4700       1.1  christos #endif /* WCHAR */
   4701       1.1  christos 
   4702       1.1  christos         case anychar:
   4703       1.1  christos 	  {
   4704       1.1  christos 	    int fastmap_newline = fastmap['\n'];
   4705       1.1  christos 
   4706       1.1  christos 	    /* `.' matches anything ...  */
   4707       1.1  christos 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
   4708       1.1  christos 	      fastmap[j] = 1;
   4709       1.1  christos 
   4710       1.1  christos 	    /* ... except perhaps newline.  */
   4711       1.1  christos 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
   4712       1.1  christos 	      fastmap['\n'] = fastmap_newline;
   4713       1.1  christos 
   4714       1.1  christos 	    /* Return if we have already set `can_be_null'; if we have,
   4715       1.1  christos 	       then the fastmap is irrelevant.  Something's wrong here.  */
   4716       1.1  christos 	    else if (bufp->can_be_null)
   4717       1.1  christos 	      goto done;
   4718       1.1  christos 
   4719       1.1  christos 	    /* Otherwise, have to check alternative paths.  */
   4720       1.1  christos 	    break;
   4721       1.1  christos 	  }
   4722       1.1  christos 
   4723       1.1  christos #ifdef emacs
   4724       1.1  christos         case syntaxspec:
   4725       1.1  christos 	  k = *p++;
   4726       1.1  christos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4727       1.1  christos 	    if (SYNTAX (j) == (enum syntaxcode) k)
   4728       1.1  christos 	      fastmap[j] = 1;
   4729       1.1  christos 	  break;
   4730       1.1  christos 
   4731       1.1  christos 
   4732       1.1  christos 	case notsyntaxspec:
   4733       1.1  christos 	  k = *p++;
   4734       1.1  christos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4735       1.1  christos 	    if (SYNTAX (j) != (enum syntaxcode) k)
   4736       1.1  christos 	      fastmap[j] = 1;
   4737       1.1  christos 	  break;
   4738       1.1  christos 
   4739       1.1  christos 
   4740       1.1  christos       /* All cases after this match the empty string.  These end with
   4741       1.1  christos          `continue'.  */
   4742       1.1  christos 
   4743       1.1  christos 
   4744       1.1  christos 	case before_dot:
   4745       1.1  christos 	case at_dot:
   4746       1.1  christos 	case after_dot:
   4747       1.1  christos           continue;
   4748       1.1  christos #endif /* emacs */
   4749       1.1  christos 
   4750       1.1  christos 
   4751       1.1  christos         case no_op:
   4752       1.1  christos         case begline:
   4753       1.1  christos         case endline:
   4754       1.1  christos 	case begbuf:
   4755       1.1  christos 	case endbuf:
   4756       1.1  christos 	case wordbound:
   4757       1.1  christos 	case notwordbound:
   4758       1.1  christos 	case wordbeg:
   4759       1.1  christos 	case wordend:
   4760       1.1  christos         case push_dummy_failure:
   4761       1.1  christos           continue;
   4762       1.1  christos 
   4763       1.1  christos 
   4764       1.1  christos 	case jump_n:
   4765       1.1  christos         case pop_failure_jump:
   4766       1.1  christos 	case maybe_pop_jump:
   4767       1.1  christos 	case jump:
   4768       1.1  christos         case jump_past_alt:
   4769       1.1  christos 	case dummy_failure_jump:
   4770       1.1  christos           EXTRACT_NUMBER_AND_INCR (j, p);
   4771       1.1  christos 	  p += j;
   4772       1.1  christos 	  if (j > 0)
   4773       1.1  christos 	    continue;
   4774       1.1  christos 
   4775       1.1  christos           /* Jump backward implies we just went through the body of a
   4776       1.1  christos              loop and matched nothing.  Opcode jumped to should be
   4777       1.1  christos              `on_failure_jump' or `succeed_n'.  Just treat it like an
   4778       1.1  christos              ordinary jump.  For a * loop, it has pushed its failure
   4779       1.1  christos              point already; if so, discard that as redundant.  */
   4780       1.1  christos           if ((re_opcode_t) *p != on_failure_jump
   4781       1.1  christos 	      && (re_opcode_t) *p != succeed_n)
   4782       1.1  christos 	    continue;
   4783       1.1  christos 
   4784       1.1  christos           p++;
   4785       1.1  christos           EXTRACT_NUMBER_AND_INCR (j, p);
   4786       1.1  christos           p += j;
   4787       1.1  christos 
   4788       1.1  christos           /* If what's on the stack is where we are now, pop it.  */
   4789       1.1  christos           if (!FAIL_STACK_EMPTY ()
   4790       1.1  christos 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
   4791       1.1  christos             fail_stack.avail--;
   4792       1.1  christos 
   4793       1.1  christos           continue;
   4794       1.1  christos 
   4795       1.1  christos 
   4796       1.1  christos         case on_failure_jump:
   4797       1.1  christos         case on_failure_keep_string_jump:
   4798       1.1  christos 	handle_on_failure_jump:
   4799       1.1  christos           EXTRACT_NUMBER_AND_INCR (j, p);
   4800       1.1  christos 
   4801       1.1  christos           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
   4802       1.1  christos              end of the pattern.  We don't want to push such a point,
   4803       1.1  christos              since when we restore it above, entering the switch will
   4804       1.1  christos              increment `p' past the end of the pattern.  We don't need
   4805       1.1  christos              to push such a point since we obviously won't find any more
   4806       1.1  christos              fastmap entries beyond `pend'.  Such a pattern can match
   4807       1.1  christos              the null string, though.  */
   4808       1.1  christos           if (p + j < pend)
   4809       1.1  christos             {
   4810       1.1  christos               if (!PUSH_PATTERN_OP (p + j, fail_stack))
   4811       1.1  christos 		{
   4812       1.1  christos 		  RESET_FAIL_STACK ();
   4813       1.1  christos 		  return -2;
   4814       1.1  christos 		}
   4815       1.1  christos             }
   4816       1.1  christos           else
   4817       1.1  christos             bufp->can_be_null = 1;
   4818       1.1  christos 
   4819       1.1  christos           if (succeed_n_p)
   4820       1.1  christos             {
   4821       1.1  christos               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
   4822       1.1  christos               succeed_n_p = false;
   4823       1.1  christos 	    }
   4824       1.1  christos 
   4825       1.1  christos           continue;
   4826       1.1  christos 
   4827       1.1  christos 
   4828       1.1  christos 	case succeed_n:
   4829       1.1  christos           /* Get to the number of times to succeed.  */
   4830       1.1  christos           p += OFFSET_ADDRESS_SIZE;
   4831       1.1  christos 
   4832       1.1  christos           /* Increment p past the n for when k != 0.  */
   4833       1.1  christos           EXTRACT_NUMBER_AND_INCR (k, p);
   4834       1.1  christos           if (k == 0)
   4835       1.1  christos 	    {
   4836       1.1  christos               p -= 2 * OFFSET_ADDRESS_SIZE;
   4837       1.1  christos   	      succeed_n_p = true;  /* Spaghetti code alert.  */
   4838       1.1  christos               goto handle_on_failure_jump;
   4839       1.1  christos             }
   4840       1.1  christos           continue;
   4841       1.1  christos 
   4842       1.1  christos 
   4843       1.1  christos 	case set_number_at:
   4844       1.1  christos           p += 2 * OFFSET_ADDRESS_SIZE;
   4845       1.1  christos           continue;
   4846       1.1  christos 
   4847       1.1  christos 
   4848       1.1  christos 	case start_memory:
   4849       1.1  christos         case stop_memory:
   4850       1.1  christos 	  p += 2;
   4851       1.1  christos 	  continue;
   4852       1.1  christos 
   4853       1.1  christos 
   4854       1.1  christos 	default:
   4855       1.1  christos           abort (); /* We have listed all the cases.  */
   4856       1.1  christos         } /* switch *p++ */
   4857       1.1  christos 
   4858       1.1  christos       /* Getting here means we have found the possible starting
   4859       1.1  christos          characters for one path of the pattern -- and that the empty
   4860       1.1  christos          string does not match.  We need not follow this path further.
   4861       1.1  christos          Instead, look at the next alternative (remembered on the
   4862       1.1  christos          stack), or quit if no more.  The test at the top of the loop
   4863       1.1  christos          does these things.  */
   4864       1.1  christos       path_can_be_null = false;
   4865       1.1  christos       p = pend;
   4866       1.1  christos     } /* while p */
   4867       1.1  christos 
   4868       1.1  christos   /* Set `can_be_null' for the last path (also the first path, if the
   4869       1.1  christos      pattern is empty).  */
   4870       1.1  christos   bufp->can_be_null |= path_can_be_null;
   4871       1.1  christos 
   4872       1.1  christos  done:
   4873       1.1  christos   RESET_FAIL_STACK ();
   4874       1.1  christos   return 0;
   4875       1.1  christos }
   4876       1.1  christos 
   4877       1.1  christos #else /* not INSIDE_RECURSION */
   4878       1.1  christos 
   4879       1.1  christos int
   4880       1.1  christos re_compile_fastmap (struct re_pattern_buffer *bufp)
   4881       1.1  christos {
   4882       1.1  christos # ifdef MBS_SUPPORT
   4883       1.1  christos   if (MB_CUR_MAX != 1)
   4884       1.1  christos     return wcs_re_compile_fastmap(bufp);
   4885       1.1  christos   else
   4886       1.1  christos # endif
   4887       1.1  christos     return byte_re_compile_fastmap(bufp);
   4888       1.1  christos } /* re_compile_fastmap */
   4889       1.1  christos #ifdef _LIBC
   4890       1.1  christos weak_alias (__re_compile_fastmap, re_compile_fastmap)
   4891       1.1  christos #endif
   4892       1.1  christos 
   4893       1.1  christos 
   4895       1.1  christos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
   4896       1.1  christos    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
   4897       1.1  christos    this memory for recording register information.  STARTS and ENDS
   4898       1.1  christos    must be allocated using the malloc library routine, and must each
   4899       1.1  christos    be at least NUM_REGS * sizeof (regoff_t) bytes long.
   4900       1.1  christos 
   4901       1.1  christos    If NUM_REGS == 0, then subsequent matches should allocate their own
   4902       1.1  christos    register data.
   4903       1.1  christos 
   4904       1.1  christos    Unless this function is called, the first search or match using
   4905       1.1  christos    PATTERN_BUFFER will allocate its own register data, without
   4906       1.1  christos    freeing the old data.  */
   4907       1.1  christos 
   4908       1.1  christos void
   4909       1.1  christos re_set_registers (struct re_pattern_buffer *bufp,
   4910       1.1  christos                   struct re_registers *regs, unsigned num_regs,
   4911       1.1  christos                   regoff_t *starts, regoff_t *ends)
   4912       1.1  christos {
   4913       1.1  christos   if (num_regs)
   4914       1.1  christos     {
   4915       1.1  christos       bufp->regs_allocated = REGS_REALLOCATE;
   4916       1.1  christos       regs->num_regs = num_regs;
   4917       1.1  christos       regs->start = starts;
   4918       1.1  christos       regs->end = ends;
   4919       1.1  christos     }
   4920       1.1  christos   else
   4921       1.1  christos     {
   4922       1.1  christos       bufp->regs_allocated = REGS_UNALLOCATED;
   4923       1.1  christos       regs->num_regs = 0;
   4924       1.1  christos       regs->start = regs->end = (regoff_t *) 0;
   4925       1.1  christos     }
   4926       1.1  christos }
   4927       1.1  christos #ifdef _LIBC
   4928       1.1  christos weak_alias (__re_set_registers, re_set_registers)
   4929       1.1  christos #endif
   4930       1.1  christos 
   4931       1.1  christos /* Searching routines.  */
   4933       1.1  christos 
   4934       1.1  christos /* Like re_search_2, below, but only one string is specified, and
   4935       1.1  christos    doesn't let you say where to stop matching.  */
   4936       1.1  christos 
   4937       1.1  christos int
   4938       1.1  christos re_search (struct re_pattern_buffer *bufp, const char *string, int size,
   4939       1.1  christos            int startpos, int range, struct re_registers *regs)
   4940       1.1  christos {
   4941       1.1  christos   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
   4942       1.1  christos 		      regs, size);
   4943       1.1  christos }
   4944       1.1  christos #ifdef _LIBC
   4945       1.1  christos weak_alias (__re_search, re_search)
   4946       1.1  christos #endif
   4947       1.1  christos 
   4948       1.1  christos 
   4949       1.1  christos /* Using the compiled pattern in BUFP->buffer, first tries to match the
   4950       1.1  christos    virtual concatenation of STRING1 and STRING2, starting first at index
   4951       1.1  christos    STARTPOS, then at STARTPOS + 1, and so on.
   4952       1.1  christos 
   4953       1.1  christos    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
   4954       1.1  christos 
   4955       1.1  christos    RANGE is how far to scan while trying to match.  RANGE = 0 means try
   4956       1.1  christos    only at STARTPOS; in general, the last start tried is STARTPOS +
   4957       1.1  christos    RANGE.
   4958       1.1  christos 
   4959       1.1  christos    In REGS, return the indices of the virtual concatenation of STRING1
   4960       1.1  christos    and STRING2 that matched the entire BUFP->buffer and its contained
   4961       1.1  christos    subexpressions.
   4962       1.1  christos 
   4963       1.1  christos    Do not consider matching one past the index STOP in the virtual
   4964       1.1  christos    concatenation of STRING1 and STRING2.
   4965       1.1  christos 
   4966       1.1  christos    We return either the position in the strings at which the match was
   4967       1.1  christos    found, -1 if no match, or -2 if error (such as failure
   4968       1.1  christos    stack overflow).  */
   4969       1.1  christos 
   4970       1.1  christos int
   4971       1.1  christos re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
   4972       1.1  christos              const char *string2, int size2, int startpos, int range,
   4973       1.1  christos              struct re_registers *regs, int stop)
   4974       1.1  christos {
   4975       1.1  christos # ifdef MBS_SUPPORT
   4976       1.1  christos   if (MB_CUR_MAX != 1)
   4977       1.1  christos     return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
   4978       1.1  christos 			    range, regs, stop);
   4979   1.1.1.2  christos   else
   4980       1.1  christos # endif
   4981       1.1  christos     return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
   4982       1.1  christos 			     range, regs, stop);
   4983       1.1  christos } /* re_search_2 */
   4984       1.1  christos #ifdef _LIBC
   4985       1.1  christos weak_alias (__re_search_2, re_search_2)
   4986       1.1  christos #endif
   4987       1.1  christos 
   4988       1.1  christos #endif /* not INSIDE_RECURSION */
   4989       1.1  christos 
   4990       1.1  christos #ifdef INSIDE_RECURSION
   4991       1.1  christos 
   4992       1.1  christos #ifdef MATCH_MAY_ALLOCATE
   4993       1.1  christos # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
   4994       1.1  christos #else
   4995       1.1  christos # define FREE_VAR(var) free (var); var = NULL
   4996       1.1  christos #endif
   4997       1.1  christos 
   4998       1.1  christos #ifdef WCHAR
   4999       1.1  christos # define MAX_ALLOCA_SIZE	2000
   5000       1.1  christos 
   5001       1.1  christos # define FREE_WCS_BUFFERS() \
   5002       1.1  christos   do {									      \
   5003       1.1  christos     if (size1 > MAX_ALLOCA_SIZE)					      \
   5004       1.1  christos       {									      \
   5005       1.1  christos 	free (wcs_string1);						      \
   5006       1.1  christos 	free (mbs_offset1);						      \
   5007       1.1  christos       }									      \
   5008       1.1  christos     else								      \
   5009       1.1  christos       {									      \
   5010       1.1  christos 	FREE_VAR (wcs_string1);						      \
   5011       1.1  christos 	FREE_VAR (mbs_offset1);						      \
   5012       1.1  christos       }									      \
   5013       1.1  christos     if (size2 > MAX_ALLOCA_SIZE) 					      \
   5014       1.1  christos       {									      \
   5015       1.1  christos 	free (wcs_string2);						      \
   5016       1.1  christos 	free (mbs_offset2);						      \
   5017       1.1  christos       }									      \
   5018       1.1  christos     else								      \
   5019       1.1  christos       {									      \
   5020       1.1  christos 	FREE_VAR (wcs_string2);						      \
   5021       1.1  christos 	FREE_VAR (mbs_offset2);						      \
   5022       1.1  christos       }									      \
   5023       1.1  christos   } while (0)
   5024       1.1  christos 
   5025       1.1  christos #endif
   5026       1.1  christos 
   5027       1.1  christos 
   5028       1.1  christos static int
   5029       1.1  christos PREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1,
   5030       1.1  christos                      int size1, const char *string2, int size2,
   5031       1.1  christos                      int startpos, int range,
   5032       1.1  christos                      struct re_registers *regs, int stop)
   5033       1.1  christos {
   5034       1.1  christos   int val;
   5035       1.1  christos   register char *fastmap = bufp->fastmap;
   5036       1.1  christos   register RE_TRANSLATE_TYPE translate = bufp->translate;
   5037       1.1  christos   int total_size = size1 + size2;
   5038       1.1  christos   int endpos = startpos + range;
   5039       1.1  christos #ifdef WCHAR
   5040       1.1  christos   /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
   5041       1.1  christos   wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
   5042       1.1  christos   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
   5043       1.1  christos   int wcs_size1 = 0, wcs_size2 = 0;
   5044       1.1  christos   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   5045       1.1  christos   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
   5046       1.1  christos   /* They hold whether each wchar_t is binary data or not.  */
   5047       1.1  christos   char *is_binary = NULL;
   5048       1.1  christos #endif /* WCHAR */
   5049       1.1  christos 
   5050       1.1  christos   /* Check for out-of-range STARTPOS.  */
   5051       1.1  christos   if (startpos < 0 || startpos > total_size)
   5052       1.1  christos     return -1;
   5053       1.1  christos 
   5054       1.1  christos   /* Fix up RANGE if it might eventually take us outside
   5055       1.1  christos      the virtual concatenation of STRING1 and STRING2.
   5056       1.1  christos      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
   5057       1.1  christos   if (endpos < 0)
   5058       1.1  christos     range = 0 - startpos;
   5059       1.1  christos   else if (endpos > total_size)
   5060       1.1  christos     range = total_size - startpos;
   5061       1.1  christos 
   5062       1.1  christos   /* If the search isn't to be a backwards one, don't waste time in a
   5063       1.1  christos      search for a pattern that must be anchored.  */
   5064       1.1  christos   if (bufp->used > 0 && range > 0
   5065       1.1  christos       && ((re_opcode_t) bufp->buffer[0] == begbuf
   5066       1.1  christos 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
   5067       1.1  christos 	  || ((re_opcode_t) bufp->buffer[0] == begline
   5068       1.1  christos 	      && !bufp->newline_anchor)))
   5069       1.1  christos     {
   5070       1.1  christos       if (startpos > 0)
   5071       1.1  christos 	return -1;
   5072       1.1  christos       else
   5073       1.1  christos 	range = 1;
   5074       1.1  christos     }
   5075       1.1  christos 
   5076       1.1  christos #ifdef emacs
   5077       1.1  christos   /* In a forward search for something that starts with \=.
   5078       1.1  christos      don't keep searching past point.  */
   5079       1.1  christos   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
   5080       1.1  christos     {
   5081       1.1  christos       range = PT - startpos;
   5082       1.1  christos       if (range <= 0)
   5083       1.1  christos 	return -1;
   5084       1.1  christos     }
   5085       1.1  christos #endif /* emacs */
   5086       1.1  christos 
   5087       1.1  christos   /* Update the fastmap now if not correct already.  */
   5088       1.1  christos   if (fastmap && !bufp->fastmap_accurate)
   5089       1.1  christos     if (re_compile_fastmap (bufp) == -2)
   5090       1.1  christos       return -2;
   5091       1.1  christos 
   5092       1.1  christos #ifdef WCHAR
   5093       1.1  christos   /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
   5094       1.1  christos      fill them with converted string.  */
   5095       1.1  christos   if (size1 != 0)
   5096       1.1  christos     {
   5097       1.1  christos       if (size1 > MAX_ALLOCA_SIZE)
   5098       1.1  christos 	{
   5099       1.1  christos 	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
   5100       1.1  christos 	  mbs_offset1 = TALLOC (size1 + 1, int);
   5101       1.1  christos 	  is_binary = TALLOC (size1 + 1, char);
   5102       1.1  christos 	}
   5103       1.1  christos       else
   5104       1.1  christos 	{
   5105       1.1  christos 	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
   5106       1.1  christos 	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
   5107       1.1  christos 	  is_binary = REGEX_TALLOC (size1 + 1, char);
   5108       1.1  christos 	}
   5109       1.1  christos       if (!wcs_string1 || !mbs_offset1 || !is_binary)
   5110       1.1  christos 	{
   5111       1.1  christos 	  if (size1 > MAX_ALLOCA_SIZE)
   5112       1.1  christos 	    {
   5113       1.1  christos 	      free (wcs_string1);
   5114       1.1  christos 	      free (mbs_offset1);
   5115       1.1  christos 	      free (is_binary);
   5116       1.1  christos 	    }
   5117       1.1  christos 	  else
   5118       1.1  christos 	    {
   5119       1.1  christos 	      FREE_VAR (wcs_string1);
   5120       1.1  christos 	      FREE_VAR (mbs_offset1);
   5121       1.1  christos 	      FREE_VAR (is_binary);
   5122       1.1  christos 	    }
   5123       1.1  christos 	  return -2;
   5124       1.1  christos 	}
   5125       1.1  christos       wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
   5126       1.1  christos 				     mbs_offset1, is_binary);
   5127       1.1  christos       wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
   5128       1.1  christos       if (size1 > MAX_ALLOCA_SIZE)
   5129       1.1  christos 	free (is_binary);
   5130       1.1  christos       else
   5131       1.1  christos 	FREE_VAR (is_binary);
   5132       1.1  christos     }
   5133       1.1  christos   if (size2 != 0)
   5134       1.1  christos     {
   5135       1.1  christos       if (size2 > MAX_ALLOCA_SIZE)
   5136       1.1  christos 	{
   5137       1.1  christos 	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
   5138       1.1  christos 	  mbs_offset2 = TALLOC (size2 + 1, int);
   5139       1.1  christos 	  is_binary = TALLOC (size2 + 1, char);
   5140       1.1  christos 	}
   5141       1.1  christos       else
   5142       1.1  christos 	{
   5143       1.1  christos 	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
   5144       1.1  christos 	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
   5145       1.1  christos 	  is_binary = REGEX_TALLOC (size2 + 1, char);
   5146       1.1  christos 	}
   5147       1.1  christos       if (!wcs_string2 || !mbs_offset2 || !is_binary)
   5148       1.1  christos 	{
   5149       1.1  christos 	  FREE_WCS_BUFFERS ();
   5150       1.1  christos 	  if (size2 > MAX_ALLOCA_SIZE)
   5151       1.1  christos 	    free (is_binary);
   5152       1.1  christos 	  else
   5153       1.1  christos 	    FREE_VAR (is_binary);
   5154       1.1  christos 	  return -2;
   5155       1.1  christos 	}
   5156       1.1  christos       wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
   5157       1.1  christos 				     mbs_offset2, is_binary);
   5158       1.1  christos       wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
   5159       1.1  christos       if (size2 > MAX_ALLOCA_SIZE)
   5160       1.1  christos 	free (is_binary);
   5161       1.1  christos       else
   5162       1.1  christos 	FREE_VAR (is_binary);
   5163       1.1  christos     }
   5164       1.1  christos #endif /* WCHAR */
   5165       1.1  christos 
   5166       1.1  christos 
   5167       1.1  christos   /* Loop through the string, looking for a place to start matching.  */
   5168       1.1  christos   for (;;)
   5169       1.1  christos     {
   5170       1.1  christos       /* If a fastmap is supplied, skip quickly over characters that
   5171       1.1  christos          cannot be the start of a match.  If the pattern can match the
   5172       1.1  christos          null string, however, we don't need to skip characters; we want
   5173       1.1  christos          the first null string.  */
   5174       1.1  christos       if (fastmap && startpos < total_size && !bufp->can_be_null)
   5175       1.1  christos 	{
   5176       1.1  christos 	  if (range > 0)	/* Searching forwards.  */
   5177       1.1  christos 	    {
   5178       1.1  christos 	      register const char *d;
   5179       1.1  christos 	      register int lim = 0;
   5180       1.1  christos 	      int irange = range;
   5181       1.1  christos 
   5182       1.1  christos               if (startpos < size1 && startpos + range >= size1)
   5183       1.1  christos                 lim = range - (size1 - startpos);
   5184       1.1  christos 
   5185       1.1  christos 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
   5186       1.1  christos 
   5187       1.1  christos               /* Written out as an if-else to avoid testing `translate'
   5188       1.1  christos                  inside the loop.  */
   5189       1.1  christos 	      if (translate)
   5190       1.1  christos                 while (range > lim
   5191       1.1  christos                        && !fastmap[(unsigned char)
   5192       1.1  christos 				   translate[(unsigned char) *d++]])
   5193       1.1  christos                   range--;
   5194       1.1  christos 	      else
   5195       1.1  christos                 while (range > lim && !fastmap[(unsigned char) *d++])
   5196       1.1  christos                   range--;
   5197       1.1  christos 
   5198       1.1  christos 	      startpos += irange - range;
   5199       1.1  christos 	    }
   5200       1.1  christos 	  else				/* Searching backwards.  */
   5201       1.1  christos 	    {
   5202       1.1  christos 	      register CHAR_T c = (size1 == 0 || startpos >= size1
   5203       1.1  christos 				      ? string2[startpos - size1]
   5204       1.1  christos 				      : string1[startpos]);
   5205       1.1  christos 
   5206       1.1  christos 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
   5207       1.1  christos 		goto advance;
   5208       1.1  christos 	    }
   5209       1.1  christos 	}
   5210       1.1  christos 
   5211       1.1  christos       /* If can't match the null string, and that's all we have left, fail.  */
   5212       1.1  christos       if (range >= 0 && startpos == total_size && fastmap
   5213       1.1  christos           && !bufp->can_be_null)
   5214       1.1  christos        {
   5215       1.1  christos #ifdef WCHAR
   5216       1.1  christos          FREE_WCS_BUFFERS ();
   5217       1.1  christos #endif
   5218       1.1  christos          return -1;
   5219       1.1  christos        }
   5220       1.1  christos 
   5221       1.1  christos #ifdef WCHAR
   5222       1.1  christos       val = wcs_re_match_2_internal (bufp, string1, size1, string2,
   5223       1.1  christos 				     size2, startpos, regs, stop,
   5224       1.1  christos 				     wcs_string1, wcs_size1,
   5225       1.1  christos 				     wcs_string2, wcs_size2,
   5226       1.1  christos 				     mbs_offset1, mbs_offset2);
   5227       1.1  christos #else /* BYTE */
   5228       1.1  christos       val = byte_re_match_2_internal (bufp, string1, size1, string2,
   5229       1.1  christos 				      size2, startpos, regs, stop);
   5230       1.1  christos #endif /* BYTE */
   5231       1.1  christos 
   5232       1.1  christos #ifndef REGEX_MALLOC
   5233       1.1  christos # ifdef C_ALLOCA
   5234       1.1  christos       alloca (0);
   5235       1.1  christos # endif
   5236       1.1  christos #endif
   5237       1.1  christos 
   5238       1.1  christos       if (val >= 0)
   5239       1.1  christos 	{
   5240       1.1  christos #ifdef WCHAR
   5241       1.1  christos 	  FREE_WCS_BUFFERS ();
   5242       1.1  christos #endif
   5243       1.1  christos 	  return startpos;
   5244       1.1  christos 	}
   5245       1.1  christos 
   5246       1.1  christos       if (val == -2)
   5247       1.1  christos 	{
   5248       1.1  christos #ifdef WCHAR
   5249       1.1  christos 	  FREE_WCS_BUFFERS ();
   5250       1.1  christos #endif
   5251       1.1  christos 	  return -2;
   5252       1.1  christos 	}
   5253       1.1  christos 
   5254       1.1  christos     advance:
   5255       1.1  christos       if (!range)
   5256       1.1  christos         break;
   5257       1.1  christos       else if (range > 0)
   5258       1.1  christos         {
   5259       1.1  christos           range--;
   5260       1.1  christos           startpos++;
   5261       1.1  christos         }
   5262       1.1  christos       else
   5263       1.1  christos         {
   5264       1.1  christos           range++;
   5265       1.1  christos           startpos--;
   5266       1.1  christos         }
   5267       1.1  christos     }
   5268       1.1  christos #ifdef WCHAR
   5269       1.1  christos   FREE_WCS_BUFFERS ();
   5270       1.1  christos #endif
   5271       1.1  christos   return -1;
   5272       1.1  christos }
   5273       1.1  christos 
   5274       1.1  christos #ifdef WCHAR
   5275       1.1  christos /* This converts PTR, a pointer into one of the search wchar_t strings
   5276       1.1  christos    `string1' and `string2' into an multibyte string offset from the
   5277       1.1  christos    beginning of that string. We use mbs_offset to optimize.
   5278       1.1  christos    See convert_mbs_to_wcs.  */
   5279       1.1  christos # define POINTER_TO_OFFSET(ptr)						\
   5280       1.1  christos   (FIRST_STRING_P (ptr)							\
   5281       1.1  christos    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
   5282       1.1  christos    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
   5283       1.1  christos 		 + csize1)))
   5284       1.1  christos #else /* BYTE */
   5285       1.1  christos /* This converts PTR, a pointer into one of the search strings `string1'
   5286       1.1  christos    and `string2' into an offset from the beginning of that string.  */
   5287       1.1  christos # define POINTER_TO_OFFSET(ptr)			\
   5288       1.1  christos   (FIRST_STRING_P (ptr)				\
   5289       1.1  christos    ? ((regoff_t) ((ptr) - string1))		\
   5290       1.1  christos    : ((regoff_t) ((ptr) - string2 + size1)))
   5291       1.1  christos #endif /* WCHAR */
   5292       1.1  christos 
   5293       1.1  christos /* Macros for dealing with the split strings in re_match_2.  */
   5294       1.1  christos 
   5295       1.1  christos #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
   5296       1.1  christos 
   5297       1.1  christos /* Call before fetching a character with *d.  This switches over to
   5298       1.1  christos    string2 if necessary.  */
   5299       1.1  christos #define PREFETCH()							\
   5300       1.1  christos   while (d == dend)						    	\
   5301       1.1  christos     {									\
   5302       1.1  christos       /* End of string2 => fail.  */					\
   5303       1.1  christos       if (dend == end_match_2) 						\
   5304       1.1  christos         goto fail;							\
   5305       1.1  christos       /* End of string1 => advance to string2.  */ 			\
   5306       1.1  christos       d = string2;						        \
   5307       1.1  christos       dend = end_match_2;						\
   5308       1.1  christos     }
   5309       1.1  christos 
   5310       1.1  christos /* Test if at very beginning or at very end of the virtual concatenation
   5311       1.1  christos    of `string1' and `string2'.  If only one string, it's `string2'.  */
   5312       1.1  christos #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
   5313       1.1  christos #define AT_STRINGS_END(d) ((d) == end2)
   5314       1.1  christos 
   5315       1.1  christos 
   5316       1.1  christos /* Test if D points to a character which is word-constituent.  We have
   5317       1.1  christos    two special cases to check for: if past the end of string1, look at
   5318       1.1  christos    the first character in string2; and if before the beginning of
   5319       1.1  christos    string2, look at the last character in string1.  */
   5320       1.1  christos #ifdef WCHAR
   5321       1.1  christos /* Use internationalized API instead of SYNTAX.  */
   5322       1.1  christos # define WORDCHAR_P(d)							\
   5323       1.1  christos   (iswalnum ((wint_t)((d) == end1 ? *string2				\
   5324       1.1  christos            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
   5325       1.1  christos    || ((d) == end1 ? *string2						\
   5326       1.1  christos        : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
   5327       1.1  christos #else /* BYTE */
   5328       1.1  christos # define WORDCHAR_P(d)							\
   5329       1.1  christos   (SYNTAX ((d) == end1 ? *string2					\
   5330       1.1  christos            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
   5331       1.1  christos    == Sword)
   5332       1.1  christos #endif /* WCHAR */
   5333       1.1  christos 
   5334       1.1  christos /* Disabled due to a compiler bug -- see comment at case wordbound */
   5335       1.1  christos #if 0
   5336       1.1  christos /* Test if the character before D and the one at D differ with respect
   5337       1.1  christos    to being word-constituent.  */
   5338       1.1  christos #define AT_WORD_BOUNDARY(d)						\
   5339       1.1  christos   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
   5340       1.1  christos    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
   5341       1.1  christos #endif
   5342       1.1  christos 
   5343       1.1  christos /* Free everything we malloc.  */
   5344       1.1  christos #ifdef MATCH_MAY_ALLOCATE
   5345       1.1  christos # ifdef WCHAR
   5346       1.1  christos #  define FREE_VARIABLES()						\
   5347       1.1  christos   do {									\
   5348       1.1  christos     REGEX_FREE_STACK (fail_stack.stack);				\
   5349       1.1  christos     FREE_VAR (regstart);						\
   5350       1.1  christos     FREE_VAR (regend);							\
   5351       1.1  christos     FREE_VAR (old_regstart);						\
   5352       1.1  christos     FREE_VAR (old_regend);						\
   5353       1.1  christos     FREE_VAR (best_regstart);						\
   5354       1.1  christos     FREE_VAR (best_regend);						\
   5355       1.1  christos     FREE_VAR (reg_info);						\
   5356       1.1  christos     FREE_VAR (reg_dummy);						\
   5357       1.1  christos     FREE_VAR (reg_info_dummy);						\
   5358       1.1  christos     if (!cant_free_wcs_buf)						\
   5359       1.1  christos       {									\
   5360       1.1  christos         FREE_VAR (string1);						\
   5361       1.1  christos         FREE_VAR (string2);						\
   5362       1.1  christos         FREE_VAR (mbs_offset1);						\
   5363       1.1  christos         FREE_VAR (mbs_offset2);						\
   5364       1.1  christos       }									\
   5365       1.1  christos   } while (0)
   5366       1.1  christos # else /* BYTE */
   5367       1.1  christos #  define FREE_VARIABLES()						\
   5368       1.1  christos   do {									\
   5369       1.1  christos     REGEX_FREE_STACK (fail_stack.stack);				\
   5370       1.1  christos     FREE_VAR (regstart);						\
   5371       1.1  christos     FREE_VAR (regend);							\
   5372       1.1  christos     FREE_VAR (old_regstart);						\
   5373       1.1  christos     FREE_VAR (old_regend);						\
   5374       1.1  christos     FREE_VAR (best_regstart);						\
   5375       1.1  christos     FREE_VAR (best_regend);						\
   5376       1.1  christos     FREE_VAR (reg_info);						\
   5377       1.1  christos     FREE_VAR (reg_dummy);						\
   5378       1.1  christos     FREE_VAR (reg_info_dummy);						\
   5379       1.1  christos   } while (0)
   5380       1.1  christos # endif /* WCHAR */
   5381       1.1  christos #else
   5382       1.1  christos # ifdef WCHAR
   5383       1.1  christos #  define FREE_VARIABLES()						\
   5384       1.1  christos   do {									\
   5385       1.1  christos     if (!cant_free_wcs_buf)						\
   5386       1.1  christos       {									\
   5387       1.1  christos         FREE_VAR (string1);						\
   5388       1.1  christos         FREE_VAR (string2);						\
   5389       1.1  christos         FREE_VAR (mbs_offset1);						\
   5390       1.1  christos         FREE_VAR (mbs_offset2);						\
   5391       1.1  christos       }									\
   5392       1.1  christos   } while (0)
   5393       1.1  christos # else /* BYTE */
   5394       1.1  christos #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
   5395       1.1  christos # endif /* WCHAR */
   5396       1.1  christos #endif /* not MATCH_MAY_ALLOCATE */
   5397       1.1  christos 
   5398       1.1  christos /* These values must meet several constraints.  They must not be valid
   5399       1.1  christos    register values; since we have a limit of 255 registers (because
   5400       1.1  christos    we use only one byte in the pattern for the register number), we can
   5401       1.1  christos    use numbers larger than 255.  They must differ by 1, because of
   5402       1.1  christos    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
   5403       1.1  christos    be larger than the value for the highest register, so we do not try
   5404       1.1  christos    to actually save any registers when none are active.  */
   5405       1.1  christos #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
   5406       1.1  christos #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
   5407       1.1  christos 
   5408       1.1  christos #else /* not INSIDE_RECURSION */
   5410       1.1  christos /* Matching routines.  */
   5411       1.1  christos 
   5412       1.1  christos #ifndef emacs   /* Emacs never uses this.  */
   5413       1.1  christos /* re_match is like re_match_2 except it takes only a single string.  */
   5414       1.1  christos 
   5415       1.1  christos int
   5416       1.1  christos re_match (struct re_pattern_buffer *bufp, const char *string,
   5417       1.1  christos           int size, int pos, struct re_registers *regs)
   5418       1.1  christos {
   5419       1.1  christos   int result;
   5420       1.1  christos # ifdef MBS_SUPPORT
   5421       1.1  christos   if (MB_CUR_MAX != 1)
   5422       1.1  christos     result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
   5423       1.1  christos 				      pos, regs, size,
   5424       1.1  christos 				      NULL, 0, NULL, 0, NULL, NULL);
   5425       1.1  christos   else
   5426       1.1  christos # endif
   5427       1.1  christos     result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
   5428       1.1  christos 				  pos, regs, size);
   5429       1.1  christos # ifndef REGEX_MALLOC
   5430       1.1  christos #  ifdef C_ALLOCA
   5431       1.1  christos   alloca (0);
   5432       1.1  christos #  endif
   5433       1.1  christos # endif
   5434       1.1  christos   return result;
   5435       1.1  christos }
   5436       1.1  christos # ifdef _LIBC
   5437       1.1  christos weak_alias (__re_match, re_match)
   5438       1.1  christos # endif
   5439       1.1  christos #endif /* not emacs */
   5440       1.1  christos 
   5441       1.1  christos #endif /* not INSIDE_RECURSION */
   5442       1.1  christos 
   5443       1.1  christos #ifdef INSIDE_RECURSION
   5444       1.1  christos static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
   5445       1.1  christos                                                   UCHAR_T *end,
   5446       1.1  christos 					PREFIX(register_info_type) *reg_info);
   5447       1.1  christos static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
   5448       1.1  christos                                                 UCHAR_T *end,
   5449       1.1  christos 					PREFIX(register_info_type) *reg_info);
   5450       1.1  christos static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
   5451       1.1  christos                                                       UCHAR_T *end,
   5452       1.1  christos 					PREFIX(register_info_type) *reg_info);
   5453       1.1  christos static int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
   5454       1.1  christos                                    int len, char *translate);
   5455       1.1  christos #else /* not INSIDE_RECURSION */
   5456       1.1  christos 
   5457       1.1  christos /* re_match_2 matches the compiled pattern in BUFP against the
   5458       1.1  christos    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
   5459       1.1  christos    and SIZE2, respectively).  We start matching at POS, and stop
   5460       1.1  christos    matching at STOP.
   5461       1.1  christos 
   5462       1.1  christos    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
   5463       1.1  christos    store offsets for the substring each group matched in REGS.  See the
   5464       1.1  christos    documentation for exactly how many groups we fill.
   5465       1.1  christos 
   5466       1.1  christos    We return -1 if no match, -2 if an internal error (such as the
   5467       1.1  christos    failure stack overflowing).  Otherwise, we return the length of the
   5468       1.1  christos    matched substring.  */
   5469       1.1  christos 
   5470       1.1  christos int
   5471       1.1  christos re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
   5472       1.1  christos             const char *string2, int size2, int pos,
   5473       1.1  christos             struct re_registers *regs, int stop)
   5474       1.1  christos {
   5475       1.1  christos   int result;
   5476       1.1  christos # ifdef MBS_SUPPORT
   5477       1.1  christos   if (MB_CUR_MAX != 1)
   5478       1.1  christos     result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
   5479       1.1  christos 				      pos, regs, stop,
   5480       1.1  christos 				      NULL, 0, NULL, 0, NULL, NULL);
   5481       1.1  christos   else
   5482       1.1  christos # endif
   5483       1.1  christos     result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
   5484       1.1  christos 				  pos, regs, stop);
   5485       1.1  christos 
   5486       1.1  christos #ifndef REGEX_MALLOC
   5487       1.1  christos # ifdef C_ALLOCA
   5488       1.1  christos   alloca (0);
   5489       1.1  christos # endif
   5490       1.1  christos #endif
   5491       1.1  christos   return result;
   5492       1.1  christos }
   5493       1.1  christos #ifdef _LIBC
   5494       1.1  christos weak_alias (__re_match_2, re_match_2)
   5495       1.1  christos #endif
   5496       1.1  christos 
   5497       1.1  christos #endif /* not INSIDE_RECURSION */
   5498       1.1  christos 
   5499       1.1  christos #ifdef INSIDE_RECURSION
   5500       1.1  christos 
   5501       1.1  christos #ifdef WCHAR
   5502       1.1  christos static int count_mbs_length (int *, int);
   5503       1.1  christos 
   5504       1.1  christos /* This check the substring (from 0, to length) of the multibyte string,
   5505       1.1  christos    to which offset_buffer correspond. And count how many wchar_t_characters
   5506       1.1  christos    the substring occupy. We use offset_buffer to optimization.
   5507       1.1  christos    See convert_mbs_to_wcs.  */
   5508       1.1  christos 
   5509       1.1  christos static int
   5510       1.1  christos count_mbs_length(int *offset_buffer, int length)
   5511       1.1  christos {
   5512       1.1  christos   int upper, lower;
   5513       1.1  christos 
   5514       1.1  christos   /* Check whether the size is valid.  */
   5515       1.1  christos   if (length < 0)
   5516       1.1  christos     return -1;
   5517       1.1  christos 
   5518       1.1  christos   if (offset_buffer == NULL)
   5519       1.1  christos     return 0;
   5520       1.1  christos 
   5521       1.1  christos   /* If there are no multibyte character, offset_buffer[i] == i.
   5522       1.1  christos    Optmize for this case.  */
   5523       1.1  christos   if (offset_buffer[length] == length)
   5524       1.1  christos     return length;
   5525       1.1  christos 
   5526       1.1  christos   /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
   5527       1.1  christos   upper = length;
   5528       1.1  christos   lower = 0;
   5529       1.1  christos 
   5530       1.1  christos   while (true)
   5531       1.1  christos     {
   5532       1.1  christos       int middle = (lower + upper) / 2;
   5533       1.1  christos       if (middle == lower || middle == upper)
   5534       1.1  christos 	break;
   5535       1.1  christos       if (offset_buffer[middle] > length)
   5536       1.1  christos 	upper = middle;
   5537       1.1  christos       else if (offset_buffer[middle] < length)
   5538       1.1  christos 	lower = middle;
   5539       1.1  christos       else
   5540       1.1  christos 	return middle;
   5541       1.1  christos     }
   5542       1.1  christos 
   5543       1.1  christos   return -1;
   5544       1.1  christos }
   5545       1.1  christos #endif /* WCHAR */
   5546       1.1  christos 
   5547       1.1  christos /* This is a separate function so that we can force an alloca cleanup
   5548       1.1  christos    afterwards.  */
   5549       1.1  christos #ifdef WCHAR
   5550       1.1  christos static int
   5551       1.1  christos wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
   5552       1.1  christos                          const char *cstring1, int csize1,
   5553       1.1  christos                          const char *cstring2, int csize2,
   5554       1.1  christos                          int pos,
   5555       1.1  christos 			 struct re_registers *regs,
   5556       1.1  christos                          int stop,
   5557       1.1  christos      /* string1 == string2 == NULL means string1/2, size1/2 and
   5558       1.1  christos 	mbs_offset1/2 need seting up in this function.  */
   5559       1.1  christos      /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
   5560       1.1  christos                          wchar_t *string1, int size1,
   5561       1.1  christos                          wchar_t *string2, int size2,
   5562       1.1  christos      /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   5563       1.1  christos 			 int *mbs_offset1, int *mbs_offset2)
   5564       1.1  christos #else /* BYTE */
   5565       1.1  christos static int
   5566       1.1  christos byte_re_match_2_internal (struct re_pattern_buffer *bufp,
   5567       1.1  christos                           const char *string1, int size1,
   5568       1.1  christos                           const char *string2, int size2,
   5569       1.1  christos                           int pos,
   5570       1.1  christos 			  struct re_registers *regs, int stop)
   5571       1.1  christos #endif /* BYTE */
   5572       1.1  christos {
   5573       1.1  christos   /* General temporaries.  */
   5574       1.1  christos   int mcnt;
   5575       1.1  christos   UCHAR_T *p1;
   5576       1.1  christos #ifdef WCHAR
   5577       1.1  christos   /* They hold whether each wchar_t is binary data or not.  */
   5578       1.1  christos   char *is_binary = NULL;
   5579       1.1  christos   /* If true, we can't free string1/2, mbs_offset1/2.  */
   5580       1.1  christos   int cant_free_wcs_buf = 1;
   5581       1.1  christos #endif /* WCHAR */
   5582       1.1  christos 
   5583       1.1  christos   /* Just past the end of the corresponding string.  */
   5584       1.1  christos   const CHAR_T *end1, *end2;
   5585       1.1  christos 
   5586       1.1  christos   /* Pointers into string1 and string2, just past the last characters in
   5587       1.1  christos      each to consider matching.  */
   5588       1.1  christos   const CHAR_T *end_match_1, *end_match_2;
   5589       1.1  christos 
   5590       1.1  christos   /* Where we are in the data, and the end of the current string.  */
   5591       1.1  christos   const CHAR_T *d, *dend;
   5592       1.1  christos 
   5593       1.1  christos   /* Where we are in the pattern, and the end of the pattern.  */
   5594       1.1  christos #ifdef WCHAR
   5595       1.1  christos   UCHAR_T *pattern, *p;
   5596       1.1  christos   register UCHAR_T *pend;
   5597       1.1  christos #else /* BYTE */
   5598       1.1  christos   UCHAR_T *p = bufp->buffer;
   5599       1.1  christos   register UCHAR_T *pend = p + bufp->used;
   5600  1.1.1.11  christos #endif /* WCHAR */
   5601       1.1  christos 
   5602       1.1  christos   /* Mark the opcode just after a start_memory, so we can test for an
   5603       1.1  christos      empty subpattern when we get to the stop_memory.  */
   5604       1.1  christos   UCHAR_T *just_past_start_mem = 0;
   5605       1.1  christos 
   5606       1.1  christos   /* We use this to map every character in the string.  */
   5607       1.1  christos   RE_TRANSLATE_TYPE translate = bufp->translate;
   5608       1.1  christos 
   5609       1.1  christos   /* Failure point stack.  Each place that can handle a failure further
   5610       1.1  christos      down the line pushes a failure point on this stack.  It consists of
   5611       1.1  christos      restart, regend, and reg_info for all registers corresponding to
   5612       1.1  christos      the subexpressions we're currently inside, plus the number of such
   5613       1.1  christos      registers, and, finally, two char *'s.  The first char * is where
   5614       1.1  christos      to resume scanning the pattern; the second one is where to resume
   5615       1.1  christos      scanning the strings.  If the latter is zero, the failure point is
   5616       1.1  christos      a ``dummy''; if a failure happens and the failure point is a dummy,
   5617       1.1  christos      it gets discarded and the next one is tried.  */
   5618       1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5619       1.1  christos   PREFIX(fail_stack_type) fail_stack;
   5620       1.1  christos #endif
   5621       1.1  christos #ifdef DEBUG
   5622       1.1  christos   static unsigned failure_id;
   5623       1.1  christos   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
   5624       1.1  christos #endif
   5625       1.1  christos 
   5626       1.1  christos #ifdef REL_ALLOC
   5627       1.1  christos   /* This holds the pointer to the failure stack, when
   5628       1.1  christos      it is allocated relocatably.  */
   5629       1.1  christos   fail_stack_elt_t *failure_stack_ptr;
   5630       1.1  christos #endif
   5631       1.1  christos 
   5632       1.1  christos   /* We fill all the registers internally, independent of what we
   5633       1.1  christos      return, for use in backreferences.  The number here includes
   5634       1.1  christos      an element for register zero.  */
   5635       1.1  christos   size_t num_regs = bufp->re_nsub + 1;
   5636       1.1  christos 
   5637       1.1  christos   /* The currently active registers.  */
   5638       1.1  christos   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   5639       1.1  christos   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   5640       1.1  christos 
   5641       1.1  christos   /* Information on the contents of registers. These are pointers into
   5642       1.1  christos      the input strings; they record just what was matched (on this
   5643       1.1  christos      attempt) by a subexpression part of the pattern, that is, the
   5644       1.1  christos      regnum-th regstart pointer points to where in the pattern we began
   5645       1.1  christos      matching and the regnum-th regend points to right after where we
   5646       1.1  christos      stopped matching the regnum-th subexpression.  (The zeroth register
   5647       1.1  christos      keeps track of what the whole pattern matches.)  */
   5648       1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5649       1.1  christos   const CHAR_T **regstart, **regend;
   5650       1.1  christos #endif
   5651       1.1  christos 
   5652       1.1  christos   /* If a group that's operated upon by a repetition operator fails to
   5653       1.1  christos      match anything, then the register for its start will need to be
   5654       1.1  christos      restored because it will have been set to wherever in the string we
   5655       1.1  christos      are when we last see its open-group operator.  Similarly for a
   5656       1.1  christos      register's end.  */
   5657       1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5658       1.1  christos   const CHAR_T **old_regstart, **old_regend;
   5659       1.1  christos #endif
   5660       1.1  christos 
   5661       1.1  christos   /* The is_active field of reg_info helps us keep track of which (possibly
   5662       1.1  christos      nested) subexpressions we are currently in. The matched_something
   5663       1.1  christos      field of reg_info[reg_num] helps us tell whether or not we have
   5664       1.1  christos      matched any of the pattern so far this time through the reg_num-th
   5665       1.1  christos      subexpression.  These two fields get reset each time through any
   5666       1.1  christos      loop their register is in.  */
   5667       1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5668       1.1  christos   PREFIX(register_info_type) *reg_info;
   5669       1.1  christos #endif
   5670       1.1  christos 
   5671       1.1  christos   /* The following record the register info as found in the above
   5672       1.1  christos      variables when we find a match better than any we've seen before.
   5673       1.1  christos      This happens as we backtrack through the failure points, which in
   5674       1.1  christos      turn happens only if we have not yet matched the entire string. */
   5675       1.1  christos   unsigned best_regs_set = false;
   5676       1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5677       1.1  christos   const CHAR_T **best_regstart, **best_regend;
   5678       1.1  christos #endif
   5679       1.1  christos 
   5680       1.1  christos   /* Logically, this is `best_regend[0]'.  But we don't want to have to
   5681       1.1  christos      allocate space for that if we're not allocating space for anything
   5682       1.1  christos      else (see below).  Also, we never need info about register 0 for
   5683       1.1  christos      any of the other register vectors, and it seems rather a kludge to
   5684       1.1  christos      treat `best_regend' differently than the rest.  So we keep track of
   5685       1.1  christos      the end of the best match so far in a separate variable.  We
   5686       1.1  christos      initialize this to NULL so that when we backtrack the first time
   5687       1.1  christos      and need to test it, it's not garbage.  */
   5688       1.1  christos   const CHAR_T *match_end = NULL;
   5689       1.1  christos 
   5690       1.1  christos   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
   5691       1.1  christos   int set_regs_matched_done = 0;
   5692       1.1  christos 
   5693       1.1  christos   /* Used when we pop values we don't care about.  */
   5694       1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5695       1.1  christos   const CHAR_T **reg_dummy;
   5696       1.1  christos   PREFIX(register_info_type) *reg_info_dummy;
   5697       1.1  christos #endif
   5698       1.1  christos 
   5699       1.1  christos #ifdef DEBUG
   5700       1.1  christos   /* Counts the total number of registers pushed.  */
   5701       1.1  christos   unsigned num_regs_pushed = 0;
   5702       1.1  christos #endif
   5703       1.1  christos 
   5704       1.1  christos   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
   5705       1.1  christos 
   5706       1.1  christos   INIT_FAIL_STACK ();
   5707       1.1  christos 
   5708       1.1  christos #ifdef MATCH_MAY_ALLOCATE
   5709       1.1  christos   /* Do not bother to initialize all the register variables if there are
   5710       1.1  christos      no groups in the pattern, as it takes a fair amount of time.  If
   5711       1.1  christos      there are groups, we include space for register 0 (the whole
   5712       1.1  christos      pattern), even though we never use it, since it simplifies the
   5713       1.1  christos      array indexing.  We should fix this.  */
   5714       1.1  christos   if (bufp->re_nsub)
   5715       1.1  christos     {
   5716       1.1  christos       regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5717       1.1  christos       regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5718       1.1  christos       old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5719       1.1  christos       old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5720       1.1  christos       best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5721       1.1  christos       best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5722       1.1  christos       reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
   5723       1.1  christos       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
   5724       1.1  christos       reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
   5725       1.1  christos 
   5726       1.1  christos       if (!(regstart && regend && old_regstart && old_regend && reg_info
   5727       1.1  christos             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
   5728       1.1  christos         {
   5729       1.1  christos           FREE_VARIABLES ();
   5730       1.1  christos           return -2;
   5731       1.1  christos         }
   5732       1.1  christos     }
   5733       1.1  christos   else
   5734       1.1  christos     {
   5735       1.1  christos       /* We must initialize all our variables to NULL, so that
   5736       1.1  christos          `FREE_VARIABLES' doesn't try to free them.  */
   5737       1.1  christos       regstart = regend = old_regstart = old_regend = best_regstart
   5738       1.1  christos         = best_regend = reg_dummy = NULL;
   5739       1.1  christos       reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
   5740       1.1  christos     }
   5741       1.1  christos #endif /* MATCH_MAY_ALLOCATE */
   5742       1.1  christos 
   5743       1.1  christos   /* The starting position is bogus.  */
   5744       1.1  christos #ifdef WCHAR
   5745       1.1  christos   if (pos < 0 || pos > csize1 + csize2)
   5746       1.1  christos #else /* BYTE */
   5747       1.1  christos   if (pos < 0 || pos > size1 + size2)
   5748       1.1  christos #endif
   5749       1.1  christos     {
   5750       1.1  christos       FREE_VARIABLES ();
   5751       1.1  christos       return -1;
   5752       1.1  christos     }
   5753       1.1  christos 
   5754       1.1  christos #ifdef WCHAR
   5755       1.1  christos   /* Allocate wchar_t array for string1 and string2 and
   5756       1.1  christos      fill them with converted string.  */
   5757       1.1  christos   if (string1 == NULL && string2 == NULL)
   5758       1.1  christos     {
   5759       1.1  christos       /* We need seting up buffers here.  */
   5760       1.1  christos 
   5761       1.1  christos       /* We must free wcs buffers in this function.  */
   5762       1.1  christos       cant_free_wcs_buf = 0;
   5763       1.1  christos 
   5764       1.1  christos       if (csize1 != 0)
   5765       1.1  christos 	{
   5766       1.1  christos 	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
   5767       1.1  christos 	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
   5768       1.1  christos 	  is_binary = REGEX_TALLOC (csize1 + 1, char);
   5769       1.1  christos 	  if (!string1 || !mbs_offset1 || !is_binary)
   5770       1.1  christos 	    {
   5771       1.1  christos 	      FREE_VAR (string1);
   5772       1.1  christos 	      FREE_VAR (mbs_offset1);
   5773       1.1  christos 	      FREE_VAR (is_binary);
   5774       1.1  christos 	      return -2;
   5775       1.1  christos 	    }
   5776       1.1  christos 	}
   5777       1.1  christos       if (csize2 != 0)
   5778       1.1  christos 	{
   5779       1.1  christos 	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
   5780       1.1  christos 	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
   5781       1.1  christos 	  is_binary = REGEX_TALLOC (csize2 + 1, char);
   5782       1.1  christos 	  if (!string2 || !mbs_offset2 || !is_binary)
   5783       1.1  christos 	    {
   5784       1.1  christos 	      FREE_VAR (string1);
   5785       1.1  christos 	      FREE_VAR (mbs_offset1);
   5786       1.1  christos 	      FREE_VAR (string2);
   5787       1.1  christos 	      FREE_VAR (mbs_offset2);
   5788       1.1  christos 	      FREE_VAR (is_binary);
   5789       1.1  christos 	      return -2;
   5790       1.1  christos 	    }
   5791       1.1  christos 	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
   5792       1.1  christos 				     mbs_offset2, is_binary);
   5793       1.1  christos 	  string2[size2] = L'\0'; /* for a sentinel  */
   5794       1.1  christos 	  FREE_VAR (is_binary);
   5795       1.1  christos 	}
   5796       1.1  christos     }
   5797       1.1  christos 
   5798       1.1  christos   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   5799       1.1  christos      pattern to (char*) in regex_compile.  */
   5800       1.1  christos   p = pattern = (CHAR_T*)bufp->buffer;
   5801       1.1  christos   pend = (CHAR_T*)(bufp->buffer + bufp->used);
   5802       1.1  christos 
   5803       1.1  christos #endif /* WCHAR */
   5804       1.1  christos 
   5805       1.1  christos   /* Initialize subexpression text positions to -1 to mark ones that no
   5806       1.1  christos      start_memory/stop_memory has been seen for. Also initialize the
   5807       1.1  christos      register information struct.  */
   5808       1.1  christos   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   5809       1.1  christos     {
   5810       1.1  christos       regstart[mcnt] = regend[mcnt]
   5811       1.1  christos         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
   5812       1.1  christos 
   5813       1.1  christos       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
   5814       1.1  christos       IS_ACTIVE (reg_info[mcnt]) = 0;
   5815       1.1  christos       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   5816       1.1  christos       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   5817       1.1  christos     }
   5818       1.1  christos 
   5819       1.1  christos   /* We move `string1' into `string2' if the latter's empty -- but not if
   5820       1.1  christos      `string1' is null.  */
   5821       1.1  christos   if (size2 == 0 && string1 != NULL)
   5822       1.1  christos     {
   5823       1.1  christos       string2 = string1;
   5824       1.1  christos       size2 = size1;
   5825       1.1  christos       string1 = 0;
   5826       1.1  christos       size1 = 0;
   5827       1.1  christos #ifdef WCHAR
   5828       1.1  christos       mbs_offset2 = mbs_offset1;
   5829       1.1  christos       csize2 = csize1;
   5830       1.1  christos       mbs_offset1 = NULL;
   5831       1.1  christos       csize1 = 0;
   5832       1.1  christos #endif
   5833       1.1  christos     }
   5834       1.1  christos   end1 = string1 + size1;
   5835       1.1  christos   end2 = string2 + size2;
   5836       1.1  christos 
   5837       1.1  christos   /* Compute where to stop matching, within the two strings.  */
   5838       1.1  christos #ifdef WCHAR
   5839       1.1  christos   if (stop <= csize1)
   5840       1.1  christos     {
   5841       1.1  christos       mcnt = count_mbs_length(mbs_offset1, stop);
   5842       1.1  christos       end_match_1 = string1 + mcnt;
   5843       1.1  christos       end_match_2 = string2;
   5844       1.1  christos     }
   5845       1.1  christos   else
   5846       1.1  christos     {
   5847       1.1  christos       if (stop > csize1 + csize2)
   5848       1.1  christos 	stop = csize1 + csize2;
   5849       1.1  christos       end_match_1 = end1;
   5850       1.1  christos       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
   5851       1.1  christos       end_match_2 = string2 + mcnt;
   5852       1.1  christos     }
   5853       1.1  christos   if (mcnt < 0)
   5854       1.1  christos     { /* count_mbs_length return error.  */
   5855       1.1  christos       FREE_VARIABLES ();
   5856       1.1  christos       return -1;
   5857       1.1  christos     }
   5858       1.1  christos #else
   5859       1.1  christos   if (stop <= size1)
   5860       1.1  christos     {
   5861       1.1  christos       end_match_1 = string1 + stop;
   5862       1.1  christos       end_match_2 = string2;
   5863       1.1  christos     }
   5864       1.1  christos   else
   5865       1.1  christos     {
   5866       1.1  christos       end_match_1 = end1;
   5867       1.1  christos       end_match_2 = string2 + stop - size1;
   5868       1.1  christos     }
   5869       1.1  christos #endif /* WCHAR */
   5870       1.1  christos 
   5871       1.1  christos   /* `p' scans through the pattern as `d' scans through the data.
   5872       1.1  christos      `dend' is the end of the input string that `d' points within.  `d'
   5873       1.1  christos      is advanced into the following input string whenever necessary, but
   5874       1.1  christos      this happens before fetching; therefore, at the beginning of the
   5875       1.1  christos      loop, `d' can be pointing at the end of a string, but it cannot
   5876       1.1  christos      equal `string2'.  */
   5877       1.1  christos #ifdef WCHAR
   5878       1.1  christos   if (size1 > 0 && pos <= csize1)
   5879       1.1  christos     {
   5880       1.1  christos       mcnt = count_mbs_length(mbs_offset1, pos);
   5881       1.1  christos       d = string1 + mcnt;
   5882       1.1  christos       dend = end_match_1;
   5883       1.1  christos     }
   5884       1.1  christos   else
   5885       1.1  christos     {
   5886       1.1  christos       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
   5887       1.1  christos       d = string2 + mcnt;
   5888       1.1  christos       dend = end_match_2;
   5889       1.1  christos     }
   5890       1.1  christos 
   5891       1.1  christos   if (mcnt < 0)
   5892       1.1  christos     { /* count_mbs_length return error.  */
   5893       1.1  christos       FREE_VARIABLES ();
   5894       1.1  christos       return -1;
   5895       1.1  christos     }
   5896       1.1  christos #else
   5897       1.1  christos   if (size1 > 0 && pos <= size1)
   5898       1.1  christos     {
   5899       1.1  christos       d = string1 + pos;
   5900       1.1  christos       dend = end_match_1;
   5901       1.1  christos     }
   5902       1.1  christos   else
   5903       1.1  christos     {
   5904       1.1  christos       d = string2 + pos - size1;
   5905       1.1  christos       dend = end_match_2;
   5906       1.1  christos     }
   5907       1.1  christos #endif /* WCHAR */
   5908       1.1  christos 
   5909       1.1  christos   DEBUG_PRINT1 ("The compiled pattern is:\n");
   5910       1.1  christos   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
   5911       1.1  christos   DEBUG_PRINT1 ("The string to match is: `");
   5912       1.1  christos   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
   5913       1.1  christos   DEBUG_PRINT1 ("'\n");
   5914       1.1  christos 
   5915       1.1  christos   /* This loops over pattern commands.  It exits by returning from the
   5916       1.1  christos      function if the match is complete, or it drops through if the match
   5917       1.1  christos      fails at this starting point in the input data.  */
   5918       1.1  christos   for (;;)
   5919       1.1  christos     {
   5920       1.1  christos #ifdef _LIBC
   5921       1.1  christos       DEBUG_PRINT2 ("\n%p: ", p);
   5922       1.1  christos #else
   5923       1.1  christos       DEBUG_PRINT2 ("\n0x%x: ", p);
   5924       1.1  christos #endif
   5925       1.1  christos 
   5926       1.1  christos       if (p == pend)
   5927       1.1  christos 	{ /* End of pattern means we might have succeeded.  */
   5928       1.1  christos           DEBUG_PRINT1 ("end of pattern ... ");
   5929       1.1  christos 
   5930       1.1  christos 	  /* If we haven't matched the entire string, and we want the
   5931       1.1  christos              longest match, try backtracking.  */
   5932       1.1  christos           if (d != end_match_2)
   5933       1.1  christos 	    {
   5934       1.1  christos 	      /* 1 if this match ends in the same string (string1 or string2)
   5935       1.1  christos 		 as the best previous match.  */
   5936       1.1  christos 	      boolean same_str_p;
   5937       1.1  christos 
   5938       1.1  christos 	      /* 1 if this match is the best seen so far.  */
   5939       1.1  christos 	      boolean best_match_p;
   5940       1.1  christos 
   5941       1.1  christos               same_str_p = (FIRST_STRING_P (match_end)
   5942       1.1  christos                             == MATCHING_IN_FIRST_STRING);
   5943       1.1  christos 
   5944       1.1  christos 	      /* AIX compiler got confused when this was combined
   5945       1.1  christos 		 with the previous declaration.  */
   5946       1.1  christos 	      if (same_str_p)
   5947       1.1  christos 		best_match_p = d > match_end;
   5948       1.1  christos 	      else
   5949       1.1  christos 		best_match_p = !MATCHING_IN_FIRST_STRING;
   5950       1.1  christos 
   5951       1.1  christos               DEBUG_PRINT1 ("backtracking.\n");
   5952       1.1  christos 
   5953       1.1  christos               if (!FAIL_STACK_EMPTY ())
   5954       1.1  christos                 { /* More failure points to try.  */
   5955       1.1  christos 
   5956       1.1  christos                   /* If exceeds best match so far, save it.  */
   5957       1.1  christos                   if (!best_regs_set || best_match_p)
   5958       1.1  christos                     {
   5959       1.1  christos                       best_regs_set = true;
   5960       1.1  christos                       match_end = d;
   5961       1.1  christos 
   5962       1.1  christos                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
   5963       1.1  christos 
   5964       1.1  christos                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   5965       1.1  christos                         {
   5966       1.1  christos                           best_regstart[mcnt] = regstart[mcnt];
   5967       1.1  christos                           best_regend[mcnt] = regend[mcnt];
   5968       1.1  christos                         }
   5969       1.1  christos                     }
   5970       1.1  christos                   goto fail;
   5971       1.1  christos                 }
   5972       1.1  christos 
   5973       1.1  christos               /* If no failure points, don't restore garbage.  And if
   5974       1.1  christos                  last match is real best match, don't restore second
   5975       1.1  christos                  best one. */
   5976       1.1  christos               else if (best_regs_set && !best_match_p)
   5977       1.1  christos                 {
   5978       1.1  christos   	        restore_best_regs:
   5979       1.1  christos                   /* Restore best match.  It may happen that `dend ==
   5980       1.1  christos                      end_match_1' while the restored d is in string2.
   5981       1.1  christos                      For example, the pattern `x.*y.*z' against the
   5982       1.1  christos                      strings `x-' and `y-z-', if the two strings are
   5983       1.1  christos                      not consecutive in memory.  */
   5984       1.1  christos                   DEBUG_PRINT1 ("Restoring best registers.\n");
   5985       1.1  christos 
   5986       1.1  christos                   d = match_end;
   5987       1.1  christos                   dend = ((d >= string1 && d <= end1)
   5988       1.1  christos 		           ? end_match_1 : end_match_2);
   5989       1.1  christos 
   5990       1.1  christos 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   5991       1.1  christos 		    {
   5992       1.1  christos 		      regstart[mcnt] = best_regstart[mcnt];
   5993       1.1  christos 		      regend[mcnt] = best_regend[mcnt];
   5994       1.1  christos 		    }
   5995       1.1  christos                 }
   5996       1.1  christos             } /* d != end_match_2 */
   5997       1.1  christos 
   5998       1.1  christos 	succeed_label:
   5999       1.1  christos           DEBUG_PRINT1 ("Accepting match.\n");
   6000       1.1  christos           /* If caller wants register contents data back, do it.  */
   6001       1.1  christos           if (regs && !bufp->no_sub)
   6002       1.1  christos 	    {
   6003       1.1  christos 	      /* Have the register data arrays been allocated?  */
   6004       1.1  christos               if (bufp->regs_allocated == REGS_UNALLOCATED)
   6005       1.1  christos                 { /* No.  So allocate them with malloc.  We need one
   6006       1.1  christos                      extra element beyond `num_regs' for the `-1' marker
   6007       1.1  christos                      GNU code uses.  */
   6008       1.1  christos                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
   6009       1.1  christos                   regs->start = TALLOC (regs->num_regs, regoff_t);
   6010       1.1  christos                   regs->end = TALLOC (regs->num_regs, regoff_t);
   6011       1.1  christos                   if (regs->start == NULL || regs->end == NULL)
   6012       1.1  christos 		    {
   6013       1.1  christos 		      FREE_VARIABLES ();
   6014       1.1  christos 		      return -2;
   6015       1.1  christos 		    }
   6016       1.1  christos                   bufp->regs_allocated = REGS_REALLOCATE;
   6017       1.1  christos                 }
   6018       1.1  christos               else if (bufp->regs_allocated == REGS_REALLOCATE)
   6019       1.1  christos                 { /* Yes.  If we need more elements than were already
   6020       1.1  christos                      allocated, reallocate them.  If we need fewer, just
   6021       1.1  christos                      leave it alone.  */
   6022       1.1  christos                   if (regs->num_regs < num_regs + 1)
   6023       1.1  christos                     {
   6024       1.1  christos                       regs->num_regs = num_regs + 1;
   6025       1.1  christos                       RETALLOC (regs->start, regs->num_regs, regoff_t);
   6026       1.1  christos                       RETALLOC (regs->end, regs->num_regs, regoff_t);
   6027       1.1  christos                       if (regs->start == NULL || regs->end == NULL)
   6028       1.1  christos 			{
   6029       1.1  christos 			  FREE_VARIABLES ();
   6030       1.1  christos 			  return -2;
   6031       1.1  christos 			}
   6032       1.1  christos                     }
   6033       1.1  christos                 }
   6034       1.1  christos               else
   6035       1.1  christos 		{
   6036       1.1  christos 		  /* These braces fend off a "empty body in an else-statement"
   6037       1.1  christos 		     warning under GCC when assert expands to nothing.  */
   6038       1.1  christos 		  assert (bufp->regs_allocated == REGS_FIXED);
   6039       1.1  christos 		}
   6040       1.1  christos 
   6041       1.1  christos               /* Convert the pointer data in `regstart' and `regend' to
   6042       1.1  christos                  indices.  Register zero has to be set differently,
   6043       1.1  christos                  since we haven't kept track of any info for it.  */
   6044       1.1  christos               if (regs->num_regs > 0)
   6045       1.1  christos                 {
   6046       1.1  christos                   regs->start[0] = pos;
   6047       1.1  christos #ifdef WCHAR
   6048       1.1  christos 		  if (MATCHING_IN_FIRST_STRING)
   6049       1.1  christos 		    regs->end[0] = mbs_offset1 != NULL ?
   6050       1.1  christos 					mbs_offset1[d-string1] : 0;
   6051       1.1  christos 		  else
   6052       1.1  christos 		    regs->end[0] = csize1 + (mbs_offset2 != NULL ?
   6053       1.1  christos 					     mbs_offset2[d-string2] : 0);
   6054       1.1  christos #else
   6055       1.1  christos                   regs->end[0] = (MATCHING_IN_FIRST_STRING
   6056       1.1  christos 				  ? ((regoff_t) (d - string1))
   6057       1.1  christos 			          : ((regoff_t) (d - string2 + size1)));
   6058       1.1  christos #endif /* WCHAR */
   6059       1.1  christos                 }
   6060       1.1  christos 
   6061       1.1  christos               /* Go through the first `min (num_regs, regs->num_regs)'
   6062       1.1  christos                  registers, since that is all we initialized.  */
   6063       1.1  christos 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
   6064       1.1  christos 		   mcnt++)
   6065       1.1  christos 		{
   6066       1.1  christos                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
   6067       1.1  christos                     regs->start[mcnt] = regs->end[mcnt] = -1;
   6068       1.1  christos                   else
   6069       1.1  christos                     {
   6070       1.1  christos 		      regs->start[mcnt]
   6071       1.1  christos 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
   6072       1.1  christos                       regs->end[mcnt]
   6073       1.1  christos 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
   6074       1.1  christos                     }
   6075       1.1  christos 		}
   6076       1.1  christos 
   6077       1.1  christos               /* If the regs structure we return has more elements than
   6078       1.1  christos                  were in the pattern, set the extra elements to -1.  If
   6079       1.1  christos                  we (re)allocated the registers, this is the case,
   6080       1.1  christos                  because we always allocate enough to have at least one
   6081       1.1  christos                  -1 at the end.  */
   6082       1.1  christos               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
   6083       1.1  christos                 regs->start[mcnt] = regs->end[mcnt] = -1;
   6084       1.1  christos 	    } /* regs && !bufp->no_sub */
   6085       1.1  christos 
   6086       1.1  christos           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
   6087       1.1  christos                         nfailure_points_pushed, nfailure_points_popped,
   6088       1.1  christos                         nfailure_points_pushed - nfailure_points_popped);
   6089       1.1  christos           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
   6090       1.1  christos 
   6091       1.1  christos #ifdef WCHAR
   6092       1.1  christos 	  if (MATCHING_IN_FIRST_STRING)
   6093       1.1  christos 	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
   6094       1.1  christos 	  else
   6095       1.1  christos 	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
   6096       1.1  christos 			csize1;
   6097       1.1  christos           mcnt -= pos;
   6098       1.1  christos #else
   6099       1.1  christos           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
   6100       1.1  christos 			    ? string1
   6101       1.1  christos 			    : string2 - size1);
   6102       1.1  christos #endif /* WCHAR */
   6103       1.1  christos 
   6104       1.1  christos           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
   6105       1.1  christos 
   6106       1.1  christos           FREE_VARIABLES ();
   6107       1.1  christos           return mcnt;
   6108       1.1  christos         }
   6109       1.1  christos 
   6110       1.1  christos       /* Otherwise match next pattern command.  */
   6111       1.1  christos       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   6112       1.1  christos 	{
   6113       1.1  christos         /* Ignore these.  Used to ignore the n of succeed_n's which
   6114       1.1  christos            currently have n == 0.  */
   6115       1.1  christos         case no_op:
   6116       1.1  christos           DEBUG_PRINT1 ("EXECUTING no_op.\n");
   6117       1.1  christos           break;
   6118       1.1  christos 
   6119       1.1  christos 	case succeed:
   6120       1.1  christos           DEBUG_PRINT1 ("EXECUTING succeed.\n");
   6121       1.1  christos 	  goto succeed_label;
   6122       1.1  christos 
   6123       1.1  christos         /* Match the next n pattern characters exactly.  The following
   6124       1.1  christos            byte in the pattern defines n, and the n bytes after that
   6125       1.1  christos            are the characters to match.  */
   6126       1.1  christos 	case exactn:
   6127       1.1  christos #ifdef MBS_SUPPORT
   6128       1.1  christos 	case exactn_bin:
   6129       1.1  christos #endif
   6130       1.1  christos 	  mcnt = *p++;
   6131       1.1  christos           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
   6132       1.1  christos 
   6133       1.1  christos           /* This is written out as an if-else so we don't waste time
   6134       1.1  christos              testing `translate' inside the loop.  */
   6135       1.1  christos           if (translate)
   6136       1.1  christos 	    {
   6137       1.1  christos 	      do
   6138       1.1  christos 		{
   6139       1.1  christos 		  PREFETCH ();
   6140       1.1  christos #ifdef WCHAR
   6141       1.1  christos 		  if (*d <= 0xff)
   6142       1.1  christos 		    {
   6143       1.1  christos 		      if ((UCHAR_T) translate[(unsigned char) *d++]
   6144       1.1  christos 			  != (UCHAR_T) *p++)
   6145       1.1  christos 			goto fail;
   6146       1.1  christos 		    }
   6147       1.1  christos 		  else
   6148       1.1  christos 		    {
   6149       1.1  christos 		      if (*d++ != (CHAR_T) *p++)
   6150       1.1  christos 			goto fail;
   6151       1.1  christos 		    }
   6152       1.1  christos #else
   6153       1.1  christos 		  if ((UCHAR_T) translate[(unsigned char) *d++]
   6154       1.1  christos 		      != (UCHAR_T) *p++)
   6155       1.1  christos                     goto fail;
   6156       1.1  christos #endif /* WCHAR */
   6157       1.1  christos 		}
   6158       1.1  christos 	      while (--mcnt);
   6159       1.1  christos 	    }
   6160       1.1  christos 	  else
   6161       1.1  christos 	    {
   6162       1.1  christos 	      do
   6163       1.1  christos 		{
   6164       1.1  christos 		  PREFETCH ();
   6165       1.1  christos 		  if (*d++ != (CHAR_T) *p++) goto fail;
   6166       1.1  christos 		}
   6167       1.1  christos 	      while (--mcnt);
   6168       1.1  christos 	    }
   6169       1.1  christos 	  SET_REGS_MATCHED ();
   6170       1.1  christos           break;
   6171       1.1  christos 
   6172       1.1  christos 
   6173       1.1  christos         /* Match any character except possibly a newline or a null.  */
   6174       1.1  christos 	case anychar:
   6175       1.1  christos           DEBUG_PRINT1 ("EXECUTING anychar.\n");
   6176       1.1  christos 
   6177       1.1  christos           PREFETCH ();
   6178       1.1  christos 
   6179       1.1  christos           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
   6180       1.1  christos               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
   6181       1.1  christos 	    goto fail;
   6182       1.1  christos 
   6183       1.1  christos           SET_REGS_MATCHED ();
   6184       1.1  christos           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
   6185       1.1  christos           d++;
   6186       1.1  christos 	  break;
   6187       1.1  christos 
   6188       1.1  christos 
   6189       1.1  christos 	case charset:
   6190       1.1  christos 	case charset_not:
   6191       1.1  christos 	  {
   6192       1.1  christos 	    register UCHAR_T c;
   6193       1.1  christos #ifdef WCHAR
   6194       1.1  christos 	    unsigned int i, char_class_length, coll_symbol_length,
   6195       1.1  christos               equiv_class_length, ranges_length, chars_length, length;
   6196       1.1  christos 	    CHAR_T *workp, *workp2, *charset_top;
   6197       1.1  christos #define WORK_BUFFER_SIZE 128
   6198       1.1  christos             CHAR_T str_buf[WORK_BUFFER_SIZE];
   6199       1.1  christos # ifdef _LIBC
   6200       1.1  christos 	    uint32_t nrules;
   6201       1.1  christos # endif /* _LIBC */
   6202       1.1  christos #endif /* WCHAR */
   6203       1.1  christos 	    boolean negate = (re_opcode_t) *(p - 1) == charset_not;
   6204       1.1  christos 
   6205       1.1  christos             DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
   6206       1.1  christos 	    PREFETCH ();
   6207       1.1  christos 	    c = TRANSLATE (*d); /* The character to match.  */
   6208       1.1  christos #ifdef WCHAR
   6209       1.1  christos # ifdef _LIBC
   6210       1.1  christos 	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   6211       1.1  christos # endif /* _LIBC */
   6212       1.1  christos 	    charset_top = p - 1;
   6213       1.1  christos 	    char_class_length = *p++;
   6214       1.1  christos 	    coll_symbol_length = *p++;
   6215       1.1  christos 	    equiv_class_length = *p++;
   6216       1.1  christos 	    ranges_length = *p++;
   6217       1.1  christos 	    chars_length = *p++;
   6218       1.1  christos 	    /* p points charset[6], so the address of the next instruction
   6219       1.1  christos 	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
   6220       1.1  christos 	       where l=length of char_classes, m=length of collating_symbol,
   6221       1.1  christos 	       n=equivalence_class, o=length of char_range,
   6222       1.1  christos 	       p'=length of character.  */
   6223       1.1  christos 	    workp = p;
   6224       1.1  christos 	    /* Update p to indicate the next instruction.  */
   6225       1.1  christos 	    p += char_class_length + coll_symbol_length+ equiv_class_length +
   6226       1.1  christos               2*ranges_length + chars_length;
   6227       1.1  christos 
   6228       1.1  christos             /* match with char_class?  */
   6229       1.1  christos 	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
   6230       1.1  christos 	      {
   6231       1.1  christos 		wctype_t wctype;
   6232       1.1  christos 		uintptr_t alignedp = ((uintptr_t)workp
   6233       1.1  christos 				      + __alignof__(wctype_t) - 1)
   6234       1.1  christos 		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   6235       1.1  christos 		wctype = *((wctype_t*)alignedp);
   6236       1.1  christos 		workp += CHAR_CLASS_SIZE;
   6237       1.1  christos # ifdef _LIBC
   6238       1.1  christos 		if (__iswctype((wint_t)c, wctype))
   6239       1.1  christos 		  goto char_set_matched;
   6240       1.1  christos # else
   6241       1.1  christos 		if (iswctype((wint_t)c, wctype))
   6242       1.1  christos 		  goto char_set_matched;
   6243       1.1  christos # endif
   6244       1.1  christos 	      }
   6245       1.1  christos 
   6246       1.1  christos             /* match with collating_symbol?  */
   6247       1.1  christos # ifdef _LIBC
   6248       1.1  christos 	    if (nrules != 0)
   6249       1.1  christos 	      {
   6250       1.1  christos 		const unsigned char *extra = (const unsigned char *)
   6251       1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   6252       1.1  christos 
   6253       1.1  christos 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
   6254       1.1  christos 		     workp++)
   6255       1.1  christos 		  {
   6256       1.1  christos 		    int32_t *wextra;
   6257       1.1  christos 		    wextra = (int32_t*)(extra + *workp++);
   6258       1.1  christos 		    for (i = 0; i < *wextra; ++i)
   6259       1.1  christos 		      if (TRANSLATE(d[i]) != wextra[1 + i])
   6260       1.1  christos 			break;
   6261       1.1  christos 
   6262       1.1  christos 		    if (i == *wextra)
   6263       1.1  christos 		      {
   6264       1.1  christos 			/* Update d, however d will be incremented at
   6265       1.1  christos 			   char_set_matched:, we decrement d here.  */
   6266       1.1  christos 			d += i - 1;
   6267       1.1  christos 			goto char_set_matched;
   6268       1.1  christos 		      }
   6269       1.1  christos 		  }
   6270       1.1  christos 	      }
   6271       1.1  christos 	    else /* (nrules == 0) */
   6272       1.1  christos # endif
   6273       1.1  christos 	      /* If we can't look up collation data, we use wcscoll
   6274       1.1  christos 		 instead.  */
   6275       1.1  christos 	      {
   6276       1.1  christos 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
   6277       1.1  christos 		  {
   6278       1.1  christos 		    const CHAR_T *backup_d = d, *backup_dend = dend;
   6279       1.1  christos # ifdef _LIBC
   6280       1.1  christos 		    length = __wcslen (workp);
   6281       1.1  christos # else
   6282       1.1  christos 		    length = wcslen (workp);
   6283       1.1  christos # endif
   6284       1.1  christos 
   6285       1.1  christos 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6286       1.1  christos 		       any substring of the string never match with the
   6287       1.1  christos 		       collating symbol.  */
   6288       1.1  christos # ifdef _LIBC
   6289       1.1  christos 		    if (__wcscoll (workp, d) > 0)
   6290       1.1  christos # else
   6291       1.1  christos 		    if (wcscoll (workp, d) > 0)
   6292       1.1  christos # endif
   6293       1.1  christos 		      {
   6294       1.1  christos 			workp += length + 1;
   6295       1.1  christos 			continue;
   6296       1.1  christos 		      }
   6297       1.1  christos 
   6298       1.1  christos 		    /* First, we compare the collating symbol with
   6299       1.1  christos 		       the first character of the string.
   6300       1.1  christos 		       If it don't match, we add the next character to
   6301       1.1  christos 		       the compare buffer in turn.  */
   6302       1.1  christos 		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
   6303       1.1  christos 		      {
   6304       1.1  christos 			int match;
   6305       1.1  christos 			if (d == dend)
   6306       1.1  christos 			  {
   6307       1.1  christos 			    if (dend == end_match_2)
   6308       1.1  christos 			      break;
   6309       1.1  christos 			    d = string2;
   6310       1.1  christos 			    dend = end_match_2;
   6311       1.1  christos 			  }
   6312       1.1  christos 
   6313       1.1  christos 			/* add next character to the compare buffer.  */
   6314       1.1  christos 			str_buf[i] = TRANSLATE(*d);
   6315       1.1  christos 			str_buf[i+1] = '\0';
   6316       1.1  christos 
   6317       1.1  christos # ifdef _LIBC
   6318       1.1  christos 			match = __wcscoll (workp, str_buf);
   6319       1.1  christos # else
   6320       1.1  christos 			match = wcscoll (workp, str_buf);
   6321       1.1  christos # endif
   6322       1.1  christos 			if (match == 0)
   6323       1.1  christos 			  goto char_set_matched;
   6324       1.1  christos 
   6325       1.1  christos 			if (match < 0)
   6326       1.1  christos 			  /* (str_buf > workp) indicate (str_buf + X > workp),
   6327       1.1  christos 			     because for all X (str_buf + X > str_buf).
   6328       1.1  christos 			     So we don't need continue this loop.  */
   6329       1.1  christos 			  break;
   6330       1.1  christos 
   6331       1.1  christos 			/* Otherwise(str_buf < workp),
   6332       1.1  christos 			   (str_buf+next_character) may equals (workp).
   6333       1.1  christos 			   So we continue this loop.  */
   6334       1.1  christos 		      }
   6335       1.1  christos 		    /* not matched */
   6336       1.1  christos 		    d = backup_d;
   6337       1.1  christos 		    dend = backup_dend;
   6338       1.1  christos 		    workp += length + 1;
   6339       1.1  christos 		  }
   6340       1.1  christos               }
   6341       1.1  christos             /* match with equivalence_class?  */
   6342       1.1  christos # ifdef _LIBC
   6343       1.1  christos 	    if (nrules != 0)
   6344       1.1  christos 	      {
   6345       1.1  christos                 const CHAR_T *backup_d = d, *backup_dend = dend;
   6346       1.1  christos 		/* Try to match the equivalence class against
   6347       1.1  christos 		   those known to the collate implementation.  */
   6348       1.1  christos 		const int32_t *table;
   6349       1.1  christos 		const int32_t *weights;
   6350       1.1  christos 		const int32_t *extra;
   6351       1.1  christos 		const int32_t *indirect;
   6352       1.1  christos 		int32_t idx, idx2;
   6353       1.1  christos 		wint_t *cp;
   6354       1.1  christos 		size_t len;
   6355       1.1  christos 
   6356       1.1  christos 		/* This #include defines a local function!  */
   6357       1.1  christos #  include <locale/weightwc.h>
   6358       1.1  christos 
   6359       1.1  christos 		table = (const int32_t *)
   6360       1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
   6361       1.1  christos 		weights = (const wint_t *)
   6362       1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
   6363       1.1  christos 		extra = (const wint_t *)
   6364       1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
   6365       1.1  christos 		indirect = (const int32_t *)
   6366       1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
   6367       1.1  christos 
   6368       1.1  christos 		/* Write 1 collating element to str_buf, and
   6369       1.1  christos 		   get its index.  */
   6370       1.1  christos 		idx2 = 0;
   6371       1.1  christos 
   6372       1.1  christos 		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
   6373       1.1  christos 		  {
   6374       1.1  christos 		    cp = (wint_t*)str_buf;
   6375       1.1  christos 		    if (d == dend)
   6376       1.1  christos 		      {
   6377       1.1  christos 			if (dend == end_match_2)
   6378       1.1  christos 			  break;
   6379       1.1  christos 			d = string2;
   6380       1.1  christos 			dend = end_match_2;
   6381       1.1  christos 		      }
   6382       1.1  christos 		    str_buf[i] = TRANSLATE(*(d+i));
   6383       1.1  christos 		    str_buf[i+1] = '\0'; /* sentinel */
   6384       1.1  christos 		    idx2 = findidx ((const wint_t**)&cp);
   6385       1.1  christos 		  }
   6386       1.1  christos 
   6387       1.1  christos 		/* Update d, however d will be incremented at
   6388       1.1  christos 		   char_set_matched:, we decrement d here.  */
   6389       1.1  christos 		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
   6390       1.1  christos 		if (d >= dend)
   6391       1.1  christos 		  {
   6392       1.1  christos 		    if (dend == end_match_2)
   6393       1.1  christos 			d = dend;
   6394       1.1  christos 		    else
   6395       1.1  christos 		      {
   6396       1.1  christos 			d = string2;
   6397       1.1  christos 			dend = end_match_2;
   6398       1.1  christos 		      }
   6399       1.1  christos 		  }
   6400       1.1  christos 
   6401       1.1  christos 		len = weights[idx2];
   6402       1.1  christos 
   6403       1.1  christos 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
   6404       1.1  christos 		     workp++)
   6405       1.1  christos 		  {
   6406       1.1  christos 		    idx = (int32_t)*workp;
   6407       1.1  christos 		    /* We already checked idx != 0 in regex_compile. */
   6408       1.1  christos 
   6409       1.1  christos 		    if (idx2 != 0 && len == weights[idx])
   6410       1.1  christos 		      {
   6411       1.1  christos 			int cnt = 0;
   6412       1.1  christos 			while (cnt < len && (weights[idx + 1 + cnt]
   6413       1.1  christos 					     == weights[idx2 + 1 + cnt]))
   6414       1.1  christos 			  ++cnt;
   6415       1.1  christos 
   6416       1.1  christos 			if (cnt == len)
   6417       1.1  christos 			  goto char_set_matched;
   6418       1.1  christos 		      }
   6419       1.1  christos 		  }
   6420       1.1  christos 		/* not matched */
   6421       1.1  christos                 d = backup_d;
   6422       1.1  christos                 dend = backup_dend;
   6423       1.1  christos 	      }
   6424       1.1  christos 	    else /* (nrules == 0) */
   6425       1.1  christos # endif
   6426       1.1  christos 	      /* If we can't look up collation data, we use wcscoll
   6427       1.1  christos 		 instead.  */
   6428       1.1  christos 	      {
   6429       1.1  christos 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
   6430       1.1  christos 		  {
   6431       1.1  christos 		    const CHAR_T *backup_d = d, *backup_dend = dend;
   6432       1.1  christos # ifdef _LIBC
   6433       1.1  christos 		    length = __wcslen (workp);
   6434       1.1  christos # else
   6435       1.1  christos 		    length = wcslen (workp);
   6436       1.1  christos # endif
   6437       1.1  christos 
   6438       1.1  christos 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6439       1.1  christos 		       any substring of the string never match with the
   6440       1.1  christos 		       collating symbol.  */
   6441       1.1  christos # ifdef _LIBC
   6442       1.1  christos 		    if (__wcscoll (workp, d) > 0)
   6443       1.1  christos # else
   6444       1.1  christos 		    if (wcscoll (workp, d) > 0)
   6445       1.1  christos # endif
   6446       1.1  christos 		      {
   6447       1.1  christos 			workp += length + 1;
   6448       1.1  christos 			break;
   6449       1.1  christos 		      }
   6450       1.1  christos 
   6451       1.1  christos 		    /* First, we compare the equivalence class with
   6452       1.1  christos 		       the first character of the string.
   6453       1.1  christos 		       If it don't match, we add the next character to
   6454       1.1  christos 		       the compare buffer in turn.  */
   6455       1.1  christos 		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
   6456       1.1  christos 		      {
   6457       1.1  christos 			int match;
   6458       1.1  christos 			if (d == dend)
   6459       1.1  christos 			  {
   6460       1.1  christos 			    if (dend == end_match_2)
   6461       1.1  christos 			      break;
   6462       1.1  christos 			    d = string2;
   6463       1.1  christos 			    dend = end_match_2;
   6464       1.1  christos 			  }
   6465       1.1  christos 
   6466       1.1  christos 			/* add next character to the compare buffer.  */
   6467       1.1  christos 			str_buf[i] = TRANSLATE(*d);
   6468       1.1  christos 			str_buf[i+1] = '\0';
   6469       1.1  christos 
   6470       1.1  christos # ifdef _LIBC
   6471       1.1  christos 			match = __wcscoll (workp, str_buf);
   6472       1.1  christos # else
   6473       1.1  christos 			match = wcscoll (workp, str_buf);
   6474       1.1  christos # endif
   6475       1.1  christos 
   6476       1.1  christos 			if (match == 0)
   6477       1.1  christos 			  goto char_set_matched;
   6478       1.1  christos 
   6479       1.1  christos 			if (match < 0)
   6480       1.1  christos 			/* (str_buf > workp) indicate (str_buf + X > workp),
   6481       1.1  christos 			   because for all X (str_buf + X > str_buf).
   6482       1.1  christos 			   So we don't need continue this loop.  */
   6483       1.1  christos 			  break;
   6484       1.1  christos 
   6485       1.1  christos 			/* Otherwise(str_buf < workp),
   6486       1.1  christos 			   (str_buf+next_character) may equals (workp).
   6487       1.1  christos 			   So we continue this loop.  */
   6488       1.1  christos 		      }
   6489       1.1  christos 		    /* not matched */
   6490       1.1  christos 		    d = backup_d;
   6491       1.1  christos 		    dend = backup_dend;
   6492       1.1  christos 		    workp += length + 1;
   6493       1.1  christos 		  }
   6494       1.1  christos 	      }
   6495       1.1  christos 
   6496       1.1  christos             /* match with char_range?  */
   6497       1.1  christos # ifdef _LIBC
   6498       1.1  christos 	    if (nrules != 0)
   6499       1.1  christos 	      {
   6500       1.1  christos 		uint32_t collseqval;
   6501       1.1  christos 		const char *collseq = (const char *)
   6502       1.1  christos 		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
   6503       1.1  christos 
   6504       1.1  christos 		collseqval = collseq_table_lookup (collseq, c);
   6505       1.1  christos 
   6506       1.1  christos 		for (; workp < p - chars_length ;)
   6507       1.1  christos 		  {
   6508       1.1  christos 		    uint32_t start_val, end_val;
   6509       1.1  christos 
   6510       1.1  christos 		    /* We already compute the collation sequence value
   6511       1.1  christos 		       of the characters (or collating symbols).  */
   6512       1.1  christos 		    start_val = (uint32_t) *workp++; /* range_start */
   6513       1.1  christos 		    end_val = (uint32_t) *workp++; /* range_end */
   6514       1.1  christos 
   6515       1.1  christos 		    if (start_val <= collseqval && collseqval <= end_val)
   6516       1.1  christos 		      goto char_set_matched;
   6517       1.1  christos 		  }
   6518       1.1  christos 	      }
   6519       1.1  christos 	    else
   6520       1.1  christos # endif
   6521       1.1  christos 	      {
   6522       1.1  christos 		/* We set range_start_char at str_buf[0], range_end_char
   6523       1.1  christos 		   at str_buf[4], and compared char at str_buf[2].  */
   6524       1.1  christos 		str_buf[1] = 0;
   6525       1.1  christos 		str_buf[2] = c;
   6526       1.1  christos 		str_buf[3] = 0;
   6527       1.1  christos 		str_buf[5] = 0;
   6528       1.1  christos 		for (; workp < p - chars_length ;)
   6529       1.1  christos 		  {
   6530       1.1  christos 		    wchar_t *range_start_char, *range_end_char;
   6531       1.1  christos 
   6532       1.1  christos 		    /* match if (range_start_char <= c <= range_end_char).  */
   6533       1.1  christos 
   6534       1.1  christos 		    /* If range_start(or end) < 0, we assume -range_start(end)
   6535       1.1  christos 		       is the offset of the collating symbol which is specified
   6536       1.1  christos 		       as the character of the range start(end).  */
   6537       1.1  christos 
   6538       1.1  christos 		    /* range_start */
   6539       1.1  christos 		    if (*workp < 0)
   6540       1.1  christos 		      range_start_char = charset_top - (*workp++);
   6541       1.1  christos 		    else
   6542       1.1  christos 		      {
   6543       1.1  christos 			str_buf[0] = *workp++;
   6544       1.1  christos 			range_start_char = str_buf;
   6545       1.1  christos 		      }
   6546       1.1  christos 
   6547       1.1  christos 		    /* range_end */
   6548       1.1  christos 		    if (*workp < 0)
   6549       1.1  christos 		      range_end_char = charset_top - (*workp++);
   6550       1.1  christos 		    else
   6551       1.1  christos 		      {
   6552       1.1  christos 			str_buf[4] = *workp++;
   6553       1.1  christos 			range_end_char = str_buf + 4;
   6554       1.1  christos 		      }
   6555       1.1  christos 
   6556       1.1  christos # ifdef _LIBC
   6557       1.1  christos 		    if (__wcscoll (range_start_char, str_buf+2) <= 0
   6558       1.1  christos 			&& __wcscoll (str_buf+2, range_end_char) <= 0)
   6559       1.1  christos # else
   6560       1.1  christos 		    if (wcscoll (range_start_char, str_buf+2) <= 0
   6561       1.1  christos 			&& wcscoll (str_buf+2, range_end_char) <= 0)
   6562       1.1  christos # endif
   6563       1.1  christos 		      goto char_set_matched;
   6564       1.1  christos 		  }
   6565       1.1  christos 	      }
   6566       1.1  christos 
   6567       1.1  christos             /* match with char?  */
   6568       1.1  christos 	    for (; workp < p ; workp++)
   6569       1.1  christos 	      if (c == *workp)
   6570       1.1  christos 		goto char_set_matched;
   6571       1.1  christos 
   6572       1.1  christos 	    negate = !negate;
   6573       1.1  christos 
   6574       1.1  christos 	  char_set_matched:
   6575       1.1  christos 	    if (negate) goto fail;
   6576       1.1  christos #else
   6577       1.1  christos             /* Cast to `unsigned' instead of `unsigned char' in case the
   6578       1.1  christos                bit list is a full 32 bytes long.  */
   6579       1.1  christos 	    if (c < (unsigned) (*p * BYTEWIDTH)
   6580       1.1  christos 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   6581       1.1  christos 	      negate = !negate;
   6582       1.1  christos 
   6583       1.1  christos 	    p += 1 + *p;
   6584       1.1  christos 
   6585       1.1  christos 	    if (!negate) goto fail;
   6586       1.1  christos #undef WORK_BUFFER_SIZE
   6587       1.1  christos #endif /* WCHAR */
   6588       1.1  christos 	    SET_REGS_MATCHED ();
   6589       1.1  christos             d++;
   6590       1.1  christos 	    break;
   6591       1.1  christos 	  }
   6592       1.1  christos 
   6593       1.1  christos 
   6594       1.1  christos         /* The beginning of a group is represented by start_memory.
   6595       1.1  christos            The arguments are the register number in the next byte, and the
   6596       1.1  christos            number of groups inner to this one in the next.  The text
   6597       1.1  christos            matched within the group is recorded (in the internal
   6598       1.1  christos            registers data structure) under the register number.  */
   6599       1.1  christos         case start_memory:
   6600       1.1  christos 	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
   6601       1.1  christos 			(long int) *p, (long int) p[1]);
   6602       1.1  christos 
   6603       1.1  christos           /* Find out if this group can match the empty string.  */
   6604       1.1  christos 	  p1 = p;		/* To send to group_match_null_string_p.  */
   6605       1.1  christos 
   6606       1.1  christos           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
   6607       1.1  christos             REG_MATCH_NULL_STRING_P (reg_info[*p])
   6608       1.1  christos               = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
   6609       1.1  christos 
   6610       1.1  christos           /* Save the position in the string where we were the last time
   6611       1.1  christos              we were at this open-group operator in case the group is
   6612       1.1  christos              operated upon by a repetition operator, e.g., with `(a*)*b'
   6613       1.1  christos              against `ab'; then we want to ignore where we are now in
   6614       1.1  christos              the string in case this attempt to match fails.  */
   6615       1.1  christos           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6616       1.1  christos                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
   6617       1.1  christos                              : regstart[*p];
   6618       1.1  christos 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
   6619       1.1  christos 			 POINTER_TO_OFFSET (old_regstart[*p]));
   6620       1.1  christos 
   6621       1.1  christos           regstart[*p] = d;
   6622       1.1  christos 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
   6623       1.1  christos 
   6624       1.1  christos           IS_ACTIVE (reg_info[*p]) = 1;
   6625       1.1  christos           MATCHED_SOMETHING (reg_info[*p]) = 0;
   6626       1.1  christos 
   6627       1.1  christos 	  /* Clear this whenever we change the register activity status.  */
   6628       1.1  christos 	  set_regs_matched_done = 0;
   6629       1.1  christos 
   6630       1.1  christos           /* This is the new highest active register.  */
   6631       1.1  christos           highest_active_reg = *p;
   6632       1.1  christos 
   6633       1.1  christos           /* If nothing was active before, this is the new lowest active
   6634       1.1  christos              register.  */
   6635       1.1  christos           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   6636       1.1  christos             lowest_active_reg = *p;
   6637       1.1  christos 
   6638       1.1  christos           /* Move past the register number and inner group count.  */
   6639       1.1  christos           p += 2;
   6640       1.1  christos 	  just_past_start_mem = p;
   6641       1.1  christos 
   6642       1.1  christos           break;
   6643       1.1  christos 
   6644       1.1  christos 
   6645       1.1  christos         /* The stop_memory opcode represents the end of a group.  Its
   6646       1.1  christos            arguments are the same as start_memory's: the register
   6647       1.1  christos            number, and the number of inner groups.  */
   6648       1.1  christos 	case stop_memory:
   6649       1.1  christos 	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
   6650       1.1  christos 			(long int) *p, (long int) p[1]);
   6651       1.1  christos 
   6652       1.1  christos           /* We need to save the string position the last time we were at
   6653       1.1  christos              this close-group operator in case the group is operated
   6654       1.1  christos              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
   6655       1.1  christos              against `aba'; then we want to ignore where we are now in
   6656       1.1  christos              the string in case this attempt to match fails.  */
   6657       1.1  christos           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6658       1.1  christos                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
   6659       1.1  christos 			   : regend[*p];
   6660       1.1  christos 	  DEBUG_PRINT2 ("      old_regend: %d\n",
   6661       1.1  christos 			 POINTER_TO_OFFSET (old_regend[*p]));
   6662       1.1  christos 
   6663       1.1  christos           regend[*p] = d;
   6664       1.1  christos 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
   6665       1.1  christos 
   6666       1.1  christos           /* This register isn't active anymore.  */
   6667       1.1  christos           IS_ACTIVE (reg_info[*p]) = 0;
   6668       1.1  christos 
   6669       1.1  christos 	  /* Clear this whenever we change the register activity status.  */
   6670       1.1  christos 	  set_regs_matched_done = 0;
   6671       1.1  christos 
   6672       1.1  christos           /* If this was the only register active, nothing is active
   6673       1.1  christos              anymore.  */
   6674       1.1  christos           if (lowest_active_reg == highest_active_reg)
   6675       1.1  christos             {
   6676       1.1  christos               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6677       1.1  christos               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6678       1.1  christos             }
   6679       1.1  christos           else
   6680       1.1  christos             { /* We must scan for the new highest active register, since
   6681       1.1  christos                  it isn't necessarily one less than now: consider
   6682       1.1  christos                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
   6683       1.1  christos                  new highest active register is 1.  */
   6684       1.1  christos               UCHAR_T r = *p - 1;
   6685       1.1  christos               while (r > 0 && !IS_ACTIVE (reg_info[r]))
   6686       1.1  christos                 r--;
   6687       1.1  christos 
   6688       1.1  christos               /* If we end up at register zero, that means that we saved
   6689       1.1  christos                  the registers as the result of an `on_failure_jump', not
   6690       1.1  christos                  a `start_memory', and we jumped to past the innermost
   6691       1.1  christos                  `stop_memory'.  For example, in ((.)*) we save
   6692       1.1  christos                  registers 1 and 2 as a result of the *, but when we pop
   6693       1.1  christos                  back to the second ), we are at the stop_memory 1.
   6694       1.1  christos                  Thus, nothing is active.  */
   6695       1.1  christos 	      if (r == 0)
   6696       1.1  christos                 {
   6697       1.1  christos                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6698       1.1  christos                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6699       1.1  christos                 }
   6700       1.1  christos               else
   6701       1.1  christos                 highest_active_reg = r;
   6702       1.1  christos             }
   6703       1.1  christos 
   6704   1.1.1.6  christos           /* If just failed to match something this time around with a
   6705       1.1  christos              group that's operated on by a repetition operator, try to
   6706       1.1  christos              force exit from the ``loop'', and restore the register
   6707       1.1  christos              information for this group that we had before trying this
   6708       1.1  christos              last match.  */
   6709       1.1  christos           if ((!MATCHED_SOMETHING (reg_info[*p])
   6710       1.1  christos                || just_past_start_mem == p - 1)
   6711       1.1  christos 	      && (p + 2) < pend)
   6712       1.1  christos             {
   6713       1.1  christos               boolean is_a_jump_n = false;
   6714       1.1  christos 
   6715       1.1  christos               p1 = p + 2;
   6716       1.1  christos               mcnt = 0;
   6717       1.1  christos               switch ((re_opcode_t) *p1++)
   6718       1.1  christos                 {
   6719       1.1  christos                   case jump_n:
   6720       1.1  christos 		    is_a_jump_n = true;
   6721       1.1  christos 		    /* Fall through.  */
   6722       1.1  christos                   case pop_failure_jump:
   6723       1.1  christos 		  case maybe_pop_jump:
   6724       1.1  christos 		  case jump:
   6725       1.1  christos 		  case dummy_failure_jump:
   6726       1.1  christos                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6727       1.1  christos 		    if (is_a_jump_n)
   6728       1.1  christos 		      p1 += OFFSET_ADDRESS_SIZE;
   6729       1.1  christos                     break;
   6730       1.1  christos 
   6731       1.1  christos                   default:
   6732       1.1  christos                     /* do nothing */ ;
   6733       1.1  christos                 }
   6734       1.1  christos 	      p1 += mcnt;
   6735       1.1  christos 
   6736       1.1  christos               /* If the next operation is a jump backwards in the pattern
   6737       1.1  christos 	         to an on_failure_jump right before the start_memory
   6738       1.1  christos                  corresponding to this stop_memory, exit from the loop
   6739       1.1  christos                  by forcing a failure after pushing on the stack the
   6740       1.1  christos                  on_failure_jump's jump in the pattern, and d.  */
   6741       1.1  christos               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
   6742       1.1  christos                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
   6743       1.1  christos 		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
   6744       1.1  christos 		{
   6745       1.1  christos                   /* If this group ever matched anything, then restore
   6746       1.1  christos                      what its registers were before trying this last
   6747       1.1  christos                      failed match, e.g., with `(a*)*b' against `ab' for
   6748       1.1  christos                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
   6749       1.1  christos                      against `aba' for regend[3].
   6750       1.1  christos 
   6751       1.1  christos                      Also restore the registers for inner groups for,
   6752       1.1  christos                      e.g., `((a*)(b*))*' against `aba' (register 3 would
   6753       1.1  christos                      otherwise get trashed).  */
   6754       1.1  christos 
   6755       1.1  christos                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
   6756       1.1  christos 		    {
   6757       1.1  christos 		      unsigned r;
   6758       1.1  christos 
   6759       1.1  christos                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
   6760       1.1  christos 
   6761       1.1  christos 		      /* Restore this and inner groups' (if any) registers.  */
   6762       1.1  christos                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
   6763       1.1  christos 			   r++)
   6764       1.1  christos                         {
   6765       1.1  christos                           regstart[r] = old_regstart[r];
   6766       1.1  christos 
   6767       1.1  christos                           /* xx why this test?  */
   6768       1.1  christos                           if (old_regend[r] >= regstart[r])
   6769       1.1  christos                             regend[r] = old_regend[r];
   6770       1.1  christos                         }
   6771       1.1  christos                     }
   6772       1.1  christos 		  p1++;
   6773       1.1  christos                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6774       1.1  christos                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
   6775       1.1  christos 
   6776       1.1  christos                   goto fail;
   6777       1.1  christos                 }
   6778       1.1  christos             }
   6779       1.1  christos 
   6780       1.1  christos           /* Move past the register number and the inner group count.  */
   6781       1.1  christos           p += 2;
   6782       1.1  christos           break;
   6783       1.1  christos 
   6784       1.1  christos 
   6785       1.1  christos 	/* \<digit> has been turned into a `duplicate' command which is
   6786       1.1  christos            followed by the numeric value of <digit> as the register number.  */
   6787       1.1  christos         case duplicate:
   6788       1.1  christos 	  {
   6789       1.1  christos 	    register const CHAR_T *d2, *dend2;
   6790       1.1  christos 	    int regno = *p++;   /* Get which register to match against.  */
   6791       1.1  christos 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
   6792       1.1  christos 
   6793       1.1  christos 	    /* Can't back reference a group which we've never matched.  */
   6794       1.1  christos             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
   6795       1.1  christos               goto fail;
   6796       1.1  christos 
   6797       1.1  christos             /* Where in input to try to start matching.  */
   6798       1.1  christos             d2 = regstart[regno];
   6799       1.1  christos 
   6800       1.1  christos             /* Where to stop matching; if both the place to start and
   6801       1.1  christos                the place to stop matching are in the same string, then
   6802       1.1  christos                set to the place to stop, otherwise, for now have to use
   6803       1.1  christos                the end of the first string.  */
   6804       1.1  christos 
   6805       1.1  christos             dend2 = ((FIRST_STRING_P (regstart[regno])
   6806       1.1  christos 		      == FIRST_STRING_P (regend[regno]))
   6807       1.1  christos 		     ? regend[regno] : end_match_1);
   6808       1.1  christos 	    for (;;)
   6809       1.1  christos 	      {
   6810       1.1  christos 		/* If necessary, advance to next segment in register
   6811       1.1  christos                    contents.  */
   6812       1.1  christos 		while (d2 == dend2)
   6813       1.1  christos 		  {
   6814       1.1  christos 		    if (dend2 == end_match_2) break;
   6815       1.1  christos 		    if (dend2 == regend[regno]) break;
   6816       1.1  christos 
   6817       1.1  christos                     /* End of string1 => advance to string2. */
   6818       1.1  christos                     d2 = string2;
   6819       1.1  christos                     dend2 = regend[regno];
   6820       1.1  christos 		  }
   6821       1.1  christos 		/* At end of register contents => success */
   6822       1.1  christos 		if (d2 == dend2) break;
   6823       1.1  christos 
   6824       1.1  christos 		/* If necessary, advance to next segment in data.  */
   6825       1.1  christos 		PREFETCH ();
   6826       1.1  christos 
   6827       1.1  christos 		/* How many characters left in this segment to match.  */
   6828       1.1  christos 		mcnt = dend - d;
   6829       1.1  christos 
   6830       1.1  christos 		/* Want how many consecutive characters we can match in
   6831       1.1  christos                    one shot, so, if necessary, adjust the count.  */
   6832       1.1  christos                 if (mcnt > dend2 - d2)
   6833       1.1  christos 		  mcnt = dend2 - d2;
   6834       1.1  christos 
   6835       1.1  christos 		/* Compare that many; failure if mismatch, else move
   6836       1.1  christos                    past them.  */
   6837       1.1  christos 		if (translate
   6838       1.1  christos                     ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
   6839       1.1  christos                     : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
   6840       1.1  christos 		  goto fail;
   6841       1.1  christos 		d += mcnt, d2 += mcnt;
   6842       1.1  christos 
   6843       1.1  christos 		/* Do this because we've match some characters.  */
   6844       1.1  christos 		SET_REGS_MATCHED ();
   6845       1.1  christos 	      }
   6846       1.1  christos 	  }
   6847       1.1  christos 	  break;
   6848       1.1  christos 
   6849       1.1  christos 
   6850       1.1  christos         /* begline matches the empty string at the beginning of the string
   6851       1.1  christos            (unless `not_bol' is set in `bufp'), and, if
   6852       1.1  christos            `newline_anchor' is set, after newlines.  */
   6853       1.1  christos 	case begline:
   6854       1.1  christos           DEBUG_PRINT1 ("EXECUTING begline.\n");
   6855       1.1  christos 
   6856       1.1  christos           if (AT_STRINGS_BEG (d))
   6857       1.1  christos             {
   6858       1.1  christos               if (!bufp->not_bol) break;
   6859       1.1  christos             }
   6860       1.1  christos           else if (d[-1] == '\n' && bufp->newline_anchor)
   6861       1.1  christos             {
   6862       1.1  christos               break;
   6863       1.1  christos             }
   6864       1.1  christos           /* In all other cases, we fail.  */
   6865       1.1  christos           goto fail;
   6866       1.1  christos 
   6867       1.1  christos 
   6868       1.1  christos         /* endline is the dual of begline.  */
   6869       1.1  christos 	case endline:
   6870       1.1  christos           DEBUG_PRINT1 ("EXECUTING endline.\n");
   6871       1.1  christos 
   6872       1.1  christos           if (AT_STRINGS_END (d))
   6873       1.1  christos             {
   6874       1.1  christos               if (!bufp->not_eol) break;
   6875       1.1  christos             }
   6876       1.1  christos 
   6877       1.1  christos           /* We have to ``prefetch'' the next character.  */
   6878       1.1  christos           else if ((d == end1 ? *string2 : *d) == '\n'
   6879       1.1  christos                    && bufp->newline_anchor)
   6880       1.1  christos             {
   6881       1.1  christos               break;
   6882       1.1  christos             }
   6883       1.1  christos           goto fail;
   6884       1.1  christos 
   6885       1.1  christos 
   6886       1.1  christos 	/* Match at the very beginning of the data.  */
   6887       1.1  christos         case begbuf:
   6888       1.1  christos           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
   6889       1.1  christos           if (AT_STRINGS_BEG (d))
   6890       1.1  christos             break;
   6891       1.1  christos           goto fail;
   6892       1.1  christos 
   6893       1.1  christos 
   6894       1.1  christos 	/* Match at the very end of the data.  */
   6895       1.1  christos         case endbuf:
   6896       1.1  christos           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
   6897       1.1  christos 	  if (AT_STRINGS_END (d))
   6898       1.1  christos 	    break;
   6899       1.1  christos           goto fail;
   6900       1.1  christos 
   6901       1.1  christos 
   6902       1.1  christos         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
   6903       1.1  christos            pushes NULL as the value for the string on the stack.  Then
   6904       1.1  christos            `pop_failure_point' will keep the current value for the
   6905       1.1  christos            string, instead of restoring it.  To see why, consider
   6906       1.1  christos            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
   6907       1.1  christos            then the . fails against the \n.  But the next thing we want
   6908       1.1  christos            to do is match the \n against the \n; if we restored the
   6909       1.1  christos            string value, we would be back at the foo.
   6910       1.1  christos 
   6911       1.1  christos            Because this is used only in specific cases, we don't need to
   6912       1.1  christos            check all the things that `on_failure_jump' does, to make
   6913       1.1  christos            sure the right things get saved on the stack.  Hence we don't
   6914       1.1  christos            share its code.  The only reason to push anything on the
   6915       1.1  christos            stack at all is that otherwise we would have to change
   6916       1.1  christos            `anychar's code to do something besides goto fail in this
   6917       1.1  christos            case; that seems worse than this.  */
   6918       1.1  christos         case on_failure_keep_string_jump:
   6919       1.1  christos           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
   6920       1.1  christos 
   6921       1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   6922       1.1  christos #ifdef _LIBC
   6923       1.1  christos           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
   6924       1.1  christos #else
   6925       1.1  christos           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
   6926       1.1  christos #endif
   6927       1.1  christos 
   6928       1.1  christos           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
   6929       1.1  christos           break;
   6930       1.1  christos 
   6931       1.1  christos 
   6932       1.1  christos 	/* Uses of on_failure_jump:
   6933       1.1  christos 
   6934       1.1  christos            Each alternative starts with an on_failure_jump that points
   6935       1.1  christos            to the beginning of the next alternative.  Each alternative
   6936       1.1  christos            except the last ends with a jump that in effect jumps past
   6937       1.1  christos            the rest of the alternatives.  (They really jump to the
   6938       1.1  christos            ending jump of the following alternative, because tensioning
   6939       1.1  christos            these jumps is a hassle.)
   6940       1.1  christos 
   6941       1.1  christos            Repeats start with an on_failure_jump that points past both
   6942       1.1  christos            the repetition text and either the following jump or
   6943       1.1  christos            pop_failure_jump back to this on_failure_jump.  */
   6944       1.1  christos 	case on_failure_jump:
   6945       1.1  christos         on_failure:
   6946       1.1  christos           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
   6947       1.1  christos 
   6948       1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   6949       1.1  christos #ifdef _LIBC
   6950       1.1  christos           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
   6951       1.1  christos #else
   6952       1.1  christos           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
   6953       1.1  christos #endif
   6954       1.1  christos 
   6955       1.1  christos           /* If this on_failure_jump comes right before a group (i.e.,
   6956       1.1  christos              the original * applied to a group), save the information
   6957       1.1  christos              for that group and all inner ones, so that if we fail back
   6958       1.1  christos              to this point, the group's information will be correct.
   6959       1.1  christos              For example, in \(a*\)*\1, we need the preceding group,
   6960       1.1  christos              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
   6961       1.1  christos 
   6962       1.1  christos           /* We can't use `p' to check ahead because we push
   6963       1.1  christos              a failure point to `p + mcnt' after we do this.  */
   6964       1.1  christos           p1 = p;
   6965       1.1  christos 
   6966       1.1  christos           /* We need to skip no_op's before we look for the
   6967       1.1  christos              start_memory in case this on_failure_jump is happening as
   6968       1.1  christos              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
   6969       1.1  christos              against aba.  */
   6970       1.1  christos           while (p1 < pend && (re_opcode_t) *p1 == no_op)
   6971       1.1  christos             p1++;
   6972       1.1  christos 
   6973       1.1  christos           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
   6974       1.1  christos             {
   6975       1.1  christos               /* We have a new highest active register now.  This will
   6976       1.1  christos                  get reset at the start_memory we are about to get to,
   6977       1.1  christos                  but we will have saved all the registers relevant to
   6978       1.1  christos                  this repetition op, as described above.  */
   6979       1.1  christos               highest_active_reg = *(p1 + 1) + *(p1 + 2);
   6980       1.1  christos               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   6981       1.1  christos                 lowest_active_reg = *(p1 + 1);
   6982       1.1  christos             }
   6983       1.1  christos 
   6984       1.1  christos           DEBUG_PRINT1 (":\n");
   6985       1.1  christos           PUSH_FAILURE_POINT (p + mcnt, d, -2);
   6986       1.1  christos           break;
   6987       1.1  christos 
   6988       1.1  christos 
   6989       1.1  christos         /* A smart repeat ends with `maybe_pop_jump'.
   6990       1.1  christos 	   We change it to either `pop_failure_jump' or `jump'.  */
   6991       1.1  christos         case maybe_pop_jump:
   6992       1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   6993       1.1  christos           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
   6994       1.1  christos           {
   6995       1.1  christos 	    register UCHAR_T *p2 = p;
   6996       1.1  christos 
   6997       1.1  christos             /* Compare the beginning of the repeat with what in the
   6998       1.1  christos                pattern follows its end. If we can establish that there
   6999       1.1  christos                is nothing that they would both match, i.e., that we
   7000       1.1  christos                would have to backtrack because of (as in, e.g., `a*a')
   7001       1.1  christos                then we can change to pop_failure_jump, because we'll
   7002       1.1  christos                never have to backtrack.
   7003       1.1  christos 
   7004       1.1  christos                This is not true in the case of alternatives: in
   7005       1.1  christos                `(a|ab)*' we do need to backtrack to the `ab' alternative
   7006       1.1  christos                (e.g., if the string was `ab').  But instead of trying to
   7007       1.1  christos                detect that here, the alternative has put on a dummy
   7008       1.1  christos                failure point which is what we will end up popping.  */
   7009       1.1  christos 
   7010       1.1  christos 	    /* Skip over open/close-group commands.
   7011       1.1  christos 	       If what follows this loop is a ...+ construct,
   7012       1.1  christos 	       look at what begins its body, since we will have to
   7013       1.1  christos 	       match at least one of that.  */
   7014       1.1  christos 	    while (1)
   7015       1.1  christos 	      {
   7016       1.1  christos 		if (p2 + 2 < pend
   7017       1.1  christos 		    && ((re_opcode_t) *p2 == stop_memory
   7018       1.1  christos 			|| (re_opcode_t) *p2 == start_memory))
   7019       1.1  christos 		  p2 += 3;
   7020       1.1  christos 		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
   7021       1.1  christos 			 && (re_opcode_t) *p2 == dummy_failure_jump)
   7022       1.1  christos 		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
   7023       1.1  christos 		else
   7024       1.1  christos 		  break;
   7025       1.1  christos 	      }
   7026       1.1  christos 
   7027       1.1  christos 	    p1 = p + mcnt;
   7028       1.1  christos 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
   7029       1.1  christos 	       to the `maybe_finalize_jump' of this case.  Examine what
   7030       1.1  christos 	       follows.  */
   7031       1.1  christos 
   7032       1.1  christos             /* If we're at the end of the pattern, we can change.  */
   7033       1.1  christos             if (p2 == pend)
   7034       1.1  christos 	      {
   7035       1.1  christos 		/* Consider what happens when matching ":\(.*\)"
   7036       1.1  christos 		   against ":/".  I don't really understand this code
   7037       1.1  christos 		   yet.  */
   7038       1.1  christos   	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
   7039       1.1  christos 		  pop_failure_jump;
   7040       1.1  christos                 DEBUG_PRINT1
   7041       1.1  christos                   ("  End of pattern: change to `pop_failure_jump'.\n");
   7042       1.1  christos               }
   7043       1.1  christos 
   7044       1.1  christos             else if ((re_opcode_t) *p2 == exactn
   7045       1.1  christos #ifdef MBS_SUPPORT
   7046       1.1  christos 		     || (re_opcode_t) *p2 == exactn_bin
   7047       1.1  christos #endif
   7048       1.1  christos 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
   7049       1.1  christos 	      {
   7050       1.1  christos 		register UCHAR_T c
   7051       1.1  christos                   = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
   7052       1.1  christos 
   7053       1.1  christos                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
   7054       1.1  christos #ifdef MBS_SUPPORT
   7055       1.1  christos 		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
   7056       1.1  christos #endif
   7057       1.1  christos 		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
   7058       1.1  christos                   {
   7059       1.1  christos   		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
   7060       1.1  christos 		      pop_failure_jump;
   7061       1.1  christos #ifdef WCHAR
   7062       1.1  christos 		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
   7063       1.1  christos 				    (wint_t) c,
   7064       1.1  christos 				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
   7065       1.1  christos #else
   7066       1.1  christos 		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
   7067       1.1  christos 				    (char) c,
   7068       1.1  christos 				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
   7069       1.1  christos #endif
   7070       1.1  christos                   }
   7071       1.1  christos 
   7072       1.1  christos #ifndef WCHAR
   7073       1.1  christos 		else if ((re_opcode_t) p1[3] == charset
   7074       1.1  christos 			 || (re_opcode_t) p1[3] == charset_not)
   7075       1.1  christos 		  {
   7076       1.1  christos 		    int negate = (re_opcode_t) p1[3] == charset_not;
   7077       1.1  christos 
   7078       1.1  christos 		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
   7079       1.1  christos 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   7080       1.1  christos 		      negate = !negate;
   7081       1.1  christos 
   7082       1.1  christos                     /* `negate' is equal to 1 if c would match, which means
   7083       1.1  christos                         that we can't change to pop_failure_jump.  */
   7084       1.1  christos 		    if (!negate)
   7085       1.1  christos                       {
   7086       1.1  christos   		        p[-3] = (unsigned char) pop_failure_jump;
   7087       1.1  christos                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7088       1.1  christos                       }
   7089       1.1  christos 		  }
   7090       1.1  christos #endif /* not WCHAR */
   7091       1.1  christos 	      }
   7092       1.1  christos #ifndef WCHAR
   7093       1.1  christos             else if ((re_opcode_t) *p2 == charset)
   7094       1.1  christos 	      {
   7095       1.1  christos 		/* We win if the first character of the loop is not part
   7096       1.1  christos                    of the charset.  */
   7097       1.1  christos                 if ((re_opcode_t) p1[3] == exactn
   7098       1.1  christos  		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
   7099       1.1  christos  			  && (p2[2 + p1[5] / BYTEWIDTH]
   7100       1.1  christos  			      & (1 << (p1[5] % BYTEWIDTH)))))
   7101       1.1  christos 		  {
   7102       1.1  christos 		    p[-3] = (unsigned char) pop_failure_jump;
   7103       1.1  christos 		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7104       1.1  christos                   }
   7105       1.1  christos 
   7106       1.1  christos 		else if ((re_opcode_t) p1[3] == charset_not)
   7107       1.1  christos 		  {
   7108       1.1  christos 		    int idx;
   7109       1.1  christos 		    /* We win if the charset_not inside the loop
   7110       1.1  christos 		       lists every character listed in the charset after.  */
   7111       1.1  christos 		    for (idx = 0; idx < (int) p2[1]; idx++)
   7112       1.1  christos 		      if (! (p2[2 + idx] == 0
   7113       1.1  christos 			     || (idx < (int) p1[4]
   7114       1.1  christos 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
   7115       1.1  christos 			break;
   7116       1.1  christos 
   7117       1.1  christos 		    if (idx == p2[1])
   7118       1.1  christos                       {
   7119       1.1  christos   		        p[-3] = (unsigned char) pop_failure_jump;
   7120       1.1  christos                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7121       1.1  christos                       }
   7122       1.1  christos 		  }
   7123       1.1  christos 		else if ((re_opcode_t) p1[3] == charset)
   7124       1.1  christos 		  {
   7125       1.1  christos 		    int idx;
   7126       1.1  christos 		    /* We win if the charset inside the loop
   7127       1.1  christos 		       has no overlap with the one after the loop.  */
   7128       1.1  christos 		    for (idx = 0;
   7129       1.1  christos 			 idx < (int) p2[1] && idx < (int) p1[4];
   7130       1.1  christos 			 idx++)
   7131       1.1  christos 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
   7132       1.1  christos 			break;
   7133   1.1.1.6  christos 
   7134       1.1  christos 		    if (idx == p2[1] || idx == p1[4])
   7135       1.1  christos                       {
   7136       1.1  christos   		        p[-3] = (unsigned char) pop_failure_jump;
   7137       1.1  christos                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7138       1.1  christos                       }
   7139       1.1  christos 		  }
   7140       1.1  christos 	      }
   7141       1.1  christos #endif /* not WCHAR */
   7142       1.1  christos 	  }
   7143       1.1  christos 	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
   7144       1.1  christos 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
   7145       1.1  christos 	    {
   7146       1.1  christos 	      p[-1] = (UCHAR_T) jump;
   7147       1.1  christos               DEBUG_PRINT1 ("  Match => jump.\n");
   7148       1.1  christos 	      goto unconditional_jump;
   7149       1.1  christos 	    }
   7150       1.1  christos         /* Fall through.  */
   7151       1.1  christos 
   7152       1.1  christos 
   7153       1.1  christos 	/* The end of a simple repeat has a pop_failure_jump back to
   7154       1.1  christos            its matching on_failure_jump, where the latter will push a
   7155       1.1  christos            failure point.  The pop_failure_jump takes off failure
   7156       1.1  christos            points put on by this pop_failure_jump's matching
   7157       1.1  christos            on_failure_jump; we got through the pattern to here from the
   7158   1.1.1.6  christos            matching on_failure_jump, so didn't fail.  */
   7159       1.1  christos         case pop_failure_jump:
   7160       1.1  christos           {
   7161       1.1  christos             /* We need to pass separate storage for the lowest and
   7162       1.1  christos                highest registers, even though we don't care about the
   7163       1.1  christos                actual values.  Otherwise, we will restore only one
   7164       1.1  christos                register from the stack, since lowest will == highest in
   7165       1.1  christos                `pop_failure_point'.  */
   7166       1.1  christos             active_reg_t dummy_low_reg, dummy_high_reg;
   7167       1.1  christos             UCHAR_T *pdummy ATTRIBUTE_UNUSED = NULL;
   7168       1.1  christos             const CHAR_T *sdummy ATTRIBUTE_UNUSED = NULL;
   7169       1.1  christos 
   7170       1.1  christos             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
   7171       1.1  christos             POP_FAILURE_POINT (sdummy, pdummy,
   7172       1.1  christos                                dummy_low_reg, dummy_high_reg,
   7173       1.1  christos                                reg_dummy, reg_dummy, reg_info_dummy);
   7174       1.1  christos           }
   7175       1.1  christos 	  /* Fall through.  */
   7176       1.1  christos 
   7177       1.1  christos 	unconditional_jump:
   7178       1.1  christos #ifdef _LIBC
   7179       1.1  christos 	  DEBUG_PRINT2 ("\n%p: ", p);
   7180       1.1  christos #else
   7181       1.1  christos 	  DEBUG_PRINT2 ("\n0x%x: ", p);
   7182       1.1  christos #endif
   7183       1.1  christos           /* Note fall through.  */
   7184       1.1  christos 
   7185       1.1  christos         /* Unconditionally jump (without popping any failure points).  */
   7186       1.1  christos         case jump:
   7187       1.1  christos 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
   7188       1.1  christos           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
   7189       1.1  christos 	  p += mcnt;				/* Do the jump.  */
   7190       1.1  christos #ifdef _LIBC
   7191       1.1  christos           DEBUG_PRINT2 ("(to %p).\n", p);
   7192       1.1  christos #else
   7193       1.1  christos           DEBUG_PRINT2 ("(to 0x%x).\n", p);
   7194       1.1  christos #endif
   7195       1.1  christos 	  break;
   7196       1.1  christos 
   7197       1.1  christos 
   7198       1.1  christos         /* We need this opcode so we can detect where alternatives end
   7199       1.1  christos            in `group_match_null_string_p' et al.  */
   7200       1.1  christos         case jump_past_alt:
   7201       1.1  christos           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
   7202       1.1  christos           goto unconditional_jump;
   7203       1.1  christos 
   7204       1.1  christos 
   7205       1.1  christos         /* Normally, the on_failure_jump pushes a failure point, which
   7206       1.1  christos            then gets popped at pop_failure_jump.  We will end up at
   7207       1.1  christos            pop_failure_jump, also, and with a pattern of, say, `a+', we
   7208       1.1  christos            are skipping over the on_failure_jump, so we have to push
   7209       1.1  christos            something meaningless for pop_failure_jump to pop.  */
   7210       1.1  christos         case dummy_failure_jump:
   7211       1.1  christos           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
   7212       1.1  christos           /* It doesn't matter what we push for the string here.  What
   7213       1.1  christos              the code at `fail' tests is the value for the pattern.  */
   7214       1.1  christos           PUSH_FAILURE_POINT (NULL, NULL, -2);
   7215       1.1  christos           goto unconditional_jump;
   7216       1.1  christos 
   7217       1.1  christos 
   7218       1.1  christos         /* At the end of an alternative, we need to push a dummy failure
   7219       1.1  christos            point in case we are followed by a `pop_failure_jump', because
   7220       1.1  christos            we don't want the failure point for the alternative to be
   7221       1.1  christos            popped.  For example, matching `(a|ab)*' against `aab'
   7222       1.1  christos            requires that we match the `ab' alternative.  */
   7223       1.1  christos         case push_dummy_failure:
   7224       1.1  christos           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
   7225       1.1  christos           /* See comments just above at `dummy_failure_jump' about the
   7226       1.1  christos              two zeroes.  */
   7227       1.1  christos           PUSH_FAILURE_POINT (NULL, NULL, -2);
   7228       1.1  christos           break;
   7229       1.1  christos 
   7230       1.1  christos         /* Have to succeed matching what follows at least n times.
   7231       1.1  christos            After that, handle like `on_failure_jump'.  */
   7232       1.1  christos         case succeed_n:
   7233       1.1  christos           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   7234       1.1  christos           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
   7235       1.1  christos 
   7236       1.1  christos           assert (mcnt >= 0);
   7237       1.1  christos           /* Originally, this is how many times we HAVE to succeed.  */
   7238       1.1  christos           if (mcnt > 0)
   7239       1.1  christos             {
   7240       1.1  christos                mcnt--;
   7241       1.1  christos 	       p += OFFSET_ADDRESS_SIZE;
   7242       1.1  christos                STORE_NUMBER_AND_INCR (p, mcnt);
   7243       1.1  christos #ifdef _LIBC
   7244       1.1  christos                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
   7245       1.1  christos 			     , mcnt);
   7246       1.1  christos #else
   7247       1.1  christos                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
   7248       1.1  christos 			     , mcnt);
   7249       1.1  christos #endif
   7250       1.1  christos             }
   7251       1.1  christos 	  else if (mcnt == 0)
   7252       1.1  christos             {
   7253       1.1  christos #ifdef _LIBC
   7254       1.1  christos               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
   7255       1.1  christos 			    p + OFFSET_ADDRESS_SIZE);
   7256       1.1  christos #else
   7257       1.1  christos               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
   7258       1.1  christos 			    p + OFFSET_ADDRESS_SIZE);
   7259       1.1  christos #endif /* _LIBC */
   7260       1.1  christos 
   7261       1.1  christos #ifdef WCHAR
   7262       1.1  christos 	      p[1] = (UCHAR_T) no_op;
   7263       1.1  christos #else
   7264       1.1  christos 	      p[2] = (UCHAR_T) no_op;
   7265       1.1  christos               p[3] = (UCHAR_T) no_op;
   7266       1.1  christos #endif /* WCHAR */
   7267       1.1  christos               goto on_failure;
   7268       1.1  christos             }
   7269       1.1  christos           break;
   7270       1.1  christos 
   7271       1.1  christos         case jump_n:
   7272       1.1  christos           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   7273       1.1  christos           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
   7274       1.1  christos 
   7275       1.1  christos           /* Originally, this is how many times we CAN jump.  */
   7276       1.1  christos           if (mcnt)
   7277       1.1  christos             {
   7278       1.1  christos                mcnt--;
   7279       1.1  christos                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
   7280       1.1  christos 
   7281       1.1  christos #ifdef _LIBC
   7282       1.1  christos                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7283       1.1  christos 			     mcnt);
   7284       1.1  christos #else
   7285       1.1  christos                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7286       1.1  christos 			     mcnt);
   7287       1.1  christos #endif /* _LIBC */
   7288       1.1  christos 	       goto unconditional_jump;
   7289       1.1  christos             }
   7290       1.1  christos           /* If don't have to jump any more, skip over the rest of command.  */
   7291       1.1  christos 	  else
   7292       1.1  christos 	    p += 2 * OFFSET_ADDRESS_SIZE;
   7293       1.1  christos           break;
   7294       1.1  christos 
   7295       1.1  christos 	case set_number_at:
   7296       1.1  christos 	  {
   7297       1.1  christos             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
   7298       1.1  christos 
   7299       1.1  christos             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7300       1.1  christos             p1 = p + mcnt;
   7301       1.1  christos             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7302       1.1  christos #ifdef _LIBC
   7303       1.1  christos             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
   7304       1.1  christos #else
   7305       1.1  christos             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
   7306       1.1  christos #endif
   7307       1.1  christos 	    STORE_NUMBER (p1, mcnt);
   7308       1.1  christos             break;
   7309       1.1  christos           }
   7310       1.1  christos 
   7311       1.1  christos #if 0
   7312       1.1  christos 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
   7313       1.1  christos 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
   7314       1.1  christos 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
   7315       1.1  christos 	   macro and introducing temporary variables works around the bug.  */
   7316       1.1  christos 
   7317       1.1  christos 	case wordbound:
   7318       1.1  christos 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7319       1.1  christos 	  if (AT_WORD_BOUNDARY (d))
   7320       1.1  christos 	    break;
   7321       1.1  christos 	  goto fail;
   7322       1.1  christos 
   7323       1.1  christos 	case notwordbound:
   7324       1.1  christos 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7325       1.1  christos 	  if (AT_WORD_BOUNDARY (d))
   7326       1.1  christos 	    goto fail;
   7327       1.1  christos 	  break;
   7328       1.1  christos #else
   7329       1.1  christos 	case wordbound:
   7330       1.1  christos 	{
   7331       1.1  christos 	  boolean prevchar, thischar;
   7332       1.1  christos 
   7333       1.1  christos 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7334       1.1  christos 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7335       1.1  christos 	    break;
   7336       1.1  christos 
   7337       1.1  christos 	  prevchar = WORDCHAR_P (d - 1);
   7338       1.1  christos 	  thischar = WORDCHAR_P (d);
   7339       1.1  christos 	  if (prevchar != thischar)
   7340       1.1  christos 	    break;
   7341       1.1  christos 	  goto fail;
   7342       1.1  christos 	}
   7343       1.1  christos 
   7344       1.1  christos       case notwordbound:
   7345       1.1  christos 	{
   7346       1.1  christos 	  boolean prevchar, thischar;
   7347       1.1  christos 
   7348       1.1  christos 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7349       1.1  christos 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7350       1.1  christos 	    goto fail;
   7351       1.1  christos 
   7352       1.1  christos 	  prevchar = WORDCHAR_P (d - 1);
   7353       1.1  christos 	  thischar = WORDCHAR_P (d);
   7354       1.1  christos 	  if (prevchar != thischar)
   7355       1.1  christos 	    goto fail;
   7356       1.1  christos 	  break;
   7357       1.1  christos 	}
   7358       1.1  christos #endif
   7359       1.1  christos 
   7360       1.1  christos 	case wordbeg:
   7361       1.1  christos           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
   7362       1.1  christos 	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
   7363       1.1  christos 	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
   7364       1.1  christos 	    break;
   7365       1.1  christos           goto fail;
   7366       1.1  christos 
   7367       1.1  christos 	case wordend:
   7368       1.1  christos           DEBUG_PRINT1 ("EXECUTING wordend.\n");
   7369       1.1  christos 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
   7370       1.1  christos               && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
   7371       1.1  christos 	    break;
   7372       1.1  christos           goto fail;
   7373       1.1  christos 
   7374       1.1  christos #ifdef emacs
   7375       1.1  christos   	case before_dot:
   7376       1.1  christos           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
   7377       1.1  christos  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
   7378       1.1  christos   	    goto fail;
   7379       1.1  christos   	  break;
   7380       1.1  christos 
   7381       1.1  christos   	case at_dot:
   7382       1.1  christos           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
   7383       1.1  christos  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
   7384       1.1  christos   	    goto fail;
   7385       1.1  christos   	  break;
   7386       1.1  christos 
   7387       1.1  christos   	case after_dot:
   7388       1.1  christos           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
   7389       1.1  christos           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
   7390       1.1  christos   	    goto fail;
   7391       1.1  christos   	  break;
   7392       1.1  christos 
   7393       1.1  christos 	case syntaxspec:
   7394       1.1  christos           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
   7395       1.1  christos 	  mcnt = *p++;
   7396       1.1  christos 	  goto matchsyntax;
   7397       1.1  christos 
   7398       1.1  christos         case wordchar:
   7399       1.1  christos           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
   7400       1.1  christos 	  mcnt = (int) Sword;
   7401       1.1  christos         matchsyntax:
   7402       1.1  christos 	  PREFETCH ();
   7403       1.1  christos 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7404       1.1  christos 	  d++;
   7405       1.1  christos 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
   7406       1.1  christos 	    goto fail;
   7407       1.1  christos           SET_REGS_MATCHED ();
   7408       1.1  christos 	  break;
   7409       1.1  christos 
   7410       1.1  christos 	case notsyntaxspec:
   7411       1.1  christos           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
   7412       1.1  christos 	  mcnt = *p++;
   7413       1.1  christos 	  goto matchnotsyntax;
   7414       1.1  christos 
   7415       1.1  christos         case notwordchar:
   7416       1.1  christos           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
   7417       1.1  christos 	  mcnt = (int) Sword;
   7418       1.1  christos         matchnotsyntax:
   7419       1.1  christos 	  PREFETCH ();
   7420       1.1  christos 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7421       1.1  christos 	  d++;
   7422       1.1  christos 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
   7423       1.1  christos 	    goto fail;
   7424       1.1  christos 	  SET_REGS_MATCHED ();
   7425       1.1  christos           break;
   7426       1.1  christos 
   7427       1.1  christos #else /* not emacs */
   7428       1.1  christos 	case wordchar:
   7429       1.1  christos           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
   7430       1.1  christos 	  PREFETCH ();
   7431       1.1  christos           if (!WORDCHAR_P (d))
   7432       1.1  christos             goto fail;
   7433       1.1  christos 	  SET_REGS_MATCHED ();
   7434       1.1  christos           d++;
   7435       1.1  christos 	  break;
   7436       1.1  christos 
   7437       1.1  christos 	case notwordchar:
   7438       1.1  christos           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
   7439       1.1  christos 	  PREFETCH ();
   7440       1.1  christos 	  if (WORDCHAR_P (d))
   7441       1.1  christos             goto fail;
   7442       1.1  christos           SET_REGS_MATCHED ();
   7443       1.1  christos           d++;
   7444       1.1  christos 	  break;
   7445       1.1  christos #endif /* not emacs */
   7446       1.1  christos 
   7447       1.1  christos         default:
   7448       1.1  christos           abort ();
   7449       1.1  christos 	}
   7450       1.1  christos       continue;  /* Successfully executed one pattern command; keep going.  */
   7451       1.1  christos 
   7452       1.1  christos 
   7453       1.1  christos     /* We goto here if a matching operation fails. */
   7454       1.1  christos     fail:
   7455       1.1  christos       if (!FAIL_STACK_EMPTY ())
   7456       1.1  christos 	{ /* A restart point is known.  Restore to that state.  */
   7457       1.1  christos           DEBUG_PRINT1 ("\nFAIL:\n");
   7458       1.1  christos           POP_FAILURE_POINT (d, p,
   7459       1.1  christos                              lowest_active_reg, highest_active_reg,
   7460       1.1  christos                              regstart, regend, reg_info);
   7461   1.1.1.6  christos 
   7462       1.1  christos           /* If this failure point is a dummy, try the next one.  */
   7463       1.1  christos           if (!p)
   7464       1.1  christos 	    goto fail;
   7465       1.1  christos 
   7466       1.1  christos           /* If we failed to the end of the pattern, don't examine *p.  */
   7467       1.1  christos 	  assert (p <= pend);
   7468       1.1  christos           if (p < pend)
   7469       1.1  christos             {
   7470       1.1  christos               boolean is_a_jump_n = false;
   7471       1.1  christos 
   7472       1.1  christos               /* If failed to a backwards jump that's part of a repetition
   7473       1.1  christos                  loop, need to pop this failure point and use the next one.  */
   7474       1.1  christos               switch ((re_opcode_t) *p)
   7475       1.1  christos                 {
   7476       1.1  christos                 case jump_n:
   7477       1.1  christos                   is_a_jump_n = true;
   7478       1.1  christos 		  /* Fall through.  */
   7479       1.1  christos                 case maybe_pop_jump:
   7480       1.1  christos                 case pop_failure_jump:
   7481       1.1  christos                 case jump:
   7482       1.1  christos                   p1 = p + 1;
   7483       1.1  christos                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7484       1.1  christos                   p1 += mcnt;
   7485       1.1  christos 
   7486       1.1  christos                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
   7487       1.1  christos                       || (!is_a_jump_n
   7488       1.1  christos                           && (re_opcode_t) *p1 == on_failure_jump))
   7489       1.1  christos                     goto fail;
   7490       1.1  christos                   break;
   7491       1.1  christos                 default:
   7492       1.1  christos                   /* do nothing */ ;
   7493       1.1  christos                 }
   7494       1.1  christos             }
   7495       1.1  christos 
   7496       1.1  christos           if (d >= string1 && d <= end1)
   7497       1.1  christos 	    dend = end_match_1;
   7498       1.1  christos         }
   7499       1.1  christos       else
   7500       1.1  christos         break;   /* Matching at this starting point really fails.  */
   7501       1.1  christos     } /* for (;;) */
   7502       1.1  christos 
   7503       1.1  christos   if (best_regs_set)
   7504       1.1  christos     goto restore_best_regs;
   7505       1.1  christos 
   7506       1.1  christos   FREE_VARIABLES ();
   7507       1.1  christos 
   7508       1.1  christos   return -1;         			/* Failure to match.  */
   7509       1.1  christos } /* re_match_2 */
   7510       1.1  christos 
   7511       1.1  christos /* Subroutine definitions for re_match_2.  */
   7513       1.1  christos 
   7514       1.1  christos 
   7515       1.1  christos /* We are passed P pointing to a register number after a start_memory.
   7516       1.1  christos 
   7517       1.1  christos    Return true if the pattern up to the corresponding stop_memory can
   7518       1.1  christos    match the empty string, and false otherwise.
   7519       1.1  christos 
   7520       1.1  christos    If we find the matching stop_memory, sets P to point to one past its number.
   7521       1.1  christos    Otherwise, sets P to an undefined byte less than or equal to END.
   7522       1.1  christos 
   7523       1.1  christos    We don't handle duplicates properly (yet).  */
   7524       1.1  christos 
   7525       1.1  christos static boolean
   7526       1.1  christos PREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
   7527       1.1  christos                                    PREFIX(register_info_type) *reg_info)
   7528       1.1  christos {
   7529       1.1  christos   int mcnt;
   7530       1.1  christos   /* Point to after the args to the start_memory.  */
   7531       1.1  christos   UCHAR_T *p1 = *p + 2;
   7532       1.1  christos 
   7533       1.1  christos   while (p1 < end)
   7534       1.1  christos     {
   7535       1.1  christos       /* Skip over opcodes that can match nothing, and return true or
   7536       1.1  christos 	 false, as appropriate, when we get to one that can't, or to the
   7537       1.1  christos          matching stop_memory.  */
   7538       1.1  christos 
   7539       1.1  christos       switch ((re_opcode_t) *p1)
   7540       1.1  christos         {
   7541       1.1  christos         /* Could be either a loop or a series of alternatives.  */
   7542       1.1  christos         case on_failure_jump:
   7543       1.1  christos           p1++;
   7544       1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7545       1.1  christos 
   7546       1.1  christos           /* If the next operation is not a jump backwards in the
   7547       1.1  christos 	     pattern.  */
   7548       1.1  christos 
   7549       1.1  christos 	  if (mcnt >= 0)
   7550       1.1  christos 	    {
   7551       1.1  christos               /* Go through the on_failure_jumps of the alternatives,
   7552       1.1  christos                  seeing if any of the alternatives cannot match nothing.
   7553       1.1  christos                  The last alternative starts with only a jump,
   7554       1.1  christos                  whereas the rest start with on_failure_jump and end
   7555       1.1  christos                  with a jump, e.g., here is the pattern for `a|b|c':
   7556       1.1  christos 
   7557       1.1  christos                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
   7558       1.1  christos                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
   7559       1.1  christos                  /exactn/1/c
   7560       1.1  christos 
   7561       1.1  christos                  So, we have to first go through the first (n-1)
   7562       1.1  christos                  alternatives and then deal with the last one separately.  */
   7563       1.1  christos 
   7564       1.1  christos 
   7565       1.1  christos               /* Deal with the first (n-1) alternatives, which start
   7566       1.1  christos                  with an on_failure_jump (see above) that jumps to right
   7567       1.1  christos                  past a jump_past_alt.  */
   7568       1.1  christos 
   7569       1.1  christos               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
   7570       1.1  christos 		     jump_past_alt)
   7571       1.1  christos                 {
   7572       1.1  christos                   /* `mcnt' holds how many bytes long the alternative
   7573       1.1  christos                      is, including the ending `jump_past_alt' and
   7574       1.1  christos                      its number.  */
   7575       1.1  christos 
   7576       1.1  christos                   if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
   7577       1.1  christos 						(1 + OFFSET_ADDRESS_SIZE),
   7578       1.1  christos 						reg_info))
   7579       1.1  christos                     return false;
   7580       1.1  christos 
   7581       1.1  christos                   /* Move to right after this alternative, including the
   7582       1.1  christos 		     jump_past_alt.  */
   7583       1.1  christos                   p1 += mcnt;
   7584       1.1  christos 
   7585       1.1  christos                   /* Break if it's the beginning of an n-th alternative
   7586       1.1  christos                      that doesn't begin with an on_failure_jump.  */
   7587       1.1  christos                   if ((re_opcode_t) *p1 != on_failure_jump)
   7588       1.1  christos                     break;
   7589       1.1  christos 
   7590       1.1  christos 		  /* Still have to check that it's not an n-th
   7591       1.1  christos 		     alternative that starts with an on_failure_jump.  */
   7592       1.1  christos 		  p1++;
   7593       1.1  christos                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7594       1.1  christos                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
   7595       1.1  christos 		      jump_past_alt)
   7596       1.1  christos                     {
   7597       1.1  christos 		      /* Get to the beginning of the n-th alternative.  */
   7598       1.1  christos                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
   7599       1.1  christos                       break;
   7600       1.1  christos                     }
   7601       1.1  christos                 }
   7602       1.1  christos 
   7603       1.1  christos               /* Deal with the last alternative: go back and get number
   7604       1.1  christos                  of the `jump_past_alt' just before it.  `mcnt' contains
   7605       1.1  christos                  the length of the alternative.  */
   7606       1.1  christos               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
   7607       1.1  christos 
   7608       1.1  christos               if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
   7609       1.1  christos                 return false;
   7610       1.1  christos 
   7611       1.1  christos               p1 += mcnt;	/* Get past the n-th alternative.  */
   7612       1.1  christos             } /* if mcnt > 0 */
   7613       1.1  christos           break;
   7614       1.1  christos 
   7615       1.1  christos 
   7616       1.1  christos         case stop_memory:
   7617       1.1  christos 	  assert (p1[1] == **p);
   7618       1.1  christos           *p = p1 + 2;
   7619       1.1  christos           return true;
   7620       1.1  christos 
   7621       1.1  christos 
   7622       1.1  christos         default:
   7623       1.1  christos           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
   7624       1.1  christos             return false;
   7625       1.1  christos         }
   7626       1.1  christos     } /* while p1 < end */
   7627       1.1  christos 
   7628       1.1  christos   return false;
   7629       1.1  christos } /* group_match_null_string_p */
   7630       1.1  christos 
   7631       1.1  christos 
   7632       1.1  christos /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
   7633       1.1  christos    It expects P to be the first byte of a single alternative and END one
   7634       1.1  christos    byte past the last. The alternative can contain groups.  */
   7635       1.1  christos 
   7636       1.1  christos static boolean
   7637       1.1  christos PREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
   7638       1.1  christos                                  PREFIX(register_info_type) *reg_info)
   7639       1.1  christos {
   7640       1.1  christos   int mcnt;
   7641       1.1  christos   UCHAR_T *p1 = p;
   7642       1.1  christos 
   7643       1.1  christos   while (p1 < end)
   7644       1.1  christos     {
   7645       1.1  christos       /* Skip over opcodes that can match nothing, and break when we get
   7646       1.1  christos          to one that can't.  */
   7647       1.1  christos 
   7648       1.1  christos       switch ((re_opcode_t) *p1)
   7649       1.1  christos         {
   7650       1.1  christos 	/* It's a loop.  */
   7651       1.1  christos         case on_failure_jump:
   7652       1.1  christos           p1++;
   7653       1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7654       1.1  christos           p1 += mcnt;
   7655       1.1  christos           break;
   7656       1.1  christos 
   7657       1.1  christos 	default:
   7658       1.1  christos           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
   7659       1.1  christos             return false;
   7660       1.1  christos         }
   7661       1.1  christos     }  /* while p1 < end */
   7662       1.1  christos 
   7663       1.1  christos   return true;
   7664       1.1  christos } /* alt_match_null_string_p */
   7665       1.1  christos 
   7666       1.1  christos 
   7667       1.1  christos /* Deals with the ops common to group_match_null_string_p and
   7668       1.1  christos    alt_match_null_string_p.
   7669       1.1  christos 
   7670       1.1  christos    Sets P to one after the op and its arguments, if any.  */
   7671       1.1  christos 
   7672       1.1  christos static boolean
   7673       1.1  christos PREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
   7674       1.1  christos                                        PREFIX(register_info_type) *reg_info)
   7675       1.1  christos {
   7676       1.1  christos   int mcnt;
   7677       1.1  christos   boolean ret;
   7678       1.1  christos   int reg_no;
   7679       1.1  christos   UCHAR_T *p1 = *p;
   7680       1.1  christos 
   7681       1.1  christos   switch ((re_opcode_t) *p1++)
   7682       1.1  christos     {
   7683       1.1  christos     case no_op:
   7684       1.1  christos     case begline:
   7685       1.1  christos     case endline:
   7686       1.1  christos     case begbuf:
   7687       1.1  christos     case endbuf:
   7688       1.1  christos     case wordbeg:
   7689       1.1  christos     case wordend:
   7690       1.1  christos     case wordbound:
   7691       1.1  christos     case notwordbound:
   7692       1.1  christos #ifdef emacs
   7693       1.1  christos     case before_dot:
   7694       1.1  christos     case at_dot:
   7695       1.1  christos     case after_dot:
   7696       1.1  christos #endif
   7697       1.1  christos       break;
   7698       1.1  christos 
   7699       1.1  christos     case start_memory:
   7700       1.1  christos       reg_no = *p1;
   7701       1.1  christos       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
   7702       1.1  christos       ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
   7703       1.1  christos 
   7704       1.1  christos       /* Have to set this here in case we're checking a group which
   7705       1.1  christos          contains a group and a back reference to it.  */
   7706       1.1  christos 
   7707       1.1  christos       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
   7708       1.1  christos         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
   7709       1.1  christos 
   7710       1.1  christos       if (!ret)
   7711       1.1  christos         return false;
   7712       1.1  christos       break;
   7713       1.1  christos 
   7714       1.1  christos     /* If this is an optimized succeed_n for zero times, make the jump.  */
   7715       1.1  christos     case jump:
   7716       1.1  christos       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7717       1.1  christos       if (mcnt >= 0)
   7718       1.1  christos         p1 += mcnt;
   7719       1.1  christos       else
   7720       1.1  christos         return false;
   7721       1.1  christos       break;
   7722       1.1  christos 
   7723       1.1  christos     case succeed_n:
   7724       1.1  christos       /* Get to the number of times to succeed.  */
   7725       1.1  christos       p1 += OFFSET_ADDRESS_SIZE;
   7726       1.1  christos       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7727   1.1.1.6  christos 
   7728       1.1  christos       if (mcnt == 0)
   7729       1.1  christos         {
   7730       1.1  christos           p1 -= 2 * OFFSET_ADDRESS_SIZE;
   7731       1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7732       1.1  christos           p1 += mcnt;
   7733       1.1  christos         }
   7734       1.1  christos       else
   7735       1.1  christos         return false;
   7736       1.1  christos       break;
   7737       1.1  christos 
   7738       1.1  christos     case duplicate:
   7739       1.1  christos       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
   7740       1.1  christos         return false;
   7741       1.1  christos       break;
   7742       1.1  christos 
   7743       1.1  christos     case set_number_at:
   7744       1.1  christos       p1 += 2 * OFFSET_ADDRESS_SIZE;
   7745       1.1  christos       return false;
   7746       1.1  christos 
   7747       1.1  christos     default:
   7748       1.1  christos       /* All other opcodes mean we cannot match the empty string.  */
   7749       1.1  christos       return false;
   7750       1.1  christos   }
   7751       1.1  christos 
   7752       1.1  christos   *p = p1;
   7753       1.1  christos   return true;
   7754       1.1  christos } /* common_op_match_null_string_p */
   7755       1.1  christos 
   7756       1.1  christos 
   7757       1.1  christos /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
   7758       1.1  christos    bytes; nonzero otherwise.  */
   7759       1.1  christos 
   7760       1.1  christos static int
   7761       1.1  christos PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len,
   7762       1.1  christos                         RE_TRANSLATE_TYPE translate)
   7763       1.1  christos {
   7764       1.1  christos   register const UCHAR_T *p1 = (const UCHAR_T *) s1;
   7765       1.1  christos   register const UCHAR_T *p2 = (const UCHAR_T *) s2;
   7766       1.1  christos   while (len)
   7767       1.1  christos     {
   7768       1.1  christos #ifdef WCHAR
   7769       1.1  christos       if (((*p1<=0xff)?translate[*p1++]:*p1++)
   7770       1.1  christos 	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
   7771       1.1  christos 	return 1;
   7772       1.1  christos #else /* BYTE */
   7773       1.1  christos       if (translate[*p1++] != translate[*p2++]) return 1;
   7774       1.1  christos #endif /* WCHAR */
   7775       1.1  christos       len--;
   7776       1.1  christos     }
   7777       1.1  christos   return 0;
   7778       1.1  christos }
   7779       1.1  christos 
   7780       1.1  christos 
   7782       1.1  christos #else /* not INSIDE_RECURSION */
   7783       1.1  christos 
   7784       1.1  christos /* Entry points for GNU code.  */
   7785       1.1  christos 
   7786       1.1  christos /* re_compile_pattern is the GNU regular expression compiler: it
   7787       1.1  christos    compiles PATTERN (of length SIZE) and puts the result in BUFP.
   7788       1.1  christos    Returns 0 if the pattern was valid, otherwise an error string.
   7789       1.1  christos 
   7790       1.1  christos    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
   7791       1.1  christos    are set in BUFP on entry.
   7792       1.1  christos 
   7793       1.1  christos    We call regex_compile to do the actual compilation.  */
   7794       1.1  christos 
   7795       1.1  christos const char *
   7796       1.1  christos re_compile_pattern (const char *pattern, size_t length,
   7797       1.1  christos                     struct re_pattern_buffer *bufp)
   7798       1.1  christos {
   7799       1.1  christos   reg_errcode_t ret;
   7800       1.1  christos 
   7801       1.1  christos   /* GNU code is written to assume at least RE_NREGS registers will be set
   7802       1.1  christos      (and at least one extra will be -1).  */
   7803       1.1  christos   bufp->regs_allocated = REGS_UNALLOCATED;
   7804       1.1  christos 
   7805       1.1  christos   /* And GNU code determines whether or not to get register information
   7806       1.1  christos      by passing null for the REGS argument to re_match, etc., not by
   7807       1.1  christos      setting no_sub.  */
   7808       1.1  christos   bufp->no_sub = 0;
   7809       1.1  christos 
   7810       1.1  christos   /* Match anchors at newline.  */
   7811       1.1  christos   bufp->newline_anchor = 1;
   7812       1.1  christos 
   7813       1.1  christos # ifdef MBS_SUPPORT
   7814       1.1  christos   if (MB_CUR_MAX != 1)
   7815       1.1  christos     ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
   7816       1.1  christos   else
   7817       1.1  christos # endif
   7818       1.1  christos     ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
   7819       1.1  christos 
   7820       1.1  christos   if (!ret)
   7821       1.1  christos     return NULL;
   7822       1.1  christos   return gettext (re_error_msgid[(int) ret]);
   7823       1.1  christos }
   7824       1.1  christos #ifdef _LIBC
   7825       1.1  christos weak_alias (__re_compile_pattern, re_compile_pattern)
   7826       1.1  christos #endif
   7827       1.1  christos 
   7828       1.1  christos /* Entry points compatible with 4.2 BSD regex library.  We don't define
   7830       1.1  christos    them unless specifically requested.  */
   7831       1.1  christos 
   7832       1.1  christos #if defined _REGEX_RE_COMP || defined _LIBC
   7833       1.1  christos 
   7834       1.1  christos /* BSD has one and only one pattern buffer.  */
   7835       1.1  christos static struct re_pattern_buffer re_comp_buf;
   7836       1.1  christos 
   7837       1.1  christos char *
   7838       1.1  christos #ifdef _LIBC
   7839       1.1  christos /* Make these definitions weak in libc, so POSIX programs can redefine
   7840       1.1  christos    these names if they don't use our functions, and still use
   7841       1.1  christos    regcomp/regexec below without link errors.  */
   7842       1.1  christos weak_function
   7843       1.1  christos #endif
   7844       1.1  christos re_comp (const char *s)
   7845       1.1  christos {
   7846       1.1  christos   reg_errcode_t ret;
   7847       1.1  christos 
   7848       1.1  christos   if (!s)
   7849       1.1  christos     {
   7850       1.1  christos       if (!re_comp_buf.buffer)
   7851       1.1  christos 	return (char *) gettext ("No previous regular expression");
   7852       1.1  christos       return 0;
   7853       1.1  christos     }
   7854       1.1  christos 
   7855       1.1  christos   if (!re_comp_buf.buffer)
   7856       1.1  christos     {
   7857       1.1  christos       re_comp_buf.buffer = (unsigned char *) malloc (200);
   7858       1.1  christos       if (re_comp_buf.buffer == NULL)
   7859       1.1  christos         return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
   7860       1.1  christos       re_comp_buf.allocated = 200;
   7861       1.1  christos 
   7862       1.1  christos       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
   7863       1.1  christos       if (re_comp_buf.fastmap == NULL)
   7864       1.1  christos 	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
   7865       1.1  christos     }
   7866       1.1  christos 
   7867       1.1  christos   /* Since `re_exec' always passes NULL for the `regs' argument, we
   7868       1.1  christos      don't need to initialize the pattern buffer fields which affect it.  */
   7869       1.1  christos 
   7870       1.1  christos   /* Match anchors at newlines.  */
   7871       1.1  christos   re_comp_buf.newline_anchor = 1;
   7872       1.1  christos 
   7873       1.1  christos # ifdef MBS_SUPPORT
   7874       1.1  christos   if (MB_CUR_MAX != 1)
   7875       1.1  christos     ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   7876       1.1  christos   else
   7877       1.1  christos # endif
   7878       1.1  christos     ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   7879       1.1  christos 
   7880       1.1  christos   if (!ret)
   7881       1.1  christos     return NULL;
   7882       1.1  christos 
   7883       1.1  christos   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
   7884       1.1  christos   return (char *) gettext (re_error_msgid[(int) ret]);
   7885       1.1  christos }
   7886       1.1  christos 
   7887       1.1  christos 
   7888       1.1  christos int
   7889       1.1  christos #ifdef _LIBC
   7890       1.1  christos weak_function
   7891       1.1  christos #endif
   7892       1.1  christos re_exec (const char *s)
   7893       1.1  christos {
   7894       1.1  christos   const int len = strlen (s);
   7895       1.1  christos   return
   7896       1.1  christos     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
   7897       1.1  christos }
   7898       1.1  christos 
   7899       1.1  christos #endif /* _REGEX_RE_COMP */
   7900       1.1  christos 
   7901       1.1  christos /* POSIX.2 functions.  Don't define these for Emacs.  */
   7903       1.1  christos 
   7904       1.1  christos #ifndef emacs
   7905       1.1  christos 
   7906       1.1  christos /* regcomp takes a regular expression as a string and compiles it.
   7907       1.1  christos 
   7908       1.1  christos    PREG is a regex_t *.  We do not expect any fields to be initialized,
   7909       1.1  christos    since POSIX says we shouldn't.  Thus, we set
   7910       1.1  christos 
   7911       1.1  christos      `buffer' to the compiled pattern;
   7912       1.1  christos      `used' to the length of the compiled pattern;
   7913       1.1  christos      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
   7914       1.1  christos        REG_EXTENDED bit in CFLAGS is set; otherwise, to
   7915       1.1  christos        RE_SYNTAX_POSIX_BASIC;
   7916       1.1  christos      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
   7917       1.1  christos      `fastmap' to an allocated space for the fastmap;
   7918       1.1  christos      `fastmap_accurate' to zero;
   7919       1.1  christos      `re_nsub' to the number of subexpressions in PATTERN.
   7920       1.1  christos 
   7921       1.1  christos    PATTERN is the address of the pattern string.
   7922       1.1  christos 
   7923       1.1  christos    CFLAGS is a series of bits which affect compilation.
   7924       1.1  christos 
   7925       1.1  christos      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
   7926       1.1  christos      use POSIX basic syntax.
   7927       1.1  christos 
   7928       1.1  christos      If REG_NEWLINE is set, then . and [^...] don't match newline.
   7929       1.1  christos      Also, regexec will try a match beginning after every newline.
   7930       1.1  christos 
   7931       1.1  christos      If REG_ICASE is set, then we considers upper- and lowercase
   7932       1.1  christos      versions of letters to be equivalent when matching.
   7933       1.1  christos 
   7934       1.1  christos      If REG_NOSUB is set, then when PREG is passed to regexec, that
   7935       1.1  christos      routine will report only success or failure, and nothing about the
   7936       1.1  christos      registers.
   7937       1.1  christos 
   7938       1.1  christos    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
   7939       1.1  christos    the return codes and their meanings.)  */
   7940       1.1  christos 
   7941       1.1  christos int
   7942       1.1  christos regcomp (regex_t *preg, const char *pattern, int cflags)
   7943       1.1  christos {
   7944       1.1  christos   reg_errcode_t ret;
   7945       1.1  christos   reg_syntax_t syntax
   7946       1.1  christos     = (cflags & REG_EXTENDED) ?
   7947       1.1  christos       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
   7948       1.1  christos 
   7949       1.1  christos   /* regex_compile will allocate the space for the compiled pattern.  */
   7950       1.1  christos   preg->buffer = 0;
   7951       1.1  christos   preg->allocated = 0;
   7952       1.1  christos   preg->used = 0;
   7953       1.1  christos 
   7954       1.1  christos   /* Try to allocate space for the fastmap.  */
   7955       1.1  christos   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
   7956       1.1  christos 
   7957       1.1  christos   if (cflags & REG_ICASE)
   7958       1.1  christos     {
   7959       1.1  christos       int i;
   7960       1.1  christos 
   7961       1.1  christos       preg->translate
   7962       1.1  christos 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
   7963       1.1  christos 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
   7964       1.1  christos       if (preg->translate == NULL)
   7965       1.1  christos         return (int) REG_ESPACE;
   7966       1.1  christos 
   7967       1.1  christos       /* Map uppercase characters to corresponding lowercase ones.  */
   7968       1.1  christos       for (i = 0; i < CHAR_SET_SIZE; i++)
   7969       1.1  christos         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
   7970       1.1  christos     }
   7971       1.1  christos   else
   7972       1.1  christos     preg->translate = NULL;
   7973       1.1  christos 
   7974       1.1  christos   /* If REG_NEWLINE is set, newlines are treated differently.  */
   7975       1.1  christos   if (cflags & REG_NEWLINE)
   7976       1.1  christos     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
   7977       1.1  christos       syntax &= ~RE_DOT_NEWLINE;
   7978       1.1  christos       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
   7979       1.1  christos       /* It also changes the matching behavior.  */
   7980       1.1  christos       preg->newline_anchor = 1;
   7981       1.1  christos     }
   7982       1.1  christos   else
   7983       1.1  christos     preg->newline_anchor = 0;
   7984       1.1  christos 
   7985       1.1  christos   preg->no_sub = !!(cflags & REG_NOSUB);
   7986       1.1  christos 
   7987       1.1  christos   /* POSIX says a null character in the pattern terminates it, so we
   7988       1.1  christos      can use strlen here in compiling the pattern.  */
   7989       1.1  christos # ifdef MBS_SUPPORT
   7990       1.1  christos   if (MB_CUR_MAX != 1)
   7991       1.1  christos     ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
   7992       1.1  christos   else
   7993       1.1  christos # endif
   7994       1.1  christos     ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
   7995       1.1  christos 
   7996       1.1  christos   /* POSIX doesn't distinguish between an unmatched open-group and an
   7997       1.1  christos      unmatched close-group: both are REG_EPAREN.  */
   7998       1.1  christos   if (ret == REG_ERPAREN) ret = REG_EPAREN;
   7999       1.1  christos 
   8000       1.1  christos   if (ret == REG_NOERROR && preg->fastmap)
   8001       1.1  christos     {
   8002       1.1  christos       /* Compute the fastmap now, since regexec cannot modify the pattern
   8003       1.1  christos 	 buffer.  */
   8004       1.1  christos       if (re_compile_fastmap (preg) == -2)
   8005       1.1  christos 	{
   8006       1.1  christos 	  /* Some error occurred while computing the fastmap, just forget
   8007       1.1  christos 	     about it.  */
   8008       1.1  christos 	  free (preg->fastmap);
   8009       1.1  christos 	  preg->fastmap = NULL;
   8010       1.1  christos 	}
   8011       1.1  christos     }
   8012       1.1  christos 
   8013       1.1  christos   return (int) ret;
   8014       1.1  christos }
   8015       1.1  christos #ifdef _LIBC
   8016       1.1  christos weak_alias (__regcomp, regcomp)
   8017       1.1  christos #endif
   8018       1.1  christos 
   8019       1.1  christos 
   8020       1.1  christos /* regexec searches for a given pattern, specified by PREG, in the
   8021       1.1  christos    string STRING.
   8022       1.1  christos 
   8023       1.1  christos    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
   8024       1.1  christos    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
   8025       1.1  christos    least NMATCH elements, and we set them to the offsets of the
   8026       1.1  christos    corresponding matched substrings.
   8027       1.1  christos 
   8028       1.1  christos    EFLAGS specifies `execution flags' which affect matching: if
   8029       1.1  christos    REG_NOTBOL is set, then ^ does not match at the beginning of the
   8030       1.1  christos    string; if REG_NOTEOL is set, then $ does not match at the end.
   8031       1.1  christos 
   8032       1.1  christos    We return 0 if we find a match and REG_NOMATCH if not.  */
   8033       1.1  christos 
   8034       1.1  christos int
   8035       1.1  christos regexec (const regex_t *preg, const char *string, size_t nmatch,
   8036       1.1  christos          regmatch_t pmatch[], int eflags)
   8037       1.1  christos {
   8038       1.1  christos   int ret;
   8039       1.1  christos   struct re_registers regs;
   8040       1.1  christos   regex_t private_preg;
   8041       1.1  christos   int len = strlen (string);
   8042       1.1  christos   boolean want_reg_info = !preg->no_sub && nmatch > 0;
   8043       1.1  christos 
   8044       1.1  christos   private_preg = *preg;
   8045       1.1  christos 
   8046       1.1  christos   private_preg.not_bol = !!(eflags & REG_NOTBOL);
   8047       1.1  christos   private_preg.not_eol = !!(eflags & REG_NOTEOL);
   8048       1.1  christos 
   8049       1.1  christos   /* The user has told us exactly how many registers to return
   8050       1.1  christos      information about, via `nmatch'.  We have to pass that on to the
   8051       1.1  christos      matching routines.  */
   8052       1.1  christos   private_preg.regs_allocated = REGS_FIXED;
   8053       1.1  christos 
   8054       1.1  christos   if (want_reg_info)
   8055       1.1  christos     {
   8056       1.1  christos       regs.num_regs = nmatch;
   8057       1.1  christos       regs.start = TALLOC (nmatch * 2, regoff_t);
   8058       1.1  christos       if (regs.start == NULL)
   8059       1.1  christos         return (int) REG_NOMATCH;
   8060       1.1  christos       regs.end = regs.start + nmatch;
   8061       1.1  christos     }
   8062       1.1  christos 
   8063       1.1  christos   /* Perform the searching operation.  */
   8064       1.1  christos   ret = re_search (&private_preg, string, len,
   8065       1.1  christos                    /* start: */ 0, /* range: */ len,
   8066       1.1  christos                    want_reg_info ? &regs : (struct re_registers *) 0);
   8067       1.1  christos 
   8068       1.1  christos   /* Copy the register information to the POSIX structure.  */
   8069       1.1  christos   if (want_reg_info)
   8070       1.1  christos     {
   8071       1.1  christos       if (ret >= 0)
   8072       1.1  christos         {
   8073       1.1  christos           unsigned r;
   8074       1.1  christos 
   8075       1.1  christos           for (r = 0; r < nmatch; r++)
   8076       1.1  christos             {
   8077       1.1  christos               pmatch[r].rm_so = regs.start[r];
   8078       1.1  christos               pmatch[r].rm_eo = regs.end[r];
   8079       1.1  christos             }
   8080       1.1  christos         }
   8081       1.1  christos 
   8082       1.1  christos       /* If we needed the temporary register info, free the space now.  */
   8083       1.1  christos       free (regs.start);
   8084       1.1  christos     }
   8085       1.1  christos 
   8086       1.1  christos   /* We want zero return to mean success, unlike `re_search'.  */
   8087       1.1  christos   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
   8088       1.1  christos }
   8089       1.1  christos #ifdef _LIBC
   8090       1.1  christos weak_alias (__regexec, regexec)
   8091       1.1  christos #endif
   8092       1.1  christos 
   8093       1.1  christos 
   8094       1.1  christos /* Returns a message corresponding to an error code, ERRCODE, returned
   8095       1.1  christos    from either regcomp or regexec.   We don't use PREG here.  */
   8096       1.1  christos 
   8097       1.1  christos size_t
   8098       1.1  christos regerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
   8099       1.1  christos           char *errbuf, size_t errbuf_size)
   8100       1.1  christos {
   8101       1.1  christos   const char *msg;
   8102       1.1  christos   size_t msg_size;
   8103   1.1.1.4  christos 
   8104       1.1  christos   if (errcode < 0
   8105       1.1  christos       || errcode >= (int) (sizeof (re_error_msgid)
   8106       1.1  christos 			   / sizeof (re_error_msgid[0])))
   8107       1.1  christos     /* Only error codes returned by the rest of the code should be passed
   8108   1.1.1.4  christos        to this routine.  If we are given anything else, or if other regex
   8109       1.1  christos        code generates an invalid error code, then the program has a bug.
   8110       1.1  christos        Dump core so we can fix it.  */
   8111       1.1  christos     abort ();
   8112       1.1  christos 
   8113       1.1  christos   msg = gettext (re_error_msgid[errcode]);
   8114       1.1  christos 
   8115       1.1  christos   msg_size = strlen (msg) + 1; /* Includes the null.  */
   8116       1.1  christos 
   8117       1.1  christos   if (errbuf_size != 0)
   8118       1.1  christos     {
   8119       1.1  christos       if (msg_size > errbuf_size)
   8120       1.1  christos         {
   8121       1.1  christos #if defined HAVE_MEMPCPY || defined _LIBC
   8122       1.1  christos 	  *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
   8123   1.1.1.2  christos #else
   8124       1.1  christos           (void) memcpy (errbuf, msg, errbuf_size - 1);
   8125       1.1  christos           errbuf[errbuf_size - 1] = 0;
   8126       1.1  christos #endif
   8127       1.1  christos         }
   8128       1.1  christos       else
   8129   1.1.1.2  christos         (void) memcpy (errbuf, msg, msg_size);
   8130       1.1  christos     }
   8131       1.1  christos 
   8132       1.1  christos   return msg_size;
   8133   1.1.1.2  christos }
   8134       1.1  christos #ifdef _LIBC
   8135       1.1  christos weak_alias (__regerror, regerror)
   8136       1.1  christos #endif
   8137       1.1  christos 
   8138       1.1  christos 
   8139       1.1  christos /* Free dynamically allocated space used by PREG.  */
   8140       1.1  christos 
   8141       1.1  christos void
   8142       1.1  christos regfree (regex_t *preg)
   8143       1.1  christos {
   8144       1.1  christos   free (preg->buffer);
   8145       1.1  christos   preg->buffer = NULL;
   8146       1.1  christos 
   8147       1.1  christos   preg->allocated = 0;
   8148       1.1  christos   preg->used = 0;
   8149       1.1  christos 
   8150       1.1  christos   free (preg->fastmap);
   8151       1.1  christos   preg->fastmap = NULL;
   8152       1.1  christos   preg->fastmap_accurate = 0;
   8153       1.1  christos 
   8154       1.1  christos   free (preg->translate);
   8155       1.1  christos   preg->translate = NULL;
   8156       1.1  christos }
   8157       1.1  christos #ifdef _LIBC
   8158       1.1  christos weak_alias (__regfree, regfree)
   8159       1.1  christos #endif
   8160       1.1  christos 
   8161       1.1  christos #endif /* not emacs  */
   8162       1.1  christos 
   8163       1.1  christos #endif /* not INSIDE_RECURSION */
   8164       1.1  christos 
   8165       1.1  christos 
   8166       1.1  christos #undef STORE_NUMBER
   8168       1.1  christos #undef STORE_NUMBER_AND_INCR
   8169       1.1  christos #undef EXTRACT_NUMBER
   8170       1.1  christos #undef EXTRACT_NUMBER_AND_INCR
   8171       1.1  christos 
   8172       1.1  christos #undef DEBUG_PRINT_COMPILED_PATTERN
   8173       1.1  christos #undef DEBUG_PRINT_DOUBLE_STRING
   8174       1.1  christos 
   8175       1.1  christos #undef INIT_FAIL_STACK
   8176       1.1  christos #undef RESET_FAIL_STACK
   8177       1.1  christos #undef DOUBLE_FAIL_STACK
   8178       1.1  christos #undef PUSH_PATTERN_OP
   8179       1.1  christos #undef PUSH_FAILURE_POINTER
   8180       1.1  christos #undef PUSH_FAILURE_INT
   8181       1.1  christos #undef PUSH_FAILURE_ELT
   8182       1.1  christos #undef POP_FAILURE_POINTER
   8183       1.1  christos #undef POP_FAILURE_INT
   8184       1.1  christos #undef POP_FAILURE_ELT
   8185       1.1  christos #undef DEBUG_PUSH
   8186       1.1  christos #undef DEBUG_POP
   8187       1.1  christos #undef PUSH_FAILURE_POINT
   8188       1.1  christos #undef POP_FAILURE_POINT
   8189       1.1  christos 
   8190       1.1  christos #undef REG_UNSET_VALUE
   8191       1.1  christos #undef REG_UNSET
   8192       1.1  christos 
   8193       1.1  christos #undef PATFETCH
   8194       1.1  christos #undef PATFETCH_RAW
   8195       1.1  christos #undef PATUNFETCH
   8196       1.1  christos #undef TRANSLATE
   8197       1.1  christos 
   8198       1.1  christos #undef INIT_BUF_SIZE
   8199       1.1  christos #undef GET_BUFFER_SPACE
   8200       1.1  christos #undef BUF_PUSH
   8201       1.1  christos #undef BUF_PUSH_2
   8202       1.1  christos #undef BUF_PUSH_3
   8203       1.1  christos #undef STORE_JUMP
   8204       1.1  christos #undef STORE_JUMP2
   8205       1.1  christos #undef INSERT_JUMP
   8206       1.1  christos #undef INSERT_JUMP2
   8207       1.1  christos #undef EXTEND_BUFFER
   8208       1.1  christos #undef GET_UNSIGNED_NUMBER
   8209       1.1  christos #undef FREE_STACK_RETURN
   8210       1.1  christos 
   8211       1.1  christos # undef POINTER_TO_OFFSET
   8212                     # undef MATCHING_IN_FRST_STRING
   8213                     # undef PREFETCH
   8214                     # undef AT_STRINGS_BEG
   8215                     # undef AT_STRINGS_END
   8216                     # undef WORDCHAR_P
   8217                     # undef FREE_VAR
   8218                     # undef FREE_VARIABLES
   8219                     # undef NO_HIGHEST_ACTIVE_REG
   8220                     # undef NO_LOWEST_ACTIVE_REG
   8221                     
   8222                     # undef CHAR_T
   8223                     # undef UCHAR_T
   8224                     # undef COMPILED_BUFFER_VAR
   8225                     # undef OFFSET_ADDRESS_SIZE
   8226                     # undef CHAR_CLASS_SIZE
   8227                     # undef PREFIX
   8228                     # undef ARG_PREFIX
   8229                     # undef PUT_CHAR
   8230                     # undef BYTE
   8231                     # undef WCHAR
   8232                     
   8233                     # define DEFINED_ONCE
   8234