Home | History | Annotate | Line # | Download | only in lib
      1  1.1  christos /*	$NetBSD: regex.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /* Extended regular expression matching and search library,
      4  1.1  christos    version 0.12.
      5  1.1  christos    (Implements POSIX draft P1003.2/D11.2, except for some of the
      6  1.1  christos    internationalization features.)
      7  1.1  christos    Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
      8  1.1  christos 
      9  1.1  christos    This program is free software; you can redistribute it and/or modify
     10  1.1  christos    it under the terms of the GNU General Public License as published by
     11  1.1  christos    the Free Software Foundation; either version 2, or (at your option)
     12  1.1  christos    any later version.
     13  1.1  christos 
     14  1.1  christos    This program is distributed in the hope that it will be useful,
     15  1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16  1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17  1.1  christos    GNU General Public License for more details.
     18  1.1  christos 
     19  1.1  christos    You should have received a copy of the GNU General Public License
     20  1.1  christos    along with this program; if not, write to the Free Software Foundation,
     21  1.1  christos    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
     22  1.1  christos 
     23  1.1  christos /* AIX requires this to be the first thing in the file. */
     24  1.1  christos #if defined _AIX && !defined REGEX_MALLOC
     25  1.1  christos   #pragma alloca
     26  1.1  christos #endif
     27  1.1  christos 
     28  1.1  christos #undef	_GNU_SOURCE
     29  1.1  christos #define _GNU_SOURCE
     30  1.1  christos 
     31  1.1  christos #ifdef HAVE_CONFIG_H
     32  1.1  christos # include <config.h>
     33  1.1  christos #endif
     34  1.1  christos 
     35  1.1  christos #ifndef PARAMS
     36  1.1  christos # if defined __GNUC__ || (defined __STDC__ && __STDC__)
     37  1.1  christos #  define PARAMS(args) args
     38  1.1  christos # else
     39  1.1  christos #  define PARAMS(args) ()
     40  1.1  christos # endif  /* GCC.  */
     41  1.1  christos #endif  /* Not PARAMS.  */
     42  1.1  christos 
     43  1.1  christos #ifndef INSIDE_RECURSION
     44  1.1  christos 
     45  1.1  christos # if defined STDC_HEADERS && !defined emacs
     46  1.1  christos #  include <stddef.h>
     47  1.1  christos # else
     48  1.1  christos /* We need this for `regex.h', and perhaps for the Emacs include files.  */
     49  1.1  christos #  include <sys/types.h>
     50  1.1  christos # endif
     51  1.1  christos 
     52  1.1  christos # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
     53  1.1  christos 
     54  1.1  christos /* For platform which support the ISO C amendement 1 functionality we
     55  1.1  christos    support user defined character classes.  */
     56  1.1  christos # if defined _LIBC || WIDE_CHAR_SUPPORT
     57  1.1  christos /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
     58  1.1  christos #  include <wchar.h>
     59  1.1  christos #  include <wctype.h>
     60  1.1  christos # endif
     61  1.1  christos 
     62  1.1  christos # ifdef _LIBC
     63  1.1  christos /* We have to keep the namespace clean.  */
     64  1.1  christos #  define regfree(preg) __regfree (preg)
     65  1.1  christos #  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
     66  1.1  christos #  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
     67  1.1  christos #  define regerror(errcode, preg, errbuf, errbuf_size) \
     68  1.1  christos 	__regerror(errcode, preg, errbuf, errbuf_size)
     69  1.1  christos #  define re_set_registers(bu, re, nu, st, en) \
     70  1.1  christos 	__re_set_registers (bu, re, nu, st, en)
     71  1.1  christos #  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
     72  1.1  christos 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
     73  1.1  christos #  define re_match(bufp, string, size, pos, regs) \
     74  1.1  christos 	__re_match (bufp, string, size, pos, regs)
     75  1.1  christos #  define re_search(bufp, string, size, startpos, range, regs) \
     76  1.1  christos 	__re_search (bufp, string, size, startpos, range, regs)
     77  1.1  christos #  define re_compile_pattern(pattern, length, bufp) \
     78  1.1  christos 	__re_compile_pattern (pattern, length, bufp)
     79  1.1  christos #  define re_set_syntax(syntax) __re_set_syntax (syntax)
     80  1.1  christos #  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
     81  1.1  christos 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
     82  1.1  christos #  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
     83  1.1  christos 
     84  1.1  christos #  define btowc __btowc
     85  1.1  christos #  define iswctype __iswctype
     86  1.1  christos #  define mbrtowc __mbrtowc
     87  1.1  christos #  define wcslen __wcslen
     88  1.1  christos #  define wcscoll __wcscoll
     89  1.1  christos #  define wcrtomb __wcrtomb
     90  1.1  christos 
     91  1.1  christos /* We are also using some library internals.  */
     92  1.1  christos #  include <locale/localeinfo.h>
     93  1.1  christos #  include <locale/elem-hash.h>
     94  1.1  christos #  include <langinfo.h>
     95  1.1  christos #  include <locale/coll-lookup.h>
     96  1.1  christos # endif
     97  1.1  christos 
     98  1.1  christos /* This is for other GNU distributions with internationalized messages.  */
     99  1.1  christos # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
    100  1.1  christos #  include <libintl.h>
    101  1.1  christos #  ifdef _LIBC
    102  1.1  christos #   undef gettext
    103  1.1  christos #   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
    104  1.1  christos #  endif
    105  1.1  christos # else
    106  1.1  christos #  define gettext(msgid) (msgid)
    107  1.1  christos # endif
    108  1.1  christos 
    109  1.1  christos # ifndef gettext_noop
    110  1.1  christos /* This define is so xgettext can find the internationalizable
    111  1.1  christos    strings.  */
    112  1.1  christos #  define gettext_noop(String) String
    113  1.1  christos # endif
    114  1.1  christos 
    115  1.1  christos /* Support for bounded pointers.  */
    116  1.1  christos # if !defined _LIBC && !defined __BOUNDED_POINTERS__
    117  1.1  christos #  define __bounded	/* nothing */
    118  1.1  christos #  define __unbounded	/* nothing */
    119  1.1  christos #  define __ptrvalue	/* nothing */
    120  1.1  christos # endif
    121  1.1  christos 
    122  1.1  christos /* The `emacs' switch turns on certain matching commands
    123  1.1  christos    that make sense only in Emacs. */
    124  1.1  christos # ifdef emacs
    125  1.1  christos 
    126  1.1  christos #  include "lisp.h"
    127  1.1  christos #  include "buffer.h"
    128  1.1  christos #  include "syntax.h"
    129  1.1  christos 
    130  1.1  christos # else  /* not emacs */
    131  1.1  christos 
    132  1.1  christos /* If we are not linking with Emacs proper,
    133  1.1  christos    we can't use the relocating allocator
    134  1.1  christos    even if config.h says that we can.  */
    135  1.1  christos #  undef REL_ALLOC
    136  1.1  christos 
    137  1.1  christos #  if defined STDC_HEADERS || defined _LIBC
    138  1.1  christos #   include <stdlib.h>
    139  1.1  christos #  else
    140  1.1  christos char *malloc ();
    141  1.1  christos char *realloc ();
    142  1.1  christos #  endif
    143  1.1  christos 
    144  1.1  christos /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
    145  1.1  christos    If nothing else has been done, use the method below.  */
    146  1.1  christos #  ifdef INHIBIT_STRING_HEADER
    147  1.1  christos #   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
    148  1.1  christos #    if !defined bzero && !defined bcopy
    149  1.1  christos #     undef INHIBIT_STRING_HEADER
    150  1.1  christos #    endif
    151  1.1  christos #   endif
    152  1.1  christos #  endif
    153  1.1  christos 
    154  1.1  christos /* This is the normal way of making sure we have a bcopy and a bzero.
    155  1.1  christos    This is used in most programs--a few other programs avoid this
    156  1.1  christos    by defining INHIBIT_STRING_HEADER.  */
    157  1.1  christos #  ifndef INHIBIT_STRING_HEADER
    158  1.1  christos #   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
    159  1.1  christos #    include <string.h>
    160  1.1  christos #    ifndef bzero
    161  1.1  christos #     ifndef _LIBC
    162  1.1  christos #      define bzero(s, n)	(memset (s, '\0', n), (s))
    163  1.1  christos #     else
    164  1.1  christos #      define bzero(s, n)	__bzero (s, n)
    165  1.1  christos #     endif
    166  1.1  christos #    endif
    167  1.1  christos #   else
    168  1.1  christos #    include <strings.h>
    169  1.1  christos #    ifndef memcmp
    170  1.1  christos #     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
    171  1.1  christos #    endif
    172  1.1  christos #    ifndef memcpy
    173  1.1  christos #     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
    174  1.1  christos #    endif
    175  1.1  christos #   endif
    176  1.1  christos #  endif
    177  1.1  christos 
    178  1.1  christos /* Define the syntax stuff for \<, \>, etc.  */
    179  1.1  christos 
    180  1.1  christos /* This must be nonzero for the wordchar and notwordchar pattern
    181  1.1  christos    commands in re_match_2.  */
    182  1.1  christos #  ifndef Sword
    183  1.1  christos #   define Sword 1
    184  1.1  christos #  endif
    185  1.1  christos 
    186  1.1  christos #  ifdef SWITCH_ENUM_BUG
    187  1.1  christos #   define SWITCH_ENUM_CAST(x) ((int)(x))
    188  1.1  christos #  else
    189  1.1  christos #   define SWITCH_ENUM_CAST(x) (x)
    190  1.1  christos #  endif
    191  1.1  christos 
    192  1.1  christos # endif /* not emacs */
    193  1.1  christos 
    194  1.1  christos # if defined _LIBC || HAVE_LIMITS_H
    195  1.1  christos #  include <limits.h>
    196  1.1  christos # endif
    197  1.1  christos 
    198  1.1  christos # ifndef MB_LEN_MAX
    199  1.1  christos #  define MB_LEN_MAX 1
    200  1.1  christos # endif
    201  1.1  christos 
    202  1.1  christos /* Get the interface, including the syntax bits.  */
    204  1.1  christos # include <regex.h>
    205  1.1  christos 
    206  1.1  christos /* isalpha etc. are used for the character classes.  */
    207  1.1  christos # include <ctype.h>
    208  1.1  christos 
    209  1.1  christos /* Jim Meyering writes:
    210  1.1  christos 
    211  1.1  christos    "... Some ctype macros are valid only for character codes that
    212  1.1  christos    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
    213  1.1  christos    using /bin/cc or gcc but without giving an ansi option).  So, all
    214  1.1  christos    ctype uses should be through macros like ISPRINT...  If
    215  1.1  christos    STDC_HEADERS is defined, then autoconf has verified that the ctype
    216  1.1  christos    macros don't need to be guarded with references to isascii. ...
    217  1.1  christos    Defining isascii to 1 should let any compiler worth its salt
    218  1.1  christos    eliminate the && through constant folding."
    219  1.1  christos    Solaris defines some of these symbols so we must undefine them first.  */
    220  1.1  christos 
    221  1.1  christos # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
    222  1.1  christos #  define IN_CTYPE_DOMAIN(c) 1
    223  1.1  christos # else
    224  1.1  christos #  define IN_CTYPE_DOMAIN(c) isascii(c)
    225  1.1  christos # endif
    226  1.1  christos 
    227  1.1  christos # ifdef isblank
    228  1.1  christos #  define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c))
    229  1.1  christos # else
    230  1.1  christos #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
    231  1.1  christos # endif
    232  1.1  christos # ifdef isgraph
    233  1.1  christos #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c))
    234  1.1  christos # else
    235  1.1  christos #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c))
    236  1.1  christos # endif
    237  1.1  christos 
    238  1.1  christos # undef ISPRINT
    239  1.1  christos # define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
    240  1.1  christos # define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
    241  1.1  christos # define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c))
    242  1.1  christos # define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c))
    243  1.1  christos # define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
    244  1.1  christos # define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c))
    245  1.1  christos # define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c))
    246  1.1  christos # define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
    247  1.1  christos # define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
    248  1.1  christos # define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c))
    249  1.1  christos 
    250  1.1  christos # ifdef _tolower
    251  1.1  christos #  define TOLOWER(c) _tolower(c)
    252  1.1  christos # else
    253  1.1  christos #  define TOLOWER(c) tolower(c)
    254  1.1  christos # endif
    255  1.1  christos 
    256  1.1  christos # ifndef NULL
    257  1.1  christos #  define NULL (void *)0
    258  1.1  christos # endif
    259  1.1  christos 
    260  1.1  christos /* We remove any previous definition of `SIGN_EXTEND_CHAR',
    261  1.1  christos    since ours (we hope) works properly with all combinations of
    262  1.1  christos    machines, compilers, `char' and `unsigned char' argument types.
    263  1.1  christos    (Per Bothner suggested the basic approach.)  */
    264  1.1  christos # undef SIGN_EXTEND_CHAR
    265  1.1  christos # if __STDC__
    266  1.1  christos #  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
    267  1.1  christos # else  /* not __STDC__ */
    268  1.1  christos /* As in Harbison and Steele.  */
    269  1.1  christos #  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
    270  1.1  christos # endif
    271  1.1  christos 
    272  1.1  christos # ifndef emacs
    274  1.1  christos /* How many characters in the character set.  */
    275  1.1  christos #  define CHAR_SET_SIZE 256
    276  1.1  christos 
    277  1.1  christos #  ifdef SYNTAX_TABLE
    278  1.1  christos 
    279  1.1  christos extern char *re_syntax_table;
    280  1.1  christos 
    281  1.1  christos #  else /* not SYNTAX_TABLE */
    282  1.1  christos 
    283  1.1  christos static char re_syntax_table[CHAR_SET_SIZE];
    284  1.1  christos 
    285  1.1  christos static void init_syntax_once PARAMS ((void));
    286  1.1  christos 
    287  1.1  christos static void
    288  1.1  christos init_syntax_once ()
    289  1.1  christos {
    290  1.1  christos    register int c;
    291  1.1  christos    static int done = 0;
    292  1.1  christos 
    293  1.1  christos    if (done)
    294  1.1  christos      return;
    295  1.1  christos    bzero (re_syntax_table, sizeof re_syntax_table);
    296  1.1  christos 
    297  1.1  christos    for (c = 0; c < CHAR_SET_SIZE; ++c)
    298  1.1  christos      if (ISALNUM (c))
    299  1.1  christos 	re_syntax_table[c] = Sword;
    300  1.1  christos 
    301  1.1  christos    re_syntax_table['_'] = Sword;
    302  1.1  christos 
    303  1.1  christos    done = 1;
    304  1.1  christos }
    305  1.1  christos 
    306  1.1  christos #  endif /* not SYNTAX_TABLE */
    307  1.1  christos 
    308  1.1  christos #  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
    309  1.1  christos 
    310  1.1  christos # endif /* emacs */
    311  1.1  christos 
    312  1.1  christos /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
    314  1.1  christos    use `alloca' instead of `malloc'.  This is because using malloc in
    315  1.1  christos    re_search* or re_match* could cause memory leaks when C-g is used in
    316  1.1  christos    Emacs; also, malloc is slower and causes storage fragmentation.  On
    317  1.1  christos    the other hand, malloc is more portable, and easier to debug.
    318  1.1  christos 
    319  1.1  christos    Because we sometimes use alloca, some routines have to be macros,
    320  1.1  christos    not functions -- `alloca'-allocated space disappears at the end of the
    321  1.1  christos    function it is called in.  */
    322  1.1  christos 
    323  1.1  christos # ifdef REGEX_MALLOC
    324  1.1  christos 
    325  1.1  christos #  define REGEX_ALLOCATE malloc
    326  1.1  christos #  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
    327  1.1  christos #  define REGEX_FREE free
    328  1.1  christos 
    329  1.1  christos # else /* not REGEX_MALLOC  */
    330  1.1  christos 
    331  1.1  christos /* Emacs already defines alloca, sometimes.  */
    332  1.1  christos #  ifndef alloca
    333  1.1  christos 
    334  1.1  christos /* Make alloca work the best possible way.  */
    335  1.1  christos #   ifdef __GNUC__
    336  1.1  christos #    define alloca __builtin_alloca
    337  1.1  christos #   else /* not __GNUC__ */
    338  1.1  christos #    if HAVE_ALLOCA_H
    339  1.1  christos #     include <alloca.h>
    340  1.1  christos #    endif /* HAVE_ALLOCA_H */
    341  1.1  christos #   endif /* not __GNUC__ */
    342  1.1  christos 
    343  1.1  christos #  endif /* not alloca */
    344  1.1  christos 
    345  1.1  christos #  define REGEX_ALLOCATE alloca
    346  1.1  christos 
    347  1.1  christos /* Assumes a `char *destination' variable.  */
    348  1.1  christos #  define REGEX_REALLOCATE(source, osize, nsize)			\
    349  1.1  christos   (destination = (char *) alloca (nsize),				\
    350  1.1  christos    memcpy (destination, source, osize))
    351  1.1  christos 
    352  1.1  christos /* No need to do anything to free, after alloca.  */
    353  1.1  christos #  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
    354  1.1  christos 
    355  1.1  christos # endif /* not REGEX_MALLOC */
    356  1.1  christos 
    357  1.1  christos /* Define how to allocate the failure stack.  */
    358  1.1  christos 
    359  1.1  christos # if defined REL_ALLOC && defined REGEX_MALLOC
    360  1.1  christos 
    361  1.1  christos #  define REGEX_ALLOCATE_STACK(size)				\
    362  1.1  christos   r_alloc (&failure_stack_ptr, (size))
    363  1.1  christos #  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
    364  1.1  christos   r_re_alloc (&failure_stack_ptr, (nsize))
    365  1.1  christos #  define REGEX_FREE_STACK(ptr)					\
    366  1.1  christos   r_alloc_free (&failure_stack_ptr)
    367  1.1  christos 
    368  1.1  christos # else /* not using relocating allocator */
    369  1.1  christos 
    370  1.1  christos #  ifdef REGEX_MALLOC
    371  1.1  christos 
    372  1.1  christos #   define REGEX_ALLOCATE_STACK malloc
    373  1.1  christos #   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
    374  1.1  christos #   define REGEX_FREE_STACK free
    375  1.1  christos 
    376  1.1  christos #  else /* not REGEX_MALLOC */
    377  1.1  christos 
    378  1.1  christos #   define REGEX_ALLOCATE_STACK alloca
    379  1.1  christos 
    380  1.1  christos #   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
    381  1.1  christos    REGEX_REALLOCATE (source, osize, nsize)
    382  1.1  christos /* No need to explicitly free anything.  */
    383  1.1  christos #   define REGEX_FREE_STACK(arg)
    384  1.1  christos 
    385  1.1  christos #  endif /* not REGEX_MALLOC */
    386  1.1  christos # endif /* not using relocating allocator */
    387  1.1  christos 
    388  1.1  christos 
    389  1.1  christos /* True if `size1' is non-NULL and PTR is pointing anywhere inside
    390  1.1  christos    `string1' or just past its end.  This works if PTR is NULL, which is
    391  1.1  christos    a good thing.  */
    392  1.1  christos # define FIRST_STRING_P(ptr) 					\
    393  1.1  christos   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
    394  1.1  christos 
    395  1.1  christos /* (Re)Allocate N items of type T using malloc, or fail.  */
    396  1.1  christos # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
    397  1.1  christos # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
    398  1.1  christos # define RETALLOC_IF(addr, n, t) \
    399  1.1  christos   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
    400  1.1  christos # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
    401  1.1  christos 
    402  1.1  christos # define BYTEWIDTH 8 /* In bits.  */
    403  1.1  christos 
    404  1.1  christos # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
    405  1.1  christos 
    406  1.1  christos # undef MAX
    407  1.1  christos # undef MIN
    408  1.1  christos # define MAX(a, b) ((a) > (b) ? (a) : (b))
    409  1.1  christos # define MIN(a, b) ((a) < (b) ? (a) : (b))
    410  1.1  christos 
    411  1.1  christos typedef char boolean;
    412  1.1  christos # define false 0
    413  1.1  christos # define true 1
    414  1.1  christos 
    415  1.1  christos static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
    416  1.1  christos                                                    reg_syntax_t syntax,
    417  1.1  christos                                                    struct re_pattern_buffer *bufp));
    418  1.1  christos 
    419  1.1  christos static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
    420  1.1  christos 					     const char *string1, int size1,
    421  1.1  christos 					     const char *string2, int size2,
    422  1.1  christos 					     int pos,
    423  1.1  christos 					     struct re_registers *regs,
    424  1.1  christos 					     int stop));
    425  1.1  christos static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
    426  1.1  christos 				     const char *string1, int size1,
    427  1.1  christos 				     const char *string2, int size2,
    428  1.1  christos 				     int startpos, int range,
    429  1.1  christos 				     struct re_registers *regs, int stop));
    430  1.1  christos static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
    431  1.1  christos 
    432  1.1  christos #ifdef MBS_SUPPORT
    433  1.1  christos static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
    434  1.1  christos                                                    reg_syntax_t syntax,
    435  1.1  christos                                                    struct re_pattern_buffer *bufp));
    436  1.1  christos 
    437  1.1  christos 
    438  1.1  christos static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
    439  1.1  christos 					    const char *cstring1, int csize1,
    440  1.1  christos 					    const char *cstring2, int csize2,
    441  1.1  christos 					    int pos,
    442  1.1  christos 					    struct re_registers *regs,
    443  1.1  christos 					    int stop,
    444  1.1  christos 					    wchar_t *string1, int size1,
    445  1.1  christos 					    wchar_t *string2, int size2,
    446  1.1  christos 					    int *mbs_offset1, int *mbs_offset2));
    447  1.1  christos static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
    448  1.1  christos 				    const char *string1, int size1,
    449  1.1  christos 				    const char *string2, int size2,
    450  1.1  christos 				    int startpos, int range,
    451  1.1  christos 				    struct re_registers *regs, int stop));
    452  1.1  christos static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
    453  1.1  christos #endif
    454  1.1  christos 
    455  1.1  christos /* These are the command codes that appear in compiled regular
    457  1.1  christos    expressions.  Some opcodes are followed by argument bytes.  A
    458  1.1  christos    command code can specify any interpretation whatsoever for its
    459  1.1  christos    arguments.  Zero bytes may appear in the compiled regular expression.  */
    460  1.1  christos 
    461  1.1  christos typedef enum
    462  1.1  christos {
    463  1.1  christos   no_op = 0,
    464  1.1  christos 
    465  1.1  christos   /* Succeed right away--no more backtracking.  */
    466  1.1  christos   succeed,
    467  1.1  christos 
    468  1.1  christos         /* Followed by one byte giving n, then by n literal bytes.  */
    469  1.1  christos   exactn,
    470  1.1  christos 
    471  1.1  christos # ifdef MBS_SUPPORT
    472  1.1  christos 	/* Same as exactn, but contains binary data.  */
    473  1.1  christos   exactn_bin,
    474  1.1  christos # endif
    475  1.1  christos 
    476  1.1  christos         /* Matches any (more or less) character.  */
    477  1.1  christos   anychar,
    478  1.1  christos 
    479  1.1  christos         /* Matches any one char belonging to specified set.  First
    480  1.1  christos            following byte is number of bitmap bytes.  Then come bytes
    481  1.1  christos            for a bitmap saying which chars are in.  Bits in each byte
    482  1.1  christos            are ordered low-bit-first.  A character is in the set if its
    483  1.1  christos            bit is 1.  A character too large to have a bit in the map is
    484  1.1  christos            automatically not in the set.  */
    485  1.1  christos         /* ifdef MBS_SUPPORT, following element is length of character
    486  1.1  christos 	   classes, length of collating symbols, length of equivalence
    487  1.1  christos 	   classes, length of character ranges, and length of characters.
    488  1.1  christos 	   Next, character class element, collating symbols elements,
    489  1.1  christos 	   equivalence class elements, range elements, and character
    490  1.1  christos 	   elements follow.
    491  1.1  christos 	   See regex_compile function.  */
    492  1.1  christos   charset,
    493  1.1  christos 
    494  1.1  christos         /* Same parameters as charset, but match any character that is
    495  1.1  christos            not one of those specified.  */
    496  1.1  christos   charset_not,
    497  1.1  christos 
    498  1.1  christos         /* Start remembering the text that is matched, for storing in a
    499  1.1  christos            register.  Followed by one byte with the register number, in
    500  1.1  christos            the range 0 to one less than the pattern buffer's re_nsub
    501  1.1  christos            field.  Then followed by one byte with the number of groups
    502  1.1  christos            inner to this one.  (This last has to be part of the
    503  1.1  christos            start_memory only because we need it in the on_failure_jump
    504  1.1  christos            of re_match_2.)  */
    505  1.1  christos   start_memory,
    506  1.1  christos 
    507  1.1  christos         /* Stop remembering the text that is matched and store it in a
    508  1.1  christos            memory register.  Followed by one byte with the register
    509  1.1  christos            number, in the range 0 to one less than `re_nsub' in the
    510  1.1  christos            pattern buffer, and one byte with the number of inner groups,
    511  1.1  christos            just like `start_memory'.  (We need the number of inner
    512  1.1  christos            groups here because we don't have any easy way of finding the
    513  1.1  christos            corresponding start_memory when we're at a stop_memory.)  */
    514  1.1  christos   stop_memory,
    515  1.1  christos 
    516  1.1  christos         /* Match a duplicate of something remembered. Followed by one
    517  1.1  christos            byte containing the register number.  */
    518  1.1  christos   duplicate,
    519  1.1  christos 
    520  1.1  christos         /* Fail unless at beginning of line.  */
    521  1.1  christos   begline,
    522  1.1  christos 
    523  1.1  christos         /* Fail unless at end of line.  */
    524  1.1  christos   endline,
    525  1.1  christos 
    526  1.1  christos         /* Succeeds if at beginning of buffer (if emacs) or at beginning
    527  1.1  christos            of string to be matched (if not).  */
    528  1.1  christos   begbuf,
    529  1.1  christos 
    530  1.1  christos         /* Analogously, for end of buffer/string.  */
    531  1.1  christos   endbuf,
    532  1.1  christos 
    533  1.1  christos         /* Followed by two byte relative address to which to jump.  */
    534  1.1  christos   jump,
    535  1.1  christos 
    536  1.1  christos 	/* Same as jump, but marks the end of an alternative.  */
    537  1.1  christos   jump_past_alt,
    538  1.1  christos 
    539  1.1  christos         /* Followed by two-byte relative address of place to resume at
    540  1.1  christos            in case of failure.  */
    541  1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    542  1.1  christos   on_failure_jump,
    543  1.1  christos 
    544  1.1  christos         /* Like on_failure_jump, but pushes a placeholder instead of the
    545  1.1  christos            current string position when executed.  */
    546  1.1  christos   on_failure_keep_string_jump,
    547  1.1  christos 
    548  1.1  christos         /* Throw away latest failure point and then jump to following
    549  1.1  christos            two-byte relative address.  */
    550  1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    551  1.1  christos   pop_failure_jump,
    552  1.1  christos 
    553  1.1  christos         /* Change to pop_failure_jump if know won't have to backtrack to
    554  1.1  christos            match; otherwise change to jump.  This is used to jump
    555  1.1  christos            back to the beginning of a repeat.  If what follows this jump
    556  1.1  christos            clearly won't match what the repeat does, such that we can be
    557  1.1  christos            sure that there is no use backtracking out of repetitions
    558  1.1  christos            already matched, then we change it to a pop_failure_jump.
    559  1.1  christos            Followed by two-byte address.  */
    560  1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    561  1.1  christos   maybe_pop_jump,
    562  1.1  christos 
    563  1.1  christos         /* Jump to following two-byte address, and push a dummy failure
    564  1.1  christos            point. This failure point will be thrown away if an attempt
    565  1.1  christos            is made to use it for a failure.  A `+' construct makes this
    566  1.1  christos            before the first repeat.  Also used as an intermediary kind
    567  1.1  christos            of jump when compiling an alternative.  */
    568  1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    569  1.1  christos   dummy_failure_jump,
    570  1.1  christos 
    571  1.1  christos 	/* Push a dummy failure point and continue.  Used at the end of
    572  1.1  christos 	   alternatives.  */
    573  1.1  christos   push_dummy_failure,
    574  1.1  christos 
    575  1.1  christos         /* Followed by two-byte relative address and two-byte number n.
    576  1.1  christos            After matching N times, jump to the address upon failure.  */
    577  1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    578  1.1  christos   succeed_n,
    579  1.1  christos 
    580  1.1  christos         /* Followed by two-byte relative address, and two-byte number n.
    581  1.1  christos            Jump to the address N times, then fail.  */
    582  1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    583  1.1  christos   jump_n,
    584  1.1  christos 
    585  1.1  christos         /* Set the following two-byte relative address to the
    586  1.1  christos            subsequent two-byte number.  The address *includes* the two
    587  1.1  christos            bytes of number.  */
    588  1.1  christos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
    589  1.1  christos   set_number_at,
    590  1.1  christos 
    591  1.1  christos   wordchar,	/* Matches any word-constituent character.  */
    592  1.1  christos   notwordchar,	/* Matches any char that is not a word-constituent.  */
    593  1.1  christos 
    594  1.1  christos   wordbeg,	/* Succeeds if at word beginning.  */
    595  1.1  christos   wordend,	/* Succeeds if at word end.  */
    596  1.1  christos 
    597  1.1  christos   wordbound,	/* Succeeds if at a word boundary.  */
    598  1.1  christos   notwordbound	/* Succeeds if not at a word boundary.  */
    599  1.1  christos 
    600  1.1  christos # ifdef emacs
    601  1.1  christos   ,before_dot,	/* Succeeds if before point.  */
    602  1.1  christos   at_dot,	/* Succeeds if at point.  */
    603  1.1  christos   after_dot,	/* Succeeds if after point.  */
    604  1.1  christos 
    605  1.1  christos 	/* Matches any character whose syntax is specified.  Followed by
    606  1.1  christos            a byte which contains a syntax code, e.g., Sword.  */
    607  1.1  christos   syntaxspec,
    608  1.1  christos 
    609  1.1  christos 	/* Matches any character whose syntax is not that specified.  */
    610  1.1  christos   notsyntaxspec
    611  1.1  christos # endif /* emacs */
    612  1.1  christos } re_opcode_t;
    613  1.1  christos #endif /* not INSIDE_RECURSION */
    614  1.1  christos 
    615  1.1  christos 
    617  1.1  christos #ifdef BYTE
    618  1.1  christos # define CHAR_T char
    619  1.1  christos # define UCHAR_T unsigned char
    620  1.1  christos # define COMPILED_BUFFER_VAR bufp->buffer
    621  1.1  christos # define OFFSET_ADDRESS_SIZE 2
    622  1.1  christos # define PREFIX(name) byte_##name
    623  1.1  christos # define ARG_PREFIX(name) name
    624  1.1  christos # define PUT_CHAR(c) putchar (c)
    625  1.1  christos #else
    626  1.1  christos # ifdef WCHAR
    627  1.1  christos #  define CHAR_T wchar_t
    628  1.1  christos #  define UCHAR_T wchar_t
    629  1.1  christos #  define COMPILED_BUFFER_VAR wc_buffer
    630  1.1  christos #  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
    631  1.1  christos #  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
    632  1.1  christos #  define PREFIX(name) wcs_##name
    633  1.1  christos #  define ARG_PREFIX(name) c##name
    634  1.1  christos /* Should we use wide stream??  */
    635  1.1  christos #  define PUT_CHAR(c) printf ("%C", c);
    636  1.1  christos #  define TRUE 1
    637  1.1  christos #  define FALSE 0
    638  1.1  christos # else
    639  1.1  christos #  ifdef MBS_SUPPORT
    640  1.1  christos #   define WCHAR
    641  1.1  christos #   define INSIDE_RECURSION
    642  1.1  christos #   include "regex.c"
    643  1.1  christos #   undef INSIDE_RECURSION
    644  1.1  christos #  endif
    645  1.1  christos #  define BYTE
    646  1.1  christos #  define INSIDE_RECURSION
    647  1.1  christos #  include "regex.c"
    648  1.1  christos #  undef INSIDE_RECURSION
    649  1.1  christos # endif
    650  1.1  christos #endif
    651  1.1  christos #include "unlocked-io.h"
    652  1.1  christos 
    653  1.1  christos #ifdef INSIDE_RECURSION
    654  1.1  christos /* Common operations on the compiled pattern.  */
    655  1.1  christos 
    656  1.1  christos /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
    657  1.1  christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    658  1.1  christos 
    659  1.1  christos # ifdef WCHAR
    660  1.1  christos #  define STORE_NUMBER(destination, number)				\
    661  1.1  christos   do {									\
    662  1.1  christos     *(destination) = (UCHAR_T)(number);				\
    663  1.1  christos   } while (0)
    664  1.1  christos # else /* BYTE */
    665  1.1  christos #  define STORE_NUMBER(destination, number)				\
    666  1.1  christos   do {									\
    667  1.1  christos     (destination)[0] = (number) & 0377;					\
    668  1.1  christos     (destination)[1] = (number) >> 8;					\
    669  1.1  christos   } while (0)
    670  1.1  christos # endif /* WCHAR */
    671  1.1  christos 
    672  1.1  christos /* Same as STORE_NUMBER, except increment DESTINATION to
    673  1.1  christos    the byte after where the number is stored.  Therefore, DESTINATION
    674  1.1  christos    must be an lvalue.  */
    675  1.1  christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    676  1.1  christos 
    677  1.1  christos # define STORE_NUMBER_AND_INCR(destination, number)			\
    678  1.1  christos   do {									\
    679  1.1  christos     STORE_NUMBER (destination, number);					\
    680  1.1  christos     (destination) += OFFSET_ADDRESS_SIZE;				\
    681  1.1  christos   } while (0)
    682  1.1  christos 
    683  1.1  christos /* Put into DESTINATION a number stored in two contiguous bytes starting
    684  1.1  christos    at SOURCE.  */
    685  1.1  christos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
    686  1.1  christos 
    687  1.1  christos # ifdef WCHAR
    688  1.1  christos #  define EXTRACT_NUMBER(destination, source)				\
    689  1.1  christos   do {									\
    690  1.1  christos     (destination) = *(source);						\
    691  1.1  christos   } while (0)
    692  1.1  christos # else /* BYTE */
    693  1.1  christos #  define EXTRACT_NUMBER(destination, source)				\
    694  1.1  christos   do {									\
    695  1.1  christos     (destination) = *(source) & 0377;					\
    696  1.1  christos     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
    697  1.1  christos   } while (0)
    698  1.1  christos # endif
    699  1.1  christos 
    700  1.1  christos # ifdef DEBUG
    701  1.1  christos static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source));
    702  1.1  christos static void
    703  1.1  christos PREFIX(extract_number) (dest, source)
    704  1.1  christos     int *dest;
    705  1.1  christos     UCHAR_T *source;
    706  1.1  christos {
    707  1.1  christos #  ifdef WCHAR
    708  1.1  christos   *dest = *source;
    709  1.1  christos #  else /* BYTE */
    710  1.1  christos   int temp = SIGN_EXTEND_CHAR (*(source + 1));
    711  1.1  christos   *dest = *source & 0377;
    712  1.1  christos   *dest += temp << 8;
    713  1.1  christos #  endif
    714  1.1  christos }
    715  1.1  christos 
    716  1.1  christos #  ifndef EXTRACT_MACROS /* To debug the macros.  */
    717  1.1  christos #   undef EXTRACT_NUMBER
    718  1.1  christos #   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
    719  1.1  christos #  endif /* not EXTRACT_MACROS */
    720  1.1  christos 
    721  1.1  christos # endif /* DEBUG */
    722  1.1  christos 
    723  1.1  christos /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
    724  1.1  christos    SOURCE must be an lvalue.  */
    725  1.1  christos 
    726  1.1  christos # define EXTRACT_NUMBER_AND_INCR(destination, source)			\
    727  1.1  christos   do {									\
    728  1.1  christos     EXTRACT_NUMBER (destination, source);				\
    729  1.1  christos     (source) += OFFSET_ADDRESS_SIZE; 					\
    730  1.1  christos   } while (0)
    731  1.1  christos 
    732  1.1  christos # ifdef DEBUG
    733  1.1  christos static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination,
    734  1.1  christos 						       UCHAR_T **source));
    735  1.1  christos static void
    736  1.1  christos PREFIX(extract_number_and_incr) (destination, source)
    737  1.1  christos     int *destination;
    738  1.1  christos     UCHAR_T **source;
    739  1.1  christos {
    740  1.1  christos   PREFIX(extract_number) (destination, *source);
    741  1.1  christos   *source += OFFSET_ADDRESS_SIZE;
    742  1.1  christos }
    743  1.1  christos 
    744  1.1  christos #  ifndef EXTRACT_MACROS
    745  1.1  christos #   undef EXTRACT_NUMBER_AND_INCR
    746  1.1  christos #   define EXTRACT_NUMBER_AND_INCR(dest, src) \
    747  1.1  christos   PREFIX(extract_number_and_incr) (&dest, &src)
    748  1.1  christos #  endif /* not EXTRACT_MACROS */
    749  1.1  christos 
    750  1.1  christos # endif /* DEBUG */
    751  1.1  christos 
    752  1.1  christos 
    753  1.1  christos 
    755  1.1  christos /* If DEBUG is defined, Regex prints many voluminous messages about what
    756  1.1  christos    it is doing (if the variable `debug' is nonzero).  If linked with the
    757  1.1  christos    main program in `iregex.c', you can enter patterns and strings
    758  1.1  christos    interactively.  And if linked with the main program in `main.c' and
    759  1.1  christos    the other test files, you can run the already-written tests.  */
    760  1.1  christos 
    761  1.1  christos # ifdef DEBUG
    762  1.1  christos 
    763  1.1  christos #  ifndef DEFINED_ONCE
    764  1.1  christos 
    765  1.1  christos /* We use standard I/O for debugging.  */
    766  1.1  christos #   include <stdio.h>
    767  1.1  christos 
    768  1.1  christos /* It is useful to test things that ``must'' be true when debugging.  */
    769  1.1  christos #   include <assert.h>
    770  1.1  christos 
    771  1.1  christos static int debug;
    772  1.1  christos 
    773  1.1  christos #   define DEBUG_STATEMENT(e) e
    774  1.1  christos #   define DEBUG_PRINT1(x) if (debug) printf (x)
    775  1.1  christos #   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
    776  1.1  christos #   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
    777  1.1  christos #   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
    778  1.1  christos #  endif /* not DEFINED_ONCE */
    779  1.1  christos 
    780  1.1  christos #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
    781  1.1  christos   if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
    782  1.1  christos #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
    783  1.1  christos   if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
    784  1.1  christos 
    785  1.1  christos 
    786  1.1  christos /* Print the fastmap in human-readable form.  */
    787  1.1  christos 
    788  1.1  christos #  ifndef DEFINED_ONCE
    789  1.1  christos void
    790  1.1  christos print_fastmap (fastmap)
    791  1.1  christos     char *fastmap;
    792  1.1  christos {
    793  1.1  christos   unsigned was_a_range = 0;
    794  1.1  christos   unsigned i = 0;
    795  1.1  christos 
    796  1.1  christos   while (i < (1 << BYTEWIDTH))
    797  1.1  christos     {
    798  1.1  christos       if (fastmap[i++])
    799  1.1  christos 	{
    800  1.1  christos 	  was_a_range = 0;
    801  1.1  christos           putchar (i - 1);
    802  1.1  christos           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
    803  1.1  christos             {
    804  1.1  christos               was_a_range = 1;
    805  1.1  christos               i++;
    806  1.1  christos             }
    807  1.1  christos 	  if (was_a_range)
    808  1.1  christos             {
    809  1.1  christos               printf ("-");
    810  1.1  christos               putchar (i - 1);
    811  1.1  christos             }
    812  1.1  christos         }
    813  1.1  christos     }
    814  1.1  christos   putchar ('\n');
    815  1.1  christos }
    816  1.1  christos #  endif /* not DEFINED_ONCE */
    817  1.1  christos 
    818  1.1  christos 
    819  1.1  christos /* Print a compiled pattern string in human-readable form, starting at
    820  1.1  christos    the START pointer into it and ending just before the pointer END.  */
    821  1.1  christos 
    822  1.1  christos void
    823  1.1  christos PREFIX(print_partial_compiled_pattern) (start, end)
    824  1.1  christos     UCHAR_T *start;
    825  1.1  christos     UCHAR_T *end;
    826  1.1  christos {
    827  1.1  christos   int mcnt, mcnt2;
    828  1.1  christos   UCHAR_T *p1;
    829  1.1  christos   UCHAR_T *p = start;
    830  1.1  christos   UCHAR_T *pend = end;
    831  1.1  christos 
    832  1.1  christos   if (start == NULL)
    833  1.1  christos     {
    834  1.1  christos       printf ("(null)\n");
    835  1.1  christos       return;
    836  1.1  christos     }
    837  1.1  christos 
    838  1.1  christos   /* Loop over pattern commands.  */
    839  1.1  christos   while (p < pend)
    840  1.1  christos     {
    841  1.1  christos #  ifdef _LIBC
    842  1.1  christos       printf ("%td:\t", p - start);
    843  1.1  christos #  else
    844  1.1  christos       printf ("%ld:\t", (long int) (p - start));
    845  1.1  christos #  endif
    846  1.1  christos 
    847  1.1  christos       switch ((re_opcode_t) *p++)
    848  1.1  christos 	{
    849  1.1  christos         case no_op:
    850  1.1  christos           printf ("/no_op");
    851  1.1  christos           break;
    852  1.1  christos 
    853  1.1  christos 	case exactn:
    854  1.1  christos 	  mcnt = *p++;
    855  1.1  christos           printf ("/exactn/%d", mcnt);
    856  1.1  christos           do
    857  1.1  christos 	    {
    858  1.1  christos               putchar ('/');
    859  1.1  christos 	      PUT_CHAR (*p++);
    860  1.1  christos             }
    861  1.1  christos           while (--mcnt);
    862  1.1  christos           break;
    863  1.1  christos 
    864  1.1  christos #  ifdef MBS_SUPPORT
    865  1.1  christos 	case exactn_bin:
    866  1.1  christos 	  mcnt = *p++;
    867  1.1  christos 	  printf ("/exactn_bin/%d", mcnt);
    868  1.1  christos           do
    869  1.1  christos 	    {
    870  1.1  christos 	      printf("/%lx", (long int) *p++);
    871  1.1  christos             }
    872  1.1  christos           while (--mcnt);
    873  1.1  christos           break;
    874  1.1  christos #  endif /* MBS_SUPPORT */
    875  1.1  christos 
    876  1.1  christos 	case start_memory:
    877  1.1  christos           mcnt = *p++;
    878  1.1  christos           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
    879  1.1  christos           break;
    880  1.1  christos 
    881  1.1  christos 	case stop_memory:
    882  1.1  christos           mcnt = *p++;
    883  1.1  christos 	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
    884  1.1  christos           break;
    885  1.1  christos 
    886  1.1  christos 	case duplicate:
    887  1.1  christos 	  printf ("/duplicate/%ld", (long int) *p++);
    888  1.1  christos 	  break;
    889  1.1  christos 
    890  1.1  christos 	case anychar:
    891  1.1  christos 	  printf ("/anychar");
    892  1.1  christos 	  break;
    893  1.1  christos 
    894  1.1  christos 	case charset:
    895  1.1  christos         case charset_not:
    896  1.1  christos           {
    897  1.1  christos #  ifdef WCHAR
    898  1.1  christos 	    int i, length;
    899  1.1  christos 	    wchar_t *workp = p;
    900  1.1  christos 	    printf ("/charset [%s",
    901  1.1  christos 	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
    902  1.1  christos 	    p += 5;
    903  1.1  christos 	    length = *workp++; /* the length of char_classes */
    904  1.1  christos 	    for (i=0 ; i<length ; i++)
    905  1.1  christos 	      printf("[:%lx:]", (long int) *p++);
    906  1.1  christos 	    length = *workp++; /* the length of collating_symbol */
    907  1.1  christos 	    for (i=0 ; i<length ;)
    908  1.1  christos 	      {
    909  1.1  christos 		printf("[.");
    910  1.1  christos 		while(*p != 0)
    911  1.1  christos 		  PUT_CHAR((i++,*p++));
    912  1.1  christos 		i++,p++;
    913  1.1  christos 		printf(".]");
    914  1.1  christos 	      }
    915  1.1  christos 	    length = *workp++; /* the length of equivalence_class */
    916  1.1  christos 	    for (i=0 ; i<length ;)
    917  1.1  christos 	      {
    918  1.1  christos 		printf("[=");
    919  1.1  christos 		while(*p != 0)
    920  1.1  christos 		  PUT_CHAR((i++,*p++));
    921  1.1  christos 		i++,p++;
    922  1.1  christos 		printf("=]");
    923  1.1  christos 	      }
    924  1.1  christos 	    length = *workp++; /* the length of char_range */
    925  1.1  christos 	    for (i=0 ; i<length ; i++)
    926  1.1  christos 	      {
    927  1.1  christos 		wchar_t range_start = *p++;
    928  1.1  christos 		wchar_t range_end = *p++;
    929  1.1  christos 		printf("%C-%C", range_start, range_end);
    930  1.1  christos 	      }
    931  1.1  christos 	    length = *workp++; /* the length of char */
    932  1.1  christos 	    for (i=0 ; i<length ; i++)
    933  1.1  christos 	      printf("%C", *p++);
    934  1.1  christos 	    putchar (']');
    935  1.1  christos #  else
    936  1.1  christos             register int c, last = -100;
    937  1.1  christos 	    register int in_range = 0;
    938  1.1  christos 
    939  1.1  christos 	    printf ("/charset [%s",
    940  1.1  christos 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
    941  1.1  christos 
    942  1.1  christos             assert (p + *p < pend);
    943  1.1  christos 
    944  1.1  christos             for (c = 0; c < 256; c++)
    945  1.1  christos 	      if (c / 8 < *p
    946  1.1  christos 		  && (p[1 + (c/8)] & (1 << (c % 8))))
    947  1.1  christos 		{
    948  1.1  christos 		  /* Are we starting a range?  */
    949  1.1  christos 		  if (last + 1 == c && ! in_range)
    950  1.1  christos 		    {
    951  1.1  christos 		      putchar ('-');
    952  1.1  christos 		      in_range = 1;
    953  1.1  christos 		    }
    954  1.1  christos 		  /* Have we broken a range?  */
    955  1.1  christos 		  else if (last + 1 != c && in_range)
    956  1.1  christos               {
    957  1.1  christos 		      putchar (last);
    958  1.1  christos 		      in_range = 0;
    959  1.1  christos 		    }
    960  1.1  christos 
    961  1.1  christos 		  if (! in_range)
    962  1.1  christos 		    putchar (c);
    963  1.1  christos 
    964  1.1  christos 		  last = c;
    965  1.1  christos               }
    966  1.1  christos 
    967  1.1  christos 	    if (in_range)
    968  1.1  christos 	      putchar (last);
    969  1.1  christos 
    970  1.1  christos 	    putchar (']');
    971  1.1  christos 
    972  1.1  christos 	    p += 1 + *p;
    973  1.1  christos #  endif /* WCHAR */
    974  1.1  christos 	  }
    975  1.1  christos 	  break;
    976  1.1  christos 
    977  1.1  christos 	case begline:
    978  1.1  christos 	  printf ("/begline");
    979  1.1  christos           break;
    980  1.1  christos 
    981  1.1  christos 	case endline:
    982  1.1  christos           printf ("/endline");
    983  1.1  christos           break;
    984  1.1  christos 
    985  1.1  christos 	case on_failure_jump:
    986  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
    987  1.1  christos #  ifdef _LIBC
    988  1.1  christos   	  printf ("/on_failure_jump to %td", p + mcnt - start);
    989  1.1  christos #  else
    990  1.1  christos   	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
    991  1.1  christos #  endif
    992  1.1  christos           break;
    993  1.1  christos 
    994  1.1  christos 	case on_failure_keep_string_jump:
    995  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
    996  1.1  christos #  ifdef _LIBC
    997  1.1  christos   	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
    998  1.1  christos #  else
    999  1.1  christos   	  printf ("/on_failure_keep_string_jump to %ld",
   1000  1.1  christos 		  (long int) (p + mcnt - start));
   1001  1.1  christos #  endif
   1002  1.1  christos           break;
   1003  1.1  christos 
   1004  1.1  christos 	case dummy_failure_jump:
   1005  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1006  1.1  christos #  ifdef _LIBC
   1007  1.1  christos   	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
   1008  1.1  christos #  else
   1009  1.1  christos   	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
   1010  1.1  christos #  endif
   1011  1.1  christos           break;
   1012  1.1  christos 
   1013  1.1  christos 	case push_dummy_failure:
   1014  1.1  christos           printf ("/push_dummy_failure");
   1015  1.1  christos           break;
   1016  1.1  christos 
   1017  1.1  christos         case maybe_pop_jump:
   1018  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1019  1.1  christos #  ifdef _LIBC
   1020  1.1  christos   	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
   1021  1.1  christos #  else
   1022  1.1  christos   	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
   1023  1.1  christos #  endif
   1024  1.1  christos 	  break;
   1025  1.1  christos 
   1026  1.1  christos         case pop_failure_jump:
   1027  1.1  christos 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1028  1.1  christos #  ifdef _LIBC
   1029  1.1  christos   	  printf ("/pop_failure_jump to %td", p + mcnt - start);
   1030  1.1  christos #  else
   1031  1.1  christos   	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
   1032  1.1  christos #  endif
   1033  1.1  christos 	  break;
   1034  1.1  christos 
   1035  1.1  christos         case jump_past_alt:
   1036  1.1  christos 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1037  1.1  christos #  ifdef _LIBC
   1038  1.1  christos   	  printf ("/jump_past_alt to %td", p + mcnt - start);
   1039  1.1  christos #  else
   1040  1.1  christos   	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
   1041  1.1  christos #  endif
   1042  1.1  christos 	  break;
   1043  1.1  christos 
   1044  1.1  christos         case jump:
   1045  1.1  christos 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
   1046  1.1  christos #  ifdef _LIBC
   1047  1.1  christos   	  printf ("/jump to %td", p + mcnt - start);
   1048  1.1  christos #  else
   1049  1.1  christos   	  printf ("/jump to %ld", (long int) (p + mcnt - start));
   1050  1.1  christos #  endif
   1051  1.1  christos 	  break;
   1052  1.1  christos 
   1053  1.1  christos         case succeed_n:
   1054  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1055  1.1  christos 	  p1 = p + mcnt;
   1056  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1057  1.1  christos #  ifdef _LIBC
   1058  1.1  christos 	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
   1059  1.1  christos #  else
   1060  1.1  christos 	  printf ("/succeed_n to %ld, %d times",
   1061  1.1  christos 		  (long int) (p1 - start), mcnt2);
   1062  1.1  christos #  endif
   1063  1.1  christos           break;
   1064  1.1  christos 
   1065  1.1  christos         case jump_n:
   1066  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1067  1.1  christos 	  p1 = p + mcnt;
   1068  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1069  1.1  christos 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
   1070  1.1  christos           break;
   1071  1.1  christos 
   1072  1.1  christos         case set_number_at:
   1073  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt, &p);
   1074  1.1  christos 	  p1 = p + mcnt;
   1075  1.1  christos           PREFIX(extract_number_and_incr) (&mcnt2, &p);
   1076  1.1  christos #  ifdef _LIBC
   1077  1.1  christos 	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
   1078  1.1  christos #  else
   1079  1.1  christos 	  printf ("/set_number_at location %ld to %d",
   1080  1.1  christos 		  (long int) (p1 - start), mcnt2);
   1081  1.1  christos #  endif
   1082  1.1  christos           break;
   1083  1.1  christos 
   1084  1.1  christos         case wordbound:
   1085  1.1  christos 	  printf ("/wordbound");
   1086  1.1  christos 	  break;
   1087  1.1  christos 
   1088  1.1  christos 	case notwordbound:
   1089  1.1  christos 	  printf ("/notwordbound");
   1090  1.1  christos           break;
   1091  1.1  christos 
   1092  1.1  christos 	case wordbeg:
   1093  1.1  christos 	  printf ("/wordbeg");
   1094  1.1  christos 	  break;
   1095  1.1  christos 
   1096  1.1  christos 	case wordend:
   1097  1.1  christos 	  printf ("/wordend");
   1098  1.1  christos 	  break;
   1099  1.1  christos 
   1100  1.1  christos #  ifdef emacs
   1101  1.1  christos 	case before_dot:
   1102  1.1  christos 	  printf ("/before_dot");
   1103  1.1  christos           break;
   1104  1.1  christos 
   1105  1.1  christos 	case at_dot:
   1106  1.1  christos 	  printf ("/at_dot");
   1107  1.1  christos           break;
   1108  1.1  christos 
   1109  1.1  christos 	case after_dot:
   1110  1.1  christos 	  printf ("/after_dot");
   1111  1.1  christos           break;
   1112  1.1  christos 
   1113  1.1  christos 	case syntaxspec:
   1114  1.1  christos           printf ("/syntaxspec");
   1115  1.1  christos 	  mcnt = *p++;
   1116  1.1  christos 	  printf ("/%d", mcnt);
   1117  1.1  christos           break;
   1118  1.1  christos 
   1119  1.1  christos 	case notsyntaxspec:
   1120  1.1  christos           printf ("/notsyntaxspec");
   1121  1.1  christos 	  mcnt = *p++;
   1122  1.1  christos 	  printf ("/%d", mcnt);
   1123  1.1  christos 	  break;
   1124  1.1  christos #  endif /* emacs */
   1125  1.1  christos 
   1126  1.1  christos 	case wordchar:
   1127  1.1  christos 	  printf ("/wordchar");
   1128  1.1  christos           break;
   1129  1.1  christos 
   1130  1.1  christos 	case notwordchar:
   1131  1.1  christos 	  printf ("/notwordchar");
   1132  1.1  christos           break;
   1133  1.1  christos 
   1134  1.1  christos 	case begbuf:
   1135  1.1  christos 	  printf ("/begbuf");
   1136  1.1  christos           break;
   1137  1.1  christos 
   1138  1.1  christos 	case endbuf:
   1139  1.1  christos 	  printf ("/endbuf");
   1140  1.1  christos           break;
   1141  1.1  christos 
   1142  1.1  christos         default:
   1143  1.1  christos           printf ("?%ld", (long int) *(p-1));
   1144  1.1  christos 	}
   1145  1.1  christos 
   1146  1.1  christos       putchar ('\n');
   1147  1.1  christos     }
   1148  1.1  christos 
   1149  1.1  christos #  ifdef _LIBC
   1150  1.1  christos   printf ("%td:\tend of pattern.\n", p - start);
   1151  1.1  christos #  else
   1152  1.1  christos   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
   1153  1.1  christos #  endif
   1154  1.1  christos }
   1155  1.1  christos 
   1156  1.1  christos 
   1157  1.1  christos void
   1158  1.1  christos PREFIX(print_compiled_pattern) (bufp)
   1159  1.1  christos     struct re_pattern_buffer *bufp;
   1160  1.1  christos {
   1161  1.1  christos   UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
   1162  1.1  christos 
   1163  1.1  christos   PREFIX(print_partial_compiled_pattern) (buffer, buffer
   1164  1.1  christos 				  + bufp->used / sizeof(UCHAR_T));
   1165  1.1  christos   printf ("%ld bytes used/%ld bytes allocated.\n",
   1166  1.1  christos 	  bufp->used, bufp->allocated);
   1167  1.1  christos 
   1168  1.1  christos   if (bufp->fastmap_accurate && bufp->fastmap)
   1169  1.1  christos     {
   1170  1.1  christos       printf ("fastmap: ");
   1171  1.1  christos       print_fastmap (bufp->fastmap);
   1172  1.1  christos     }
   1173  1.1  christos 
   1174  1.1  christos #  ifdef _LIBC
   1175  1.1  christos   printf ("re_nsub: %Zd\t", bufp->re_nsub);
   1176  1.1  christos #  else
   1177  1.1  christos   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
   1178  1.1  christos #  endif
   1179  1.1  christos   printf ("regs_alloc: %d\t", bufp->regs_allocated);
   1180  1.1  christos   printf ("can_be_null: %d\t", bufp->can_be_null);
   1181  1.1  christos   printf ("newline_anchor: %d\n", bufp->newline_anchor);
   1182  1.1  christos   printf ("no_sub: %d\t", bufp->no_sub);
   1183  1.1  christos   printf ("not_bol: %d\t", bufp->not_bol);
   1184  1.1  christos   printf ("not_eol: %d\t", bufp->not_eol);
   1185  1.1  christos   printf ("syntax: %lx\n", bufp->syntax);
   1186  1.1  christos   /* Perhaps we should print the translate table?  */
   1187  1.1  christos }
   1188  1.1  christos 
   1189  1.1  christos 
   1190  1.1  christos void
   1191  1.1  christos PREFIX(print_double_string) (where, string1, size1, string2, size2)
   1192  1.1  christos     const CHAR_T *where;
   1193  1.1  christos     const CHAR_T *string1;
   1194  1.1  christos     const CHAR_T *string2;
   1195  1.1  christos     int size1;
   1196  1.1  christos     int size2;
   1197  1.1  christos {
   1198  1.1  christos   int this_char;
   1199  1.1  christos 
   1200  1.1  christos   if (where == NULL)
   1201  1.1  christos     printf ("(null)");
   1202  1.1  christos   else
   1203  1.1  christos     {
   1204  1.1  christos       int cnt;
   1205  1.1  christos 
   1206  1.1  christos       if (FIRST_STRING_P (where))
   1207  1.1  christos         {
   1208  1.1  christos           for (this_char = where - string1; this_char < size1; this_char++)
   1209  1.1  christos 	    PUT_CHAR (string1[this_char]);
   1210  1.1  christos 
   1211  1.1  christos           where = string2;
   1212  1.1  christos         }
   1213  1.1  christos 
   1214  1.1  christos       cnt = 0;
   1215  1.1  christos       for (this_char = where - string2; this_char < size2; this_char++)
   1216  1.1  christos 	{
   1217  1.1  christos 	  PUT_CHAR (string2[this_char]);
   1218  1.1  christos 	  if (++cnt > 100)
   1219  1.1  christos 	    {
   1220  1.1  christos 	      fputs ("...", stdout);
   1221  1.1  christos 	      break;
   1222  1.1  christos 	    }
   1223  1.1  christos 	}
   1224  1.1  christos     }
   1225  1.1  christos }
   1226  1.1  christos 
   1227  1.1  christos #  ifndef DEFINED_ONCE
   1228  1.1  christos void
   1229  1.1  christos printchar (c)
   1230  1.1  christos      int c;
   1231  1.1  christos {
   1232  1.1  christos   putc (c, stderr);
   1233  1.1  christos }
   1234  1.1  christos #  endif
   1235  1.1  christos 
   1236  1.1  christos # else /* not DEBUG */
   1237  1.1  christos 
   1238  1.1  christos #  ifndef DEFINED_ONCE
   1239  1.1  christos #   undef assert
   1240  1.1  christos #   define assert(e)
   1241  1.1  christos 
   1242  1.1  christos #   define DEBUG_STATEMENT(e)
   1243  1.1  christos #   define DEBUG_PRINT1(x)
   1244  1.1  christos #   define DEBUG_PRINT2(x1, x2)
   1245  1.1  christos #   define DEBUG_PRINT3(x1, x2, x3)
   1246  1.1  christos #   define DEBUG_PRINT4(x1, x2, x3, x4)
   1247  1.1  christos #  endif /* not DEFINED_ONCE */
   1248  1.1  christos #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
   1249  1.1  christos #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
   1250  1.1  christos 
   1251  1.1  christos # endif /* not DEBUG */
   1252  1.1  christos 
   1253  1.1  christos 
   1254  1.1  christos 
   1256  1.1  christos # ifdef WCHAR
   1257  1.1  christos /* This  convert a multibyte string to a wide character string.
   1258  1.1  christos    And write their correspondances to offset_buffer(see below)
   1259  1.1  christos    and write whether each wchar_t is binary data to is_binary.
   1260  1.1  christos    This assume invalid multibyte sequences as binary data.
   1261  1.1  christos    We assume offset_buffer and is_binary is already allocated
   1262  1.1  christos    enough space.  */
   1263  1.1  christos 
   1264  1.1  christos static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
   1265  1.1  christos 				  size_t len, int *offset_buffer,
   1266  1.1  christos 				  char *is_binary);
   1267  1.1  christos static size_t
   1268  1.1  christos convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
   1269  1.1  christos      CHAR_T *dest;
   1270  1.1  christos      const unsigned char* src;
   1271  1.1  christos      size_t len; /* the length of multibyte string.  */
   1272  1.1  christos 
   1273  1.1  christos      /* It hold correspondances between src(char string) and
   1274  1.1  christos 	dest(wchar_t string) for optimization.
   1275  1.1  christos 	e.g. src  = "xxxyzz"
   1276  1.1  christos              dest = {'X', 'Y', 'Z'}
   1277  1.1  christos 	      (each "xxx", "y" and "zz" represent one multibyte character
   1278  1.1  christos 	       corresponding to 'X', 'Y' and 'Z'.)
   1279  1.1  christos 	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
   1280  1.1  christos 	  	        = {0, 3, 4, 6}
   1281  1.1  christos      */
   1282  1.1  christos      int *offset_buffer;
   1283  1.1  christos      char *is_binary;
   1284  1.1  christos {
   1285  1.1  christos   wchar_t *pdest = dest;
   1286  1.1  christos   const unsigned char *psrc = src;
   1287  1.1  christos   size_t wc_count = 0;
   1288  1.1  christos 
   1289  1.1  christos   mbstate_t mbs;
   1290  1.1  christos   int i, consumed;
   1291  1.1  christos   size_t mb_remain = len;
   1292  1.1  christos   size_t mb_count = 0;
   1293  1.1  christos 
   1294  1.1  christos   /* Initialize the conversion state.  */
   1295  1.1  christos   memset (&mbs, 0, sizeof (mbstate_t));
   1296  1.1  christos 
   1297  1.1  christos   offset_buffer[0] = 0;
   1298  1.1  christos   for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
   1299  1.1  christos 	 psrc += consumed)
   1300  1.1  christos     {
   1301  1.1  christos       consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
   1302  1.1  christos 
   1303  1.1  christos       if (consumed <= 0)
   1304  1.1  christos 	/* failed to convert. maybe src contains binary data.
   1305  1.1  christos 	   So we consume 1 byte manualy.  */
   1306  1.1  christos 	{
   1307  1.1  christos 	  *pdest = *psrc;
   1308  1.1  christos 	  consumed = 1;
   1309  1.1  christos 	  is_binary[wc_count] = TRUE;
   1310  1.1  christos 	}
   1311  1.1  christos       else
   1312  1.1  christos 	is_binary[wc_count] = FALSE;
   1313  1.1  christos       /* In sjis encoding, we use yen sign as escape character in
   1314  1.1  christos 	 place of reverse solidus. So we convert 0x5c(yen sign in
   1315  1.1  christos 	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
   1316  1.1  christos 	 solidus in UCS2).  */
   1317  1.1  christos       if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
   1318  1.1  christos 	*pdest = (wchar_t) *psrc;
   1319  1.1  christos 
   1320  1.1  christos       offset_buffer[wc_count + 1] = mb_count += consumed;
   1321  1.1  christos     }
   1322  1.1  christos 
   1323  1.1  christos   /* Fill remain of the buffer with sentinel.  */
   1324  1.1  christos   for (i = wc_count + 1 ; i <= len ; i++)
   1325  1.1  christos     offset_buffer[i] = mb_count + 1;
   1326  1.1  christos 
   1327  1.1  christos   return wc_count;
   1328  1.1  christos }
   1329  1.1  christos 
   1330  1.1  christos # endif /* WCHAR */
   1331  1.1  christos 
   1332  1.1  christos #else /* not INSIDE_RECURSION */
   1333  1.1  christos 
   1334  1.1  christos /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
   1335  1.1  christos    also be assigned to arbitrarily: each pattern buffer stores its own
   1336  1.1  christos    syntax, so it can be changed between regex compilations.  */
   1337  1.1  christos /* This has no initializer because initialized variables in Emacs
   1338  1.1  christos    become read-only after dumping.  */
   1339  1.1  christos reg_syntax_t re_syntax_options;
   1340  1.1  christos 
   1341  1.1  christos 
   1342  1.1  christos /* Specify the precise syntax of regexps for compilation.  This provides
   1343  1.1  christos    for compatibility for various utilities which historically have
   1344  1.1  christos    different, incompatible syntaxes.
   1345  1.1  christos 
   1346  1.1  christos    The argument SYNTAX is a bit mask comprised of the various bits
   1347  1.1  christos    defined in regex.h.  We return the old syntax.  */
   1348  1.1  christos 
   1349  1.1  christos reg_syntax_t
   1350  1.1  christos re_set_syntax (syntax)
   1351  1.1  christos     reg_syntax_t syntax;
   1352  1.1  christos {
   1353  1.1  christos   reg_syntax_t ret = re_syntax_options;
   1354  1.1  christos 
   1355  1.1  christos   re_syntax_options = syntax;
   1356  1.1  christos # ifdef DEBUG
   1357  1.1  christos   if (syntax & RE_DEBUG)
   1358  1.1  christos     debug = 1;
   1359  1.1  christos   else if (debug) /* was on but now is not */
   1360  1.1  christos     debug = 0;
   1361  1.1  christos # endif /* DEBUG */
   1362  1.1  christos   return ret;
   1363  1.1  christos }
   1364  1.1  christos # ifdef _LIBC
   1365  1.1  christos weak_alias (__re_set_syntax, re_set_syntax)
   1366  1.1  christos # endif
   1367  1.1  christos 
   1368  1.1  christos /* This table gives an error message for each of the error codes listed
   1370  1.1  christos    in regex.h.  Obviously the order here has to be same as there.
   1371  1.1  christos    POSIX doesn't require that we do anything for REG_NOERROR,
   1372  1.1  christos    but why not be nice?  */
   1373  1.1  christos 
   1374  1.1  christos static const char re_error_msgid[] =
   1375  1.1  christos   {
   1376  1.1  christos # define REG_NOERROR_IDX	0
   1377  1.1  christos     gettext_noop ("Success")	/* REG_NOERROR */
   1378  1.1  christos     "\0"
   1379  1.1  christos # define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
   1380  1.1  christos     gettext_noop ("No match")	/* REG_NOMATCH */
   1381  1.1  christos     "\0"
   1382  1.1  christos # define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
   1383  1.1  christos     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
   1384  1.1  christos     "\0"
   1385  1.1  christos # define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
   1386  1.1  christos     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
   1387  1.1  christos     "\0"
   1388  1.1  christos # define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
   1389  1.1  christos     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
   1390  1.1  christos     "\0"
   1391  1.1  christos # define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
   1392  1.1  christos     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
   1393  1.1  christos     "\0"
   1394  1.1  christos # define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
   1395  1.1  christos     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
   1396  1.1  christos     "\0"
   1397  1.1  christos # define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
   1398  1.1  christos     gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
   1399  1.1  christos     "\0"
   1400  1.1  christos # define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
   1401  1.1  christos     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
   1402  1.1  christos     "\0"
   1403  1.1  christos # define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
   1404  1.1  christos     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
   1405  1.1  christos     "\0"
   1406  1.1  christos # define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
   1407  1.1  christos     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
   1408  1.1  christos     "\0"
   1409  1.1  christos # define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
   1410  1.1  christos     gettext_noop ("Invalid range end")	/* REG_ERANGE */
   1411  1.1  christos     "\0"
   1412  1.1  christos # define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
   1413  1.1  christos     gettext_noop ("Memory exhausted") /* REG_ESPACE */
   1414  1.1  christos     "\0"
   1415  1.1  christos # define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
   1416  1.1  christos     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
   1417  1.1  christos     "\0"
   1418  1.1  christos # define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
   1419  1.1  christos     gettext_noop ("Premature end of regular expression") /* REG_EEND */
   1420  1.1  christos     "\0"
   1421  1.1  christos # define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
   1422  1.1  christos     gettext_noop ("Regular expression too big") /* REG_ESIZE */
   1423  1.1  christos     "\0"
   1424  1.1  christos # define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
   1425  1.1  christos     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
   1426  1.1  christos   };
   1427  1.1  christos 
   1428  1.1  christos static const size_t re_error_msgid_idx[] =
   1429  1.1  christos   {
   1430  1.1  christos     REG_NOERROR_IDX,
   1431  1.1  christos     REG_NOMATCH_IDX,
   1432  1.1  christos     REG_BADPAT_IDX,
   1433  1.1  christos     REG_ECOLLATE_IDX,
   1434  1.1  christos     REG_ECTYPE_IDX,
   1435  1.1  christos     REG_EESCAPE_IDX,
   1436  1.1  christos     REG_ESUBREG_IDX,
   1437  1.1  christos     REG_EBRACK_IDX,
   1438  1.1  christos     REG_EPAREN_IDX,
   1439  1.1  christos     REG_EBRACE_IDX,
   1440  1.1  christos     REG_BADBR_IDX,
   1441  1.1  christos     REG_ERANGE_IDX,
   1442  1.1  christos     REG_ESPACE_IDX,
   1443  1.1  christos     REG_BADRPT_IDX,
   1444  1.1  christos     REG_EEND_IDX,
   1445  1.1  christos     REG_ESIZE_IDX,
   1446  1.1  christos     REG_ERPAREN_IDX
   1447  1.1  christos   };
   1448  1.1  christos 
   1449  1.1  christos #endif /* INSIDE_RECURSION */
   1451  1.1  christos 
   1452  1.1  christos #ifndef DEFINED_ONCE
   1453  1.1  christos /* Avoiding alloca during matching, to placate r_alloc.  */
   1454  1.1  christos 
   1455  1.1  christos /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
   1456  1.1  christos    searching and matching functions should not call alloca.  On some
   1457  1.1  christos    systems, alloca is implemented in terms of malloc, and if we're
   1458  1.1  christos    using the relocating allocator routines, then malloc could cause a
   1459  1.1  christos    relocation, which might (if the strings being searched are in the
   1460  1.1  christos    ralloc heap) shift the data out from underneath the regexp
   1461  1.1  christos    routines.
   1462  1.1  christos 
   1463  1.1  christos    Here's another reason to avoid allocation: Emacs
   1464  1.1  christos    processes input from X in a signal handler; processing X input may
   1465  1.1  christos    call malloc; if input arrives while a matching routine is calling
   1466  1.1  christos    malloc, then we're scrod.  But Emacs can't just block input while
   1467  1.1  christos    calling matching routines; then we don't notice interrupts when
   1468  1.1  christos    they come in.  So, Emacs blocks input around all regexp calls
   1469  1.1  christos    except the matching calls, which it leaves unprotected, in the
   1470  1.1  christos    faith that they will not malloc.  */
   1471  1.1  christos 
   1472  1.1  christos /* Normally, this is fine.  */
   1473  1.1  christos # define MATCH_MAY_ALLOCATE
   1474  1.1  christos 
   1475  1.1  christos /* When using GNU C, we are not REALLY using the C alloca, no matter
   1476  1.1  christos    what config.h may say.  So don't take precautions for it.  */
   1477  1.1  christos # ifdef __GNUC__
   1478  1.1  christos #  undef C_ALLOCA
   1479  1.1  christos # endif
   1480  1.1  christos 
   1481  1.1  christos /* The match routines may not allocate if (1) they would do it with malloc
   1482  1.1  christos    and (2) it's not safe for them to use malloc.
   1483  1.1  christos    Note that if REL_ALLOC is defined, matching would not use malloc for the
   1484  1.1  christos    failure stack, but we would still use it for the register vectors;
   1485  1.1  christos    so REL_ALLOC should not affect this.  */
   1486  1.1  christos # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
   1487  1.1  christos #  undef MATCH_MAY_ALLOCATE
   1488  1.1  christos # endif
   1489  1.1  christos #endif /* not DEFINED_ONCE */
   1490  1.1  christos 
   1491  1.1  christos #ifdef INSIDE_RECURSION
   1493  1.1  christos /* Failure stack declarations and macros; both re_compile_fastmap and
   1494  1.1  christos    re_match_2 use a failure stack.  These have to be macros because of
   1495  1.1  christos    REGEX_ALLOCATE_STACK.  */
   1496  1.1  christos 
   1497  1.1  christos 
   1498  1.1  christos /* Number of failure points for which to initially allocate space
   1499  1.1  christos    when matching.  If this number is exceeded, we allocate more
   1500  1.1  christos    space, so it is not a hard limit.  */
   1501  1.1  christos # ifndef INIT_FAILURE_ALLOC
   1502  1.1  christos #  define INIT_FAILURE_ALLOC 5
   1503  1.1  christos # endif
   1504  1.1  christos 
   1505  1.1  christos /* Roughly the maximum number of failure points on the stack.  Would be
   1506  1.1  christos    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
   1507  1.1  christos    This is a variable only so users of regex can assign to it; we never
   1508  1.1  christos    change it ourselves.  */
   1509  1.1  christos 
   1510  1.1  christos # ifdef INT_IS_16BIT
   1511  1.1  christos 
   1512  1.1  christos #  ifndef DEFINED_ONCE
   1513  1.1  christos #   if defined MATCH_MAY_ALLOCATE
   1514  1.1  christos /* 4400 was enough to cause a crash on Alpha OSF/1,
   1515  1.1  christos    whose default stack limit is 2mb.  */
   1516  1.1  christos long int re_max_failures = 4000;
   1517  1.1  christos #   else
   1518  1.1  christos long int re_max_failures = 2000;
   1519  1.1  christos #   endif
   1520  1.1  christos #  endif
   1521  1.1  christos 
   1522  1.1  christos union PREFIX(fail_stack_elt)
   1523  1.1  christos {
   1524  1.1  christos   UCHAR_T *pointer;
   1525  1.1  christos   long int integer;
   1526  1.1  christos };
   1527  1.1  christos 
   1528  1.1  christos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
   1529  1.1  christos 
   1530  1.1  christos typedef struct
   1531  1.1  christos {
   1532  1.1  christos   PREFIX(fail_stack_elt_t) *stack;
   1533  1.1  christos   unsigned long int size;
   1534  1.1  christos   unsigned long int avail;		/* Offset of next open position.  */
   1535  1.1  christos } PREFIX(fail_stack_type);
   1536  1.1  christos 
   1537  1.1  christos # else /* not INT_IS_16BIT */
   1538  1.1  christos 
   1539  1.1  christos #  ifndef DEFINED_ONCE
   1540  1.1  christos #   if defined MATCH_MAY_ALLOCATE
   1541  1.1  christos /* 4400 was enough to cause a crash on Alpha OSF/1,
   1542  1.1  christos    whose default stack limit is 2mb.  */
   1543  1.1  christos int re_max_failures = 4000;
   1544  1.1  christos #   else
   1545  1.1  christos int re_max_failures = 2000;
   1546  1.1  christos #   endif
   1547  1.1  christos #  endif
   1548  1.1  christos 
   1549  1.1  christos union PREFIX(fail_stack_elt)
   1550  1.1  christos {
   1551  1.1  christos   UCHAR_T *pointer;
   1552  1.1  christos   int integer;
   1553  1.1  christos };
   1554  1.1  christos 
   1555  1.1  christos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
   1556  1.1  christos 
   1557  1.1  christos typedef struct
   1558  1.1  christos {
   1559  1.1  christos   PREFIX(fail_stack_elt_t) *stack;
   1560  1.1  christos   unsigned size;
   1561  1.1  christos   unsigned avail;			/* Offset of next open position.  */
   1562  1.1  christos } PREFIX(fail_stack_type);
   1563  1.1  christos 
   1564  1.1  christos # endif /* INT_IS_16BIT */
   1565  1.1  christos 
   1566  1.1  christos # ifndef DEFINED_ONCE
   1567  1.1  christos #  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
   1568  1.1  christos #  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
   1569  1.1  christos #  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
   1570  1.1  christos # endif
   1571  1.1  christos 
   1572  1.1  christos 
   1573  1.1  christos /* Define macros to initialize and free the failure stack.
   1574  1.1  christos    Do `return -2' if the alloc fails.  */
   1575  1.1  christos 
   1576  1.1  christos # ifdef MATCH_MAY_ALLOCATE
   1577  1.1  christos #  define INIT_FAIL_STACK()						\
   1578  1.1  christos   do {									\
   1579  1.1  christos     fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
   1580  1.1  christos       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
   1581  1.1  christos 									\
   1582  1.1  christos     if (fail_stack.stack == NULL)				\
   1583  1.1  christos       return -2;							\
   1584  1.1  christos 									\
   1585  1.1  christos     fail_stack.size = INIT_FAILURE_ALLOC;			\
   1586  1.1  christos     fail_stack.avail = 0;					\
   1587  1.1  christos   } while (0)
   1588  1.1  christos 
   1589  1.1  christos #  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
   1590  1.1  christos # else
   1591  1.1  christos #  define INIT_FAIL_STACK()						\
   1592  1.1  christos   do {									\
   1593  1.1  christos     fail_stack.avail = 0;					\
   1594  1.1  christos   } while (0)
   1595  1.1  christos 
   1596  1.1  christos #  define RESET_FAIL_STACK()
   1597  1.1  christos # endif
   1598  1.1  christos 
   1599  1.1  christos 
   1600  1.1  christos /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
   1601  1.1  christos 
   1602  1.1  christos    Return 1 if succeeds, and 0 if either ran out of memory
   1603  1.1  christos    allocating space for it or it was already too large.
   1604  1.1  christos 
   1605  1.1  christos    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
   1606  1.1  christos 
   1607  1.1  christos # define DOUBLE_FAIL_STACK(fail_stack)					\
   1608  1.1  christos   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
   1609  1.1  christos    ? 0									\
   1610  1.1  christos    : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
   1611  1.1  christos         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
   1612  1.1  christos           (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
   1613  1.1  christos           ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
   1614  1.1  christos 									\
   1615  1.1  christos       (fail_stack).stack == NULL					\
   1616  1.1  christos       ? 0								\
   1617  1.1  christos       : ((fail_stack).size <<= 1, 					\
   1618  1.1  christos          1)))
   1619  1.1  christos 
   1620  1.1  christos 
   1621  1.1  christos /* Push pointer POINTER on FAIL_STACK.
   1622  1.1  christos    Return 1 if was able to do so and 0 if ran out of memory allocating
   1623  1.1  christos    space to do so.  */
   1624  1.1  christos # define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
   1625  1.1  christos   ((FAIL_STACK_FULL ()							\
   1626  1.1  christos     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
   1627  1.1  christos    ? 0									\
   1628  1.1  christos    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
   1629  1.1  christos       1))
   1630  1.1  christos 
   1631  1.1  christos /* Push a pointer value onto the failure stack.
   1632  1.1  christos    Assumes the variable `fail_stack'.  Probably should only
   1633  1.1  christos    be called from within `PUSH_FAILURE_POINT'.  */
   1634  1.1  christos # define PUSH_FAILURE_POINTER(item)					\
   1635  1.1  christos   fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
   1636  1.1  christos 
   1637  1.1  christos /* This pushes an integer-valued item onto the failure stack.
   1638  1.1  christos    Assumes the variable `fail_stack'.  Probably should only
   1639  1.1  christos    be called from within `PUSH_FAILURE_POINT'.  */
   1640  1.1  christos # define PUSH_FAILURE_INT(item)					\
   1641  1.1  christos   fail_stack.stack[fail_stack.avail++].integer = (item)
   1642  1.1  christos 
   1643  1.1  christos /* Push a fail_stack_elt_t value onto the failure stack.
   1644  1.1  christos    Assumes the variable `fail_stack'.  Probably should only
   1645  1.1  christos    be called from within `PUSH_FAILURE_POINT'.  */
   1646  1.1  christos # define PUSH_FAILURE_ELT(item)					\
   1647  1.1  christos   fail_stack.stack[fail_stack.avail++] =  (item)
   1648  1.1  christos 
   1649  1.1  christos /* These three POP... operations complement the three PUSH... operations.
   1650  1.1  christos    All assume that `fail_stack' is nonempty.  */
   1651  1.1  christos # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
   1652  1.1  christos # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
   1653  1.1  christos # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
   1654  1.1  christos 
   1655  1.1  christos /* Used to omit pushing failure point id's when we're not debugging.  */
   1656  1.1  christos # ifdef DEBUG
   1657  1.1  christos #  define DEBUG_PUSH PUSH_FAILURE_INT
   1658  1.1  christos #  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
   1659  1.1  christos # else
   1660  1.1  christos #  define DEBUG_PUSH(item)
   1661  1.1  christos #  define DEBUG_POP(item_addr)
   1662  1.1  christos # endif
   1663  1.1  christos 
   1664  1.1  christos 
   1665  1.1  christos /* Push the information about the state we will need
   1666  1.1  christos    if we ever fail back to it.
   1667  1.1  christos 
   1668  1.1  christos    Requires variables fail_stack, regstart, regend, reg_info, and
   1669  1.1  christos    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
   1670  1.1  christos    be declared.
   1671  1.1  christos 
   1672  1.1  christos    Does `return FAILURE_CODE' if runs out of memory.  */
   1673  1.1  christos 
   1674  1.1  christos # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
   1675  1.1  christos   do {									\
   1676  1.1  christos     char *destination;							\
   1677  1.1  christos     /* Must be int, so when we don't save any registers, the arithmetic	\
   1678  1.1  christos        of 0 + -1 isn't done as unsigned.  */				\
   1679  1.1  christos     /* Can't be int, since there is not a shred of a guarantee that int	\
   1680  1.1  christos        is wide enough to hold a value of something to which pointer can	\
   1681  1.1  christos        be assigned */							\
   1682  1.1  christos     active_reg_t this_reg;						\
   1683  1.1  christos     									\
   1684  1.1  christos     DEBUG_STATEMENT (failure_id++);					\
   1685  1.1  christos     DEBUG_STATEMENT (nfailure_points_pushed++);				\
   1686  1.1  christos     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
   1687  1.1  christos     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
   1688  1.1  christos     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
   1689  1.1  christos 									\
   1690  1.1  christos     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
   1691  1.1  christos     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
   1692  1.1  christos 									\
   1693  1.1  christos     /* Ensure we have enough space allocated for what we will push.  */	\
   1694  1.1  christos     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
   1695  1.1  christos       {									\
   1696  1.1  christos         if (!DOUBLE_FAIL_STACK (fail_stack))				\
   1697  1.1  christos           return failure_code;						\
   1698  1.1  christos 									\
   1699  1.1  christos         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
   1700  1.1  christos 		       (fail_stack).size);				\
   1701  1.1  christos         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
   1702  1.1  christos       }									\
   1703  1.1  christos 									\
   1704  1.1  christos     /* Push the info, starting with the registers.  */			\
   1705  1.1  christos     DEBUG_PRINT1 ("\n");						\
   1706  1.1  christos 									\
   1707  1.1  christos     if (1)								\
   1708  1.1  christos       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
   1709  1.1  christos 	   this_reg++)							\
   1710  1.1  christos 	{								\
   1711  1.1  christos 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
   1712  1.1  christos 	  DEBUG_STATEMENT (num_regs_pushed++);				\
   1713  1.1  christos 									\
   1714  1.1  christos 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
   1715  1.1  christos 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
   1716  1.1  christos 									\
   1717  1.1  christos 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
   1718  1.1  christos 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
   1719  1.1  christos 									\
   1720  1.1  christos 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
   1721  1.1  christos 			reg_info[this_reg].word.pointer);		\
   1722  1.1  christos 	  DEBUG_PRINT2 (" match_null=%d",				\
   1723  1.1  christos 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
   1724  1.1  christos 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
   1725  1.1  christos 	  DEBUG_PRINT2 (" matched_something=%d",			\
   1726  1.1  christos 			MATCHED_SOMETHING (reg_info[this_reg]));	\
   1727  1.1  christos 	  DEBUG_PRINT2 (" ever_matched=%d",				\
   1728  1.1  christos 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
   1729  1.1  christos 	  DEBUG_PRINT1 ("\n");						\
   1730  1.1  christos 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
   1731  1.1  christos 	}								\
   1732  1.1  christos 									\
   1733  1.1  christos     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
   1734  1.1  christos     PUSH_FAILURE_INT (lowest_active_reg);				\
   1735  1.1  christos 									\
   1736  1.1  christos     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
   1737  1.1  christos     PUSH_FAILURE_INT (highest_active_reg);				\
   1738  1.1  christos 									\
   1739  1.1  christos     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
   1740  1.1  christos     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
   1741  1.1  christos     PUSH_FAILURE_POINTER (pattern_place);				\
   1742  1.1  christos 									\
   1743  1.1  christos     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
   1744  1.1  christos     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
   1745  1.1  christos 				 size2);				\
   1746  1.1  christos     DEBUG_PRINT1 ("'\n");						\
   1747  1.1  christos     PUSH_FAILURE_POINTER (string_place);				\
   1748  1.1  christos 									\
   1749  1.1  christos     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
   1750  1.1  christos     DEBUG_PUSH (failure_id);						\
   1751  1.1  christos   } while (0)
   1752  1.1  christos 
   1753  1.1  christos # ifndef DEFINED_ONCE
   1754  1.1  christos /* This is the number of items that are pushed and popped on the stack
   1755  1.1  christos    for each register.  */
   1756  1.1  christos #  define NUM_REG_ITEMS  3
   1757  1.1  christos 
   1758  1.1  christos /* Individual items aside from the registers.  */
   1759  1.1  christos #  ifdef DEBUG
   1760  1.1  christos #   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
   1761  1.1  christos #  else
   1762  1.1  christos #   define NUM_NONREG_ITEMS 4
   1763  1.1  christos #  endif
   1764  1.1  christos 
   1765  1.1  christos /* We push at most this many items on the stack.  */
   1766  1.1  christos /* We used to use (num_regs - 1), which is the number of registers
   1767  1.1  christos    this regexp will save; but that was changed to 5
   1768  1.1  christos    to avoid stack overflow for a regexp with lots of parens.  */
   1769  1.1  christos #  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
   1770  1.1  christos 
   1771  1.1  christos /* We actually push this many items.  */
   1772  1.1  christos #  define NUM_FAILURE_ITEMS				\
   1773  1.1  christos   (((0							\
   1774  1.1  christos      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
   1775  1.1  christos     * NUM_REG_ITEMS)					\
   1776  1.1  christos    + NUM_NONREG_ITEMS)
   1777  1.1  christos 
   1778  1.1  christos /* How many items can still be added to the stack without overflowing it.  */
   1779  1.1  christos #  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
   1780  1.1  christos # endif /* not DEFINED_ONCE */
   1781  1.1  christos 
   1782  1.1  christos 
   1783  1.1  christos /* Pops what PUSH_FAIL_STACK pushes.
   1784  1.1  christos 
   1785  1.1  christos    We restore into the parameters, all of which should be lvalues:
   1786  1.1  christos      STR -- the saved data position.
   1787  1.1  christos      PAT -- the saved pattern position.
   1788  1.1  christos      LOW_REG, HIGH_REG -- the highest and lowest active registers.
   1789  1.1  christos      REGSTART, REGEND -- arrays of string positions.
   1790  1.1  christos      REG_INFO -- array of information about each subexpression.
   1791  1.1  christos 
   1792  1.1  christos    Also assumes the variables `fail_stack' and (if debugging), `bufp',
   1793  1.1  christos    `pend', `string1', `size1', `string2', and `size2'.  */
   1794  1.1  christos # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
   1795  1.1  christos {									\
   1796  1.1  christos   DEBUG_STATEMENT (unsigned failure_id;)				\
   1797  1.1  christos   active_reg_t this_reg;						\
   1798  1.1  christos   const UCHAR_T *string_temp;						\
   1799  1.1  christos 									\
   1800  1.1  christos   assert (!FAIL_STACK_EMPTY ());					\
   1801  1.1  christos 									\
   1802  1.1  christos   /* Remove failure points and point to how many regs pushed.  */	\
   1803  1.1  christos   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
   1804  1.1  christos   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
   1805  1.1  christos   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
   1806  1.1  christos 									\
   1807  1.1  christos   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
   1808  1.1  christos 									\
   1809  1.1  christos   DEBUG_POP (&failure_id);						\
   1810  1.1  christos   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
   1811  1.1  christos 									\
   1812  1.1  christos   /* If the saved string location is NULL, it came from an		\
   1813  1.1  christos      on_failure_keep_string_jump opcode, and we want to throw away the	\
   1814  1.1  christos      saved NULL, thus retaining our current position in the string.  */	\
   1815  1.1  christos   string_temp = POP_FAILURE_POINTER ();					\
   1816  1.1  christos   if (string_temp != NULL)						\
   1817  1.1  christos     str = (const CHAR_T *) string_temp;					\
   1818  1.1  christos 									\
   1819  1.1  christos   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
   1820  1.1  christos   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
   1821  1.1  christos   DEBUG_PRINT1 ("'\n");							\
   1822  1.1  christos 									\
   1823  1.1  christos   pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
   1824  1.1  christos   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
   1825  1.1  christos   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
   1826  1.1  christos 									\
   1827  1.1  christos   /* Restore register info.  */						\
   1828  1.1  christos   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1829  1.1  christos   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
   1830  1.1  christos 									\
   1831  1.1  christos   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
   1832  1.1  christos   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
   1833  1.1  christos 									\
   1834  1.1  christos   if (1)								\
   1835  1.1  christos     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
   1836  1.1  christos       {									\
   1837  1.1  christos 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
   1838  1.1  christos 									\
   1839  1.1  christos 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
   1840  1.1  christos 	DEBUG_PRINT2 ("      info: %p\n",				\
   1841  1.1  christos 		      reg_info[this_reg].word.pointer);			\
   1842  1.1  christos 									\
   1843  1.1  christos 	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
   1844  1.1  christos 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
   1845  1.1  christos 									\
   1846  1.1  christos 	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
   1847  1.1  christos 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
   1848  1.1  christos       }									\
   1849  1.1  christos   else									\
   1850  1.1  christos     {									\
   1851  1.1  christos       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
   1852  1.1  christos 	{								\
   1853  1.1  christos 	  reg_info[this_reg].word.integer = 0;				\
   1854  1.1  christos 	  regend[this_reg] = 0;						\
   1855  1.1  christos 	  regstart[this_reg] = 0;					\
   1856  1.1  christos 	}								\
   1857  1.1  christos       highest_active_reg = high_reg;					\
   1858  1.1  christos     }									\
   1859  1.1  christos 									\
   1860  1.1  christos   set_regs_matched_done = 0;						\
   1861  1.1  christos   DEBUG_STATEMENT (nfailure_points_popped++);				\
   1862  1.1  christos } /* POP_FAILURE_POINT */
   1863  1.1  christos 
   1864  1.1  christos /* Structure for per-register (a.k.a. per-group) information.
   1866  1.1  christos    Other register information, such as the
   1867  1.1  christos    starting and ending positions (which are addresses), and the list of
   1868  1.1  christos    inner groups (which is a bits list) are maintained in separate
   1869  1.1  christos    variables.
   1870  1.1  christos 
   1871  1.1  christos    We are making a (strictly speaking) nonportable assumption here: that
   1872  1.1  christos    the compiler will pack our bit fields into something that fits into
   1873  1.1  christos    the type of `word', i.e., is something that fits into one item on the
   1874  1.1  christos    failure stack.  */
   1875  1.1  christos 
   1876  1.1  christos 
   1877  1.1  christos /* Declarations and macros for re_match_2.  */
   1878  1.1  christos 
   1879  1.1  christos typedef union
   1880  1.1  christos {
   1881  1.1  christos   PREFIX(fail_stack_elt_t) word;
   1882  1.1  christos   struct
   1883  1.1  christos   {
   1884  1.1  christos       /* This field is one if this group can match the empty string,
   1885  1.1  christos          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
   1886  1.1  christos # define MATCH_NULL_UNSET_VALUE 3
   1887  1.1  christos     unsigned match_null_string_p : 2;
   1888  1.1  christos     unsigned is_active : 1;
   1889  1.1  christos     unsigned matched_something : 1;
   1890  1.1  christos     unsigned ever_matched_something : 1;
   1891  1.1  christos   } bits;
   1892  1.1  christos } PREFIX(register_info_type);
   1893  1.1  christos 
   1894  1.1  christos # ifndef DEFINED_ONCE
   1895  1.1  christos #  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
   1896  1.1  christos #  define IS_ACTIVE(R)  ((R).bits.is_active)
   1897  1.1  christos #  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
   1898  1.1  christos #  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
   1899  1.1  christos 
   1900  1.1  christos 
   1901  1.1  christos /* Call this when have matched a real character; it sets `matched' flags
   1902  1.1  christos    for the subexpressions which we are currently inside.  Also records
   1903  1.1  christos    that those subexprs have matched.  */
   1904  1.1  christos #  define SET_REGS_MATCHED()						\
   1905  1.1  christos   do									\
   1906  1.1  christos     {									\
   1907  1.1  christos       if (!set_regs_matched_done)					\
   1908  1.1  christos 	{								\
   1909  1.1  christos 	  active_reg_t r;						\
   1910  1.1  christos 	  set_regs_matched_done = 1;					\
   1911  1.1  christos 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
   1912  1.1  christos 	    {								\
   1913  1.1  christos 	      MATCHED_SOMETHING (reg_info[r])				\
   1914  1.1  christos 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
   1915  1.1  christos 		= 1;							\
   1916  1.1  christos 	    }								\
   1917  1.1  christos 	}								\
   1918  1.1  christos     }									\
   1919  1.1  christos   while (0)
   1920  1.1  christos # endif /* not DEFINED_ONCE */
   1921  1.1  christos 
   1922  1.1  christos /* Registers are set to a sentinel when they haven't yet matched.  */
   1923  1.1  christos static CHAR_T PREFIX(reg_unset_dummy);
   1924  1.1  christos # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
   1925  1.1  christos # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
   1926  1.1  christos 
   1927  1.1  christos /* Subroutine declarations and macros for regex_compile.  */
   1928  1.1  christos static void PREFIX(store_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg));
   1929  1.1  christos static void PREFIX(store_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
   1930  1.1  christos 				 int arg1, int arg2));
   1931  1.1  christos static void PREFIX(insert_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
   1932  1.1  christos 				  int arg, UCHAR_T *end));
   1933  1.1  christos static void PREFIX(insert_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
   1934  1.1  christos 				  int arg1, int arg2, UCHAR_T *end));
   1935  1.1  christos static boolean PREFIX(at_begline_loc_p) _RE_ARGS ((const CHAR_T *pattern,
   1936  1.1  christos 					   const CHAR_T *p,
   1937  1.1  christos 					   reg_syntax_t syntax));
   1938  1.1  christos static boolean PREFIX(at_endline_loc_p) _RE_ARGS ((const CHAR_T *p,
   1939  1.1  christos 					   const CHAR_T *pend,
   1940  1.1  christos 					   reg_syntax_t syntax));
   1941  1.1  christos # ifdef WCHAR
   1942  1.1  christos static reg_errcode_t wcs_compile_range _RE_ARGS ((CHAR_T range_start,
   1943  1.1  christos 						  const CHAR_T **p_ptr,
   1944  1.1  christos 						  const CHAR_T *pend,
   1945  1.1  christos 						  char *translate,
   1946  1.1  christos 						  reg_syntax_t syntax,
   1947  1.1  christos 						  UCHAR_T *b,
   1948  1.1  christos 						  CHAR_T *char_set));
   1949  1.1  christos static void insert_space _RE_ARGS ((int num, CHAR_T *loc, CHAR_T *end));
   1950  1.1  christos # else /* BYTE */
   1951  1.1  christos static reg_errcode_t byte_compile_range _RE_ARGS ((unsigned int range_start,
   1952  1.1  christos 						   const char **p_ptr,
   1953  1.1  christos 						   const char *pend,
   1954  1.1  christos 						   char *translate,
   1955  1.1  christos 						   reg_syntax_t syntax,
   1956  1.1  christos 						   unsigned char *b));
   1957  1.1  christos # endif /* WCHAR */
   1958  1.1  christos 
   1959  1.1  christos /* Fetch the next character in the uncompiled pattern---translating it
   1960  1.1  christos    if necessary.  Also cast from a signed character in the constant
   1961  1.1  christos    string passed to us by the user to an unsigned char that we can use
   1962  1.1  christos    as an array index (in, e.g., `translate').  */
   1963  1.1  christos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1964  1.1  christos    because it is impossible to allocate 4GB array for some encodings
   1965  1.1  christos    which have 4 byte character_set like UCS4.  */
   1966  1.1  christos # ifndef PATFETCH
   1967  1.1  christos #  ifdef WCHAR
   1968  1.1  christos #   define PATFETCH(c)							\
   1969  1.1  christos   do {if (p == pend) return REG_EEND;					\
   1970  1.1  christos     c = (UCHAR_T) *p++;							\
   1971  1.1  christos     if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
   1972  1.1  christos   } while (0)
   1973  1.1  christos #  else /* BYTE */
   1974  1.1  christos #   define PATFETCH(c)							\
   1975  1.1  christos   do {if (p == pend) return REG_EEND;					\
   1976  1.1  christos     c = (unsigned char) *p++;						\
   1977  1.1  christos     if (translate) c = (unsigned char) translate[c];			\
   1978  1.1  christos   } while (0)
   1979  1.1  christos #  endif /* WCHAR */
   1980  1.1  christos # endif
   1981  1.1  christos 
   1982  1.1  christos /* Fetch the next character in the uncompiled pattern, with no
   1983  1.1  christos    translation.  */
   1984  1.1  christos # define PATFETCH_RAW(c)						\
   1985  1.1  christos   do {if (p == pend) return REG_EEND;					\
   1986  1.1  christos     c = (UCHAR_T) *p++; 	       					\
   1987  1.1  christos   } while (0)
   1988  1.1  christos 
   1989  1.1  christos /* Go backwards one character in the pattern.  */
   1990  1.1  christos # define PATUNFETCH p--
   1991  1.1  christos 
   1992  1.1  christos 
   1993  1.1  christos /* If `translate' is non-null, return translate[D], else just D.  We
   1994  1.1  christos    cast the subscript to translate because some data is declared as
   1995  1.1  christos    `char *', to avoid warnings when a string constant is passed.  But
   1996  1.1  christos    when we use a character as a subscript we must make it unsigned.  */
   1997  1.1  christos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
   1998  1.1  christos    because it is impossible to allocate 4GB array for some encodings
   1999  1.1  christos    which have 4 byte character_set like UCS4.  */
   2000  1.1  christos 
   2001  1.1  christos # ifndef TRANSLATE
   2002  1.1  christos #  ifdef WCHAR
   2003  1.1  christos #   define TRANSLATE(d) \
   2004  1.1  christos   ((translate && ((UCHAR_T) (d)) <= 0xff) \
   2005  1.1  christos    ? (char) translate[(unsigned char) (d)] : (d))
   2006  1.1  christos # else /* BYTE */
   2007  1.1  christos #   define TRANSLATE(d) \
   2008  1.1  christos   (translate ? (char) translate[(unsigned char) (d)] : (d))
   2009  1.1  christos #  endif /* WCHAR */
   2010  1.1  christos # endif
   2011  1.1  christos 
   2012  1.1  christos 
   2013  1.1  christos /* Macros for outputting the compiled pattern into `buffer'.  */
   2014  1.1  christos 
   2015  1.1  christos /* If the buffer isn't allocated when it comes in, use this.  */
   2016  1.1  christos # define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
   2017  1.1  christos 
   2018  1.1  christos /* Make sure we have at least N more bytes of space in buffer.  */
   2019  1.1  christos # ifdef WCHAR
   2020  1.1  christos #  define GET_BUFFER_SPACE(n)						\
   2021  1.1  christos     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
   2022  1.1  christos             + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
   2023  1.1  christos       EXTEND_BUFFER ()
   2024  1.1  christos # else /* BYTE */
   2025  1.1  christos #  define GET_BUFFER_SPACE(n)						\
   2026  1.1  christos     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
   2027  1.1  christos       EXTEND_BUFFER ()
   2028  1.1  christos # endif /* WCHAR */
   2029  1.1  christos 
   2030  1.1  christos /* Make sure we have one more byte of buffer space and then add C to it.  */
   2031  1.1  christos # define BUF_PUSH(c)							\
   2032  1.1  christos   do {									\
   2033  1.1  christos     GET_BUFFER_SPACE (1);						\
   2034  1.1  christos     *b++ = (UCHAR_T) (c);						\
   2035  1.1  christos   } while (0)
   2036  1.1  christos 
   2037  1.1  christos 
   2038  1.1  christos /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
   2039  1.1  christos # define BUF_PUSH_2(c1, c2)						\
   2040  1.1  christos   do {									\
   2041  1.1  christos     GET_BUFFER_SPACE (2);						\
   2042  1.1  christos     *b++ = (UCHAR_T) (c1);						\
   2043  1.1  christos     *b++ = (UCHAR_T) (c2);						\
   2044  1.1  christos   } while (0)
   2045  1.1  christos 
   2046  1.1  christos 
   2047  1.1  christos /* As with BUF_PUSH_2, except for three bytes.  */
   2048  1.1  christos # define BUF_PUSH_3(c1, c2, c3)						\
   2049  1.1  christos   do {									\
   2050  1.1  christos     GET_BUFFER_SPACE (3);						\
   2051  1.1  christos     *b++ = (UCHAR_T) (c1);						\
   2052  1.1  christos     *b++ = (UCHAR_T) (c2);						\
   2053  1.1  christos     *b++ = (UCHAR_T) (c3);						\
   2054  1.1  christos   } while (0)
   2055  1.1  christos 
   2056  1.1  christos /* Store a jump with opcode OP at LOC to location TO.  We store a
   2057  1.1  christos    relative address offset by the three bytes the jump itself occupies.  */
   2058  1.1  christos # define STORE_JUMP(op, loc, to) \
   2059  1.1  christos  PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
   2060  1.1  christos 
   2061  1.1  christos /* Likewise, for a two-argument jump.  */
   2062  1.1  christos # define STORE_JUMP2(op, loc, to, arg) \
   2063  1.1  christos   PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
   2064  1.1  christos 
   2065  1.1  christos /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
   2066  1.1  christos # define INSERT_JUMP(op, loc, to) \
   2067  1.1  christos   PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
   2068  1.1  christos 
   2069  1.1  christos /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
   2070  1.1  christos # define INSERT_JUMP2(op, loc, to, arg) \
   2071  1.1  christos   PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
   2072  1.1  christos 	      arg, b)
   2073  1.1  christos 
   2074  1.1  christos /* This is not an arbitrary limit: the arguments which represent offsets
   2075  1.1  christos    into the pattern are two bytes long.  So if 2^16 bytes turns out to
   2076  1.1  christos    be too small, many things would have to change.  */
   2077  1.1  christos /* Any other compiler which, like MSC, has allocation limit below 2^16
   2078  1.1  christos    bytes will have to use approach similar to what was done below for
   2079  1.1  christos    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
   2080  1.1  christos    reallocating to 0 bytes.  Such thing is not going to work too well.
   2081  1.1  christos    You have been warned!!  */
   2082  1.1  christos # ifndef DEFINED_ONCE
   2083  1.1  christos #  if defined _MSC_VER  && !defined WIN32
   2084  1.1  christos /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
   2085  1.1  christos    The REALLOC define eliminates a flurry of conversion warnings,
   2086  1.1  christos    but is not required. */
   2087  1.1  christos #   define MAX_BUF_SIZE  65500L
   2088  1.1  christos #   define REALLOC(p,s) realloc ((p), (size_t) (s))
   2089  1.1  christos #  else
   2090  1.1  christos #   define MAX_BUF_SIZE (1L << 16)
   2091  1.1  christos #   define REALLOC(p,s) realloc ((p), (s))
   2092  1.1  christos #  endif
   2093  1.1  christos 
   2094  1.1  christos /* Extend the buffer by twice its current size via realloc and
   2095  1.1  christos    reset the pointers that pointed into the old block to point to the
   2096  1.1  christos    correct places in the new one.  If extending the buffer results in it
   2097  1.1  christos    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
   2098  1.1  christos #  if __BOUNDED_POINTERS__
   2099  1.1  christos #   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
   2100  1.1  christos #   define MOVE_BUFFER_POINTER(P) \
   2101  1.1  christos   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
   2102  1.1  christos #   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
   2103  1.1  christos   else						\
   2104  1.1  christos     {						\
   2105  1.1  christos       SET_HIGH_BOUND (b);			\
   2106  1.1  christos       SET_HIGH_BOUND (begalt);			\
   2107  1.1  christos       if (fixup_alt_jump)			\
   2108  1.1  christos 	SET_HIGH_BOUND (fixup_alt_jump);	\
   2109  1.1  christos       if (laststart)				\
   2110  1.1  christos 	SET_HIGH_BOUND (laststart);		\
   2111  1.1  christos       if (pending_exact)			\
   2112  1.1  christos 	SET_HIGH_BOUND (pending_exact);		\
   2113  1.1  christos     }
   2114  1.1  christos #  else
   2115  1.1  christos #   define MOVE_BUFFER_POINTER(P) (P) += incr
   2116  1.1  christos #   define ELSE_EXTEND_BUFFER_HIGH_BOUND
   2117  1.1  christos #  endif
   2118  1.1  christos # endif /* not DEFINED_ONCE */
   2119  1.1  christos 
   2120  1.1  christos # ifdef WCHAR
   2121  1.1  christos #  define EXTEND_BUFFER()						\
   2122  1.1  christos   do {									\
   2123  1.1  christos     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
   2124  1.1  christos     int wchar_count;							\
   2125  1.1  christos     if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
   2126  1.1  christos       return REG_ESIZE;							\
   2127  1.1  christos     bufp->allocated <<= 1;						\
   2128  1.1  christos     if (bufp->allocated > MAX_BUF_SIZE)					\
   2129  1.1  christos       bufp->allocated = MAX_BUF_SIZE;					\
   2130  1.1  christos     /* How many characters the new buffer can have?  */			\
   2131  1.1  christos     wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
   2132  1.1  christos     if (wchar_count == 0) wchar_count = 1;				\
   2133  1.1  christos     /* Truncate the buffer to CHAR_T align.  */			\
   2134  1.1  christos     bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
   2135  1.1  christos     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
   2136  1.1  christos     bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
   2137  1.1  christos     if (COMPILED_BUFFER_VAR == NULL)					\
   2138  1.1  christos       return REG_ESPACE;						\
   2139  1.1  christos     /* If the buffer moved, move all the pointers into it.  */		\
   2140  1.1  christos     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2141  1.1  christos       {									\
   2142  1.1  christos 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
   2143  1.1  christos 	MOVE_BUFFER_POINTER (b);					\
   2144  1.1  christos 	MOVE_BUFFER_POINTER (begalt);					\
   2145  1.1  christos 	if (fixup_alt_jump)						\
   2146  1.1  christos 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2147  1.1  christos 	if (laststart)							\
   2148  1.1  christos 	  MOVE_BUFFER_POINTER (laststart);				\
   2149  1.1  christos 	if (pending_exact)						\
   2150  1.1  christos 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2151  1.1  christos       }									\
   2152  1.1  christos     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2153  1.1  christos   } while (0)
   2154  1.1  christos # else /* BYTE */
   2155  1.1  christos #  define EXTEND_BUFFER()						\
   2156  1.1  christos   do {									\
   2157  1.1  christos     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
   2158  1.1  christos     if (bufp->allocated == MAX_BUF_SIZE)				\
   2159  1.1  christos       return REG_ESIZE;							\
   2160  1.1  christos     bufp->allocated <<= 1;						\
   2161  1.1  christos     if (bufp->allocated > MAX_BUF_SIZE)					\
   2162  1.1  christos       bufp->allocated = MAX_BUF_SIZE;					\
   2163  1.1  christos     bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,		\
   2164  1.1  christos 						bufp->allocated);	\
   2165  1.1  christos     if (COMPILED_BUFFER_VAR == NULL)					\
   2166  1.1  christos       return REG_ESPACE;						\
   2167  1.1  christos     /* If the buffer moved, move all the pointers into it.  */		\
   2168  1.1  christos     if (old_buffer != COMPILED_BUFFER_VAR)				\
   2169  1.1  christos       {									\
   2170  1.1  christos 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
   2171  1.1  christos 	MOVE_BUFFER_POINTER (b);					\
   2172  1.1  christos 	MOVE_BUFFER_POINTER (begalt);					\
   2173  1.1  christos 	if (fixup_alt_jump)						\
   2174  1.1  christos 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
   2175  1.1  christos 	if (laststart)							\
   2176  1.1  christos 	  MOVE_BUFFER_POINTER (laststart);				\
   2177  1.1  christos 	if (pending_exact)						\
   2178  1.1  christos 	  MOVE_BUFFER_POINTER (pending_exact);				\
   2179  1.1  christos       }									\
   2180  1.1  christos     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
   2181  1.1  christos   } while (0)
   2182  1.1  christos # endif /* WCHAR */
   2183  1.1  christos 
   2184  1.1  christos # ifndef DEFINED_ONCE
   2185  1.1  christos /* Since we have one byte reserved for the register number argument to
   2186  1.1  christos    {start,stop}_memory, the maximum number of groups we can report
   2187  1.1  christos    things about is what fits in that byte.  */
   2188  1.1  christos #  define MAX_REGNUM 255
   2189  1.1  christos 
   2190  1.1  christos /* But patterns can have more than `MAX_REGNUM' registers.  We just
   2191  1.1  christos    ignore the excess.  */
   2192  1.1  christos typedef unsigned regnum_t;
   2193  1.1  christos 
   2194  1.1  christos 
   2195  1.1  christos /* Macros for the compile stack.  */
   2196  1.1  christos 
   2197  1.1  christos /* Since offsets can go either forwards or backwards, this type needs to
   2198  1.1  christos    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
   2199  1.1  christos /* int may be not enough when sizeof(int) == 2.  */
   2200  1.1  christos typedef long pattern_offset_t;
   2201  1.1  christos 
   2202  1.1  christos typedef struct
   2203  1.1  christos {
   2204  1.1  christos   pattern_offset_t begalt_offset;
   2205  1.1  christos   pattern_offset_t fixup_alt_jump;
   2206  1.1  christos   pattern_offset_t inner_group_offset;
   2207  1.1  christos   pattern_offset_t laststart_offset;
   2208  1.1  christos   regnum_t regnum;
   2209  1.1  christos } compile_stack_elt_t;
   2210  1.1  christos 
   2211  1.1  christos 
   2212  1.1  christos typedef struct
   2213  1.1  christos {
   2214  1.1  christos   compile_stack_elt_t *stack;
   2215  1.1  christos   unsigned size;
   2216  1.1  christos   unsigned avail;			/* Offset of next open position.  */
   2217  1.1  christos } compile_stack_type;
   2218  1.1  christos 
   2219  1.1  christos 
   2220  1.1  christos #  define INIT_COMPILE_STACK_SIZE 32
   2221  1.1  christos 
   2222  1.1  christos #  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
   2223  1.1  christos #  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
   2224  1.1  christos 
   2225  1.1  christos /* The next available element.  */
   2226  1.1  christos #  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
   2227  1.1  christos 
   2228  1.1  christos # endif /* not DEFINED_ONCE */
   2229  1.1  christos 
   2230  1.1  christos /* Set the bit for character C in a list.  */
   2231  1.1  christos # ifndef DEFINED_ONCE
   2232  1.1  christos #  define SET_LIST_BIT(c)                               \
   2233  1.1  christos   (b[((unsigned char) (c)) / BYTEWIDTH]               \
   2234  1.1  christos    |= 1 << (((unsigned char) c) % BYTEWIDTH))
   2235  1.1  christos # endif /* DEFINED_ONCE */
   2236  1.1  christos 
   2237  1.1  christos /* Get the next unsigned number in the uncompiled pattern.  */
   2238  1.1  christos # define GET_UNSIGNED_NUMBER(num) \
   2239  1.1  christos   {									\
   2240  1.1  christos     while (p != pend)							\
   2241  1.1  christos       {									\
   2242  1.1  christos 	PATFETCH (c);							\
   2243  1.1  christos 	if (c < '0' || c > '9')						\
   2244  1.1  christos 	  break;							\
   2245  1.1  christos 	if (num <= RE_DUP_MAX)						\
   2246  1.1  christos 	  {								\
   2247  1.1  christos 	    if (num < 0)						\
   2248  1.1  christos 	      num = 0;							\
   2249  1.1  christos 	    num = num * 10 + c - '0';					\
   2250  1.1  christos 	  }								\
   2251  1.1  christos       }									\
   2252  1.1  christos   }
   2253  1.1  christos 
   2254  1.1  christos # ifndef DEFINED_ONCE
   2255  1.1  christos #  if defined _LIBC || WIDE_CHAR_SUPPORT
   2256  1.1  christos /* The GNU C library provides support for user-defined character classes
   2257  1.1  christos    and the functions from ISO C amendement 1.  */
   2258  1.1  christos #   ifdef CHARCLASS_NAME_MAX
   2259  1.1  christos #    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
   2260  1.1  christos #   else
   2261  1.1  christos /* This shouldn't happen but some implementation might still have this
   2262  1.1  christos    problem.  Use a reasonable default value.  */
   2263  1.1  christos #    define CHAR_CLASS_MAX_LENGTH 256
   2264  1.1  christos #   endif
   2265  1.1  christos 
   2266  1.1  christos #   ifdef _LIBC
   2267  1.1  christos #    define IS_CHAR_CLASS(string) __wctype (string)
   2268  1.1  christos #   else
   2269  1.1  christos #    define IS_CHAR_CLASS(string) wctype (string)
   2270  1.1  christos #   endif
   2271  1.1  christos #  else
   2272  1.1  christos #   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
   2273  1.1  christos 
   2274  1.1  christos #   define IS_CHAR_CLASS(string)					\
   2275  1.1  christos    (STREQ (string, "alpha") || STREQ (string, "upper")			\
   2276  1.1  christos     || STREQ (string, "lower") || STREQ (string, "digit")		\
   2277  1.1  christos     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
   2278  1.1  christos     || STREQ (string, "space") || STREQ (string, "print")		\
   2279  1.1  christos     || STREQ (string, "punct") || STREQ (string, "graph")		\
   2280  1.1  christos     || STREQ (string, "cntrl") || STREQ (string, "blank"))
   2281  1.1  christos #  endif
   2282  1.1  christos # endif /* DEFINED_ONCE */
   2283  1.1  christos 
   2284  1.1  christos # ifndef MATCH_MAY_ALLOCATE
   2286  1.1  christos 
   2287  1.1  christos /* If we cannot allocate large objects within re_match_2_internal,
   2288  1.1  christos    we make the fail stack and register vectors global.
   2289  1.1  christos    The fail stack, we grow to the maximum size when a regexp
   2290  1.1  christos    is compiled.
   2291  1.1  christos    The register vectors, we adjust in size each time we
   2292  1.1  christos    compile a regexp, according to the number of registers it needs.  */
   2293  1.1  christos 
   2294  1.1  christos static PREFIX(fail_stack_type) fail_stack;
   2295  1.1  christos 
   2296  1.1  christos /* Size with which the following vectors are currently allocated.
   2297  1.1  christos    That is so we can make them bigger as needed,
   2298  1.1  christos    but never make them smaller.  */
   2299  1.1  christos #  ifdef DEFINED_ONCE
   2300  1.1  christos static int regs_allocated_size;
   2301  1.1  christos 
   2302  1.1  christos static const char **     regstart, **     regend;
   2303  1.1  christos static const char ** old_regstart, ** old_regend;
   2304  1.1  christos static const char **best_regstart, **best_regend;
   2305  1.1  christos static const char **reg_dummy;
   2306  1.1  christos #  endif /* DEFINED_ONCE */
   2307  1.1  christos 
   2308  1.1  christos static PREFIX(register_info_type) *PREFIX(reg_info);
   2309  1.1  christos static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
   2310  1.1  christos 
   2311  1.1  christos /* Make the register vectors big enough for NUM_REGS registers,
   2312  1.1  christos    but don't make them smaller.  */
   2313  1.1  christos 
   2314  1.1  christos static void
   2315  1.1  christos PREFIX(regex_grow_registers) (num_regs)
   2316  1.1  christos      int num_regs;
   2317  1.1  christos {
   2318  1.1  christos   if (num_regs > regs_allocated_size)
   2319  1.1  christos     {
   2320  1.1  christos       RETALLOC_IF (regstart,	 num_regs, const char *);
   2321  1.1  christos       RETALLOC_IF (regend,	 num_regs, const char *);
   2322  1.1  christos       RETALLOC_IF (old_regstart, num_regs, const char *);
   2323  1.1  christos       RETALLOC_IF (old_regend,	 num_regs, const char *);
   2324  1.1  christos       RETALLOC_IF (best_regstart, num_regs, const char *);
   2325  1.1  christos       RETALLOC_IF (best_regend,	 num_regs, const char *);
   2326  1.1  christos       RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
   2327  1.1  christos       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
   2328  1.1  christos       RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
   2329  1.1  christos 
   2330  1.1  christos       regs_allocated_size = num_regs;
   2331  1.1  christos     }
   2332  1.1  christos }
   2333  1.1  christos 
   2334  1.1  christos # endif /* not MATCH_MAY_ALLOCATE */
   2335  1.1  christos 
   2336  1.1  christos # ifndef DEFINED_ONCE
   2338  1.1  christos static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
   2339  1.1  christos 						 compile_stack,
   2340  1.1  christos 						 regnum_t regnum));
   2341  1.1  christos # endif /* not DEFINED_ONCE */
   2342  1.1  christos 
   2343  1.1  christos /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
   2344  1.1  christos    Returns one of error codes defined in `regex.h', or zero for success.
   2345  1.1  christos 
   2346  1.1  christos    Assumes the `allocated' (and perhaps `buffer') and `translate'
   2347  1.1  christos    fields are set in BUFP on entry.
   2348  1.1  christos 
   2349  1.1  christos    If it succeeds, results are put in BUFP (if it returns an error, the
   2350  1.1  christos    contents of BUFP are undefined):
   2351  1.1  christos      `buffer' is the compiled pattern;
   2352  1.1  christos      `syntax' is set to SYNTAX;
   2353  1.1  christos      `used' is set to the length of the compiled pattern;
   2354  1.1  christos      `fastmap_accurate' is zero;
   2355  1.1  christos      `re_nsub' is the number of subexpressions in PATTERN;
   2356  1.1  christos      `not_bol' and `not_eol' are zero;
   2357  1.1  christos 
   2358  1.1  christos    The `fastmap' and `newline_anchor' fields are neither
   2359  1.1  christos    examined nor set.  */
   2360  1.1  christos 
   2361  1.1  christos /* Return, freeing storage we allocated.  */
   2362  1.1  christos # ifdef WCHAR
   2363  1.1  christos #  define FREE_STACK_RETURN(value)		\
   2364  1.1  christos   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
   2365  1.1  christos # else
   2366  1.1  christos #  define FREE_STACK_RETURN(value)		\
   2367  1.1  christos   return (free (compile_stack.stack), value)
   2368  1.1  christos # endif /* WCHAR */
   2369  1.1  christos 
   2370  1.1  christos static reg_errcode_t
   2371  1.1  christos PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp)
   2372  1.1  christos      const char *ARG_PREFIX(pattern);
   2373  1.1  christos      size_t ARG_PREFIX(size);
   2374  1.1  christos      reg_syntax_t syntax;
   2375  1.1  christos      struct re_pattern_buffer *bufp;
   2376  1.1  christos {
   2377  1.1  christos   /* We fetch characters from PATTERN here.  Even though PATTERN is
   2378  1.1  christos      `char *' (i.e., signed), we declare these variables as unsigned, so
   2379  1.1  christos      they can be reliably used as array indices.  */
   2380  1.1  christos   register UCHAR_T c, c1;
   2381  1.1  christos 
   2382  1.1  christos #ifdef WCHAR
   2383  1.1  christos   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
   2384  1.1  christos   CHAR_T *pattern, *COMPILED_BUFFER_VAR;
   2385  1.1  christos   size_t size;
   2386  1.1  christos   /* offset buffer for optimization. See convert_mbs_to_wc.  */
   2387  1.1  christos   int *mbs_offset = NULL;
   2388  1.1  christos   /* It hold whether each wchar_t is binary data or not.  */
   2389  1.1  christos   char *is_binary = NULL;
   2390  1.1  christos   /* A flag whether exactn is handling binary data or not.  */
   2391  1.1  christos   char is_exactn_bin = FALSE;
   2392  1.1  christos #endif /* WCHAR */
   2393  1.1  christos 
   2394  1.1  christos   /* A random temporary spot in PATTERN.  */
   2395  1.1  christos   const CHAR_T *p1;
   2396  1.1  christos 
   2397  1.1  christos   /* Points to the end of the buffer, where we should append.  */
   2398  1.1  christos   register UCHAR_T *b;
   2399  1.1  christos 
   2400  1.1  christos   /* Keeps track of unclosed groups.  */
   2401  1.1  christos   compile_stack_type compile_stack;
   2402  1.1  christos 
   2403  1.1  christos   /* Points to the current (ending) position in the pattern.  */
   2404  1.1  christos #ifdef WCHAR
   2405  1.1  christos   const CHAR_T *p;
   2406  1.1  christos   const CHAR_T *pend;
   2407  1.1  christos #else /* BYTE */
   2408  1.1  christos   const CHAR_T *p = pattern;
   2409  1.1  christos   const CHAR_T *pend = pattern + size;
   2410  1.1  christos #endif /* WCHAR */
   2411  1.1  christos 
   2412  1.1  christos   /* How to translate the characters in the pattern.  */
   2413  1.1  christos   RE_TRANSLATE_TYPE translate = bufp->translate;
   2414  1.1  christos 
   2415  1.1  christos   /* Address of the count-byte of the most recently inserted `exactn'
   2416  1.1  christos      command.  This makes it possible to tell if a new exact-match
   2417  1.1  christos      character can be added to that command or if the character requires
   2418  1.1  christos      a new `exactn' command.  */
   2419  1.1  christos   UCHAR_T *pending_exact = 0;
   2420  1.1  christos 
   2421  1.1  christos   /* Address of start of the most recently finished expression.
   2422  1.1  christos      This tells, e.g., postfix * where to find the start of its
   2423  1.1  christos      operand.  Reset at the beginning of groups and alternatives.  */
   2424  1.1  christos   UCHAR_T *laststart = 0;
   2425  1.1  christos 
   2426  1.1  christos   /* Address of beginning of regexp, or inside of last group.  */
   2427  1.1  christos   UCHAR_T *begalt;
   2428  1.1  christos 
   2429  1.1  christos   /* Address of the place where a forward jump should go to the end of
   2430  1.1  christos      the containing expression.  Each alternative of an `or' -- except the
   2431  1.1  christos      last -- ends with a forward jump of this sort.  */
   2432  1.1  christos   UCHAR_T *fixup_alt_jump = 0;
   2433  1.1  christos 
   2434  1.1  christos   /* Counts open-groups as they are encountered.  Remembered for the
   2435  1.1  christos      matching close-group on the compile stack, so the same register
   2436  1.1  christos      number is put in the stop_memory as the start_memory.  */
   2437  1.1  christos   regnum_t regnum = 0;
   2438  1.1  christos 
   2439  1.1  christos #ifdef WCHAR
   2440  1.1  christos   /* Initialize the wchar_t PATTERN and offset_buffer.  */
   2441  1.1  christos   p = pend = pattern = TALLOC(csize + 1, CHAR_T);
   2442  1.1  christos   mbs_offset = TALLOC(csize + 1, int);
   2443  1.1  christos   is_binary = TALLOC(csize + 1, char);
   2444  1.1  christos   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
   2445  1.1  christos     {
   2446  1.1  christos       free(pattern);
   2447  1.1  christos       free(mbs_offset);
   2448  1.1  christos       free(is_binary);
   2449  1.1  christos       return REG_ESPACE;
   2450  1.1  christos     }
   2451  1.1  christos   pattern[csize] = L'\0';	/* sentinel */
   2452  1.1  christos   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
   2453  1.1  christos   pend = p + size;
   2454  1.1  christos   if (size < 0)
   2455  1.1  christos     {
   2456  1.1  christos       free(pattern);
   2457  1.1  christos       free(mbs_offset);
   2458  1.1  christos       free(is_binary);
   2459  1.1  christos       return REG_BADPAT;
   2460  1.1  christos     }
   2461  1.1  christos #endif
   2462  1.1  christos 
   2463  1.1  christos #ifdef DEBUG
   2464  1.1  christos   DEBUG_PRINT1 ("\nCompiling pattern: ");
   2465  1.1  christos   if (debug)
   2466  1.1  christos     {
   2467  1.1  christos       unsigned debug_count;
   2468  1.1  christos 
   2469  1.1  christos       for (debug_count = 0; debug_count < size; debug_count++)
   2470  1.1  christos         PUT_CHAR (pattern[debug_count]);
   2471  1.1  christos       putchar ('\n');
   2472  1.1  christos     }
   2473  1.1  christos #endif /* DEBUG */
   2474  1.1  christos 
   2475  1.1  christos   /* Initialize the compile stack.  */
   2476  1.1  christos   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
   2477  1.1  christos   if (compile_stack.stack == NULL)
   2478  1.1  christos     {
   2479  1.1  christos #ifdef WCHAR
   2480  1.1  christos       free(pattern);
   2481  1.1  christos       free(mbs_offset);
   2482  1.1  christos       free(is_binary);
   2483  1.1  christos #endif
   2484  1.1  christos       return REG_ESPACE;
   2485  1.1  christos     }
   2486  1.1  christos 
   2487  1.1  christos   compile_stack.size = INIT_COMPILE_STACK_SIZE;
   2488  1.1  christos   compile_stack.avail = 0;
   2489  1.1  christos 
   2490  1.1  christos   /* Initialize the pattern buffer.  */
   2491  1.1  christos   bufp->syntax = syntax;
   2492  1.1  christos   bufp->fastmap_accurate = 0;
   2493  1.1  christos   bufp->not_bol = bufp->not_eol = 0;
   2494  1.1  christos 
   2495  1.1  christos   /* Set `used' to zero, so that if we return an error, the pattern
   2496  1.1  christos      printer (for debugging) will think there's no pattern.  We reset it
   2497  1.1  christos      at the end.  */
   2498  1.1  christos   bufp->used = 0;
   2499  1.1  christos 
   2500  1.1  christos   /* Always count groups, whether or not bufp->no_sub is set.  */
   2501  1.1  christos   bufp->re_nsub = 0;
   2502  1.1  christos 
   2503  1.1  christos #if !defined emacs && !defined SYNTAX_TABLE
   2504  1.1  christos   /* Initialize the syntax table.  */
   2505  1.1  christos    init_syntax_once ();
   2506  1.1  christos #endif
   2507  1.1  christos 
   2508  1.1  christos   if (bufp->allocated == 0)
   2509  1.1  christos     {
   2510  1.1  christos       if (bufp->buffer)
   2511  1.1  christos 	{ /* If zero allocated, but buffer is non-null, try to realloc
   2512  1.1  christos              enough space.  This loses if buffer's address is bogus, but
   2513  1.1  christos              that is the user's responsibility.  */
   2514  1.1  christos #ifdef WCHAR
   2515  1.1  christos 	  /* Free bufp->buffer and allocate an array for wchar_t pattern
   2516  1.1  christos 	     buffer.  */
   2517  1.1  christos           free(bufp->buffer);
   2518  1.1  christos           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
   2519  1.1  christos 					UCHAR_T);
   2520  1.1  christos #else
   2521  1.1  christos           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
   2522  1.1  christos #endif /* WCHAR */
   2523  1.1  christos         }
   2524  1.1  christos       else
   2525  1.1  christos         { /* Caller did not allocate a buffer.  Do it for them.  */
   2526  1.1  christos           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
   2527  1.1  christos 					UCHAR_T);
   2528  1.1  christos         }
   2529  1.1  christos 
   2530  1.1  christos       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
   2531  1.1  christos #ifdef WCHAR
   2532  1.1  christos       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
   2533  1.1  christos #endif /* WCHAR */
   2534  1.1  christos       bufp->allocated = INIT_BUF_SIZE;
   2535  1.1  christos     }
   2536  1.1  christos #ifdef WCHAR
   2537  1.1  christos   else
   2538  1.1  christos     COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
   2539  1.1  christos #endif
   2540  1.1  christos 
   2541  1.1  christos   begalt = b = COMPILED_BUFFER_VAR;
   2542  1.1  christos 
   2543  1.1  christos   /* Loop through the uncompiled pattern until we're at the end.  */
   2544  1.1  christos   while (p != pend)
   2545  1.1  christos     {
   2546  1.1  christos       PATFETCH (c);
   2547  1.1  christos 
   2548  1.1  christos       switch (c)
   2549  1.1  christos         {
   2550  1.1  christos         case '^':
   2551  1.1  christos           {
   2552  1.1  christos             if (   /* If at start of pattern, it's an operator.  */
   2553  1.1  christos                    p == pattern + 1
   2554  1.1  christos                    /* If context independent, it's an operator.  */
   2555  1.1  christos                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2556  1.1  christos                    /* Otherwise, depends on what's come before.  */
   2557  1.1  christos                 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
   2558  1.1  christos               BUF_PUSH (begline);
   2559  1.1  christos             else
   2560  1.1  christos               goto normal_char;
   2561  1.1  christos           }
   2562  1.1  christos           break;
   2563  1.1  christos 
   2564  1.1  christos 
   2565  1.1  christos         case '$':
   2566  1.1  christos           {
   2567  1.1  christos             if (   /* If at end of pattern, it's an operator.  */
   2568  1.1  christos                    p == pend
   2569  1.1  christos                    /* If context independent, it's an operator.  */
   2570  1.1  christos                 || syntax & RE_CONTEXT_INDEP_ANCHORS
   2571  1.1  christos                    /* Otherwise, depends on what's next.  */
   2572  1.1  christos                 || PREFIX(at_endline_loc_p) (p, pend, syntax))
   2573  1.1  christos                BUF_PUSH (endline);
   2574  1.1  christos              else
   2575  1.1  christos                goto normal_char;
   2576  1.1  christos            }
   2577  1.1  christos            break;
   2578  1.1  christos 
   2579  1.1  christos 
   2580  1.1  christos 	case '+':
   2581  1.1  christos         case '?':
   2582  1.1  christos           if ((syntax & RE_BK_PLUS_QM)
   2583  1.1  christos               || (syntax & RE_LIMITED_OPS))
   2584  1.1  christos             goto normal_char;
   2585  1.1  christos         handle_plus:
   2586  1.1  christos         case '*':
   2587  1.1  christos           /* If there is no previous pattern... */
   2588  1.1  christos           if (!laststart)
   2589  1.1  christos             {
   2590  1.1  christos               if (syntax & RE_CONTEXT_INVALID_OPS)
   2591  1.1  christos                 FREE_STACK_RETURN (REG_BADRPT);
   2592  1.1  christos               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
   2593  1.1  christos                 goto normal_char;
   2594  1.1  christos             }
   2595  1.1  christos 
   2596  1.1  christos           {
   2597  1.1  christos             /* Are we optimizing this jump?  */
   2598  1.1  christos             boolean keep_string_p = false;
   2599  1.1  christos 
   2600  1.1  christos             /* 1 means zero (many) matches is allowed.  */
   2601  1.1  christos             char zero_times_ok = 0, many_times_ok = 0;
   2602  1.1  christos 
   2603  1.1  christos             /* If there is a sequence of repetition chars, collapse it
   2604  1.1  christos                down to just one (the right one).  We can't combine
   2605  1.1  christos                interval operators with these because of, e.g., `a{2}*',
   2606  1.1  christos                which should only match an even number of `a's.  */
   2607  1.1  christos 
   2608  1.1  christos             for (;;)
   2609  1.1  christos               {
   2610  1.1  christos                 zero_times_ok |= c != '+';
   2611  1.1  christos                 many_times_ok |= c != '?';
   2612  1.1  christos 
   2613  1.1  christos                 if (p == pend)
   2614  1.1  christos                   break;
   2615  1.1  christos 
   2616  1.1  christos                 PATFETCH (c);
   2617  1.1  christos 
   2618  1.1  christos                 if (c == '*'
   2619  1.1  christos                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
   2620  1.1  christos                   ;
   2621  1.1  christos 
   2622  1.1  christos                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
   2623  1.1  christos                   {
   2624  1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2625  1.1  christos 
   2626  1.1  christos                     PATFETCH (c1);
   2627  1.1  christos                     if (!(c1 == '+' || c1 == '?'))
   2628  1.1  christos                       {
   2629  1.1  christos                         PATUNFETCH;
   2630  1.1  christos                         PATUNFETCH;
   2631  1.1  christos                         break;
   2632  1.1  christos                       }
   2633  1.1  christos 
   2634  1.1  christos                     c = c1;
   2635  1.1  christos                   }
   2636  1.1  christos                 else
   2637  1.1  christos                   {
   2638  1.1  christos                     PATUNFETCH;
   2639  1.1  christos                     break;
   2640  1.1  christos                   }
   2641  1.1  christos 
   2642  1.1  christos                 /* If we get here, we found another repeat character.  */
   2643  1.1  christos                }
   2644  1.1  christos 
   2645  1.1  christos             /* Star, etc. applied to an empty pattern is equivalent
   2646  1.1  christos                to an empty pattern.  */
   2647  1.1  christos             if (!laststart)
   2648  1.1  christos               break;
   2649  1.1  christos 
   2650  1.1  christos             /* Now we know whether or not zero matches is allowed
   2651  1.1  christos                and also whether or not two or more matches is allowed.  */
   2652  1.1  christos             if (many_times_ok)
   2653  1.1  christos               { /* More than one repetition is allowed, so put in at the
   2654  1.1  christos                    end a backward relative jump from `b' to before the next
   2655  1.1  christos                    jump we're going to put in below (which jumps from
   2656  1.1  christos                    laststart to after this jump).
   2657  1.1  christos 
   2658  1.1  christos                    But if we are at the `*' in the exact sequence `.*\n',
   2659  1.1  christos                    insert an unconditional jump backwards to the .,
   2660  1.1  christos                    instead of the beginning of the loop.  This way we only
   2661  1.1  christos                    push a failure point once, instead of every time
   2662  1.1  christos                    through the loop.  */
   2663  1.1  christos                 assert (p - 1 > pattern);
   2664  1.1  christos 
   2665  1.1  christos                 /* Allocate the space for the jump.  */
   2666  1.1  christos                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2667  1.1  christos 
   2668  1.1  christos                 /* We know we are not at the first character of the pattern,
   2669  1.1  christos                    because laststart was nonzero.  And we've already
   2670  1.1  christos                    incremented `p', by the way, to be the character after
   2671  1.1  christos                    the `*'.  Do we have to do something analogous here
   2672  1.1  christos                    for null bytes, because of RE_DOT_NOT_NULL?  */
   2673  1.1  christos                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
   2674  1.1  christos 		    && zero_times_ok
   2675  1.1  christos                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
   2676  1.1  christos                     && !(syntax & RE_DOT_NEWLINE))
   2677  1.1  christos                   { /* We have .*\n.  */
   2678  1.1  christos                     STORE_JUMP (jump, b, laststart);
   2679  1.1  christos                     keep_string_p = true;
   2680  1.1  christos                   }
   2681  1.1  christos                 else
   2682  1.1  christos                   /* Anything else.  */
   2683  1.1  christos                   STORE_JUMP (maybe_pop_jump, b, laststart -
   2684  1.1  christos 			      (1 + OFFSET_ADDRESS_SIZE));
   2685  1.1  christos 
   2686  1.1  christos                 /* We've added more stuff to the buffer.  */
   2687  1.1  christos                 b += 1 + OFFSET_ADDRESS_SIZE;
   2688  1.1  christos               }
   2689  1.1  christos 
   2690  1.1  christos             /* On failure, jump from laststart to b + 3, which will be the
   2691  1.1  christos                end of the buffer after this jump is inserted.  */
   2692  1.1  christos 	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
   2693  1.1  christos 	       'b + 3'.  */
   2694  1.1  christos             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2695  1.1  christos             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
   2696  1.1  christos                                        : on_failure_jump,
   2697  1.1  christos                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
   2698  1.1  christos             pending_exact = 0;
   2699  1.1  christos             b += 1 + OFFSET_ADDRESS_SIZE;
   2700  1.1  christos 
   2701  1.1  christos             if (!zero_times_ok)
   2702  1.1  christos               {
   2703  1.1  christos                 /* At least one repetition is required, so insert a
   2704  1.1  christos                    `dummy_failure_jump' before the initial
   2705  1.1  christos                    `on_failure_jump' instruction of the loop. This
   2706  1.1  christos                    effects a skip over that instruction the first time
   2707  1.1  christos                    we hit that loop.  */
   2708  1.1  christos                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   2709  1.1  christos                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
   2710  1.1  christos 			     2 + 2 * OFFSET_ADDRESS_SIZE);
   2711  1.1  christos                 b += 1 + OFFSET_ADDRESS_SIZE;
   2712  1.1  christos               }
   2713  1.1  christos             }
   2714  1.1  christos 	  break;
   2715  1.1  christos 
   2716  1.1  christos 
   2717  1.1  christos 	case '.':
   2718  1.1  christos           laststart = b;
   2719  1.1  christos           BUF_PUSH (anychar);
   2720  1.1  christos           break;
   2721  1.1  christos 
   2722  1.1  christos 
   2723  1.1  christos         case '[':
   2724  1.1  christos           {
   2725  1.1  christos             boolean had_char_class = false;
   2726  1.1  christos #ifdef WCHAR
   2727  1.1  christos 	    CHAR_T range_start = 0xffffffff;
   2728  1.1  christos #else
   2729  1.1  christos 	    unsigned int range_start = 0xffffffff;
   2730  1.1  christos #endif
   2731  1.1  christos             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2732  1.1  christos 
   2733  1.1  christos #ifdef WCHAR
   2734  1.1  christos 	    /* We assume a charset(_not) structure as a wchar_t array.
   2735  1.1  christos 	       charset[0] = (re_opcode_t) charset(_not)
   2736  1.1  christos                charset[1] = l (= length of char_classes)
   2737  1.1  christos                charset[2] = m (= length of collating_symbols)
   2738  1.1  christos                charset[3] = n (= length of equivalence_classes)
   2739  1.1  christos 	       charset[4] = o (= length of char_ranges)
   2740  1.1  christos 	       charset[5] = p (= length of chars)
   2741  1.1  christos 
   2742  1.1  christos                charset[6] = char_class (wctype_t)
   2743  1.1  christos                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
   2744  1.1  christos                          ...
   2745  1.1  christos                charset[l+5]  = char_class (wctype_t)
   2746  1.1  christos 
   2747  1.1  christos                charset[l+6]  = collating_symbol (wchar_t)
   2748  1.1  christos                             ...
   2749  1.1  christos                charset[l+m+5]  = collating_symbol (wchar_t)
   2750  1.1  christos 					ifdef _LIBC we use the index if
   2751  1.1  christos 					_NL_COLLATE_SYMB_EXTRAMB instead of
   2752  1.1  christos 					wchar_t string.
   2753  1.1  christos 
   2754  1.1  christos                charset[l+m+6]  = equivalence_classes (wchar_t)
   2755  1.1  christos                               ...
   2756  1.1  christos                charset[l+m+n+5]  = equivalence_classes (wchar_t)
   2757  1.1  christos 					ifdef _LIBC we use the index in
   2758  1.1  christos 					_NL_COLLATE_WEIGHT instead of
   2759  1.1  christos 					wchar_t string.
   2760  1.1  christos 
   2761  1.1  christos 	       charset[l+m+n+6] = range_start
   2762  1.1  christos 	       charset[l+m+n+7] = range_end
   2763  1.1  christos 	                       ...
   2764  1.1  christos 	       charset[l+m+n+2o+4] = range_start
   2765  1.1  christos 	       charset[l+m+n+2o+5] = range_end
   2766  1.1  christos 					ifdef _LIBC we use the value looked up
   2767  1.1  christos 					in _NL_COLLATE_COLLSEQ instead of
   2768  1.1  christos 					wchar_t character.
   2769  1.1  christos 
   2770  1.1  christos 	       charset[l+m+n+2o+6] = char
   2771  1.1  christos 	                          ...
   2772  1.1  christos 	       charset[l+m+n+2o+p+5] = char
   2773  1.1  christos 
   2774  1.1  christos 	     */
   2775  1.1  christos 
   2776  1.1  christos 	    /* We need at least 6 spaces: the opcode, the length of
   2777  1.1  christos                char_classes, the length of collating_symbols, the length of
   2778  1.1  christos                equivalence_classes, the length of char_ranges, the length of
   2779  1.1  christos                chars.  */
   2780  1.1  christos 	    GET_BUFFER_SPACE (6);
   2781  1.1  christos 
   2782  1.1  christos 	    /* Save b as laststart. And We use laststart as the pointer
   2783  1.1  christos 	       to the first element of the charset here.
   2784  1.1  christos 	       In other words, laststart[i] indicates charset[i].  */
   2785  1.1  christos             laststart = b;
   2786  1.1  christos 
   2787  1.1  christos             /* We test `*p == '^' twice, instead of using an if
   2788  1.1  christos                statement, so we only need one BUF_PUSH.  */
   2789  1.1  christos             BUF_PUSH (*p == '^' ? charset_not : charset);
   2790  1.1  christos             if (*p == '^')
   2791  1.1  christos               p++;
   2792  1.1  christos 
   2793  1.1  christos             /* Push the length of char_classes, the length of
   2794  1.1  christos                collating_symbols, the length of equivalence_classes, the
   2795  1.1  christos                length of char_ranges and the length of chars.  */
   2796  1.1  christos             BUF_PUSH_3 (0, 0, 0);
   2797  1.1  christos             BUF_PUSH_2 (0, 0);
   2798  1.1  christos 
   2799  1.1  christos             /* Remember the first position in the bracket expression.  */
   2800  1.1  christos             p1 = p;
   2801  1.1  christos 
   2802  1.1  christos             /* charset_not matches newline according to a syntax bit.  */
   2803  1.1  christos             if ((re_opcode_t) b[-6] == charset_not
   2804  1.1  christos                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   2805  1.1  christos 	      {
   2806  1.1  christos 		BUF_PUSH('\n');
   2807  1.1  christos 		laststart[5]++; /* Update the length of characters  */
   2808  1.1  christos 	      }
   2809  1.1  christos 
   2810  1.1  christos             /* Read in characters and ranges, setting map bits.  */
   2811  1.1  christos             for (;;)
   2812  1.1  christos               {
   2813  1.1  christos                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2814  1.1  christos 
   2815  1.1  christos                 PATFETCH (c);
   2816  1.1  christos 
   2817  1.1  christos                 /* \ might escape characters inside [...] and [^...].  */
   2818  1.1  christos                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   2819  1.1  christos                   {
   2820  1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2821  1.1  christos 
   2822  1.1  christos                     PATFETCH (c1);
   2823  1.1  christos 		    BUF_PUSH(c1);
   2824  1.1  christos 		    laststart[5]++; /* Update the length of chars  */
   2825  1.1  christos 		    range_start = c1;
   2826  1.1  christos                     continue;
   2827  1.1  christos                   }
   2828  1.1  christos 
   2829  1.1  christos                 /* Could be the end of the bracket expression.  If it's
   2830  1.1  christos                    not (i.e., when the bracket expression is `[]' so
   2831  1.1  christos                    far), the ']' character bit gets set way below.  */
   2832  1.1  christos                 if (c == ']' && p != p1 + 1)
   2833  1.1  christos                   break;
   2834  1.1  christos 
   2835  1.1  christos                 /* Look ahead to see if it's a range when the last thing
   2836  1.1  christos                    was a character class.  */
   2837  1.1  christos                 if (had_char_class && c == '-' && *p != ']')
   2838  1.1  christos                   FREE_STACK_RETURN (REG_ERANGE);
   2839  1.1  christos 
   2840  1.1  christos                 /* Look ahead to see if it's a range when the last thing
   2841  1.1  christos                    was a character: if this is a hyphen not at the
   2842  1.1  christos                    beginning or the end of a list, then it's the range
   2843  1.1  christos                    operator.  */
   2844  1.1  christos                 if (c == '-'
   2845  1.1  christos                     && !(p - 2 >= pattern && p[-2] == '[')
   2846  1.1  christos                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   2847  1.1  christos                     && *p != ']')
   2848  1.1  christos                   {
   2849  1.1  christos                     reg_errcode_t ret;
   2850  1.1  christos 		    /* Allocate the space for range_start and range_end.  */
   2851  1.1  christos 		    GET_BUFFER_SPACE (2);
   2852  1.1  christos 		    /* Update the pointer to indicate end of buffer.  */
   2853  1.1  christos                     b += 2;
   2854  1.1  christos                     ret = wcs_compile_range (range_start, &p, pend, translate,
   2855  1.1  christos                                          syntax, b, laststart);
   2856  1.1  christos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2857  1.1  christos                     range_start = 0xffffffff;
   2858  1.1  christos                   }
   2859  1.1  christos                 else if (p[0] == '-' && p[1] != ']')
   2860  1.1  christos                   { /* This handles ranges made up of characters only.  */
   2861  1.1  christos                     reg_errcode_t ret;
   2862  1.1  christos 
   2863  1.1  christos 		    /* Move past the `-'.  */
   2864  1.1  christos                     PATFETCH (c1);
   2865  1.1  christos 		    /* Allocate the space for range_start and range_end.  */
   2866  1.1  christos 		    GET_BUFFER_SPACE (2);
   2867  1.1  christos 		    /* Update the pointer to indicate end of buffer.  */
   2868  1.1  christos                     b += 2;
   2869  1.1  christos                     ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
   2870  1.1  christos                                          laststart);
   2871  1.1  christos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2872  1.1  christos 		    range_start = 0xffffffff;
   2873  1.1  christos                   }
   2874  1.1  christos 
   2875  1.1  christos                 /* See if we're at the beginning of a possible character
   2876  1.1  christos                    class.  */
   2877  1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   2878  1.1  christos                   { /* Leave room for the null.  */
   2879  1.1  christos                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   2880  1.1  christos 
   2881  1.1  christos                     PATFETCH (c);
   2882  1.1  christos                     c1 = 0;
   2883  1.1  christos 
   2884  1.1  christos                     /* If pattern is `[[:'.  */
   2885  1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2886  1.1  christos 
   2887  1.1  christos                     for (;;)
   2888  1.1  christos                       {
   2889  1.1  christos                         PATFETCH (c);
   2890  1.1  christos                         if ((c == ':' && *p == ']') || p == pend)
   2891  1.1  christos                           break;
   2892  1.1  christos 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   2893  1.1  christos 			  str[c1++] = c;
   2894  1.1  christos 			else
   2895  1.1  christos 			  /* This is in any case an invalid class name.  */
   2896  1.1  christos 			  str[0] = '\0';
   2897  1.1  christos                       }
   2898  1.1  christos                     str[c1] = '\0';
   2899  1.1  christos 
   2900  1.1  christos                     /* If isn't a word bracketed by `[:' and `:]':
   2901  1.1  christos                        undo the ending character, the letters, and leave
   2902  1.1  christos                        the leading `:' and `[' (but store them as character).  */
   2903  1.1  christos                     if (c == ':' && *p == ']')
   2904  1.1  christos                       {
   2905  1.1  christos 			wctype_t wt;
   2906  1.1  christos 			uintptr_t alignedp;
   2907  1.1  christos 
   2908  1.1  christos 			/* Query the character class as wctype_t.  */
   2909  1.1  christos 			wt = IS_CHAR_CLASS (str);
   2910  1.1  christos 			if (wt == 0)
   2911  1.1  christos 			  FREE_STACK_RETURN (REG_ECTYPE);
   2912  1.1  christos 
   2913  1.1  christos                         /* Throw away the ] at the end of the character
   2914  1.1  christos                            class.  */
   2915  1.1  christos                         PATFETCH (c);
   2916  1.1  christos 
   2917  1.1  christos                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2918  1.1  christos 
   2919  1.1  christos 			/* Allocate the space for character class.  */
   2920  1.1  christos                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
   2921  1.1  christos 			/* Update the pointer to indicate end of buffer.  */
   2922  1.1  christos                         b += CHAR_CLASS_SIZE;
   2923  1.1  christos 			/* Move data which follow character classes
   2924  1.1  christos 			    not to violate the data.  */
   2925  1.1  christos                         insert_space(CHAR_CLASS_SIZE,
   2926  1.1  christos 				     laststart + 6 + laststart[1],
   2927  1.1  christos 				     b - 1);
   2928  1.1  christos 			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
   2929  1.1  christos 				    + __alignof__(wctype_t) - 1)
   2930  1.1  christos 			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   2931  1.1  christos 			/* Store the character class.  */
   2932  1.1  christos                         *((wctype_t*)alignedp) = wt;
   2933  1.1  christos                         /* Update length of char_classes */
   2934  1.1  christos                         laststart[1] += CHAR_CLASS_SIZE;
   2935  1.1  christos 
   2936  1.1  christos                         had_char_class = true;
   2937  1.1  christos                       }
   2938  1.1  christos                     else
   2939  1.1  christos                       {
   2940  1.1  christos                         c1++;
   2941  1.1  christos                         while (c1--)
   2942  1.1  christos                           PATUNFETCH;
   2943  1.1  christos                         BUF_PUSH ('[');
   2944  1.1  christos                         BUF_PUSH (':');
   2945  1.1  christos                         laststart[5] += 2; /* Update the length of characters  */
   2946  1.1  christos 			range_start = ':';
   2947  1.1  christos                         had_char_class = false;
   2948  1.1  christos                       }
   2949  1.1  christos                   }
   2950  1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
   2951  1.1  christos 							  || *p == '.'))
   2952  1.1  christos 		  {
   2953  1.1  christos 		    CHAR_T str[128];	/* Should be large enough.  */
   2954  1.1  christos 		    CHAR_T delim = *p; /* '=' or '.'  */
   2955  1.1  christos # ifdef _LIBC
   2956  1.1  christos 		    uint32_t nrules =
   2957  1.1  christos 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   2958  1.1  christos # endif
   2959  1.1  christos 		    PATFETCH (c);
   2960  1.1  christos 		    c1 = 0;
   2961  1.1  christos 
   2962  1.1  christos 		    /* If pattern is `[[=' or '[[.'.  */
   2963  1.1  christos 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2964  1.1  christos 
   2965  1.1  christos 		    for (;;)
   2966  1.1  christos 		      {
   2967  1.1  christos 			PATFETCH (c);
   2968  1.1  christos 			if ((c == delim && *p == ']') || p == pend)
   2969  1.1  christos 			  break;
   2970  1.1  christos 			if (c1 < sizeof (str) - 1)
   2971  1.1  christos 			  str[c1++] = c;
   2972  1.1  christos 			else
   2973  1.1  christos 			  /* This is in any case an invalid class name.  */
   2974  1.1  christos 			  str[0] = '\0';
   2975  1.1  christos                       }
   2976  1.1  christos 		    str[c1] = '\0';
   2977  1.1  christos 
   2978  1.1  christos 		    if (c == delim && *p == ']' && str[0] != '\0')
   2979  1.1  christos 		      {
   2980  1.1  christos                         unsigned int i, offset;
   2981  1.1  christos 			/* If we have no collation data we use the default
   2982  1.1  christos 			   collation in which each character is in a class
   2983  1.1  christos 			   by itself.  It also means that ASCII is the
   2984  1.1  christos 			   character set and therefore we cannot have character
   2985  1.1  christos 			   with more than one byte in the multibyte
   2986  1.1  christos 			   representation.  */
   2987  1.1  christos 
   2988  1.1  christos                         /* If not defined _LIBC, we push the name and
   2989  1.1  christos 			   `\0' for the sake of matching performance.  */
   2990  1.1  christos 			int datasize = c1 + 1;
   2991  1.1  christos 
   2992  1.1  christos # ifdef _LIBC
   2993  1.1  christos 			int32_t idx = 0;
   2994  1.1  christos 			if (nrules == 0)
   2995  1.1  christos # endif
   2996  1.1  christos 			  {
   2997  1.1  christos 			    if (c1 != 1)
   2998  1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   2999  1.1  christos 			  }
   3000  1.1  christos # ifdef _LIBC
   3001  1.1  christos 			else
   3002  1.1  christos 			  {
   3003  1.1  christos 			    const int32_t *table;
   3004  1.1  christos 			    const int32_t *weights;
   3005  1.1  christos 			    const int32_t *extra;
   3006  1.1  christos 			    const int32_t *indirect;
   3007  1.1  christos 			    wint_t *cp;
   3008  1.1  christos 
   3009  1.1  christos 			    /* This #include defines a local function!  */
   3010  1.1  christos #  include <locale/weightwc.h>
   3011  1.1  christos 
   3012  1.1  christos 			    if(delim == '=')
   3013  1.1  christos 			      {
   3014  1.1  christos 				/* We push the index for equivalence class.  */
   3015  1.1  christos 				cp = (wint_t*)str;
   3016  1.1  christos 
   3017  1.1  christos 				table = (const int32_t *)
   3018  1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   3019  1.1  christos 					       _NL_COLLATE_TABLEWC);
   3020  1.1  christos 				weights = (const int32_t *)
   3021  1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   3022  1.1  christos 					       _NL_COLLATE_WEIGHTWC);
   3023  1.1  christos 				extra = (const int32_t *)
   3024  1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   3025  1.1  christos 					       _NL_COLLATE_EXTRAWC);
   3026  1.1  christos 				indirect = (const int32_t *)
   3027  1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   3028  1.1  christos 					       _NL_COLLATE_INDIRECTWC);
   3029  1.1  christos 
   3030  1.1  christos 				idx = findidx ((const wint_t**)&cp);
   3031  1.1  christos 				if (idx == 0 || cp < (wint_t*) str + c1)
   3032  1.1  christos 				  /* This is no valid character.  */
   3033  1.1  christos 				  FREE_STACK_RETURN (REG_ECOLLATE);
   3034  1.1  christos 
   3035  1.1  christos 				str[0] = (wchar_t)idx;
   3036  1.1  christos 			      }
   3037  1.1  christos 			    else /* delim == '.' */
   3038  1.1  christos 			      {
   3039  1.1  christos 				/* We push collation sequence value
   3040  1.1  christos 				   for collating symbol.  */
   3041  1.1  christos 				int32_t table_size;
   3042  1.1  christos 				const int32_t *symb_table;
   3043  1.1  christos 				const unsigned char *extra;
   3044  1.1  christos 				int32_t idx;
   3045  1.1  christos 				int32_t elem;
   3046  1.1  christos 				int32_t second;
   3047  1.1  christos 				int32_t hash;
   3048  1.1  christos 				char char_str[c1];
   3049  1.1  christos 
   3050  1.1  christos 				/* We have to convert the name to a single-byte
   3051  1.1  christos 				   string.  This is possible since the names
   3052  1.1  christos 				   consist of ASCII characters and the internal
   3053  1.1  christos 				   representation is UCS4.  */
   3054  1.1  christos 				for (i = 0; i < c1; ++i)
   3055  1.1  christos 				  char_str[i] = str[i];
   3056  1.1  christos 
   3057  1.1  christos 				table_size =
   3058  1.1  christos 				  _NL_CURRENT_WORD (LC_COLLATE,
   3059  1.1  christos 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
   3060  1.1  christos 				symb_table = (const int32_t *)
   3061  1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   3062  1.1  christos 					       _NL_COLLATE_SYMB_TABLEMB);
   3063  1.1  christos 				extra = (const unsigned char *)
   3064  1.1  christos 				  _NL_CURRENT (LC_COLLATE,
   3065  1.1  christos 					       _NL_COLLATE_SYMB_EXTRAMB);
   3066  1.1  christos 
   3067  1.1  christos 				/* Locate the character in the hashing table.  */
   3068  1.1  christos 				hash = elem_hash (char_str, c1);
   3069  1.1  christos 
   3070  1.1  christos 				idx = 0;
   3071  1.1  christos 				elem = hash % table_size;
   3072  1.1  christos 				second = hash % (table_size - 2);
   3073  1.1  christos 				while (symb_table[2 * elem] != 0)
   3074  1.1  christos 				  {
   3075  1.1  christos 				    /* First compare the hashing value.  */
   3076  1.1  christos 				    if (symb_table[2 * elem] == hash
   3077  1.1  christos 					&& c1 == extra[symb_table[2 * elem + 1]]
   3078  1.1  christos 					&& memcmp (char_str,
   3079  1.1  christos 						   &extra[symb_table[2 * elem + 1]
   3080  1.1  christos 							 + 1], c1) == 0)
   3081  1.1  christos 				      {
   3082  1.1  christos 					/* Yep, this is the entry.  */
   3083  1.1  christos 					idx = symb_table[2 * elem + 1];
   3084  1.1  christos 					idx += 1 + extra[idx];
   3085  1.1  christos 					break;
   3086  1.1  christos 				      }
   3087  1.1  christos 
   3088  1.1  christos 				    /* Next entry.  */
   3089  1.1  christos 				    elem += second;
   3090  1.1  christos 				  }
   3091  1.1  christos 
   3092  1.1  christos 				if (symb_table[2 * elem] != 0)
   3093  1.1  christos 				  {
   3094  1.1  christos 				    /* Compute the index of the byte sequence
   3095  1.1  christos 				       in the table.  */
   3096  1.1  christos 				    idx += 1 + extra[idx];
   3097  1.1  christos 				    /* Adjust for the alignment.  */
   3098  1.1  christos 				    idx = (idx + 3) & ~3;
   3099  1.1  christos 
   3100  1.1  christos 				    str[0] = (wchar_t) idx + 4;
   3101  1.1  christos 				  }
   3102  1.1  christos 				else if (symb_table[2 * elem] == 0 && c1 == 1)
   3103  1.1  christos 				  {
   3104  1.1  christos 				    /* No valid character.  Match it as a
   3105  1.1  christos 				       single byte character.  */
   3106  1.1  christos 				    had_char_class = false;
   3107  1.1  christos 				    BUF_PUSH(str[0]);
   3108  1.1  christos 				    /* Update the length of characters  */
   3109  1.1  christos 				    laststart[5]++;
   3110  1.1  christos 				    range_start = str[0];
   3111  1.1  christos 
   3112  1.1  christos 				    /* Throw away the ] at the end of the
   3113  1.1  christos 				       collating symbol.  */
   3114  1.1  christos 				    PATFETCH (c);
   3115  1.1  christos 				    /* exit from the switch block.  */
   3116  1.1  christos 				    continue;
   3117  1.1  christos 				  }
   3118  1.1  christos 				else
   3119  1.1  christos 				  FREE_STACK_RETURN (REG_ECOLLATE);
   3120  1.1  christos 			      }
   3121  1.1  christos 			    datasize = 1;
   3122  1.1  christos 			  }
   3123  1.1  christos # endif
   3124  1.1  christos                         /* Throw away the ] at the end of the equivalence
   3125  1.1  christos                            class (or collating symbol).  */
   3126  1.1  christos                         PATFETCH (c);
   3127  1.1  christos 
   3128  1.1  christos 			/* Allocate the space for the equivalence class
   3129  1.1  christos 			   (or collating symbol) (and '\0' if needed).  */
   3130  1.1  christos                         GET_BUFFER_SPACE(datasize);
   3131  1.1  christos 			/* Update the pointer to indicate end of buffer.  */
   3132  1.1  christos                         b += datasize;
   3133  1.1  christos 
   3134  1.1  christos 			if (delim == '=')
   3135  1.1  christos 			  { /* equivalence class  */
   3136  1.1  christos 			    /* Calculate the offset of char_ranges,
   3137  1.1  christos 			       which is next to equivalence_classes.  */
   3138  1.1  christos 			    offset = laststart[1] + laststart[2]
   3139  1.1  christos 			      + laststart[3] +6;
   3140  1.1  christos 			    /* Insert space.  */
   3141  1.1  christos 			    insert_space(datasize, laststart + offset, b - 1);
   3142  1.1  christos 
   3143  1.1  christos 			    /* Write the equivalence_class and \0.  */
   3144  1.1  christos 			    for (i = 0 ; i < datasize ; i++)
   3145  1.1  christos 			      laststart[offset + i] = str[i];
   3146  1.1  christos 
   3147  1.1  christos 			    /* Update the length of equivalence_classes.  */
   3148  1.1  christos 			    laststart[3] += datasize;
   3149  1.1  christos 			    had_char_class = true;
   3150  1.1  christos 			  }
   3151  1.1  christos 			else /* delim == '.' */
   3152  1.1  christos 			  { /* collating symbol  */
   3153  1.1  christos 			    /* Calculate the offset of the equivalence_classes,
   3154  1.1  christos 			       which is next to collating_symbols.  */
   3155  1.1  christos 			    offset = laststart[1] + laststart[2] + 6;
   3156  1.1  christos 			    /* Insert space and write the collationg_symbol
   3157  1.1  christos 			       and \0.  */
   3158  1.1  christos 			    insert_space(datasize, laststart + offset, b-1);
   3159  1.1  christos 			    for (i = 0 ; i < datasize ; i++)
   3160  1.1  christos 			      laststart[offset + i] = str[i];
   3161  1.1  christos 
   3162  1.1  christos 			    /* In re_match_2_internal if range_start < -1, we
   3163  1.1  christos 			       assume -range_start is the offset of the
   3164  1.1  christos 			       collating symbol which is specified as
   3165  1.1  christos 			       the character of the range start.  So we assign
   3166  1.1  christos 			       -(laststart[1] + laststart[2] + 6) to
   3167  1.1  christos 			       range_start.  */
   3168  1.1  christos 			    range_start = -(laststart[1] + laststart[2] + 6);
   3169  1.1  christos 			    /* Update the length of collating_symbol.  */
   3170  1.1  christos 			    laststart[2] += datasize;
   3171  1.1  christos 			    had_char_class = false;
   3172  1.1  christos 			  }
   3173  1.1  christos 		      }
   3174  1.1  christos                     else
   3175  1.1  christos                       {
   3176  1.1  christos                         c1++;
   3177  1.1  christos                         while (c1--)
   3178  1.1  christos                           PATUNFETCH;
   3179  1.1  christos                         BUF_PUSH ('[');
   3180  1.1  christos                         BUF_PUSH (delim);
   3181  1.1  christos                         laststart[5] += 2; /* Update the length of characters  */
   3182  1.1  christos 			range_start = delim;
   3183  1.1  christos                         had_char_class = false;
   3184  1.1  christos                       }
   3185  1.1  christos 		  }
   3186  1.1  christos                 else
   3187  1.1  christos                   {
   3188  1.1  christos                     had_char_class = false;
   3189  1.1  christos 		    BUF_PUSH(c);
   3190  1.1  christos 		    laststart[5]++;  /* Update the length of characters  */
   3191  1.1  christos 		    range_start = c;
   3192  1.1  christos                   }
   3193  1.1  christos 	      }
   3194  1.1  christos 
   3195  1.1  christos #else /* BYTE */
   3196  1.1  christos             /* Ensure that we have enough space to push a charset: the
   3197  1.1  christos                opcode, the length count, and the bitset; 34 bytes in all.  */
   3198  1.1  christos 	    GET_BUFFER_SPACE (34);
   3199  1.1  christos 
   3200  1.1  christos             laststart = b;
   3201  1.1  christos 
   3202  1.1  christos             /* We test `*p == '^' twice, instead of using an if
   3203  1.1  christos                statement, so we only need one BUF_PUSH.  */
   3204  1.1  christos             BUF_PUSH (*p == '^' ? charset_not : charset);
   3205  1.1  christos             if (*p == '^')
   3206  1.1  christos               p++;
   3207  1.1  christos 
   3208  1.1  christos             /* Remember the first position in the bracket expression.  */
   3209  1.1  christos             p1 = p;
   3210  1.1  christos 
   3211  1.1  christos             /* Push the number of bytes in the bitmap.  */
   3212  1.1  christos             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
   3213  1.1  christos 
   3214  1.1  christos             /* Clear the whole map.  */
   3215  1.1  christos             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
   3216  1.1  christos 
   3217  1.1  christos             /* charset_not matches newline according to a syntax bit.  */
   3218  1.1  christos             if ((re_opcode_t) b[-2] == charset_not
   3219  1.1  christos                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   3220  1.1  christos               SET_LIST_BIT ('\n');
   3221  1.1  christos 
   3222  1.1  christos             /* Read in characters and ranges, setting map bits.  */
   3223  1.1  christos             for (;;)
   3224  1.1  christos               {
   3225  1.1  christos                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3226  1.1  christos 
   3227  1.1  christos                 PATFETCH (c);
   3228  1.1  christos 
   3229  1.1  christos                 /* \ might escape characters inside [...] and [^...].  */
   3230  1.1  christos                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   3231  1.1  christos                   {
   3232  1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3233  1.1  christos 
   3234  1.1  christos                     PATFETCH (c1);
   3235  1.1  christos                     SET_LIST_BIT (c1);
   3236  1.1  christos 		    range_start = c1;
   3237  1.1  christos                     continue;
   3238  1.1  christos                   }
   3239  1.1  christos 
   3240  1.1  christos                 /* Could be the end of the bracket expression.  If it's
   3241  1.1  christos                    not (i.e., when the bracket expression is `[]' so
   3242  1.1  christos                    far), the ']' character bit gets set way below.  */
   3243  1.1  christos                 if (c == ']' && p != p1 + 1)
   3244  1.1  christos                   break;
   3245  1.1  christos 
   3246  1.1  christos                 /* Look ahead to see if it's a range when the last thing
   3247  1.1  christos                    was a character class.  */
   3248  1.1  christos                 if (had_char_class && c == '-' && *p != ']')
   3249  1.1  christos                   FREE_STACK_RETURN (REG_ERANGE);
   3250  1.1  christos 
   3251  1.1  christos                 /* Look ahead to see if it's a range when the last thing
   3252  1.1  christos                    was a character: if this is a hyphen not at the
   3253  1.1  christos                    beginning or the end of a list, then it's the range
   3254  1.1  christos                    operator.  */
   3255  1.1  christos                 if (c == '-'
   3256  1.1  christos                     && !(p - 2 >= pattern && p[-2] == '[')
   3257  1.1  christos                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   3258  1.1  christos                     && *p != ']')
   3259  1.1  christos                   {
   3260  1.1  christos                     reg_errcode_t ret
   3261  1.1  christos                       = byte_compile_range (range_start, &p, pend, translate,
   3262  1.1  christos 					    syntax, b);
   3263  1.1  christos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3264  1.1  christos 		    range_start = 0xffffffff;
   3265  1.1  christos                   }
   3266  1.1  christos 
   3267  1.1  christos                 else if (p[0] == '-' && p[1] != ']')
   3268  1.1  christos                   { /* This handles ranges made up of characters only.  */
   3269  1.1  christos                     reg_errcode_t ret;
   3270  1.1  christos 
   3271  1.1  christos 		    /* Move past the `-'.  */
   3272  1.1  christos                     PATFETCH (c1);
   3273  1.1  christos 
   3274  1.1  christos                     ret = byte_compile_range (c, &p, pend, translate, syntax, b);
   3275  1.1  christos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   3276  1.1  christos 		    range_start = 0xffffffff;
   3277  1.1  christos                   }
   3278  1.1  christos 
   3279  1.1  christos                 /* See if we're at the beginning of a possible character
   3280  1.1  christos                    class.  */
   3281  1.1  christos 
   3282  1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   3283  1.1  christos                   { /* Leave room for the null.  */
   3284  1.1  christos                     char str[CHAR_CLASS_MAX_LENGTH + 1];
   3285  1.1  christos 
   3286  1.1  christos                     PATFETCH (c);
   3287  1.1  christos                     c1 = 0;
   3288  1.1  christos 
   3289  1.1  christos                     /* If pattern is `[[:'.  */
   3290  1.1  christos                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3291  1.1  christos 
   3292  1.1  christos                     for (;;)
   3293  1.1  christos                       {
   3294  1.1  christos                         PATFETCH (c);
   3295  1.1  christos                         if ((c == ':' && *p == ']') || p == pend)
   3296  1.1  christos                           break;
   3297  1.1  christos 			if (c1 < CHAR_CLASS_MAX_LENGTH)
   3298  1.1  christos 			  str[c1++] = c;
   3299  1.1  christos 			else
   3300  1.1  christos 			  /* This is in any case an invalid class name.  */
   3301  1.1  christos 			  str[0] = '\0';
   3302  1.1  christos                       }
   3303  1.1  christos                     str[c1] = '\0';
   3304  1.1  christos 
   3305  1.1  christos                     /* If isn't a word bracketed by `[:' and `:]':
   3306  1.1  christos                        undo the ending character, the letters, and leave
   3307  1.1  christos                        the leading `:' and `[' (but set bits for them).  */
   3308  1.1  christos                     if (c == ':' && *p == ']')
   3309  1.1  christos                       {
   3310  1.1  christos # if defined _LIBC || WIDE_CHAR_SUPPORT
   3311  1.1  christos                         boolean is_lower = STREQ (str, "lower");
   3312  1.1  christos                         boolean is_upper = STREQ (str, "upper");
   3313  1.1  christos 			wctype_t wt;
   3314  1.1  christos                         int ch;
   3315  1.1  christos 
   3316  1.1  christos 			wt = IS_CHAR_CLASS (str);
   3317  1.1  christos 			if (wt == 0)
   3318  1.1  christos 			  FREE_STACK_RETURN (REG_ECTYPE);
   3319  1.1  christos 
   3320  1.1  christos                         /* Throw away the ] at the end of the character
   3321  1.1  christos                            class.  */
   3322  1.1  christos                         PATFETCH (c);
   3323  1.1  christos 
   3324  1.1  christos                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3325  1.1  christos 
   3326  1.1  christos                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
   3327  1.1  christos 			  {
   3328  1.1  christos 			    if (iswctype (btowc (ch), wt))
   3329  1.1  christos 			      SET_LIST_BIT (ch);
   3330  1.1  christos 
   3331  1.1  christos 			    if (translate && (is_upper || is_lower)
   3332  1.1  christos 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3333  1.1  christos 			      SET_LIST_BIT (ch);
   3334  1.1  christos 			  }
   3335  1.1  christos 
   3336  1.1  christos                         had_char_class = true;
   3337  1.1  christos # else
   3338  1.1  christos                         int ch;
   3339  1.1  christos                         boolean is_alnum = STREQ (str, "alnum");
   3340  1.1  christos                         boolean is_alpha = STREQ (str, "alpha");
   3341  1.1  christos                         boolean is_blank = STREQ (str, "blank");
   3342  1.1  christos                         boolean is_cntrl = STREQ (str, "cntrl");
   3343  1.1  christos                         boolean is_digit = STREQ (str, "digit");
   3344  1.1  christos                         boolean is_graph = STREQ (str, "graph");
   3345  1.1  christos                         boolean is_lower = STREQ (str, "lower");
   3346  1.1  christos                         boolean is_print = STREQ (str, "print");
   3347  1.1  christos                         boolean is_punct = STREQ (str, "punct");
   3348  1.1  christos                         boolean is_space = STREQ (str, "space");
   3349  1.1  christos                         boolean is_upper = STREQ (str, "upper");
   3350  1.1  christos                         boolean is_xdigit = STREQ (str, "xdigit");
   3351  1.1  christos 
   3352  1.1  christos                         if (!IS_CHAR_CLASS (str))
   3353  1.1  christos 			  FREE_STACK_RETURN (REG_ECTYPE);
   3354  1.1  christos 
   3355  1.1  christos                         /* Throw away the ] at the end of the character
   3356  1.1  christos                            class.  */
   3357  1.1  christos                         PATFETCH (c);
   3358  1.1  christos 
   3359  1.1  christos                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3360  1.1  christos 
   3361  1.1  christos                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
   3362  1.1  christos                           {
   3363  1.1  christos 			    /* This was split into 3 if's to
   3364  1.1  christos 			       avoid an arbitrary limit in some compiler.  */
   3365  1.1  christos                             if (   (is_alnum  && ISALNUM (ch))
   3366  1.1  christos                                 || (is_alpha  && ISALPHA (ch))
   3367  1.1  christos                                 || (is_blank  && ISBLANK (ch))
   3368  1.1  christos                                 || (is_cntrl  && ISCNTRL (ch)))
   3369  1.1  christos 			      SET_LIST_BIT (ch);
   3370  1.1  christos 			    if (   (is_digit  && ISDIGIT (ch))
   3371  1.1  christos                                 || (is_graph  && ISGRAPH (ch))
   3372  1.1  christos                                 || (is_lower  && ISLOWER (ch))
   3373  1.1  christos                                 || (is_print  && ISPRINT (ch)))
   3374  1.1  christos 			      SET_LIST_BIT (ch);
   3375  1.1  christos 			    if (   (is_punct  && ISPUNCT (ch))
   3376  1.1  christos                                 || (is_space  && ISSPACE (ch))
   3377  1.1  christos                                 || (is_upper  && ISUPPER (ch))
   3378  1.1  christos                                 || (is_xdigit && ISXDIGIT (ch)))
   3379  1.1  christos 			      SET_LIST_BIT (ch);
   3380  1.1  christos 			    if (   translate && (is_upper || is_lower)
   3381  1.1  christos 				&& (ISUPPER (ch) || ISLOWER (ch)))
   3382  1.1  christos 			      SET_LIST_BIT (ch);
   3383  1.1  christos                           }
   3384  1.1  christos                         had_char_class = true;
   3385  1.1  christos # endif	/* libc || wctype.h */
   3386  1.1  christos                       }
   3387  1.1  christos                     else
   3388  1.1  christos                       {
   3389  1.1  christos                         c1++;
   3390  1.1  christos                         while (c1--)
   3391  1.1  christos                           PATUNFETCH;
   3392  1.1  christos                         SET_LIST_BIT ('[');
   3393  1.1  christos                         SET_LIST_BIT (':');
   3394  1.1  christos 			range_start = ':';
   3395  1.1  christos                         had_char_class = false;
   3396  1.1  christos                       }
   3397  1.1  christos                   }
   3398  1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
   3399  1.1  christos 		  {
   3400  1.1  christos 		    unsigned char str[MB_LEN_MAX + 1];
   3401  1.1  christos # ifdef _LIBC
   3402  1.1  christos 		    uint32_t nrules =
   3403  1.1  christos 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3404  1.1  christos # endif
   3405  1.1  christos 
   3406  1.1  christos 		    PATFETCH (c);
   3407  1.1  christos 		    c1 = 0;
   3408  1.1  christos 
   3409  1.1  christos 		    /* If pattern is `[[='.  */
   3410  1.1  christos 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3411  1.1  christos 
   3412  1.1  christos 		    for (;;)
   3413  1.1  christos 		      {
   3414  1.1  christos 			PATFETCH (c);
   3415  1.1  christos 			if ((c == '=' && *p == ']') || p == pend)
   3416  1.1  christos 			  break;
   3417  1.1  christos 			if (c1 < MB_LEN_MAX)
   3418  1.1  christos 			  str[c1++] = c;
   3419  1.1  christos 			else
   3420  1.1  christos 			  /* This is in any case an invalid class name.  */
   3421  1.1  christos 			  str[0] = '\0';
   3422  1.1  christos                       }
   3423  1.1  christos 		    str[c1] = '\0';
   3424  1.1  christos 
   3425  1.1  christos 		    if (c == '=' && *p == ']' && str[0] != '\0')
   3426  1.1  christos 		      {
   3427  1.1  christos 			/* If we have no collation data we use the default
   3428  1.1  christos 			   collation in which each character is in a class
   3429  1.1  christos 			   by itself.  It also means that ASCII is the
   3430  1.1  christos 			   character set and therefore we cannot have character
   3431  1.1  christos 			   with more than one byte in the multibyte
   3432  1.1  christos 			   representation.  */
   3433  1.1  christos # ifdef _LIBC
   3434  1.1  christos 			if (nrules == 0)
   3435  1.1  christos # endif
   3436  1.1  christos 			  {
   3437  1.1  christos 			    if (c1 != 1)
   3438  1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3439  1.1  christos 
   3440  1.1  christos 			    /* Throw away the ] at the end of the equivalence
   3441  1.1  christos 			       class.  */
   3442  1.1  christos 			    PATFETCH (c);
   3443  1.1  christos 
   3444  1.1  christos 			    /* Set the bit for the character.  */
   3445  1.1  christos 			    SET_LIST_BIT (str[0]);
   3446  1.1  christos 			  }
   3447  1.1  christos # ifdef _LIBC
   3448  1.1  christos 			else
   3449  1.1  christos 			  {
   3450  1.1  christos 			    /* Try to match the byte sequence in `str' against
   3451  1.1  christos 			       those known to the collate implementation.
   3452  1.1  christos 			       First find out whether the bytes in `str' are
   3453  1.1  christos 			       actually from exactly one character.  */
   3454  1.1  christos 			    const int32_t *table;
   3455  1.1  christos 			    const unsigned char *weights;
   3456  1.1  christos 			    const unsigned char *extra;
   3457  1.1  christos 			    const int32_t *indirect;
   3458  1.1  christos 			    int32_t idx;
   3459  1.1  christos 			    const unsigned char *cp = str;
   3460  1.1  christos 			    int ch;
   3461  1.1  christos 
   3462  1.1  christos 			    /* This #include defines a local function!  */
   3463  1.1  christos #  include <locale/weight.h>
   3464  1.1  christos 
   3465  1.1  christos 			    table = (const int32_t *)
   3466  1.1  christos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
   3467  1.1  christos 			    weights = (const unsigned char *)
   3468  1.1  christos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
   3469  1.1  christos 			    extra = (const unsigned char *)
   3470  1.1  christos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
   3471  1.1  christos 			    indirect = (const int32_t *)
   3472  1.1  christos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
   3473  1.1  christos 
   3474  1.1  christos 			    idx = findidx (&cp);
   3475  1.1  christos 			    if (idx == 0 || cp < str + c1)
   3476  1.1  christos 			      /* This is no valid character.  */
   3477  1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3478  1.1  christos 
   3479  1.1  christos 			    /* Throw away the ] at the end of the equivalence
   3480  1.1  christos 			       class.  */
   3481  1.1  christos 			    PATFETCH (c);
   3482  1.1  christos 
   3483  1.1  christos 			    /* Now we have to go throught the whole table
   3484  1.1  christos 			       and find all characters which have the same
   3485  1.1  christos 			       first level weight.
   3486  1.1  christos 
   3487  1.1  christos 			       XXX Note that this is not entirely correct.
   3488  1.1  christos 			       we would have to match multibyte sequences
   3489  1.1  christos 			       but this is not possible with the current
   3490  1.1  christos 			       implementation.  */
   3491  1.1  christos 			    for (ch = 1; ch < 256; ++ch)
   3492  1.1  christos 			      /* XXX This test would have to be changed if we
   3493  1.1  christos 				 would allow matching multibyte sequences.  */
   3494  1.1  christos 			      if (table[ch] > 0)
   3495  1.1  christos 				{
   3496  1.1  christos 				  int32_t idx2 = table[ch];
   3497  1.1  christos 				  size_t len = weights[idx2];
   3498  1.1  christos 
   3499  1.1  christos 				  /* Test whether the lenghts match.  */
   3500  1.1  christos 				  if (weights[idx] == len)
   3501  1.1  christos 				    {
   3502  1.1  christos 				      /* They do.  New compare the bytes of
   3503  1.1  christos 					 the weight.  */
   3504  1.1  christos 				      size_t cnt = 0;
   3505  1.1  christos 
   3506  1.1  christos 				      while (cnt < len
   3507  1.1  christos 					     && (weights[idx + 1 + cnt]
   3508  1.1  christos 						 == weights[idx2 + 1 + cnt]))
   3509  1.1  christos 					++cnt;
   3510  1.1  christos 
   3511  1.1  christos 				      if (cnt == len)
   3512  1.1  christos 					/* They match.  Mark the character as
   3513  1.1  christos 					   acceptable.  */
   3514  1.1  christos 					SET_LIST_BIT (ch);
   3515  1.1  christos 				    }
   3516  1.1  christos 				}
   3517  1.1  christos 			  }
   3518  1.1  christos # endif
   3519  1.1  christos 			had_char_class = true;
   3520  1.1  christos 		      }
   3521  1.1  christos                     else
   3522  1.1  christos                       {
   3523  1.1  christos                         c1++;
   3524  1.1  christos                         while (c1--)
   3525  1.1  christos                           PATUNFETCH;
   3526  1.1  christos                         SET_LIST_BIT ('[');
   3527  1.1  christos                         SET_LIST_BIT ('=');
   3528  1.1  christos 			range_start = '=';
   3529  1.1  christos                         had_char_class = false;
   3530  1.1  christos                       }
   3531  1.1  christos 		  }
   3532  1.1  christos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
   3533  1.1  christos 		  {
   3534  1.1  christos 		    unsigned char str[128];	/* Should be large enough.  */
   3535  1.1  christos # ifdef _LIBC
   3536  1.1  christos 		    uint32_t nrules =
   3537  1.1  christos 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3538  1.1  christos # endif
   3539  1.1  christos 
   3540  1.1  christos 		    PATFETCH (c);
   3541  1.1  christos 		    c1 = 0;
   3542  1.1  christos 
   3543  1.1  christos 		    /* If pattern is `[[.'.  */
   3544  1.1  christos 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   3545  1.1  christos 
   3546  1.1  christos 		    for (;;)
   3547  1.1  christos 		      {
   3548  1.1  christos 			PATFETCH (c);
   3549  1.1  christos 			if ((c == '.' && *p == ']') || p == pend)
   3550  1.1  christos 			  break;
   3551  1.1  christos 			if (c1 < sizeof (str))
   3552  1.1  christos 			  str[c1++] = c;
   3553  1.1  christos 			else
   3554  1.1  christos 			  /* This is in any case an invalid class name.  */
   3555  1.1  christos 			  str[0] = '\0';
   3556  1.1  christos                       }
   3557  1.1  christos 		    str[c1] = '\0';
   3558  1.1  christos 
   3559  1.1  christos 		    if (c == '.' && *p == ']' && str[0] != '\0')
   3560  1.1  christos 		      {
   3561  1.1  christos 			/* If we have no collation data we use the default
   3562  1.1  christos 			   collation in which each character is the name
   3563  1.1  christos 			   for its own class which contains only the one
   3564  1.1  christos 			   character.  It also means that ASCII is the
   3565  1.1  christos 			   character set and therefore we cannot have character
   3566  1.1  christos 			   with more than one byte in the multibyte
   3567  1.1  christos 			   representation.  */
   3568  1.1  christos # ifdef _LIBC
   3569  1.1  christos 			if (nrules == 0)
   3570  1.1  christos # endif
   3571  1.1  christos 			  {
   3572  1.1  christos 			    if (c1 != 1)
   3573  1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3574  1.1  christos 
   3575  1.1  christos 			    /* Throw away the ] at the end of the equivalence
   3576  1.1  christos 			       class.  */
   3577  1.1  christos 			    PATFETCH (c);
   3578  1.1  christos 
   3579  1.1  christos 			    /* Set the bit for the character.  */
   3580  1.1  christos 			    SET_LIST_BIT (str[0]);
   3581  1.1  christos 			    range_start = ((const unsigned char *) str)[0];
   3582  1.1  christos 			  }
   3583  1.1  christos # ifdef _LIBC
   3584  1.1  christos 			else
   3585  1.1  christos 			  {
   3586  1.1  christos 			    /* Try to match the byte sequence in `str' against
   3587  1.1  christos 			       those known to the collate implementation.
   3588  1.1  christos 			       First find out whether the bytes in `str' are
   3589  1.1  christos 			       actually from exactly one character.  */
   3590  1.1  christos 			    int32_t table_size;
   3591  1.1  christos 			    const int32_t *symb_table;
   3592  1.1  christos 			    const unsigned char *extra;
   3593  1.1  christos 			    int32_t idx;
   3594  1.1  christos 			    int32_t elem;
   3595  1.1  christos 			    int32_t second;
   3596  1.1  christos 			    int32_t hash;
   3597  1.1  christos 
   3598  1.1  christos 			    table_size =
   3599  1.1  christos 			      _NL_CURRENT_WORD (LC_COLLATE,
   3600  1.1  christos 						_NL_COLLATE_SYMB_HASH_SIZEMB);
   3601  1.1  christos 			    symb_table = (const int32_t *)
   3602  1.1  christos 			      _NL_CURRENT (LC_COLLATE,
   3603  1.1  christos 					   _NL_COLLATE_SYMB_TABLEMB);
   3604  1.1  christos 			    extra = (const unsigned char *)
   3605  1.1  christos 			      _NL_CURRENT (LC_COLLATE,
   3606  1.1  christos 					   _NL_COLLATE_SYMB_EXTRAMB);
   3607  1.1  christos 
   3608  1.1  christos 			    /* Locate the character in the hashing table.  */
   3609  1.1  christos 			    hash = elem_hash (str, c1);
   3610  1.1  christos 
   3611  1.1  christos 			    idx = 0;
   3612  1.1  christos 			    elem = hash % table_size;
   3613  1.1  christos 			    second = hash % (table_size - 2);
   3614  1.1  christos 			    while (symb_table[2 * elem] != 0)
   3615  1.1  christos 			      {
   3616  1.1  christos 				/* First compare the hashing value.  */
   3617  1.1  christos 				if (symb_table[2 * elem] == hash
   3618  1.1  christos 				    && c1 == extra[symb_table[2 * elem + 1]]
   3619  1.1  christos 				    && memcmp (str,
   3620  1.1  christos 					       &extra[symb_table[2 * elem + 1]
   3621  1.1  christos 						     + 1],
   3622  1.1  christos 					       c1) == 0)
   3623  1.1  christos 				  {
   3624  1.1  christos 				    /* Yep, this is the entry.  */
   3625  1.1  christos 				    idx = symb_table[2 * elem + 1];
   3626  1.1  christos 				    idx += 1 + extra[idx];
   3627  1.1  christos 				    break;
   3628  1.1  christos 				  }
   3629  1.1  christos 
   3630  1.1  christos 				/* Next entry.  */
   3631  1.1  christos 				elem += second;
   3632  1.1  christos 			      }
   3633  1.1  christos 
   3634  1.1  christos 			    if (symb_table[2 * elem] == 0)
   3635  1.1  christos 			      /* This is no valid character.  */
   3636  1.1  christos 			      FREE_STACK_RETURN (REG_ECOLLATE);
   3637  1.1  christos 
   3638  1.1  christos 			    /* Throw away the ] at the end of the equivalence
   3639  1.1  christos 			       class.  */
   3640  1.1  christos 			    PATFETCH (c);
   3641  1.1  christos 
   3642  1.1  christos 			    /* Now add the multibyte character(s) we found
   3643  1.1  christos 			       to the accept list.
   3644  1.1  christos 
   3645  1.1  christos 			       XXX Note that this is not entirely correct.
   3646  1.1  christos 			       we would have to match multibyte sequences
   3647  1.1  christos 			       but this is not possible with the current
   3648  1.1  christos 			       implementation.  Also, we have to match
   3649  1.1  christos 			       collating symbols, which expand to more than
   3650  1.1  christos 			       one file, as a whole and not allow the
   3651  1.1  christos 			       individual bytes.  */
   3652  1.1  christos 			    c1 = extra[idx++];
   3653  1.1  christos 			    if (c1 == 1)
   3654  1.1  christos 			      range_start = extra[idx];
   3655  1.1  christos 			    while (c1-- > 0)
   3656  1.1  christos 			      {
   3657  1.1  christos 				SET_LIST_BIT (extra[idx]);
   3658  1.1  christos 				++idx;
   3659  1.1  christos 			      }
   3660  1.1  christos 			  }
   3661  1.1  christos # endif
   3662  1.1  christos 			had_char_class = false;
   3663  1.1  christos 		      }
   3664  1.1  christos                     else
   3665  1.1  christos                       {
   3666  1.1  christos                         c1++;
   3667  1.1  christos                         while (c1--)
   3668  1.1  christos                           PATUNFETCH;
   3669  1.1  christos                         SET_LIST_BIT ('[');
   3670  1.1  christos                         SET_LIST_BIT ('.');
   3671  1.1  christos 			range_start = '.';
   3672  1.1  christos                         had_char_class = false;
   3673  1.1  christos                       }
   3674  1.1  christos 		  }
   3675  1.1  christos                 else
   3676  1.1  christos                   {
   3677  1.1  christos                     had_char_class = false;
   3678  1.1  christos                     SET_LIST_BIT (c);
   3679  1.1  christos 		    range_start = c;
   3680  1.1  christos                   }
   3681  1.1  christos               }
   3682  1.1  christos 
   3683  1.1  christos             /* Discard any (non)matching list bytes that are all 0 at the
   3684  1.1  christos                end of the map.  Decrease the map-length byte too.  */
   3685  1.1  christos             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
   3686  1.1  christos               b[-1]--;
   3687  1.1  christos             b += b[-1];
   3688  1.1  christos #endif /* WCHAR */
   3689  1.1  christos           }
   3690  1.1  christos           break;
   3691  1.1  christos 
   3692  1.1  christos 
   3693  1.1  christos 	case '(':
   3694  1.1  christos           if (syntax & RE_NO_BK_PARENS)
   3695  1.1  christos             goto handle_open;
   3696  1.1  christos           else
   3697  1.1  christos             goto normal_char;
   3698  1.1  christos 
   3699  1.1  christos 
   3700  1.1  christos         case ')':
   3701  1.1  christos           if (syntax & RE_NO_BK_PARENS)
   3702  1.1  christos             goto handle_close;
   3703  1.1  christos           else
   3704  1.1  christos             goto normal_char;
   3705  1.1  christos 
   3706  1.1  christos 
   3707  1.1  christos         case '\n':
   3708  1.1  christos           if (syntax & RE_NEWLINE_ALT)
   3709  1.1  christos             goto handle_alt;
   3710  1.1  christos           else
   3711  1.1  christos             goto normal_char;
   3712  1.1  christos 
   3713  1.1  christos 
   3714  1.1  christos 	case '|':
   3715  1.1  christos           if (syntax & RE_NO_BK_VBAR)
   3716  1.1  christos             goto handle_alt;
   3717  1.1  christos           else
   3718  1.1  christos             goto normal_char;
   3719  1.1  christos 
   3720  1.1  christos 
   3721  1.1  christos         case '{':
   3722  1.1  christos            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
   3723  1.1  christos              goto handle_interval;
   3724  1.1  christos            else
   3725  1.1  christos              goto normal_char;
   3726  1.1  christos 
   3727  1.1  christos 
   3728  1.1  christos         case '\\':
   3729  1.1  christos           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   3730  1.1  christos 
   3731  1.1  christos           /* Do not translate the character after the \, so that we can
   3732  1.1  christos              distinguish, e.g., \B from \b, even if we normally would
   3733  1.1  christos              translate, e.g., B to b.  */
   3734  1.1  christos           PATFETCH_RAW (c);
   3735  1.1  christos 
   3736  1.1  christos           switch (c)
   3737  1.1  christos             {
   3738  1.1  christos             case '(':
   3739  1.1  christos               if (syntax & RE_NO_BK_PARENS)
   3740  1.1  christos                 goto normal_backslash;
   3741  1.1  christos 
   3742  1.1  christos             handle_open:
   3743  1.1  christos               bufp->re_nsub++;
   3744  1.1  christos               regnum++;
   3745  1.1  christos 
   3746  1.1  christos               if (COMPILE_STACK_FULL)
   3747  1.1  christos                 {
   3748  1.1  christos                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
   3749  1.1  christos                             compile_stack_elt_t);
   3750  1.1  christos                   if (compile_stack.stack == NULL) return REG_ESPACE;
   3751  1.1  christos 
   3752  1.1  christos                   compile_stack.size <<= 1;
   3753  1.1  christos                 }
   3754  1.1  christos 
   3755  1.1  christos               /* These are the values to restore when we hit end of this
   3756  1.1  christos                  group.  They are all relative offsets, so that if the
   3757  1.1  christos                  whole pattern moves because of realloc, they will still
   3758  1.1  christos                  be valid.  */
   3759  1.1  christos               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
   3760  1.1  christos               COMPILE_STACK_TOP.fixup_alt_jump
   3761  1.1  christos                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
   3762  1.1  christos               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
   3763  1.1  christos               COMPILE_STACK_TOP.regnum = regnum;
   3764  1.1  christos 
   3765  1.1  christos               /* We will eventually replace the 0 with the number of
   3766  1.1  christos                  groups inner to this one.  But do not push a
   3767  1.1  christos                  start_memory for groups beyond the last one we can
   3768  1.1  christos                  represent in the compiled pattern.  */
   3769  1.1  christos               if (regnum <= MAX_REGNUM)
   3770  1.1  christos                 {
   3771  1.1  christos                   COMPILE_STACK_TOP.inner_group_offset = b
   3772  1.1  christos 		    - COMPILED_BUFFER_VAR + 2;
   3773  1.1  christos                   BUF_PUSH_3 (start_memory, regnum, 0);
   3774  1.1  christos                 }
   3775  1.1  christos 
   3776  1.1  christos               compile_stack.avail++;
   3777  1.1  christos 
   3778  1.1  christos               fixup_alt_jump = 0;
   3779  1.1  christos               laststart = 0;
   3780  1.1  christos               begalt = b;
   3781  1.1  christos 	      /* If we've reached MAX_REGNUM groups, then this open
   3782  1.1  christos 		 won't actually generate any code, so we'll have to
   3783  1.1  christos 		 clear pending_exact explicitly.  */
   3784  1.1  christos 	      pending_exact = 0;
   3785  1.1  christos               break;
   3786  1.1  christos 
   3787  1.1  christos 
   3788  1.1  christos             case ')':
   3789  1.1  christos               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
   3790  1.1  christos 
   3791  1.1  christos               if (COMPILE_STACK_EMPTY)
   3792  1.1  christos 		{
   3793  1.1  christos 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3794  1.1  christos 		    goto normal_backslash;
   3795  1.1  christos 		  else
   3796  1.1  christos 		    FREE_STACK_RETURN (REG_ERPAREN);
   3797  1.1  christos 		}
   3798  1.1  christos 
   3799  1.1  christos             handle_close:
   3800  1.1  christos               if (fixup_alt_jump)
   3801  1.1  christos                 { /* Push a dummy failure point at the end of the
   3802  1.1  christos                      alternative for a possible future
   3803  1.1  christos                      `pop_failure_jump' to pop.  See comments at
   3804  1.1  christos                      `push_dummy_failure' in `re_match_2'.  */
   3805  1.1  christos                   BUF_PUSH (push_dummy_failure);
   3806  1.1  christos 
   3807  1.1  christos                   /* We allocated space for this jump when we assigned
   3808  1.1  christos                      to `fixup_alt_jump', in the `handle_alt' case below.  */
   3809  1.1  christos                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
   3810  1.1  christos                 }
   3811  1.1  christos 
   3812  1.1  christos               /* See similar code for backslashed left paren above.  */
   3813  1.1  christos               if (COMPILE_STACK_EMPTY)
   3814  1.1  christos 		{
   3815  1.1  christos 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   3816  1.1  christos 		    goto normal_char;
   3817  1.1  christos 		  else
   3818  1.1  christos 		    FREE_STACK_RETURN (REG_ERPAREN);
   3819  1.1  christos 		}
   3820  1.1  christos 
   3821  1.1  christos               /* Since we just checked for an empty stack above, this
   3822  1.1  christos                  ``can't happen''.  */
   3823  1.1  christos               assert (compile_stack.avail != 0);
   3824  1.1  christos               {
   3825  1.1  christos                 /* We don't just want to restore into `regnum', because
   3826  1.1  christos                    later groups should continue to be numbered higher,
   3827  1.1  christos                    as in `(ab)c(de)' -- the second group is #2.  */
   3828  1.1  christos                 regnum_t this_group_regnum;
   3829  1.1  christos 
   3830  1.1  christos                 compile_stack.avail--;
   3831  1.1  christos                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
   3832  1.1  christos                 fixup_alt_jump
   3833  1.1  christos                   = COMPILE_STACK_TOP.fixup_alt_jump
   3834  1.1  christos                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
   3835  1.1  christos                     : 0;
   3836  1.1  christos                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
   3837  1.1  christos                 this_group_regnum = COMPILE_STACK_TOP.regnum;
   3838  1.1  christos 		/* If we've reached MAX_REGNUM groups, then this open
   3839  1.1  christos 		   won't actually generate any code, so we'll have to
   3840  1.1  christos 		   clear pending_exact explicitly.  */
   3841  1.1  christos 		pending_exact = 0;
   3842  1.1  christos 
   3843  1.1  christos                 /* We're at the end of the group, so now we know how many
   3844  1.1  christos                    groups were inside this one.  */
   3845  1.1  christos                 if (this_group_regnum <= MAX_REGNUM)
   3846  1.1  christos                   {
   3847  1.1  christos 		    UCHAR_T *inner_group_loc
   3848  1.1  christos                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
   3849  1.1  christos 
   3850  1.1  christos                     *inner_group_loc = regnum - this_group_regnum;
   3851  1.1  christos                     BUF_PUSH_3 (stop_memory, this_group_regnum,
   3852  1.1  christos                                 regnum - this_group_regnum);
   3853  1.1  christos                   }
   3854  1.1  christos               }
   3855  1.1  christos               break;
   3856  1.1  christos 
   3857  1.1  christos 
   3858  1.1  christos             case '|':					/* `\|'.  */
   3859  1.1  christos               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
   3860  1.1  christos                 goto normal_backslash;
   3861  1.1  christos             handle_alt:
   3862  1.1  christos               if (syntax & RE_LIMITED_OPS)
   3863  1.1  christos                 goto normal_char;
   3864  1.1  christos 
   3865  1.1  christos               /* Insert before the previous alternative a jump which
   3866  1.1  christos                  jumps to this alternative if the former fails.  */
   3867  1.1  christos               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3868  1.1  christos               INSERT_JUMP (on_failure_jump, begalt,
   3869  1.1  christos 			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
   3870  1.1  christos               pending_exact = 0;
   3871  1.1  christos               b += 1 + OFFSET_ADDRESS_SIZE;
   3872  1.1  christos 
   3873  1.1  christos               /* The alternative before this one has a jump after it
   3874  1.1  christos                  which gets executed if it gets matched.  Adjust that
   3875  1.1  christos                  jump so it will jump to this alternative's analogous
   3876  1.1  christos                  jump (put in below, which in turn will jump to the next
   3877  1.1  christos                  (if any) alternative's such jump, etc.).  The last such
   3878  1.1  christos                  jump jumps to the correct final destination.  A picture:
   3879  1.1  christos                           _____ _____
   3880  1.1  christos                           |   | |   |
   3881  1.1  christos                           |   v |   v
   3882  1.1  christos                          a | b   | c
   3883  1.1  christos 
   3884  1.1  christos                  If we are at `b', then fixup_alt_jump right now points to a
   3885  1.1  christos                  three-byte space after `a'.  We'll put in the jump, set
   3886  1.1  christos                  fixup_alt_jump to right after `b', and leave behind three
   3887  1.1  christos                  bytes which we'll fill in when we get to after `c'.  */
   3888  1.1  christos 
   3889  1.1  christos               if (fixup_alt_jump)
   3890  1.1  christos                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   3891  1.1  christos 
   3892  1.1  christos               /* Mark and leave space for a jump after this alternative,
   3893  1.1  christos                  to be filled in later either by next alternative or
   3894  1.1  christos                  when know we're at the end of a series of alternatives.  */
   3895  1.1  christos               fixup_alt_jump = b;
   3896  1.1  christos               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3897  1.1  christos               b += 1 + OFFSET_ADDRESS_SIZE;
   3898  1.1  christos 
   3899  1.1  christos               laststart = 0;
   3900  1.1  christos               begalt = b;
   3901  1.1  christos               break;
   3902  1.1  christos 
   3903  1.1  christos 
   3904  1.1  christos             case '{':
   3905  1.1  christos               /* If \{ is a literal.  */
   3906  1.1  christos               if (!(syntax & RE_INTERVALS)
   3907  1.1  christos                      /* If we're at `\{' and it's not the open-interval
   3908  1.1  christos                         operator.  */
   3909  1.1  christos 		  || (syntax & RE_NO_BK_BRACES))
   3910  1.1  christos                 goto normal_backslash;
   3911  1.1  christos 
   3912  1.1  christos             handle_interval:
   3913  1.1  christos               {
   3914  1.1  christos                 /* If got here, then the syntax allows intervals.  */
   3915  1.1  christos 
   3916  1.1  christos                 /* At least (most) this many matches must be made.  */
   3917  1.1  christos                 int lower_bound = -1, upper_bound = -1;
   3918  1.1  christos 
   3919  1.1  christos 		/* Place in the uncompiled pattern (i.e., just after
   3920  1.1  christos 		   the '{') to go back to if the interval is invalid.  */
   3921  1.1  christos 		const CHAR_T *beg_interval = p;
   3922  1.1  christos 
   3923  1.1  christos                 if (p == pend)
   3924  1.1  christos 		  goto invalid_interval;
   3925  1.1  christos 
   3926  1.1  christos                 GET_UNSIGNED_NUMBER (lower_bound);
   3927  1.1  christos 
   3928  1.1  christos                 if (c == ',')
   3929  1.1  christos                   {
   3930  1.1  christos                     GET_UNSIGNED_NUMBER (upper_bound);
   3931  1.1  christos 		    if (upper_bound < 0)
   3932  1.1  christos 		      upper_bound = RE_DUP_MAX;
   3933  1.1  christos                   }
   3934  1.1  christos                 else
   3935  1.1  christos                   /* Interval such as `{1}' => match exactly once. */
   3936  1.1  christos                   upper_bound = lower_bound;
   3937  1.1  christos 
   3938  1.1  christos                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
   3939  1.1  christos 		  goto invalid_interval;
   3940  1.1  christos 
   3941  1.1  christos                 if (!(syntax & RE_NO_BK_BRACES))
   3942  1.1  christos                   {
   3943  1.1  christos 		    if (c != '\\' || p == pend)
   3944  1.1  christos 		      goto invalid_interval;
   3945  1.1  christos                     PATFETCH (c);
   3946  1.1  christos                   }
   3947  1.1  christos 
   3948  1.1  christos                 if (c != '}')
   3949  1.1  christos 		  goto invalid_interval;
   3950  1.1  christos 
   3951  1.1  christos                 /* If it's invalid to have no preceding re.  */
   3952  1.1  christos                 if (!laststart)
   3953  1.1  christos                   {
   3954  1.1  christos 		    if (syntax & RE_CONTEXT_INVALID_OPS
   3955  1.1  christos 			&& !(syntax & RE_INVALID_INTERVAL_ORD))
   3956  1.1  christos                       FREE_STACK_RETURN (REG_BADRPT);
   3957  1.1  christos                     else if (syntax & RE_CONTEXT_INDEP_OPS)
   3958  1.1  christos                       laststart = b;
   3959  1.1  christos                     else
   3960  1.1  christos                       goto unfetch_interval;
   3961  1.1  christos                   }
   3962  1.1  christos 
   3963  1.1  christos                 /* We just parsed a valid interval.  */
   3964  1.1  christos 
   3965  1.1  christos                 if (RE_DUP_MAX < upper_bound)
   3966  1.1  christos 		  FREE_STACK_RETURN (REG_BADBR);
   3967  1.1  christos 
   3968  1.1  christos                 /* If the upper bound is zero, don't want to succeed at
   3969  1.1  christos                    all; jump from `laststart' to `b + 3', which will be
   3970  1.1  christos 		   the end of the buffer after we insert the jump.  */
   3971  1.1  christos 		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
   3972  1.1  christos 		   instead of 'b + 3'.  */
   3973  1.1  christos                  if (upper_bound == 0)
   3974  1.1  christos                    {
   3975  1.1  christos                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
   3976  1.1  christos                      INSERT_JUMP (jump, laststart, b + 1
   3977  1.1  christos 				  + OFFSET_ADDRESS_SIZE);
   3978  1.1  christos                      b += 1 + OFFSET_ADDRESS_SIZE;
   3979  1.1  christos                    }
   3980  1.1  christos 
   3981  1.1  christos                  /* Otherwise, we have a nontrivial interval.  When
   3982  1.1  christos                     we're all done, the pattern will look like:
   3983  1.1  christos                       set_number_at <jump count> <upper bound>
   3984  1.1  christos                       set_number_at <succeed_n count> <lower bound>
   3985  1.1  christos                       succeed_n <after jump addr> <succeed_n count>
   3986  1.1  christos                       <body of loop>
   3987  1.1  christos                       jump_n <succeed_n addr> <jump count>
   3988  1.1  christos                     (The upper bound and `jump_n' are omitted if
   3989  1.1  christos                     `upper_bound' is 1, though.)  */
   3990  1.1  christos                  else
   3991  1.1  christos                    { /* If the upper bound is > 1, we need to insert
   3992  1.1  christos                         more at the end of the loop.  */
   3993  1.1  christos                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
   3994  1.1  christos 		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
   3995  1.1  christos 
   3996  1.1  christos                      GET_BUFFER_SPACE (nbytes);
   3997  1.1  christos 
   3998  1.1  christos                      /* Initialize lower bound of the `succeed_n', even
   3999  1.1  christos                         though it will be set during matching by its
   4000  1.1  christos                         attendant `set_number_at' (inserted next),
   4001  1.1  christos                         because `re_compile_fastmap' needs to know.
   4002  1.1  christos                         Jump to the `jump_n' we might insert below.  */
   4003  1.1  christos                      INSERT_JUMP2 (succeed_n, laststart,
   4004  1.1  christos                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
   4005  1.1  christos 				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
   4006  1.1  christos 				   , lower_bound);
   4007  1.1  christos                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   4008  1.1  christos 
   4009  1.1  christos                      /* Code to initialize the lower bound.  Insert
   4010  1.1  christos                         before the `succeed_n'.  The `5' is the last two
   4011  1.1  christos                         bytes of this `set_number_at', plus 3 bytes of
   4012  1.1  christos                         the following `succeed_n'.  */
   4013  1.1  christos 		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
   4014  1.1  christos 			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
   4015  1.1  christos 			of the following `succeed_n'.  */
   4016  1.1  christos                      PREFIX(insert_op2) (set_number_at, laststart, 1
   4017  1.1  christos 				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
   4018  1.1  christos                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   4019  1.1  christos 
   4020  1.1  christos                      if (upper_bound > 1)
   4021  1.1  christos                        { /* More than one repetition is allowed, so
   4022  1.1  christos                             append a backward jump to the `succeed_n'
   4023  1.1  christos                             that starts this interval.
   4024  1.1  christos 
   4025  1.1  christos                             When we've reached this during matching,
   4026  1.1  christos                             we'll have matched the interval once, so
   4027  1.1  christos                             jump back only `upper_bound - 1' times.  */
   4028  1.1  christos                          STORE_JUMP2 (jump_n, b, laststart
   4029  1.1  christos 				      + 2 * OFFSET_ADDRESS_SIZE + 1,
   4030  1.1  christos                                       upper_bound - 1);
   4031  1.1  christos                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   4032  1.1  christos 
   4033  1.1  christos                          /* The location we want to set is the second
   4034  1.1  christos                             parameter of the `jump_n'; that is `b-2' as
   4035  1.1  christos                             an absolute address.  `laststart' will be
   4036  1.1  christos                             the `set_number_at' we're about to insert;
   4037  1.1  christos                             `laststart+3' the number to set, the source
   4038  1.1  christos                             for the relative address.  But we are
   4039  1.1  christos                             inserting into the middle of the pattern --
   4040  1.1  christos                             so everything is getting moved up by 5.
   4041  1.1  christos                             Conclusion: (b - 2) - (laststart + 3) + 5,
   4042  1.1  christos                             i.e., b - laststart.
   4043  1.1  christos 
   4044  1.1  christos                             We insert this at the beginning of the loop
   4045  1.1  christos                             so that if we fail during matching, we'll
   4046  1.1  christos                             reinitialize the bounds.  */
   4047  1.1  christos                          PREFIX(insert_op2) (set_number_at, laststart,
   4048  1.1  christos 					     b - laststart,
   4049  1.1  christos 					     upper_bound - 1, b);
   4050  1.1  christos                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
   4051  1.1  christos                        }
   4052  1.1  christos                    }
   4053  1.1  christos                 pending_exact = 0;
   4054  1.1  christos 		break;
   4055  1.1  christos 
   4056  1.1  christos 	      invalid_interval:
   4057  1.1  christos 		if (!(syntax & RE_INVALID_INTERVAL_ORD))
   4058  1.1  christos 		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
   4059  1.1  christos 	      unfetch_interval:
   4060  1.1  christos 		/* Match the characters as literals.  */
   4061  1.1  christos 		p = beg_interval;
   4062  1.1  christos 		c = '{';
   4063  1.1  christos 		if (syntax & RE_NO_BK_BRACES)
   4064  1.1  christos 		  goto normal_char;
   4065  1.1  christos 		else
   4066  1.1  christos 		  goto normal_backslash;
   4067  1.1  christos 	      }
   4068  1.1  christos 
   4069  1.1  christos #ifdef emacs
   4070  1.1  christos             /* There is no way to specify the before_dot and after_dot
   4071  1.1  christos                operators.  rms says this is ok.  --karl  */
   4072  1.1  christos             case '=':
   4073  1.1  christos               BUF_PUSH (at_dot);
   4074  1.1  christos               break;
   4075  1.1  christos 
   4076  1.1  christos             case 's':
   4077  1.1  christos               laststart = b;
   4078  1.1  christos               PATFETCH (c);
   4079  1.1  christos               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
   4080  1.1  christos               break;
   4081  1.1  christos 
   4082  1.1  christos             case 'S':
   4083  1.1  christos               laststart = b;
   4084  1.1  christos               PATFETCH (c);
   4085  1.1  christos               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
   4086  1.1  christos               break;
   4087  1.1  christos #endif /* emacs */
   4088  1.1  christos 
   4089  1.1  christos 
   4090  1.1  christos             case 'w':
   4091  1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4092  1.1  christos 		goto normal_char;
   4093  1.1  christos               laststart = b;
   4094  1.1  christos               BUF_PUSH (wordchar);
   4095  1.1  christos               break;
   4096  1.1  christos 
   4097  1.1  christos 
   4098  1.1  christos             case 'W':
   4099  1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4100  1.1  christos 		goto normal_char;
   4101  1.1  christos               laststart = b;
   4102  1.1  christos               BUF_PUSH (notwordchar);
   4103  1.1  christos               break;
   4104  1.1  christos 
   4105  1.1  christos 
   4106  1.1  christos             case '<':
   4107  1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4108  1.1  christos 		goto normal_char;
   4109  1.1  christos               BUF_PUSH (wordbeg);
   4110  1.1  christos               break;
   4111  1.1  christos 
   4112  1.1  christos             case '>':
   4113  1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4114  1.1  christos 		goto normal_char;
   4115  1.1  christos               BUF_PUSH (wordend);
   4116  1.1  christos               break;
   4117  1.1  christos 
   4118  1.1  christos             case 'b':
   4119  1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4120  1.1  christos 		goto normal_char;
   4121  1.1  christos               BUF_PUSH (wordbound);
   4122  1.1  christos               break;
   4123  1.1  christos 
   4124  1.1  christos             case 'B':
   4125  1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4126  1.1  christos 		goto normal_char;
   4127  1.1  christos               BUF_PUSH (notwordbound);
   4128  1.1  christos               break;
   4129  1.1  christos 
   4130  1.1  christos             case '`':
   4131  1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4132  1.1  christos 		goto normal_char;
   4133  1.1  christos               BUF_PUSH (begbuf);
   4134  1.1  christos               break;
   4135  1.1  christos 
   4136  1.1  christos             case '\'':
   4137  1.1  christos 	      if (syntax & RE_NO_GNU_OPS)
   4138  1.1  christos 		goto normal_char;
   4139  1.1  christos               BUF_PUSH (endbuf);
   4140  1.1  christos               break;
   4141  1.1  christos 
   4142  1.1  christos             case '1': case '2': case '3': case '4': case '5':
   4143  1.1  christos             case '6': case '7': case '8': case '9':
   4144  1.1  christos               if (syntax & RE_NO_BK_REFS)
   4145  1.1  christos                 goto normal_char;
   4146  1.1  christos 
   4147  1.1  christos               c1 = c - '0';
   4148  1.1  christos 
   4149  1.1  christos               if (c1 > regnum)
   4150  1.1  christos                 FREE_STACK_RETURN (REG_ESUBREG);
   4151  1.1  christos 
   4152  1.1  christos               /* Can't back reference to a subexpression if inside of it.  */
   4153  1.1  christos               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
   4154  1.1  christos                 goto normal_char;
   4155  1.1  christos 
   4156  1.1  christos               laststart = b;
   4157  1.1  christos               BUF_PUSH_2 (duplicate, c1);
   4158  1.1  christos               break;
   4159  1.1  christos 
   4160  1.1  christos 
   4161  1.1  christos             case '+':
   4162  1.1  christos             case '?':
   4163  1.1  christos               if (syntax & RE_BK_PLUS_QM)
   4164  1.1  christos                 goto handle_plus;
   4165  1.1  christos               else
   4166  1.1  christos                 goto normal_backslash;
   4167  1.1  christos 
   4168  1.1  christos             default:
   4169  1.1  christos             normal_backslash:
   4170  1.1  christos               /* You might think it would be useful for \ to mean
   4171  1.1  christos                  not to translate; but if we don't translate it
   4172  1.1  christos                  it will never match anything.  */
   4173  1.1  christos               c = TRANSLATE (c);
   4174  1.1  christos               goto normal_char;
   4175  1.1  christos             }
   4176  1.1  christos           break;
   4177  1.1  christos 
   4178  1.1  christos 
   4179  1.1  christos 	default:
   4180  1.1  christos         /* Expects the character in `c'.  */
   4181  1.1  christos 	normal_char:
   4182  1.1  christos 	      /* If no exactn currently being built.  */
   4183  1.1  christos           if (!pending_exact
   4184  1.1  christos #ifdef WCHAR
   4185  1.1  christos 	      /* If last exactn handle binary(or character) and
   4186  1.1  christos 		 new exactn handle character(or binary).  */
   4187  1.1  christos 	      || is_exactn_bin != is_binary[p - 1 - pattern]
   4188  1.1  christos #endif /* WCHAR */
   4189  1.1  christos 
   4190  1.1  christos               /* If last exactn not at current position.  */
   4191  1.1  christos               || pending_exact + *pending_exact + 1 != b
   4192  1.1  christos 
   4193  1.1  christos               /* We have only one byte following the exactn for the count.  */
   4194  1.1  christos 	      || *pending_exact == (1 << BYTEWIDTH) - 1
   4195  1.1  christos 
   4196  1.1  christos               /* If followed by a repetition operator.  */
   4197  1.1  christos               || *p == '*' || *p == '^'
   4198  1.1  christos 	      || ((syntax & RE_BK_PLUS_QM)
   4199  1.1  christos 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
   4200  1.1  christos 		  : (*p == '+' || *p == '?'))
   4201  1.1  christos 	      || ((syntax & RE_INTERVALS)
   4202  1.1  christos                   && ((syntax & RE_NO_BK_BRACES)
   4203  1.1  christos 		      ? *p == '{'
   4204  1.1  christos                       : (p[0] == '\\' && p[1] == '{'))))
   4205  1.1  christos 	    {
   4206  1.1  christos 	      /* Start building a new exactn.  */
   4207  1.1  christos 
   4208  1.1  christos               laststart = b;
   4209  1.1  christos 
   4210  1.1  christos #ifdef WCHAR
   4211  1.1  christos 	      /* Is this exactn binary data or character? */
   4212  1.1  christos 	      is_exactn_bin = is_binary[p - 1 - pattern];
   4213  1.1  christos 	      if (is_exactn_bin)
   4214  1.1  christos 		  BUF_PUSH_2 (exactn_bin, 0);
   4215  1.1  christos 	      else
   4216  1.1  christos 		  BUF_PUSH_2 (exactn, 0);
   4217  1.1  christos #else
   4218  1.1  christos 	      BUF_PUSH_2 (exactn, 0);
   4219  1.1  christos #endif /* WCHAR */
   4220  1.1  christos 	      pending_exact = b - 1;
   4221  1.1  christos             }
   4222  1.1  christos 
   4223  1.1  christos 	  BUF_PUSH (c);
   4224  1.1  christos           (*pending_exact)++;
   4225  1.1  christos 	  break;
   4226  1.1  christos         } /* switch (c) */
   4227  1.1  christos     } /* while p != pend */
   4228  1.1  christos 
   4229  1.1  christos 
   4230  1.1  christos   /* Through the pattern now.  */
   4231  1.1  christos 
   4232  1.1  christos   if (fixup_alt_jump)
   4233  1.1  christos     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   4234  1.1  christos 
   4235  1.1  christos   if (!COMPILE_STACK_EMPTY)
   4236  1.1  christos     FREE_STACK_RETURN (REG_EPAREN);
   4237  1.1  christos 
   4238  1.1  christos   /* If we don't want backtracking, force success
   4239  1.1  christos      the first time we reach the end of the compiled pattern.  */
   4240  1.1  christos   if (syntax & RE_NO_POSIX_BACKTRACKING)
   4241  1.1  christos     BUF_PUSH (succeed);
   4242  1.1  christos 
   4243  1.1  christos #ifdef WCHAR
   4244  1.1  christos   free (pattern);
   4245  1.1  christos   free (mbs_offset);
   4246  1.1  christos   free (is_binary);
   4247  1.1  christos #endif
   4248  1.1  christos   free (compile_stack.stack);
   4249  1.1  christos 
   4250  1.1  christos   /* We have succeeded; set the length of the buffer.  */
   4251  1.1  christos #ifdef WCHAR
   4252  1.1  christos   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
   4253  1.1  christos #else
   4254  1.1  christos   bufp->used = b - bufp->buffer;
   4255  1.1  christos #endif
   4256  1.1  christos 
   4257  1.1  christos #ifdef DEBUG
   4258  1.1  christos   if (debug)
   4259  1.1  christos     {
   4260  1.1  christos       DEBUG_PRINT1 ("\nCompiled pattern: \n");
   4261  1.1  christos       PREFIX(print_compiled_pattern) (bufp);
   4262  1.1  christos     }
   4263  1.1  christos #endif /* DEBUG */
   4264  1.1  christos 
   4265  1.1  christos #ifndef MATCH_MAY_ALLOCATE
   4266  1.1  christos   /* Initialize the failure stack to the largest possible stack.  This
   4267  1.1  christos      isn't necessary unless we're trying to avoid calling alloca in
   4268  1.1  christos      the search and match routines.  */
   4269  1.1  christos   {
   4270  1.1  christos     int num_regs = bufp->re_nsub + 1;
   4271  1.1  christos 
   4272  1.1  christos     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
   4273  1.1  christos        is strictly greater than re_max_failures, the largest possible stack
   4274  1.1  christos        is 2 * re_max_failures failure points.  */
   4275  1.1  christos     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
   4276  1.1  christos       {
   4277  1.1  christos 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
   4278  1.1  christos 
   4279  1.1  christos # ifdef emacs
   4280  1.1  christos 	if (! fail_stack.stack)
   4281  1.1  christos 	  fail_stack.stack
   4282  1.1  christos 	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
   4283  1.1  christos 				    * sizeof (PREFIX(fail_stack_elt_t)));
   4284  1.1  christos 	else
   4285  1.1  christos 	  fail_stack.stack
   4286  1.1  christos 	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
   4287  1.1  christos 				     (fail_stack.size
   4288  1.1  christos 				      * sizeof (PREFIX(fail_stack_elt_t))));
   4289  1.1  christos # else /* not emacs */
   4290  1.1  christos 	if (! fail_stack.stack)
   4291  1.1  christos 	  fail_stack.stack
   4292  1.1  christos 	    = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
   4293  1.1  christos 				   * sizeof (PREFIX(fail_stack_elt_t)));
   4294  1.1  christos 	else
   4295  1.1  christos 	  fail_stack.stack
   4296  1.1  christos 	    = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
   4297  1.1  christos 					    (fail_stack.size
   4298  1.1  christos 				     * sizeof (PREFIX(fail_stack_elt_t))));
   4299  1.1  christos # endif /* not emacs */
   4300  1.1  christos       }
   4301  1.1  christos 
   4302  1.1  christos    PREFIX(regex_grow_registers) (num_regs);
   4303  1.1  christos   }
   4304  1.1  christos #endif /* not MATCH_MAY_ALLOCATE */
   4305  1.1  christos 
   4306  1.1  christos   return REG_NOERROR;
   4307  1.1  christos } /* regex_compile */
   4308  1.1  christos 
   4309  1.1  christos /* Subroutines for `regex_compile'.  */
   4310  1.1  christos 
   4311  1.1  christos /* Store OP at LOC followed by two-byte integer parameter ARG.  */
   4312  1.1  christos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4313  1.1  christos 
   4314  1.1  christos static void
   4315  1.1  christos PREFIX(store_op1) (op, loc, arg)
   4316  1.1  christos     re_opcode_t op;
   4317  1.1  christos     UCHAR_T *loc;
   4318  1.1  christos     int arg;
   4319  1.1  christos {
   4320  1.1  christos   *loc = (UCHAR_T) op;
   4321  1.1  christos   STORE_NUMBER (loc + 1, arg);
   4322  1.1  christos }
   4323  1.1  christos 
   4324  1.1  christos 
   4325  1.1  christos /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4326  1.1  christos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4327  1.1  christos 
   4328  1.1  christos static void
   4329  1.1  christos PREFIX(store_op2) (op, loc, arg1, arg2)
   4330  1.1  christos     re_opcode_t op;
   4331  1.1  christos     UCHAR_T *loc;
   4332  1.1  christos     int arg1, arg2;
   4333  1.1  christos {
   4334  1.1  christos   *loc = (UCHAR_T) op;
   4335  1.1  christos   STORE_NUMBER (loc + 1, arg1);
   4336  1.1  christos   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
   4337  1.1  christos }
   4338  1.1  christos 
   4339  1.1  christos 
   4340  1.1  christos /* Copy the bytes from LOC to END to open up three bytes of space at LOC
   4341  1.1  christos    for OP followed by two-byte integer parameter ARG.  */
   4342  1.1  christos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4343  1.1  christos 
   4344  1.1  christos static void
   4345  1.1  christos PREFIX(insert_op1) (op, loc, arg, end)
   4346  1.1  christos     re_opcode_t op;
   4347  1.1  christos     UCHAR_T *loc;
   4348  1.1  christos     int arg;
   4349  1.1  christos     UCHAR_T *end;
   4350  1.1  christos {
   4351  1.1  christos   register UCHAR_T *pfrom = end;
   4352  1.1  christos   register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
   4353  1.1  christos 
   4354  1.1  christos   while (pfrom != loc)
   4355  1.1  christos     *--pto = *--pfrom;
   4356  1.1  christos 
   4357  1.1  christos   PREFIX(store_op1) (op, loc, arg);
   4358  1.1  christos }
   4359  1.1  christos 
   4360  1.1  christos 
   4361  1.1  christos /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
   4362  1.1  christos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
   4363  1.1  christos 
   4364  1.1  christos static void
   4365  1.1  christos PREFIX(insert_op2) (op, loc, arg1, arg2, end)
   4366  1.1  christos     re_opcode_t op;
   4367  1.1  christos     UCHAR_T *loc;
   4368  1.1  christos     int arg1, arg2;
   4369  1.1  christos     UCHAR_T *end;
   4370  1.1  christos {
   4371  1.1  christos   register UCHAR_T *pfrom = end;
   4372  1.1  christos   register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
   4373  1.1  christos 
   4374  1.1  christos   while (pfrom != loc)
   4375  1.1  christos     *--pto = *--pfrom;
   4376  1.1  christos 
   4377  1.1  christos   PREFIX(store_op2) (op, loc, arg1, arg2);
   4378  1.1  christos }
   4379  1.1  christos 
   4380  1.1  christos 
   4381  1.1  christos /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
   4382  1.1  christos    after an alternative or a begin-subexpression.  We assume there is at
   4383  1.1  christos    least one character before the ^.  */
   4384  1.1  christos 
   4385  1.1  christos static boolean
   4386  1.1  christos PREFIX(at_begline_loc_p) (pattern, p, syntax)
   4387  1.1  christos     const CHAR_T *pattern, *p;
   4388  1.1  christos     reg_syntax_t syntax;
   4389  1.1  christos {
   4390  1.1  christos   const CHAR_T *prev = p - 2;
   4391  1.1  christos   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
   4392  1.1  christos 
   4393  1.1  christos   return
   4394  1.1  christos        /* After a subexpression?  */
   4395  1.1  christos        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
   4396  1.1  christos        /* After an alternative?  */
   4397  1.1  christos     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
   4398  1.1  christos }
   4399  1.1  christos 
   4400  1.1  christos 
   4401  1.1  christos /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
   4402  1.1  christos    at least one character after the $, i.e., `P < PEND'.  */
   4403  1.1  christos 
   4404  1.1  christos static boolean
   4405  1.1  christos PREFIX(at_endline_loc_p) (p, pend, syntax)
   4406  1.1  christos     const CHAR_T *p, *pend;
   4407  1.1  christos     reg_syntax_t syntax;
   4408  1.1  christos {
   4409  1.1  christos   const CHAR_T *next = p;
   4410  1.1  christos   boolean next_backslash = *next == '\\';
   4411  1.1  christos   const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
   4412  1.1  christos 
   4413  1.1  christos   return
   4414  1.1  christos        /* Before a subexpression?  */
   4415  1.1  christos        (syntax & RE_NO_BK_PARENS ? *next == ')'
   4416  1.1  christos         : next_backslash && next_next && *next_next == ')')
   4417  1.1  christos        /* Before an alternative?  */
   4418  1.1  christos     || (syntax & RE_NO_BK_VBAR ? *next == '|'
   4419  1.1  christos         : next_backslash && next_next && *next_next == '|');
   4420  1.1  christos }
   4421  1.1  christos 
   4422  1.1  christos #else /* not INSIDE_RECURSION */
   4423  1.1  christos 
   4424  1.1  christos /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
   4425  1.1  christos    false if it's not.  */
   4426  1.1  christos 
   4427  1.1  christos static boolean
   4428  1.1  christos group_in_compile_stack (compile_stack, regnum)
   4429  1.1  christos     compile_stack_type compile_stack;
   4430  1.1  christos     regnum_t regnum;
   4431  1.1  christos {
   4432  1.1  christos   int this_element;
   4433  1.1  christos 
   4434  1.1  christos   for (this_element = compile_stack.avail - 1;
   4435  1.1  christos        this_element >= 0;
   4436  1.1  christos        this_element--)
   4437  1.1  christos     if (compile_stack.stack[this_element].regnum == regnum)
   4438  1.1  christos       return true;
   4439  1.1  christos 
   4440  1.1  christos   return false;
   4441  1.1  christos }
   4442  1.1  christos #endif /* not INSIDE_RECURSION */
   4443  1.1  christos 
   4444  1.1  christos #ifdef INSIDE_RECURSION
   4445  1.1  christos 
   4446  1.1  christos #ifdef WCHAR
   4447  1.1  christos /* This insert space, which size is "num", into the pattern at "loc".
   4448  1.1  christos    "end" must point the end of the allocated buffer.  */
   4449  1.1  christos static void
   4450  1.1  christos insert_space (num, loc, end)
   4451  1.1  christos      int num;
   4452  1.1  christos      CHAR_T *loc;
   4453  1.1  christos      CHAR_T *end;
   4454  1.1  christos {
   4455  1.1  christos   register CHAR_T *pto = end;
   4456  1.1  christos   register CHAR_T *pfrom = end - num;
   4457  1.1  christos 
   4458  1.1  christos   while (pfrom >= loc)
   4459  1.1  christos     *pto-- = *pfrom--;
   4460  1.1  christos }
   4461  1.1  christos #endif /* WCHAR */
   4462  1.1  christos 
   4463  1.1  christos #ifdef WCHAR
   4464  1.1  christos static reg_errcode_t
   4465  1.1  christos wcs_compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
   4466  1.1  christos 		   char_set)
   4467  1.1  christos      CHAR_T range_start_char;
   4468  1.1  christos      const CHAR_T **p_ptr, *pend;
   4469  1.1  christos      CHAR_T *char_set, *b;
   4470  1.1  christos      RE_TRANSLATE_TYPE translate;
   4471  1.1  christos      reg_syntax_t syntax;
   4472  1.1  christos {
   4473  1.1  christos   const CHAR_T *p = *p_ptr;
   4474  1.1  christos   CHAR_T range_start, range_end;
   4475  1.1  christos   reg_errcode_t ret;
   4476  1.1  christos # ifdef _LIBC
   4477  1.1  christos   uint32_t nrules;
   4478  1.1  christos   uint32_t start_val, end_val;
   4479  1.1  christos # endif
   4480  1.1  christos   if (p == pend)
   4481  1.1  christos     return REG_ERANGE;
   4482  1.1  christos 
   4483  1.1  christos # ifdef _LIBC
   4484  1.1  christos   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   4485  1.1  christos   if (nrules != 0)
   4486  1.1  christos     {
   4487  1.1  christos       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
   4488  1.1  christos 						       _NL_COLLATE_COLLSEQWC);
   4489  1.1  christos       const unsigned char *extra = (const unsigned char *)
   4490  1.1  christos 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   4491  1.1  christos 
   4492  1.1  christos       if (range_start_char < -1)
   4493  1.1  christos 	{
   4494  1.1  christos 	  /* range_start is a collating symbol.  */
   4495  1.1  christos 	  int32_t *wextra;
   4496  1.1  christos 	  /* Retreive the index and get collation sequence value.  */
   4497  1.1  christos 	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
   4498  1.1  christos 	  start_val = wextra[1 + *wextra];
   4499  1.1  christos 	}
   4500  1.1  christos       else
   4501  1.1  christos 	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
   4502  1.1  christos 
   4503  1.1  christos       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
   4504  1.1  christos 
   4505  1.1  christos       /* Report an error if the range is empty and the syntax prohibits
   4506  1.1  christos 	 this.  */
   4507  1.1  christos       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4508  1.1  christos 	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
   4509  1.1  christos 
   4510  1.1  christos       /* Insert space to the end of the char_ranges.  */
   4511  1.1  christos       insert_space(2, b - char_set[5] - 2, b - 1);
   4512  1.1  christos       *(b - char_set[5] - 2) = (wchar_t)start_val;
   4513  1.1  christos       *(b - char_set[5] - 1) = (wchar_t)end_val;
   4514  1.1  christos       char_set[4]++; /* ranges_index */
   4515  1.1  christos     }
   4516  1.1  christos   else
   4517  1.1  christos # endif
   4518  1.1  christos     {
   4519  1.1  christos       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
   4520  1.1  christos 	range_start_char;
   4521  1.1  christos       range_end = TRANSLATE (p[0]);
   4522  1.1  christos       /* Report an error if the range is empty and the syntax prohibits
   4523  1.1  christos 	 this.  */
   4524  1.1  christos       ret = ((syntax & RE_NO_EMPTY_RANGES)
   4525  1.1  christos 	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
   4526  1.1  christos 
   4527  1.1  christos       /* Insert space to the end of the char_ranges.  */
   4528  1.1  christos       insert_space(2, b - char_set[5] - 2, b - 1);
   4529  1.1  christos       *(b - char_set[5] - 2) = range_start;
   4530  1.1  christos       *(b - char_set[5] - 1) = range_end;
   4531  1.1  christos       char_set[4]++; /* ranges_index */
   4532  1.1  christos     }
   4533  1.1  christos   /* Have to increment the pointer into the pattern string, so the
   4534  1.1  christos      caller isn't still at the ending character.  */
   4535  1.1  christos   (*p_ptr)++;
   4536  1.1  christos 
   4537  1.1  christos   return ret;
   4538  1.1  christos }
   4539  1.1  christos #else /* BYTE */
   4540  1.1  christos /* Read the ending character of a range (in a bracket expression) from the
   4541  1.1  christos    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
   4542  1.1  christos    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
   4543  1.1  christos    Then we set the translation of all bits between the starting and
   4544  1.1  christos    ending characters (inclusive) in the compiled pattern B.
   4545  1.1  christos 
   4546  1.1  christos    Return an error code.
   4547  1.1  christos 
   4548  1.1  christos    We use these short variable names so we can use the same macros as
   4549  1.1  christos    `regex_compile' itself.  */
   4550  1.1  christos 
   4551  1.1  christos static reg_errcode_t
   4552  1.1  christos byte_compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
   4553  1.1  christos      unsigned int range_start_char;
   4554  1.1  christos      const char **p_ptr, *pend;
   4555  1.1  christos      RE_TRANSLATE_TYPE translate;
   4556  1.1  christos      reg_syntax_t syntax;
   4557  1.1  christos      unsigned char *b;
   4558  1.1  christos {
   4559  1.1  christos   unsigned this_char;
   4560  1.1  christos   const char *p = *p_ptr;
   4561  1.1  christos   reg_errcode_t ret;
   4562  1.1  christos # if _LIBC
   4563  1.1  christos   const unsigned char *collseq;
   4564  1.1  christos   unsigned int start_colseq;
   4565  1.1  christos   unsigned int end_colseq;
   4566  1.1  christos # else
   4567  1.1  christos   unsigned end_char;
   4568  1.1  christos # endif
   4569  1.1  christos 
   4570  1.1  christos   if (p == pend)
   4571  1.1  christos     return REG_ERANGE;
   4572  1.1  christos 
   4573  1.1  christos   /* Have to increment the pointer into the pattern string, so the
   4574  1.1  christos      caller isn't still at the ending character.  */
   4575  1.1  christos   (*p_ptr)++;
   4576  1.1  christos 
   4577  1.1  christos   /* Report an error if the range is empty and the syntax prohibits this.  */
   4578  1.1  christos   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
   4579  1.1  christos 
   4580  1.1  christos # if _LIBC
   4581  1.1  christos   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
   4582  1.1  christos 						 _NL_COLLATE_COLLSEQMB);
   4583  1.1  christos 
   4584  1.1  christos   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
   4585  1.1  christos   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
   4586  1.1  christos   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
   4587  1.1  christos     {
   4588  1.1  christos       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
   4589  1.1  christos 
   4590  1.1  christos       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
   4591  1.1  christos 	{
   4592  1.1  christos 	  SET_LIST_BIT (TRANSLATE (this_char));
   4593  1.1  christos 	  ret = REG_NOERROR;
   4594  1.1  christos 	}
   4595  1.1  christos     }
   4596  1.1  christos # else
   4597  1.1  christos   /* Here we see why `this_char' has to be larger than an `unsigned
   4598  1.1  christos      char' -- we would otherwise go into an infinite loop, since all
   4599  1.1  christos      characters <= 0xff.  */
   4600  1.1  christos   range_start_char = TRANSLATE (range_start_char);
   4601  1.1  christos   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
   4602  1.1  christos      and some compilers cast it to int implicitly, so following for_loop
   4603  1.1  christos      may fall to (almost) infinite loop.
   4604  1.1  christos      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
   4605  1.1  christos      To avoid this, we cast p[0] to unsigned int and truncate it.  */
   4606  1.1  christos   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
   4607  1.1  christos 
   4608  1.1  christos   for (this_char = range_start_char; this_char <= end_char; ++this_char)
   4609  1.1  christos     {
   4610  1.1  christos       SET_LIST_BIT (TRANSLATE (this_char));
   4611  1.1  christos       ret = REG_NOERROR;
   4612  1.1  christos     }
   4613  1.1  christos # endif
   4614  1.1  christos 
   4615  1.1  christos   return ret;
   4616  1.1  christos }
   4617  1.1  christos #endif /* WCHAR */
   4618  1.1  christos 
   4619  1.1  christos /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
   4621  1.1  christos    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
   4622  1.1  christos    characters can start a string that matches the pattern.  This fastmap
   4623  1.1  christos    is used by re_search to skip quickly over impossible starting points.
   4624  1.1  christos 
   4625  1.1  christos    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
   4626  1.1  christos    area as BUFP->fastmap.
   4627  1.1  christos 
   4628  1.1  christos    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
   4629  1.1  christos    the pattern buffer.
   4630  1.1  christos 
   4631  1.1  christos    Returns 0 if we succeed, -2 if an internal error.   */
   4632  1.1  christos 
   4633  1.1  christos #ifdef WCHAR
   4634  1.1  christos /* local function for re_compile_fastmap.
   4635  1.1  christos    truncate wchar_t character to char.  */
   4636  1.1  christos static unsigned char truncate_wchar (CHAR_T c);
   4637  1.1  christos 
   4638  1.1  christos static unsigned char
   4639  1.1  christos truncate_wchar (c)
   4640  1.1  christos      CHAR_T c;
   4641  1.1  christos {
   4642  1.1  christos   unsigned char buf[MB_CUR_MAX];
   4643  1.1  christos   mbstate_t state;
   4644  1.1  christos   int retval;
   4645  1.1  christos   memset (&state, '\0', sizeof (state));
   4646  1.1  christos   retval = wcrtomb (buf, c, &state);
   4647  1.1  christos   return retval > 0 ? buf[0] : (unsigned char) c;
   4648  1.1  christos }
   4649  1.1  christos #endif /* WCHAR */
   4650  1.1  christos 
   4651  1.1  christos static int
   4652  1.1  christos PREFIX(re_compile_fastmap) (bufp)
   4653  1.1  christos      struct re_pattern_buffer *bufp;
   4654  1.1  christos {
   4655  1.1  christos   int j, k;
   4656  1.1  christos #ifdef MATCH_MAY_ALLOCATE
   4657  1.1  christos   PREFIX(fail_stack_type) fail_stack;
   4658  1.1  christos #endif
   4659  1.1  christos #ifndef REGEX_MALLOC
   4660  1.1  christos   char *destination;
   4661  1.1  christos #endif
   4662  1.1  christos 
   4663  1.1  christos   register char *fastmap = bufp->fastmap;
   4664  1.1  christos 
   4665  1.1  christos #ifdef WCHAR
   4666  1.1  christos   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   4667  1.1  christos      pattern to (char*) in regex_compile.  */
   4668  1.1  christos   UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
   4669  1.1  christos   register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
   4670  1.1  christos #else /* BYTE */
   4671  1.1  christos   UCHAR_T *pattern = bufp->buffer;
   4672  1.1  christos   register UCHAR_T *pend = pattern + bufp->used;
   4673  1.1  christos #endif /* WCHAR */
   4674  1.1  christos   UCHAR_T *p = pattern;
   4675  1.1  christos 
   4676  1.1  christos #ifdef REL_ALLOC
   4677  1.1  christos   /* This holds the pointer to the failure stack, when
   4678  1.1  christos      it is allocated relocatably.  */
   4679  1.1  christos   fail_stack_elt_t *failure_stack_ptr;
   4680  1.1  christos #endif
   4681  1.1  christos 
   4682  1.1  christos   /* Assume that each path through the pattern can be null until
   4683  1.1  christos      proven otherwise.  We set this false at the bottom of switch
   4684  1.1  christos      statement, to which we get only if a particular path doesn't
   4685  1.1  christos      match the empty string.  */
   4686  1.1  christos   boolean path_can_be_null = true;
   4687  1.1  christos 
   4688  1.1  christos   /* We aren't doing a `succeed_n' to begin with.  */
   4689  1.1  christos   boolean succeed_n_p = false;
   4690  1.1  christos 
   4691  1.1  christos   assert (fastmap != NULL && p != NULL);
   4692  1.1  christos 
   4693  1.1  christos   INIT_FAIL_STACK ();
   4694  1.1  christos   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
   4695  1.1  christos   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
   4696  1.1  christos   bufp->can_be_null = 0;
   4697  1.1  christos 
   4698  1.1  christos   while (1)
   4699  1.1  christos     {
   4700  1.1  christos       if (p == pend || *p == succeed)
   4701  1.1  christos 	{
   4702  1.1  christos 	  /* We have reached the (effective) end of pattern.  */
   4703  1.1  christos 	  if (!FAIL_STACK_EMPTY ())
   4704  1.1  christos 	    {
   4705  1.1  christos 	      bufp->can_be_null |= path_can_be_null;
   4706  1.1  christos 
   4707  1.1  christos 	      /* Reset for next path.  */
   4708  1.1  christos 	      path_can_be_null = true;
   4709  1.1  christos 
   4710  1.1  christos 	      p = fail_stack.stack[--fail_stack.avail].pointer;
   4711  1.1  christos 
   4712  1.1  christos 	      continue;
   4713  1.1  christos 	    }
   4714  1.1  christos 	  else
   4715  1.1  christos 	    break;
   4716  1.1  christos 	}
   4717  1.1  christos 
   4718  1.1  christos       /* We should never be about to go beyond the end of the pattern.  */
   4719  1.1  christos       assert (p < pend);
   4720  1.1  christos 
   4721  1.1  christos       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   4722  1.1  christos 	{
   4723  1.1  christos 
   4724  1.1  christos         /* I guess the idea here is to simply not bother with a fastmap
   4725  1.1  christos            if a backreference is used, since it's too hard to figure out
   4726  1.1  christos            the fastmap for the corresponding group.  Setting
   4727  1.1  christos            `can_be_null' stops `re_search_2' from using the fastmap, so
   4728  1.1  christos            that is all we do.  */
   4729  1.1  christos 	case duplicate:
   4730  1.1  christos 	  bufp->can_be_null = 1;
   4731  1.1  christos           goto done;
   4732  1.1  christos 
   4733  1.1  christos 
   4734  1.1  christos       /* Following are the cases which match a character.  These end
   4735  1.1  christos          with `break'.  */
   4736  1.1  christos 
   4737  1.1  christos #ifdef WCHAR
   4738  1.1  christos 	case exactn:
   4739  1.1  christos           fastmap[truncate_wchar(p[1])] = 1;
   4740  1.1  christos 	  break;
   4741  1.1  christos #else /* BYTE */
   4742  1.1  christos 	case exactn:
   4743  1.1  christos           fastmap[p[1]] = 1;
   4744  1.1  christos 	  break;
   4745  1.1  christos #endif /* WCHAR */
   4746  1.1  christos #ifdef MBS_SUPPORT
   4747  1.1  christos 	case exactn_bin:
   4748  1.1  christos 	  fastmap[p[1]] = 1;
   4749  1.1  christos 	  break;
   4750  1.1  christos #endif
   4751  1.1  christos 
   4752  1.1  christos #ifdef WCHAR
   4753  1.1  christos         /* It is hard to distinguish fastmap from (multi byte) characters
   4754  1.1  christos            which depends on current locale.  */
   4755  1.1  christos         case charset:
   4756  1.1  christos 	case charset_not:
   4757  1.1  christos 	case wordchar:
   4758  1.1  christos 	case notwordchar:
   4759  1.1  christos           bufp->can_be_null = 1;
   4760  1.1  christos           goto done;
   4761  1.1  christos #else /* BYTE */
   4762  1.1  christos         case charset:
   4763  1.1  christos           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4764  1.1  christos 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
   4765  1.1  christos               fastmap[j] = 1;
   4766  1.1  christos 	  break;
   4767  1.1  christos 
   4768  1.1  christos 
   4769  1.1  christos 	case charset_not:
   4770  1.1  christos 	  /* Chars beyond end of map must be allowed.  */
   4771  1.1  christos 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
   4772  1.1  christos             fastmap[j] = 1;
   4773  1.1  christos 
   4774  1.1  christos 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   4775  1.1  christos 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
   4776  1.1  christos               fastmap[j] = 1;
   4777  1.1  christos           break;
   4778  1.1  christos 
   4779  1.1  christos 
   4780  1.1  christos 	case wordchar:
   4781  1.1  christos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4782  1.1  christos 	    if (SYNTAX (j) == Sword)
   4783  1.1  christos 	      fastmap[j] = 1;
   4784  1.1  christos 	  break;
   4785  1.1  christos 
   4786  1.1  christos 
   4787  1.1  christos 	case notwordchar:
   4788  1.1  christos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4789  1.1  christos 	    if (SYNTAX (j) != Sword)
   4790  1.1  christos 	      fastmap[j] = 1;
   4791  1.1  christos 	  break;
   4792  1.1  christos #endif /* WCHAR */
   4793  1.1  christos 
   4794  1.1  christos         case anychar:
   4795  1.1  christos 	  {
   4796  1.1  christos 	    int fastmap_newline = fastmap['\n'];
   4797  1.1  christos 
   4798  1.1  christos 	    /* `.' matches anything ...  */
   4799  1.1  christos 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
   4800  1.1  christos 	      fastmap[j] = 1;
   4801  1.1  christos 
   4802  1.1  christos 	    /* ... except perhaps newline.  */
   4803  1.1  christos 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
   4804  1.1  christos 	      fastmap['\n'] = fastmap_newline;
   4805  1.1  christos 
   4806  1.1  christos 	    /* Return if we have already set `can_be_null'; if we have,
   4807  1.1  christos 	       then the fastmap is irrelevant.  Something's wrong here.  */
   4808  1.1  christos 	    else if (bufp->can_be_null)
   4809  1.1  christos 	      goto done;
   4810  1.1  christos 
   4811  1.1  christos 	    /* Otherwise, have to check alternative paths.  */
   4812  1.1  christos 	    break;
   4813  1.1  christos 	  }
   4814  1.1  christos 
   4815  1.1  christos #ifdef emacs
   4816  1.1  christos         case syntaxspec:
   4817  1.1  christos 	  k = *p++;
   4818  1.1  christos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4819  1.1  christos 	    if (SYNTAX (j) == (enum syntaxcode) k)
   4820  1.1  christos 	      fastmap[j] = 1;
   4821  1.1  christos 	  break;
   4822  1.1  christos 
   4823  1.1  christos 
   4824  1.1  christos 	case notsyntaxspec:
   4825  1.1  christos 	  k = *p++;
   4826  1.1  christos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   4827  1.1  christos 	    if (SYNTAX (j) != (enum syntaxcode) k)
   4828  1.1  christos 	      fastmap[j] = 1;
   4829  1.1  christos 	  break;
   4830  1.1  christos 
   4831  1.1  christos 
   4832  1.1  christos       /* All cases after this match the empty string.  These end with
   4833  1.1  christos          `continue'.  */
   4834  1.1  christos 
   4835  1.1  christos 
   4836  1.1  christos 	case before_dot:
   4837  1.1  christos 	case at_dot:
   4838  1.1  christos 	case after_dot:
   4839  1.1  christos           continue;
   4840  1.1  christos #endif /* emacs */
   4841  1.1  christos 
   4842  1.1  christos 
   4843  1.1  christos         case no_op:
   4844  1.1  christos         case begline:
   4845  1.1  christos         case endline:
   4846  1.1  christos 	case begbuf:
   4847  1.1  christos 	case endbuf:
   4848  1.1  christos 	case wordbound:
   4849  1.1  christos 	case notwordbound:
   4850  1.1  christos 	case wordbeg:
   4851  1.1  christos 	case wordend:
   4852  1.1  christos         case push_dummy_failure:
   4853  1.1  christos           continue;
   4854  1.1  christos 
   4855  1.1  christos 
   4856  1.1  christos 	case jump_n:
   4857  1.1  christos         case pop_failure_jump:
   4858  1.1  christos 	case maybe_pop_jump:
   4859  1.1  christos 	case jump:
   4860  1.1  christos         case jump_past_alt:
   4861  1.1  christos 	case dummy_failure_jump:
   4862  1.1  christos           EXTRACT_NUMBER_AND_INCR (j, p);
   4863  1.1  christos 	  p += j;
   4864  1.1  christos 	  if (j > 0)
   4865  1.1  christos 	    continue;
   4866  1.1  christos 
   4867  1.1  christos           /* Jump backward implies we just went through the body of a
   4868  1.1  christos              loop and matched nothing.  Opcode jumped to should be
   4869  1.1  christos              `on_failure_jump' or `succeed_n'.  Just treat it like an
   4870  1.1  christos              ordinary jump.  For a * loop, it has pushed its failure
   4871  1.1  christos              point already; if so, discard that as redundant.  */
   4872  1.1  christos           if ((re_opcode_t) *p != on_failure_jump
   4873  1.1  christos 	      && (re_opcode_t) *p != succeed_n)
   4874  1.1  christos 	    continue;
   4875  1.1  christos 
   4876  1.1  christos           p++;
   4877  1.1  christos           EXTRACT_NUMBER_AND_INCR (j, p);
   4878  1.1  christos           p += j;
   4879  1.1  christos 
   4880  1.1  christos           /* If what's on the stack is where we are now, pop it.  */
   4881  1.1  christos           if (!FAIL_STACK_EMPTY ()
   4882  1.1  christos 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
   4883  1.1  christos             fail_stack.avail--;
   4884  1.1  christos 
   4885  1.1  christos           continue;
   4886  1.1  christos 
   4887  1.1  christos 
   4888  1.1  christos         case on_failure_jump:
   4889  1.1  christos         case on_failure_keep_string_jump:
   4890  1.1  christos 	handle_on_failure_jump:
   4891  1.1  christos           EXTRACT_NUMBER_AND_INCR (j, p);
   4892  1.1  christos 
   4893  1.1  christos           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
   4894  1.1  christos              end of the pattern.  We don't want to push such a point,
   4895  1.1  christos              since when we restore it above, entering the switch will
   4896  1.1  christos              increment `p' past the end of the pattern.  We don't need
   4897  1.1  christos              to push such a point since we obviously won't find any more
   4898  1.1  christos              fastmap entries beyond `pend'.  Such a pattern can match
   4899  1.1  christos              the null string, though.  */
   4900  1.1  christos           if (p + j < pend)
   4901  1.1  christos             {
   4902  1.1  christos               if (!PUSH_PATTERN_OP (p + j, fail_stack))
   4903  1.1  christos 		{
   4904  1.1  christos 		  RESET_FAIL_STACK ();
   4905  1.1  christos 		  return -2;
   4906  1.1  christos 		}
   4907  1.1  christos             }
   4908  1.1  christos           else
   4909  1.1  christos             bufp->can_be_null = 1;
   4910  1.1  christos 
   4911  1.1  christos           if (succeed_n_p)
   4912  1.1  christos             {
   4913  1.1  christos               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
   4914  1.1  christos               succeed_n_p = false;
   4915  1.1  christos 	    }
   4916  1.1  christos 
   4917  1.1  christos           continue;
   4918  1.1  christos 
   4919  1.1  christos 
   4920  1.1  christos 	case succeed_n:
   4921  1.1  christos           /* Get to the number of times to succeed.  */
   4922  1.1  christos           p += OFFSET_ADDRESS_SIZE;
   4923  1.1  christos 
   4924  1.1  christos           /* Increment p past the n for when k != 0.  */
   4925  1.1  christos           EXTRACT_NUMBER_AND_INCR (k, p);
   4926  1.1  christos           if (k == 0)
   4927  1.1  christos 	    {
   4928  1.1  christos               p -= 2 * OFFSET_ADDRESS_SIZE;
   4929  1.1  christos   	      succeed_n_p = true;  /* Spaghetti code alert.  */
   4930  1.1  christos               goto handle_on_failure_jump;
   4931  1.1  christos             }
   4932  1.1  christos           continue;
   4933  1.1  christos 
   4934  1.1  christos 
   4935  1.1  christos 	case set_number_at:
   4936  1.1  christos           p += 2 * OFFSET_ADDRESS_SIZE;
   4937  1.1  christos           continue;
   4938  1.1  christos 
   4939  1.1  christos 
   4940  1.1  christos 	case start_memory:
   4941  1.1  christos         case stop_memory:
   4942  1.1  christos 	  p += 2;
   4943  1.1  christos 	  continue;
   4944  1.1  christos 
   4945  1.1  christos 
   4946  1.1  christos 	default:
   4947  1.1  christos           abort (); /* We have listed all the cases.  */
   4948  1.1  christos         } /* switch *p++ */
   4949  1.1  christos 
   4950  1.1  christos       /* Getting here means we have found the possible starting
   4951  1.1  christos          characters for one path of the pattern -- and that the empty
   4952  1.1  christos          string does not match.  We need not follow this path further.
   4953  1.1  christos          Instead, look at the next alternative (remembered on the
   4954  1.1  christos          stack), or quit if no more.  The test at the top of the loop
   4955  1.1  christos          does these things.  */
   4956  1.1  christos       path_can_be_null = false;
   4957  1.1  christos       p = pend;
   4958  1.1  christos     } /* while p */
   4959  1.1  christos 
   4960  1.1  christos   /* Set `can_be_null' for the last path (also the first path, if the
   4961  1.1  christos      pattern is empty).  */
   4962  1.1  christos   bufp->can_be_null |= path_can_be_null;
   4963  1.1  christos 
   4964  1.1  christos  done:
   4965  1.1  christos   RESET_FAIL_STACK ();
   4966  1.1  christos   return 0;
   4967  1.1  christos }
   4968  1.1  christos 
   4969  1.1  christos #else /* not INSIDE_RECURSION */
   4970  1.1  christos 
   4971  1.1  christos int
   4972  1.1  christos re_compile_fastmap (bufp)
   4973  1.1  christos      struct re_pattern_buffer *bufp;
   4974  1.1  christos {
   4975  1.1  christos # ifdef MBS_SUPPORT
   4976  1.1  christos   if (MB_CUR_MAX != 1)
   4977  1.1  christos     return wcs_re_compile_fastmap(bufp);
   4978  1.1  christos   else
   4979  1.1  christos # endif
   4980  1.1  christos     return byte_re_compile_fastmap(bufp);
   4981  1.1  christos } /* re_compile_fastmap */
   4982  1.1  christos #ifdef _LIBC
   4983  1.1  christos weak_alias (__re_compile_fastmap, re_compile_fastmap)
   4984  1.1  christos #endif
   4985  1.1  christos 
   4986  1.1  christos 
   4988  1.1  christos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
   4989  1.1  christos    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
   4990  1.1  christos    this memory for recording register information.  STARTS and ENDS
   4991  1.1  christos    must be allocated using the malloc library routine, and must each
   4992  1.1  christos    be at least NUM_REGS * sizeof (regoff_t) bytes long.
   4993  1.1  christos 
   4994  1.1  christos    If NUM_REGS == 0, then subsequent matches should allocate their own
   4995  1.1  christos    register data.
   4996  1.1  christos 
   4997  1.1  christos    Unless this function is called, the first search or match using
   4998  1.1  christos    PATTERN_BUFFER will allocate its own register data, without
   4999  1.1  christos    freeing the old data.  */
   5000  1.1  christos 
   5001  1.1  christos void
   5002  1.1  christos re_set_registers (bufp, regs, num_regs, starts, ends)
   5003  1.1  christos     struct re_pattern_buffer *bufp;
   5004  1.1  christos     struct re_registers *regs;
   5005  1.1  christos     unsigned num_regs;
   5006  1.1  christos     regoff_t *starts, *ends;
   5007  1.1  christos {
   5008  1.1  christos   if (num_regs)
   5009  1.1  christos     {
   5010  1.1  christos       bufp->regs_allocated = REGS_REALLOCATE;
   5011  1.1  christos       regs->num_regs = num_regs;
   5012  1.1  christos       regs->start = starts;
   5013  1.1  christos       regs->end = ends;
   5014  1.1  christos     }
   5015  1.1  christos   else
   5016  1.1  christos     {
   5017  1.1  christos       bufp->regs_allocated = REGS_UNALLOCATED;
   5018  1.1  christos       regs->num_regs = 0;
   5019  1.1  christos       regs->start = regs->end = (regoff_t *) 0;
   5020  1.1  christos     }
   5021  1.1  christos }
   5022  1.1  christos #ifdef _LIBC
   5023  1.1  christos weak_alias (__re_set_registers, re_set_registers)
   5024  1.1  christos #endif
   5025  1.1  christos 
   5026  1.1  christos /* Searching routines.  */
   5028  1.1  christos 
   5029  1.1  christos /* Like re_search_2, below, but only one string is specified, and
   5030  1.1  christos    doesn't let you say where to stop matching.  */
   5031  1.1  christos 
   5032  1.1  christos int
   5033  1.1  christos re_search (bufp, string, size, startpos, range, regs)
   5034  1.1  christos      struct re_pattern_buffer *bufp;
   5035  1.1  christos      const char *string;
   5036  1.1  christos      int size, startpos, range;
   5037  1.1  christos      struct re_registers *regs;
   5038  1.1  christos {
   5039  1.1  christos   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
   5040  1.1  christos 		      regs, size);
   5041  1.1  christos }
   5042  1.1  christos #ifdef _LIBC
   5043  1.1  christos weak_alias (__re_search, re_search)
   5044  1.1  christos #endif
   5045  1.1  christos 
   5046  1.1  christos 
   5047  1.1  christos /* Using the compiled pattern in BUFP->buffer, first tries to match the
   5048  1.1  christos    virtual concatenation of STRING1 and STRING2, starting first at index
   5049  1.1  christos    STARTPOS, then at STARTPOS + 1, and so on.
   5050  1.1  christos 
   5051  1.1  christos    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
   5052  1.1  christos 
   5053  1.1  christos    RANGE is how far to scan while trying to match.  RANGE = 0 means try
   5054  1.1  christos    only at STARTPOS; in general, the last start tried is STARTPOS +
   5055  1.1  christos    RANGE.
   5056  1.1  christos 
   5057  1.1  christos    In REGS, return the indices of the virtual concatenation of STRING1
   5058  1.1  christos    and STRING2 that matched the entire BUFP->buffer and its contained
   5059  1.1  christos    subexpressions.
   5060  1.1  christos 
   5061  1.1  christos    Do not consider matching one past the index STOP in the virtual
   5062  1.1  christos    concatenation of STRING1 and STRING2.
   5063  1.1  christos 
   5064  1.1  christos    We return either the position in the strings at which the match was
   5065  1.1  christos    found, -1 if no match, or -2 if error (such as failure
   5066  1.1  christos    stack overflow).  */
   5067  1.1  christos 
   5068  1.1  christos int
   5069  1.1  christos re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
   5070  1.1  christos      struct re_pattern_buffer *bufp;
   5071  1.1  christos      const char *string1, *string2;
   5072  1.1  christos      int size1, size2;
   5073  1.1  christos      int startpos;
   5074  1.1  christos      int range;
   5075  1.1  christos      struct re_registers *regs;
   5076  1.1  christos      int stop;
   5077  1.1  christos {
   5078  1.1  christos # ifdef MBS_SUPPORT
   5079  1.1  christos   if (MB_CUR_MAX != 1)
   5080  1.1  christos     return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
   5081  1.1  christos 			    range, regs, stop);
   5082  1.1  christos   else
   5083  1.1  christos # endif
   5084  1.1  christos     return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
   5085  1.1  christos 			     range, regs, stop);
   5086  1.1  christos } /* re_search_2 */
   5087  1.1  christos #ifdef _LIBC
   5088  1.1  christos weak_alias (__re_search_2, re_search_2)
   5089  1.1  christos #endif
   5090  1.1  christos 
   5091  1.1  christos #endif /* not INSIDE_RECURSION */
   5092  1.1  christos 
   5093  1.1  christos #ifdef INSIDE_RECURSION
   5094  1.1  christos 
   5095  1.1  christos #ifdef MATCH_MAY_ALLOCATE
   5096  1.1  christos # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
   5097  1.1  christos #else
   5098  1.1  christos # define FREE_VAR(var) if (var) free (var); var = NULL
   5099  1.1  christos #endif
   5100  1.1  christos 
   5101  1.1  christos #ifdef WCHAR
   5102  1.1  christos # define MAX_ALLOCA_SIZE	2000
   5103  1.1  christos 
   5104  1.1  christos # define FREE_WCS_BUFFERS() \
   5105  1.1  christos   do {									      \
   5106  1.1  christos     if (size1 > MAX_ALLOCA_SIZE)					      \
   5107  1.1  christos       {									      \
   5108  1.1  christos 	free (wcs_string1);						      \
   5109  1.1  christos 	free (mbs_offset1);						      \
   5110  1.1  christos       }									      \
   5111  1.1  christos     else								      \
   5112  1.1  christos       {									      \
   5113  1.1  christos 	FREE_VAR (wcs_string1);						      \
   5114  1.1  christos 	FREE_VAR (mbs_offset1);						      \
   5115  1.1  christos       }									      \
   5116  1.1  christos     if (size2 > MAX_ALLOCA_SIZE) 					      \
   5117  1.1  christos       {									      \
   5118  1.1  christos 	free (wcs_string2);						      \
   5119  1.1  christos 	free (mbs_offset2);						      \
   5120  1.1  christos       }									      \
   5121  1.1  christos     else								      \
   5122  1.1  christos       {									      \
   5123  1.1  christos 	FREE_VAR (wcs_string2);						      \
   5124  1.1  christos 	FREE_VAR (mbs_offset2);						      \
   5125  1.1  christos       }									      \
   5126  1.1  christos   } while (0)
   5127  1.1  christos 
   5128  1.1  christos #endif
   5129  1.1  christos 
   5130  1.1  christos 
   5131  1.1  christos static int
   5132  1.1  christos PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
   5133  1.1  christos 		     regs, stop)
   5134  1.1  christos      struct re_pattern_buffer *bufp;
   5135  1.1  christos      const char *string1, *string2;
   5136  1.1  christos      int size1, size2;
   5137  1.1  christos      int startpos;
   5138  1.1  christos      int range;
   5139  1.1  christos      struct re_registers *regs;
   5140  1.1  christos      int stop;
   5141  1.1  christos {
   5142  1.1  christos   int val;
   5143  1.1  christos   register char *fastmap = bufp->fastmap;
   5144  1.1  christos   register RE_TRANSLATE_TYPE translate = bufp->translate;
   5145  1.1  christos   int total_size = size1 + size2;
   5146  1.1  christos   int endpos = startpos + range;
   5147  1.1  christos #ifdef WCHAR
   5148  1.1  christos   /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
   5149  1.1  christos   wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
   5150  1.1  christos   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
   5151  1.1  christos   int wcs_size1 = 0, wcs_size2 = 0;
   5152  1.1  christos   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   5153  1.1  christos   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
   5154  1.1  christos   /* They hold whether each wchar_t is binary data or not.  */
   5155  1.1  christos   char *is_binary = NULL;
   5156  1.1  christos #endif /* WCHAR */
   5157  1.1  christos 
   5158  1.1  christos   /* Check for out-of-range STARTPOS.  */
   5159  1.1  christos   if (startpos < 0 || startpos > total_size)
   5160  1.1  christos     return -1;
   5161  1.1  christos 
   5162  1.1  christos   /* Fix up RANGE if it might eventually take us outside
   5163  1.1  christos      the virtual concatenation of STRING1 and STRING2.
   5164  1.1  christos      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
   5165  1.1  christos   if (endpos < 0)
   5166  1.1  christos     range = 0 - startpos;
   5167  1.1  christos   else if (endpos > total_size)
   5168  1.1  christos     range = total_size - startpos;
   5169  1.1  christos 
   5170  1.1  christos   /* If the search isn't to be a backwards one, don't waste time in a
   5171  1.1  christos      search for a pattern that must be anchored.  */
   5172  1.1  christos   if (bufp->used > 0 && range > 0
   5173  1.1  christos       && ((re_opcode_t) bufp->buffer[0] == begbuf
   5174  1.1  christos 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
   5175  1.1  christos 	  || ((re_opcode_t) bufp->buffer[0] == begline
   5176  1.1  christos 	      && !bufp->newline_anchor)))
   5177  1.1  christos     {
   5178  1.1  christos       if (startpos > 0)
   5179  1.1  christos 	return -1;
   5180  1.1  christos       else
   5181  1.1  christos 	range = 1;
   5182  1.1  christos     }
   5183  1.1  christos 
   5184  1.1  christos #ifdef emacs
   5185  1.1  christos   /* In a forward search for something that starts with \=.
   5186  1.1  christos      don't keep searching past point.  */
   5187  1.1  christos   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
   5188  1.1  christos     {
   5189  1.1  christos       range = PT - startpos;
   5190  1.1  christos       if (range <= 0)
   5191  1.1  christos 	return -1;
   5192  1.1  christos     }
   5193  1.1  christos #endif /* emacs */
   5194  1.1  christos 
   5195  1.1  christos   /* Update the fastmap now if not correct already.  */
   5196  1.1  christos   if (fastmap && !bufp->fastmap_accurate)
   5197  1.1  christos     if (re_compile_fastmap (bufp) == -2)
   5198  1.1  christos       return -2;
   5199  1.1  christos 
   5200  1.1  christos #ifdef WCHAR
   5201  1.1  christos   /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
   5202  1.1  christos      fill them with converted string.  */
   5203  1.1  christos   if (size1 != 0)
   5204  1.1  christos     {
   5205  1.1  christos       if (size1 > MAX_ALLOCA_SIZE)
   5206  1.1  christos 	{
   5207  1.1  christos 	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
   5208  1.1  christos 	  mbs_offset1 = TALLOC (size1 + 1, int);
   5209  1.1  christos 	  is_binary = TALLOC (size1 + 1, char);
   5210  1.1  christos 	}
   5211  1.1  christos       else
   5212  1.1  christos 	{
   5213  1.1  christos 	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
   5214  1.1  christos 	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
   5215  1.1  christos 	  is_binary = REGEX_TALLOC (size1 + 1, char);
   5216  1.1  christos 	}
   5217  1.1  christos       if (!wcs_string1 || !mbs_offset1 || !is_binary)
   5218  1.1  christos 	{
   5219  1.1  christos 	  if (size1 > MAX_ALLOCA_SIZE)
   5220  1.1  christos 	    {
   5221  1.1  christos 	      free (wcs_string1);
   5222  1.1  christos 	      free (mbs_offset1);
   5223  1.1  christos 	      free (is_binary);
   5224  1.1  christos 	    }
   5225  1.1  christos 	  else
   5226  1.1  christos 	    {
   5227  1.1  christos 	      FREE_VAR (wcs_string1);
   5228  1.1  christos 	      FREE_VAR (mbs_offset1);
   5229  1.1  christos 	      FREE_VAR (is_binary);
   5230  1.1  christos 	    }
   5231  1.1  christos 	  return -2;
   5232  1.1  christos 	}
   5233  1.1  christos       wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
   5234  1.1  christos 				     mbs_offset1, is_binary);
   5235  1.1  christos       wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
   5236  1.1  christos       if (size1 > MAX_ALLOCA_SIZE)
   5237  1.1  christos 	free (is_binary);
   5238  1.1  christos       else
   5239  1.1  christos 	FREE_VAR (is_binary);
   5240  1.1  christos     }
   5241  1.1  christos   if (size2 != 0)
   5242  1.1  christos     {
   5243  1.1  christos       if (size2 > MAX_ALLOCA_SIZE)
   5244  1.1  christos 	{
   5245  1.1  christos 	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
   5246  1.1  christos 	  mbs_offset2 = TALLOC (size2 + 1, int);
   5247  1.1  christos 	  is_binary = TALLOC (size2 + 1, char);
   5248  1.1  christos 	}
   5249  1.1  christos       else
   5250  1.1  christos 	{
   5251  1.1  christos 	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
   5252  1.1  christos 	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
   5253  1.1  christos 	  is_binary = REGEX_TALLOC (size2 + 1, char);
   5254  1.1  christos 	}
   5255  1.1  christos       if (!wcs_string2 || !mbs_offset2 || !is_binary)
   5256  1.1  christos 	{
   5257  1.1  christos 	  FREE_WCS_BUFFERS ();
   5258  1.1  christos 	  if (size2 > MAX_ALLOCA_SIZE)
   5259  1.1  christos 	    free (is_binary);
   5260  1.1  christos 	  else
   5261  1.1  christos 	    FREE_VAR (is_binary);
   5262  1.1  christos 	  return -2;
   5263  1.1  christos 	}
   5264  1.1  christos       wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
   5265  1.1  christos 				     mbs_offset2, is_binary);
   5266  1.1  christos       wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
   5267  1.1  christos       if (size2 > MAX_ALLOCA_SIZE)
   5268  1.1  christos 	free (is_binary);
   5269  1.1  christos       else
   5270  1.1  christos 	FREE_VAR (is_binary);
   5271  1.1  christos     }
   5272  1.1  christos #endif /* WCHAR */
   5273  1.1  christos 
   5274  1.1  christos 
   5275  1.1  christos   /* Loop through the string, looking for a place to start matching.  */
   5276  1.1  christos   for (;;)
   5277  1.1  christos     {
   5278  1.1  christos       /* If a fastmap is supplied, skip quickly over characters that
   5279  1.1  christos          cannot be the start of a match.  If the pattern can match the
   5280  1.1  christos          null string, however, we don't need to skip characters; we want
   5281  1.1  christos          the first null string.  */
   5282  1.1  christos       if (fastmap && startpos < total_size && !bufp->can_be_null)
   5283  1.1  christos 	{
   5284  1.1  christos 	  if (range > 0)	/* Searching forwards.  */
   5285  1.1  christos 	    {
   5286  1.1  christos 	      register const char *d;
   5287  1.1  christos 	      register int lim = 0;
   5288  1.1  christos 	      int irange = range;
   5289  1.1  christos 
   5290  1.1  christos               if (startpos < size1 && startpos + range >= size1)
   5291  1.1  christos                 lim = range - (size1 - startpos);
   5292  1.1  christos 
   5293  1.1  christos 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
   5294  1.1  christos 
   5295  1.1  christos               /* Written out as an if-else to avoid testing `translate'
   5296  1.1  christos                  inside the loop.  */
   5297  1.1  christos 	      if (translate)
   5298  1.1  christos                 while (range > lim
   5299  1.1  christos                        && !fastmap[(unsigned char)
   5300  1.1  christos 				   translate[(unsigned char) *d++]])
   5301  1.1  christos                   range--;
   5302  1.1  christos 	      else
   5303  1.1  christos                 while (range > lim && !fastmap[(unsigned char) *d++])
   5304  1.1  christos                   range--;
   5305  1.1  christos 
   5306  1.1  christos 	      startpos += irange - range;
   5307  1.1  christos 	    }
   5308  1.1  christos 	  else				/* Searching backwards.  */
   5309  1.1  christos 	    {
   5310  1.1  christos 	      register CHAR_T c = (size1 == 0 || startpos >= size1
   5311  1.1  christos 				      ? string2[startpos - size1]
   5312  1.1  christos 				      : string1[startpos]);
   5313  1.1  christos 
   5314  1.1  christos 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
   5315  1.1  christos 		goto advance;
   5316  1.1  christos 	    }
   5317  1.1  christos 	}
   5318  1.1  christos 
   5319  1.1  christos       /* If can't match the null string, and that's all we have left, fail.  */
   5320  1.1  christos       if (range >= 0 && startpos == total_size && fastmap
   5321  1.1  christos           && !bufp->can_be_null)
   5322  1.1  christos        {
   5323  1.1  christos #ifdef WCHAR
   5324  1.1  christos          FREE_WCS_BUFFERS ();
   5325  1.1  christos #endif
   5326  1.1  christos          return -1;
   5327  1.1  christos        }
   5328  1.1  christos 
   5329  1.1  christos #ifdef WCHAR
   5330  1.1  christos       val = wcs_re_match_2_internal (bufp, string1, size1, string2,
   5331  1.1  christos 				     size2, startpos, regs, stop,
   5332  1.1  christos 				     wcs_string1, wcs_size1,
   5333  1.1  christos 				     wcs_string2, wcs_size2,
   5334  1.1  christos 				     mbs_offset1, mbs_offset2);
   5335  1.1  christos #else /* BYTE */
   5336  1.1  christos       val = byte_re_match_2_internal (bufp, string1, size1, string2,
   5337  1.1  christos 				      size2, startpos, regs, stop);
   5338  1.1  christos #endif /* BYTE */
   5339  1.1  christos 
   5340  1.1  christos #ifndef REGEX_MALLOC
   5341  1.1  christos # ifdef C_ALLOCA
   5342  1.1  christos       alloca (0);
   5343  1.1  christos # endif
   5344  1.1  christos #endif
   5345  1.1  christos 
   5346  1.1  christos       if (val >= 0)
   5347  1.1  christos 	{
   5348  1.1  christos #ifdef WCHAR
   5349  1.1  christos 	  FREE_WCS_BUFFERS ();
   5350  1.1  christos #endif
   5351  1.1  christos 	  return startpos;
   5352  1.1  christos 	}
   5353  1.1  christos 
   5354  1.1  christos       if (val == -2)
   5355  1.1  christos 	{
   5356  1.1  christos #ifdef WCHAR
   5357  1.1  christos 	  FREE_WCS_BUFFERS ();
   5358  1.1  christos #endif
   5359  1.1  christos 	  return -2;
   5360  1.1  christos 	}
   5361  1.1  christos 
   5362  1.1  christos     advance:
   5363  1.1  christos       if (!range)
   5364  1.1  christos         break;
   5365  1.1  christos       else if (range > 0)
   5366  1.1  christos         {
   5367  1.1  christos           range--;
   5368  1.1  christos           startpos++;
   5369  1.1  christos         }
   5370  1.1  christos       else
   5371  1.1  christos         {
   5372  1.1  christos           range++;
   5373  1.1  christos           startpos--;
   5374  1.1  christos         }
   5375  1.1  christos     }
   5376  1.1  christos #ifdef WCHAR
   5377  1.1  christos   FREE_WCS_BUFFERS ();
   5378  1.1  christos #endif
   5379  1.1  christos   return -1;
   5380  1.1  christos }
   5381  1.1  christos 
   5382  1.1  christos #ifdef WCHAR
   5383  1.1  christos /* This converts PTR, a pointer into one of the search wchar_t strings
   5384  1.1  christos    `string1' and `string2' into an multibyte string offset from the
   5385  1.1  christos    beginning of that string. We use mbs_offset to optimize.
   5386  1.1  christos    See convert_mbs_to_wcs.  */
   5387  1.1  christos # define POINTER_TO_OFFSET(ptr)						\
   5388  1.1  christos   (FIRST_STRING_P (ptr)							\
   5389  1.1  christos    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
   5390  1.1  christos    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
   5391  1.1  christos 		 + csize1)))
   5392  1.1  christos #else /* BYTE */
   5393  1.1  christos /* This converts PTR, a pointer into one of the search strings `string1'
   5394  1.1  christos    and `string2' into an offset from the beginning of that string.  */
   5395  1.1  christos # define POINTER_TO_OFFSET(ptr)			\
   5396  1.1  christos   (FIRST_STRING_P (ptr)				\
   5397  1.1  christos    ? ((regoff_t) ((ptr) - string1))		\
   5398  1.1  christos    : ((regoff_t) ((ptr) - string2 + size1)))
   5399  1.1  christos #endif /* WCHAR */
   5400  1.1  christos 
   5401  1.1  christos /* Macros for dealing with the split strings in re_match_2.  */
   5402  1.1  christos 
   5403  1.1  christos #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
   5404  1.1  christos 
   5405  1.1  christos /* Call before fetching a character with *d.  This switches over to
   5406  1.1  christos    string2 if necessary.  */
   5407  1.1  christos #define PREFETCH()							\
   5408  1.1  christos   while (d == dend)						    	\
   5409  1.1  christos     {									\
   5410  1.1  christos       /* End of string2 => fail.  */					\
   5411  1.1  christos       if (dend == end_match_2) 						\
   5412  1.1  christos         goto fail;							\
   5413  1.1  christos       /* End of string1 => advance to string2.  */ 			\
   5414  1.1  christos       d = string2;						        \
   5415  1.1  christos       dend = end_match_2;						\
   5416  1.1  christos     }
   5417  1.1  christos 
   5418  1.1  christos /* Test if at very beginning or at very end of the virtual concatenation
   5419  1.1  christos    of `string1' and `string2'.  If only one string, it's `string2'.  */
   5420  1.1  christos #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
   5421  1.1  christos #define AT_STRINGS_END(d) ((d) == end2)
   5422  1.1  christos 
   5423  1.1  christos 
   5424  1.1  christos /* Test if D points to a character which is word-constituent.  We have
   5425  1.1  christos    two special cases to check for: if past the end of string1, look at
   5426  1.1  christos    the first character in string2; and if before the beginning of
   5427  1.1  christos    string2, look at the last character in string1.  */
   5428  1.1  christos #ifdef WCHAR
   5429  1.1  christos /* Use internationalized API instead of SYNTAX.  */
   5430  1.1  christos # define WORDCHAR_P(d)							\
   5431  1.1  christos   (iswalnum ((wint_t)((d) == end1 ? *string2				\
   5432  1.1  christos            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
   5433  1.1  christos    || ((d) == end1 ? *string2						\
   5434  1.1  christos        : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
   5435  1.1  christos #else /* BYTE */
   5436  1.1  christos # define WORDCHAR_P(d)							\
   5437  1.1  christos   (SYNTAX ((d) == end1 ? *string2					\
   5438  1.1  christos            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
   5439  1.1  christos    == Sword)
   5440  1.1  christos #endif /* WCHAR */
   5441  1.1  christos 
   5442  1.1  christos /* Disabled due to a compiler bug -- see comment at case wordbound */
   5443  1.1  christos #if 0
   5444  1.1  christos /* Test if the character before D and the one at D differ with respect
   5445  1.1  christos    to being word-constituent.  */
   5446  1.1  christos #define AT_WORD_BOUNDARY(d)						\
   5447  1.1  christos   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
   5448  1.1  christos    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
   5449  1.1  christos #endif
   5450  1.1  christos 
   5451  1.1  christos /* Free everything we malloc.  */
   5452  1.1  christos #ifdef MATCH_MAY_ALLOCATE
   5453  1.1  christos # ifdef WCHAR
   5454  1.1  christos #  define FREE_VARIABLES()						\
   5455  1.1  christos   do {									\
   5456  1.1  christos     REGEX_FREE_STACK (fail_stack.stack);				\
   5457  1.1  christos     FREE_VAR (regstart);						\
   5458  1.1  christos     FREE_VAR (regend);							\
   5459  1.1  christos     FREE_VAR (old_regstart);						\
   5460  1.1  christos     FREE_VAR (old_regend);						\
   5461  1.1  christos     FREE_VAR (best_regstart);						\
   5462  1.1  christos     FREE_VAR (best_regend);						\
   5463  1.1  christos     FREE_VAR (reg_info);						\
   5464  1.1  christos     FREE_VAR (reg_dummy);						\
   5465  1.1  christos     FREE_VAR (reg_info_dummy);						\
   5466  1.1  christos     if (!cant_free_wcs_buf)						\
   5467  1.1  christos       {									\
   5468  1.1  christos         FREE_VAR (string1);						\
   5469  1.1  christos         FREE_VAR (string2);						\
   5470  1.1  christos         FREE_VAR (mbs_offset1);						\
   5471  1.1  christos         FREE_VAR (mbs_offset2);						\
   5472  1.1  christos       }									\
   5473  1.1  christos   } while (0)
   5474  1.1  christos # else /* BYTE */
   5475  1.1  christos #  define FREE_VARIABLES()						\
   5476  1.1  christos   do {									\
   5477  1.1  christos     REGEX_FREE_STACK (fail_stack.stack);				\
   5478  1.1  christos     FREE_VAR (regstart);						\
   5479  1.1  christos     FREE_VAR (regend);							\
   5480  1.1  christos     FREE_VAR (old_regstart);						\
   5481  1.1  christos     FREE_VAR (old_regend);						\
   5482  1.1  christos     FREE_VAR (best_regstart);						\
   5483  1.1  christos     FREE_VAR (best_regend);						\
   5484  1.1  christos     FREE_VAR (reg_info);						\
   5485  1.1  christos     FREE_VAR (reg_dummy);						\
   5486  1.1  christos     FREE_VAR (reg_info_dummy);						\
   5487  1.1  christos   } while (0)
   5488  1.1  christos # endif /* WCHAR */
   5489  1.1  christos #else
   5490  1.1  christos # ifdef WCHAR
   5491  1.1  christos #  define FREE_VARIABLES()						\
   5492  1.1  christos   do {									\
   5493  1.1  christos     if (!cant_free_wcs_buf)						\
   5494  1.1  christos       {									\
   5495  1.1  christos         FREE_VAR (string1);						\
   5496  1.1  christos         FREE_VAR (string2);						\
   5497  1.1  christos         FREE_VAR (mbs_offset1);						\
   5498  1.1  christos         FREE_VAR (mbs_offset2);						\
   5499  1.1  christos       }									\
   5500  1.1  christos   } while (0)
   5501  1.1  christos # else /* BYTE */
   5502  1.1  christos #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
   5503  1.1  christos # endif /* WCHAR */
   5504  1.1  christos #endif /* not MATCH_MAY_ALLOCATE */
   5505  1.1  christos 
   5506  1.1  christos /* These values must meet several constraints.  They must not be valid
   5507  1.1  christos    register values; since we have a limit of 255 registers (because
   5508  1.1  christos    we use only one byte in the pattern for the register number), we can
   5509  1.1  christos    use numbers larger than 255.  They must differ by 1, because of
   5510  1.1  christos    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
   5511  1.1  christos    be larger than the value for the highest register, so we do not try
   5512  1.1  christos    to actually save any registers when none are active.  */
   5513  1.1  christos #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
   5514  1.1  christos #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
   5515  1.1  christos 
   5516  1.1  christos #else /* not INSIDE_RECURSION */
   5518  1.1  christos /* Matching routines.  */
   5519  1.1  christos 
   5520  1.1  christos #ifndef emacs   /* Emacs never uses this.  */
   5521  1.1  christos /* re_match is like re_match_2 except it takes only a single string.  */
   5522  1.1  christos 
   5523  1.1  christos int
   5524  1.1  christos re_match (bufp, string, size, pos, regs)
   5525  1.1  christos      struct re_pattern_buffer *bufp;
   5526  1.1  christos      const char *string;
   5527  1.1  christos      int size, pos;
   5528  1.1  christos      struct re_registers *regs;
   5529  1.1  christos {
   5530  1.1  christos   int result;
   5531  1.1  christos # ifdef MBS_SUPPORT
   5532  1.1  christos   if (MB_CUR_MAX != 1)
   5533  1.1  christos     result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
   5534  1.1  christos 				      pos, regs, size,
   5535  1.1  christos 				      NULL, 0, NULL, 0, NULL, NULL);
   5536  1.1  christos   else
   5537  1.1  christos # endif
   5538  1.1  christos     result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
   5539  1.1  christos 				  pos, regs, size);
   5540  1.1  christos # ifndef REGEX_MALLOC
   5541  1.1  christos #  ifdef C_ALLOCA
   5542  1.1  christos   alloca (0);
   5543  1.1  christos #  endif
   5544  1.1  christos # endif
   5545  1.1  christos   return result;
   5546  1.1  christos }
   5547  1.1  christos # ifdef _LIBC
   5548  1.1  christos weak_alias (__re_match, re_match)
   5549  1.1  christos # endif
   5550  1.1  christos #endif /* not emacs */
   5551  1.1  christos 
   5552  1.1  christos #endif /* not INSIDE_RECURSION */
   5553  1.1  christos 
   5554  1.1  christos #ifdef INSIDE_RECURSION
   5555  1.1  christos static boolean PREFIX(group_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
   5556  1.1  christos 						    UCHAR_T *end,
   5557  1.1  christos 					PREFIX(register_info_type) *reg_info));
   5558  1.1  christos static boolean PREFIX(alt_match_null_string_p) _RE_ARGS ((UCHAR_T *p,
   5559  1.1  christos 						  UCHAR_T *end,
   5560  1.1  christos 					PREFIX(register_info_type) *reg_info));
   5561  1.1  christos static boolean PREFIX(common_op_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
   5562  1.1  christos 							UCHAR_T *end,
   5563  1.1  christos 					PREFIX(register_info_type) *reg_info));
   5564  1.1  christos static int PREFIX(bcmp_translate) _RE_ARGS ((const CHAR_T *s1, const CHAR_T *s2,
   5565  1.1  christos 				     int len, char *translate));
   5566  1.1  christos #else /* not INSIDE_RECURSION */
   5567  1.1  christos 
   5568  1.1  christos /* re_match_2 matches the compiled pattern in BUFP against the
   5569  1.1  christos    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
   5570  1.1  christos    and SIZE2, respectively).  We start matching at POS, and stop
   5571  1.1  christos    matching at STOP.
   5572  1.1  christos 
   5573  1.1  christos    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
   5574  1.1  christos    store offsets for the substring each group matched in REGS.  See the
   5575  1.1  christos    documentation for exactly how many groups we fill.
   5576  1.1  christos 
   5577  1.1  christos    We return -1 if no match, -2 if an internal error (such as the
   5578  1.1  christos    failure stack overflowing).  Otherwise, we return the length of the
   5579  1.1  christos    matched substring.  */
   5580  1.1  christos 
   5581  1.1  christos int
   5582  1.1  christos re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
   5583  1.1  christos      struct re_pattern_buffer *bufp;
   5584  1.1  christos      const char *string1, *string2;
   5585  1.1  christos      int size1, size2;
   5586  1.1  christos      int pos;
   5587  1.1  christos      struct re_registers *regs;
   5588  1.1  christos      int stop;
   5589  1.1  christos {
   5590  1.1  christos   int result;
   5591  1.1  christos # ifdef MBS_SUPPORT
   5592  1.1  christos   if (MB_CUR_MAX != 1)
   5593  1.1  christos     result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
   5594  1.1  christos 				      pos, regs, stop,
   5595  1.1  christos 				      NULL, 0, NULL, 0, NULL, NULL);
   5596  1.1  christos   else
   5597  1.1  christos # endif
   5598  1.1  christos     result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
   5599  1.1  christos 				  pos, regs, stop);
   5600  1.1  christos 
   5601  1.1  christos #ifndef REGEX_MALLOC
   5602  1.1  christos # ifdef C_ALLOCA
   5603  1.1  christos   alloca (0);
   5604  1.1  christos # endif
   5605  1.1  christos #endif
   5606  1.1  christos   return result;
   5607  1.1  christos }
   5608  1.1  christos #ifdef _LIBC
   5609  1.1  christos weak_alias (__re_match_2, re_match_2)
   5610  1.1  christos #endif
   5611  1.1  christos 
   5612  1.1  christos #endif /* not INSIDE_RECURSION */
   5613  1.1  christos 
   5614  1.1  christos #ifdef INSIDE_RECURSION
   5615  1.1  christos 
   5616  1.1  christos #ifdef WCHAR
   5617  1.1  christos static int count_mbs_length PARAMS ((int *, int));
   5618  1.1  christos 
   5619  1.1  christos /* This check the substring (from 0, to length) of the multibyte string,
   5620  1.1  christos    to which offset_buffer correspond. And count how many wchar_t_characters
   5621  1.1  christos    the substring occupy. We use offset_buffer to optimization.
   5622  1.1  christos    See convert_mbs_to_wcs.  */
   5623  1.1  christos 
   5624  1.1  christos static int
   5625  1.1  christos count_mbs_length(offset_buffer, length)
   5626  1.1  christos      int *offset_buffer;
   5627  1.1  christos      int length;
   5628  1.1  christos {
   5629  1.1  christos   int upper, lower;
   5630  1.1  christos 
   5631  1.1  christos   /* Check whether the size is valid.  */
   5632  1.1  christos   if (length < 0)
   5633  1.1  christos     return -1;
   5634  1.1  christos 
   5635  1.1  christos   if (offset_buffer == NULL)
   5636  1.1  christos     return 0;
   5637  1.1  christos 
   5638  1.1  christos   /* If there are no multibyte character, offset_buffer[i] == i.
   5639  1.1  christos    Optmize for this case.  */
   5640  1.1  christos   if (offset_buffer[length] == length)
   5641  1.1  christos     return length;
   5642  1.1  christos 
   5643  1.1  christos   /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
   5644  1.1  christos   upper = length;
   5645  1.1  christos   lower = 0;
   5646  1.1  christos 
   5647  1.1  christos   while (true)
   5648  1.1  christos     {
   5649  1.1  christos       int middle = (lower + upper) / 2;
   5650  1.1  christos       if (middle == lower || middle == upper)
   5651  1.1  christos 	break;
   5652  1.1  christos       if (offset_buffer[middle] > length)
   5653  1.1  christos 	upper = middle;
   5654  1.1  christos       else if (offset_buffer[middle] < length)
   5655  1.1  christos 	lower = middle;
   5656  1.1  christos       else
   5657  1.1  christos 	return middle;
   5658  1.1  christos     }
   5659  1.1  christos 
   5660  1.1  christos   return -1;
   5661  1.1  christos }
   5662  1.1  christos #endif /* WCHAR */
   5663  1.1  christos 
   5664  1.1  christos /* This is a separate function so that we can force an alloca cleanup
   5665  1.1  christos    afterwards.  */
   5666  1.1  christos #ifdef WCHAR
   5667  1.1  christos static int
   5668  1.1  christos wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos,
   5669  1.1  christos 			 regs, stop, string1, size1, string2, size2,
   5670  1.1  christos 			 mbs_offset1, mbs_offset2)
   5671  1.1  christos      struct re_pattern_buffer *bufp;
   5672  1.1  christos      const char *cstring1, *cstring2;
   5673  1.1  christos      int csize1, csize2;
   5674  1.1  christos      int pos;
   5675  1.1  christos      struct re_registers *regs;
   5676  1.1  christos      int stop;
   5677  1.1  christos      /* string1 == string2 == NULL means string1/2, size1/2 and
   5678  1.1  christos 	mbs_offset1/2 need seting up in this function.  */
   5679  1.1  christos      /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
   5680  1.1  christos      wchar_t *string1, *string2;
   5681  1.1  christos      /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
   5682  1.1  christos      int size1, size2;
   5683  1.1  christos      /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
   5684  1.1  christos      int *mbs_offset1, *mbs_offset2;
   5685  1.1  christos #else /* BYTE */
   5686  1.1  christos static int
   5687  1.1  christos byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos,
   5688  1.1  christos 			  regs, stop)
   5689  1.1  christos      struct re_pattern_buffer *bufp;
   5690  1.1  christos      const char *string1, *string2;
   5691  1.1  christos      int size1, size2;
   5692  1.1  christos      int pos;
   5693  1.1  christos      struct re_registers *regs;
   5694  1.1  christos      int stop;
   5695  1.1  christos #endif /* BYTE */
   5696  1.1  christos {
   5697  1.1  christos   /* General temporaries.  */
   5698  1.1  christos   int mcnt;
   5699  1.1  christos   UCHAR_T *p1;
   5700  1.1  christos #ifdef WCHAR
   5701  1.1  christos   /* They hold whether each wchar_t is binary data or not.  */
   5702  1.1  christos   char *is_binary = NULL;
   5703  1.1  christos   /* If true, we can't free string1/2, mbs_offset1/2.  */
   5704  1.1  christos   int cant_free_wcs_buf = 1;
   5705  1.1  christos #endif /* WCHAR */
   5706  1.1  christos 
   5707  1.1  christos   /* Just past the end of the corresponding string.  */
   5708  1.1  christos   const CHAR_T *end1, *end2;
   5709  1.1  christos 
   5710  1.1  christos   /* Pointers into string1 and string2, just past the last characters in
   5711  1.1  christos      each to consider matching.  */
   5712  1.1  christos   const CHAR_T *end_match_1, *end_match_2;
   5713  1.1  christos 
   5714  1.1  christos   /* Where we are in the data, and the end of the current string.  */
   5715  1.1  christos   const CHAR_T *d, *dend;
   5716  1.1  christos 
   5717  1.1  christos   /* Where we are in the pattern, and the end of the pattern.  */
   5718  1.1  christos #ifdef WCHAR
   5719  1.1  christos   UCHAR_T *pattern, *p;
   5720  1.1  christos   register UCHAR_T *pend;
   5721  1.1  christos #else /* BYTE */
   5722  1.1  christos   UCHAR_T *p = bufp->buffer;
   5723  1.1  christos   register UCHAR_T *pend = p + bufp->used;
   5724  1.1  christos #endif /* WCHAR */
   5725  1.1  christos 
   5726  1.1  christos   /* Mark the opcode just after a start_memory, so we can test for an
   5727  1.1  christos      empty subpattern when we get to the stop_memory.  */
   5728  1.1  christos   UCHAR_T *just_past_start_mem = 0;
   5729  1.1  christos 
   5730  1.1  christos   /* We use this to map every character in the string.  */
   5731  1.1  christos   RE_TRANSLATE_TYPE translate = bufp->translate;
   5732  1.1  christos 
   5733  1.1  christos   /* Failure point stack.  Each place that can handle a failure further
   5734  1.1  christos      down the line pushes a failure point on this stack.  It consists of
   5735  1.1  christos      restart, regend, and reg_info for all registers corresponding to
   5736  1.1  christos      the subexpressions we're currently inside, plus the number of such
   5737  1.1  christos      registers, and, finally, two char *'s.  The first char * is where
   5738  1.1  christos      to resume scanning the pattern; the second one is where to resume
   5739  1.1  christos      scanning the strings.  If the latter is zero, the failure point is
   5740  1.1  christos      a ``dummy''; if a failure happens and the failure point is a dummy,
   5741  1.1  christos      it gets discarded and the next next one is tried.  */
   5742  1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5743  1.1  christos   PREFIX(fail_stack_type) fail_stack;
   5744  1.1  christos #endif
   5745  1.1  christos #ifdef DEBUG
   5746  1.1  christos   static unsigned failure_id;
   5747  1.1  christos   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
   5748  1.1  christos #endif
   5749  1.1  christos 
   5750  1.1  christos #ifdef REL_ALLOC
   5751  1.1  christos   /* This holds the pointer to the failure stack, when
   5752  1.1  christos      it is allocated relocatably.  */
   5753  1.1  christos   fail_stack_elt_t *failure_stack_ptr;
   5754  1.1  christos #endif
   5755  1.1  christos 
   5756  1.1  christos   /* We fill all the registers internally, independent of what we
   5757  1.1  christos      return, for use in backreferences.  The number here includes
   5758  1.1  christos      an element for register zero.  */
   5759  1.1  christos   size_t num_regs = bufp->re_nsub + 1;
   5760  1.1  christos 
   5761  1.1  christos   /* The currently active registers.  */
   5762  1.1  christos   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   5763  1.1  christos   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   5764  1.1  christos 
   5765  1.1  christos   /* Information on the contents of registers. These are pointers into
   5766  1.1  christos      the input strings; they record just what was matched (on this
   5767  1.1  christos      attempt) by a subexpression part of the pattern, that is, the
   5768  1.1  christos      regnum-th regstart pointer points to where in the pattern we began
   5769  1.1  christos      matching and the regnum-th regend points to right after where we
   5770  1.1  christos      stopped matching the regnum-th subexpression.  (The zeroth register
   5771  1.1  christos      keeps track of what the whole pattern matches.)  */
   5772  1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5773  1.1  christos   const CHAR_T **regstart, **regend;
   5774  1.1  christos #endif
   5775  1.1  christos 
   5776  1.1  christos   /* If a group that's operated upon by a repetition operator fails to
   5777  1.1  christos      match anything, then the register for its start will need to be
   5778  1.1  christos      restored because it will have been set to wherever in the string we
   5779  1.1  christos      are when we last see its open-group operator.  Similarly for a
   5780  1.1  christos      register's end.  */
   5781  1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5782  1.1  christos   const CHAR_T **old_regstart, **old_regend;
   5783  1.1  christos #endif
   5784  1.1  christos 
   5785  1.1  christos   /* The is_active field of reg_info helps us keep track of which (possibly
   5786  1.1  christos      nested) subexpressions we are currently in. The matched_something
   5787  1.1  christos      field of reg_info[reg_num] helps us tell whether or not we have
   5788  1.1  christos      matched any of the pattern so far this time through the reg_num-th
   5789  1.1  christos      subexpression.  These two fields get reset each time through any
   5790  1.1  christos      loop their register is in.  */
   5791  1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   5792  1.1  christos   PREFIX(register_info_type) *reg_info;
   5793  1.1  christos #endif
   5794  1.1  christos 
   5795  1.1  christos   /* The following record the register info as found in the above
   5796  1.1  christos      variables when we find a match better than any we've seen before.
   5797  1.1  christos      This happens as we backtrack through the failure points, which in
   5798  1.1  christos      turn happens only if we have not yet matched the entire string. */
   5799  1.1  christos   unsigned best_regs_set = false;
   5800  1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5801  1.1  christos   const CHAR_T **best_regstart, **best_regend;
   5802  1.1  christos #endif
   5803  1.1  christos 
   5804  1.1  christos   /* Logically, this is `best_regend[0]'.  But we don't want to have to
   5805  1.1  christos      allocate space for that if we're not allocating space for anything
   5806  1.1  christos      else (see below).  Also, we never need info about register 0 for
   5807  1.1  christos      any of the other register vectors, and it seems rather a kludge to
   5808  1.1  christos      treat `best_regend' differently than the rest.  So we keep track of
   5809  1.1  christos      the end of the best match so far in a separate variable.  We
   5810  1.1  christos      initialize this to NULL so that when we backtrack the first time
   5811  1.1  christos      and need to test it, it's not garbage.  */
   5812  1.1  christos   const CHAR_T *match_end = NULL;
   5813  1.1  christos 
   5814  1.1  christos   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
   5815  1.1  christos   int set_regs_matched_done = 0;
   5816  1.1  christos 
   5817  1.1  christos   /* Used when we pop values we don't care about.  */
   5818  1.1  christos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   5819  1.1  christos   const CHAR_T **reg_dummy;
   5820  1.1  christos   PREFIX(register_info_type) *reg_info_dummy;
   5821  1.1  christos #endif
   5822  1.1  christos 
   5823  1.1  christos #ifdef DEBUG
   5824  1.1  christos   /* Counts the total number of registers pushed.  */
   5825  1.1  christos   unsigned num_regs_pushed = 0;
   5826  1.1  christos #endif
   5827  1.1  christos 
   5828  1.1  christos   /* Definitions for state transitions.  More efficiently for gcc.  */
   5829  1.1  christos #ifdef __GNUC__
   5830  1.1  christos # if defined HAVE_SUBTRACT_LOCAL_LABELS && defined SHARED
   5831  1.1  christos #  define NEXT \
   5832  1.1  christos       do								      \
   5833  1.1  christos 	{								      \
   5834  1.1  christos 	  int offset;							      \
   5835  1.1  christos 	  const void *__unbounded ptr;					      \
   5836  1.1  christos 	  offset = (p == pend						      \
   5837  1.1  christos 		    ? 0 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);   \
   5838  1.1  christos 	  ptr = &&end_of_pattern + offset;				      \
   5839  1.1  christos 	  goto *ptr;							      \
   5840  1.1  christos 	}								      \
   5841  1.1  christos       while (0)
   5842  1.1  christos #  define REF(x) \
   5843  1.1  christos   &&label_##x - &&end_of_pattern
   5844  1.1  christos #  define JUMP_TABLE_TYPE const int
   5845  1.1  christos # else
   5846  1.1  christos #  define NEXT \
   5847  1.1  christos       do								      \
   5848  1.1  christos 	{								      \
   5849  1.1  christos 	  const void *__unbounded ptr;					      \
   5850  1.1  christos 	  ptr = (p == pend ? &&end_of_pattern				      \
   5851  1.1  christos 		 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);	      \
   5852  1.1  christos 	  goto *ptr;							      \
   5853  1.1  christos 	}								      \
   5854  1.1  christos       while (0)
   5855  1.1  christos #  define REF(x) \
   5856  1.1  christos   &&label_##x
   5857  1.1  christos #  define JUMP_TABLE_TYPE const void *const
   5858  1.1  christos # endif
   5859  1.1  christos # define CASE(x) label_##x
   5860  1.1  christos   static JUMP_TABLE_TYPE jmptable[] =
   5861  1.1  christos     {
   5862  1.1  christos     REF (no_op),
   5863  1.1  christos     REF (succeed),
   5864  1.1  christos     REF (exactn),
   5865  1.1  christos # ifdef MBS_SUPPORT
   5866  1.1  christos     REF (exactn_bin),
   5867  1.1  christos # endif
   5868  1.1  christos     REF (anychar),
   5869  1.1  christos     REF (charset),
   5870  1.1  christos     REF (charset_not),
   5871  1.1  christos     REF (start_memory),
   5872  1.1  christos     REF (stop_memory),
   5873  1.1  christos     REF (duplicate),
   5874  1.1  christos     REF (begline),
   5875  1.1  christos     REF (endline),
   5876  1.1  christos     REF (begbuf),
   5877  1.1  christos     REF (endbuf),
   5878  1.1  christos     REF (jump),
   5879  1.1  christos     REF (jump_past_alt),
   5880  1.1  christos     REF (on_failure_jump),
   5881  1.1  christos     REF (on_failure_keep_string_jump),
   5882  1.1  christos     REF (pop_failure_jump),
   5883  1.1  christos     REF (maybe_pop_jump),
   5884  1.1  christos     REF (dummy_failure_jump),
   5885  1.1  christos     REF (push_dummy_failure),
   5886  1.1  christos     REF (succeed_n),
   5887  1.1  christos     REF (jump_n),
   5888  1.1  christos     REF (set_number_at),
   5889  1.1  christos     REF (wordchar),
   5890  1.1  christos     REF (notwordchar),
   5891  1.1  christos     REF (wordbeg),
   5892  1.1  christos     REF (wordend),
   5893  1.1  christos     REF (wordbound),
   5894  1.1  christos     REF (notwordbound)
   5895  1.1  christos # ifdef emacs
   5896  1.1  christos     ,REF (before_dot),
   5897  1.1  christos     REF (at_dot),
   5898  1.1  christos     REF (after_dot),
   5899  1.1  christos     REF (syntaxspec),
   5900  1.1  christos     REF (notsyntaxspec)
   5901  1.1  christos # endif
   5902  1.1  christos     };
   5903  1.1  christos #else
   5904  1.1  christos # define NEXT \
   5905  1.1  christos   break
   5906  1.1  christos # define CASE(x) \
   5907  1.1  christos   case x
   5908  1.1  christos #endif
   5909  1.1  christos 
   5910  1.1  christos   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
   5911  1.1  christos 
   5912  1.1  christos   INIT_FAIL_STACK ();
   5913  1.1  christos 
   5914  1.1  christos #ifdef MATCH_MAY_ALLOCATE
   5915  1.1  christos   /* Do not bother to initialize all the register variables if there are
   5916  1.1  christos      no groups in the pattern, as it takes a fair amount of time.  If
   5917  1.1  christos      there are groups, we include space for register 0 (the whole
   5918  1.1  christos      pattern), even though we never use it, since it simplifies the
   5919  1.1  christos      array indexing.  We should fix this.  */
   5920  1.1  christos   if (bufp->re_nsub)
   5921  1.1  christos     {
   5922  1.1  christos       regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5923  1.1  christos       regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5924  1.1  christos       old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5925  1.1  christos       old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5926  1.1  christos       best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
   5927  1.1  christos       best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
   5928  1.1  christos       reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
   5929  1.1  christos       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
   5930  1.1  christos       reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
   5931  1.1  christos 
   5932  1.1  christos       if (!(regstart && regend && old_regstart && old_regend && reg_info
   5933  1.1  christos             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
   5934  1.1  christos         {
   5935  1.1  christos           FREE_VARIABLES ();
   5936  1.1  christos           return -2;
   5937  1.1  christos         }
   5938  1.1  christos     }
   5939  1.1  christos   else
   5940  1.1  christos     {
   5941  1.1  christos       /* We must initialize all our variables to NULL, so that
   5942  1.1  christos          `FREE_VARIABLES' doesn't try to free them.  */
   5943  1.1  christos       regstart = regend = old_regstart = old_regend = best_regstart
   5944  1.1  christos         = best_regend = reg_dummy = NULL;
   5945  1.1  christos       reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
   5946  1.1  christos     }
   5947  1.1  christos #endif /* MATCH_MAY_ALLOCATE */
   5948  1.1  christos 
   5949  1.1  christos   /* The starting position is bogus.  */
   5950  1.1  christos #ifdef WCHAR
   5951  1.1  christos   if (pos < 0 || pos > csize1 + csize2)
   5952  1.1  christos #else /* BYTE */
   5953  1.1  christos   if (pos < 0 || pos > size1 + size2)
   5954  1.1  christos #endif
   5955  1.1  christos     {
   5956  1.1  christos       FREE_VARIABLES ();
   5957  1.1  christos       return -1;
   5958  1.1  christos     }
   5959  1.1  christos 
   5960  1.1  christos #ifdef WCHAR
   5961  1.1  christos   /* Allocate wchar_t array for string1 and string2 and
   5962  1.1  christos      fill them with converted string.  */
   5963  1.1  christos   if (string1 == NULL && string2 == NULL)
   5964  1.1  christos     {
   5965  1.1  christos       /* We need seting up buffers here.  */
   5966  1.1  christos 
   5967  1.1  christos       /* We must free wcs buffers in this function.  */
   5968  1.1  christos       cant_free_wcs_buf = 0;
   5969  1.1  christos 
   5970  1.1  christos       if (csize1 != 0)
   5971  1.1  christos 	{
   5972  1.1  christos 	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
   5973  1.1  christos 	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
   5974  1.1  christos 	  is_binary = REGEX_TALLOC (csize1 + 1, char);
   5975  1.1  christos 	  if (!string1 || !mbs_offset1 || !is_binary)
   5976  1.1  christos 	    {
   5977  1.1  christos 	      FREE_VAR (string1);
   5978  1.1  christos 	      FREE_VAR (mbs_offset1);
   5979  1.1  christos 	      FREE_VAR (is_binary);
   5980  1.1  christos 	      return -2;
   5981  1.1  christos 	    }
   5982  1.1  christos 	}
   5983  1.1  christos       if (csize2 != 0)
   5984  1.1  christos 	{
   5985  1.1  christos 	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
   5986  1.1  christos 	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
   5987  1.1  christos 	  is_binary = REGEX_TALLOC (csize2 + 1, char);
   5988  1.1  christos 	  if (!string2 || !mbs_offset2 || !is_binary)
   5989  1.1  christos 	    {
   5990  1.1  christos 	      FREE_VAR (string1);
   5991  1.1  christos 	      FREE_VAR (mbs_offset1);
   5992  1.1  christos 	      FREE_VAR (string2);
   5993  1.1  christos 	      FREE_VAR (mbs_offset2);
   5994  1.1  christos 	      FREE_VAR (is_binary);
   5995  1.1  christos 	      return -2;
   5996  1.1  christos 	    }
   5997  1.1  christos 	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
   5998  1.1  christos 				     mbs_offset2, is_binary);
   5999  1.1  christos 	  string2[size2] = L'\0'; /* for a sentinel  */
   6000  1.1  christos 	  FREE_VAR (is_binary);
   6001  1.1  christos 	}
   6002  1.1  christos     }
   6003  1.1  christos 
   6004  1.1  christos   /* We need to cast pattern to (wchar_t*), because we casted this compiled
   6005  1.1  christos      pattern to (char*) in regex_compile.  */
   6006  1.1  christos   p = pattern = (CHAR_T*)bufp->buffer;
   6007  1.1  christos   pend = (CHAR_T*)(bufp->buffer + bufp->used);
   6008  1.1  christos 
   6009  1.1  christos #endif /* WCHAR */
   6010  1.1  christos 
   6011  1.1  christos   /* Initialize subexpression text positions to -1 to mark ones that no
   6012  1.1  christos      start_memory/stop_memory has been seen for. Also initialize the
   6013  1.1  christos      register information struct.  */
   6014  1.1  christos   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   6015  1.1  christos     {
   6016  1.1  christos       regstart[mcnt] = regend[mcnt]
   6017  1.1  christos         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
   6018  1.1  christos 
   6019  1.1  christos       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
   6020  1.1  christos       IS_ACTIVE (reg_info[mcnt]) = 0;
   6021  1.1  christos       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   6022  1.1  christos       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   6023  1.1  christos     }
   6024  1.1  christos 
   6025  1.1  christos   /* We move `string1' into `string2' if the latter's empty -- but not if
   6026  1.1  christos      `string1' is null.  */
   6027  1.1  christos   if (size2 == 0 && string1 != NULL)
   6028  1.1  christos     {
   6029  1.1  christos       string2 = string1;
   6030  1.1  christos       size2 = size1;
   6031  1.1  christos       string1 = 0;
   6032  1.1  christos       size1 = 0;
   6033  1.1  christos #ifdef WCHAR
   6034  1.1  christos       mbs_offset2 = mbs_offset1;
   6035  1.1  christos       csize2 = csize1;
   6036  1.1  christos       mbs_offset1 = NULL;
   6037  1.1  christos       csize1 = 0;
   6038  1.1  christos #endif
   6039  1.1  christos     }
   6040  1.1  christos   end1 = string1 + size1;
   6041  1.1  christos   end2 = string2 + size2;
   6042  1.1  christos 
   6043  1.1  christos   /* Compute where to stop matching, within the two strings.  */
   6044  1.1  christos #ifdef WCHAR
   6045  1.1  christos   if (stop <= csize1)
   6046  1.1  christos     {
   6047  1.1  christos       mcnt = count_mbs_length(mbs_offset1, stop);
   6048  1.1  christos       end_match_1 = string1 + mcnt;
   6049  1.1  christos       end_match_2 = string2;
   6050  1.1  christos     }
   6051  1.1  christos   else
   6052  1.1  christos     {
   6053  1.1  christos       if (stop > csize1 + csize2)
   6054  1.1  christos 	stop = csize1 + csize2;
   6055  1.1  christos       end_match_1 = end1;
   6056  1.1  christos       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
   6057  1.1  christos       end_match_2 = string2 + mcnt;
   6058  1.1  christos     }
   6059  1.1  christos   if (mcnt < 0)
   6060  1.1  christos     { /* count_mbs_length return error.  */
   6061  1.1  christos       FREE_VARIABLES ();
   6062  1.1  christos       return -1;
   6063  1.1  christos     }
   6064  1.1  christos #else
   6065  1.1  christos   if (stop <= size1)
   6066  1.1  christos     {
   6067  1.1  christos       end_match_1 = string1 + stop;
   6068  1.1  christos       end_match_2 = string2;
   6069  1.1  christos     }
   6070  1.1  christos   else
   6071  1.1  christos     {
   6072  1.1  christos       end_match_1 = end1;
   6073  1.1  christos       end_match_2 = string2 + stop - size1;
   6074  1.1  christos     }
   6075  1.1  christos #endif /* WCHAR */
   6076  1.1  christos 
   6077  1.1  christos   /* `p' scans through the pattern as `d' scans through the data.
   6078  1.1  christos      `dend' is the end of the input string that `d' points within.  `d'
   6079  1.1  christos      is advanced into the following input string whenever necessary, but
   6080  1.1  christos      this happens before fetching; therefore, at the beginning of the
   6081  1.1  christos      loop, `d' can be pointing at the end of a string, but it cannot
   6082  1.1  christos      equal `string2'.  */
   6083  1.1  christos #ifdef WCHAR
   6084  1.1  christos   if (size1 > 0 && pos <= csize1)
   6085  1.1  christos     {
   6086  1.1  christos       mcnt = count_mbs_length(mbs_offset1, pos);
   6087  1.1  christos       d = string1 + mcnt;
   6088  1.1  christos       dend = end_match_1;
   6089  1.1  christos     }
   6090  1.1  christos   else
   6091  1.1  christos     {
   6092  1.1  christos       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
   6093  1.1  christos       d = string2 + mcnt;
   6094  1.1  christos       dend = end_match_2;
   6095  1.1  christos     }
   6096  1.1  christos 
   6097  1.1  christos   if (mcnt < 0)
   6098  1.1  christos     { /* count_mbs_length return error.  */
   6099  1.1  christos       FREE_VARIABLES ();
   6100  1.1  christos       return -1;
   6101  1.1  christos     }
   6102  1.1  christos #else
   6103  1.1  christos   if (size1 > 0 && pos <= size1)
   6104  1.1  christos     {
   6105  1.1  christos       d = string1 + pos;
   6106  1.1  christos       dend = end_match_1;
   6107  1.1  christos     }
   6108  1.1  christos   else
   6109  1.1  christos     {
   6110  1.1  christos       d = string2 + pos - size1;
   6111  1.1  christos       dend = end_match_2;
   6112  1.1  christos     }
   6113  1.1  christos #endif /* WCHAR */
   6114  1.1  christos 
   6115  1.1  christos   DEBUG_PRINT1 ("The compiled pattern is:\n");
   6116  1.1  christos   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
   6117  1.1  christos   DEBUG_PRINT1 ("The string to match is: `");
   6118  1.1  christos   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
   6119  1.1  christos   DEBUG_PRINT1 ("'\n");
   6120  1.1  christos 
   6121  1.1  christos   /* This loops over pattern commands.  It exits by returning from the
   6122  1.1  christos      function if the match is complete, or it drops through if the match
   6123  1.1  christos      fails at this starting point in the input data.  */
   6124  1.1  christos   for (;;)
   6125  1.1  christos     {
   6126  1.1  christos #ifdef _LIBC
   6127  1.1  christos       DEBUG_PRINT2 ("\n%p: ", p);
   6128  1.1  christos #else
   6129  1.1  christos       DEBUG_PRINT2 ("\n0x%x: ", p);
   6130  1.1  christos #endif
   6131  1.1  christos 
   6132  1.1  christos #ifdef __GNUC__
   6133  1.1  christos       NEXT;
   6134  1.1  christos #else
   6135  1.1  christos       if (p == pend)
   6136  1.1  christos #endif
   6137  1.1  christos 	{
   6138  1.1  christos #ifdef __GNUC__
   6139  1.1  christos 	end_of_pattern:
   6140  1.1  christos #endif
   6141  1.1  christos 	  /* End of pattern means we might have succeeded.  */
   6142  1.1  christos 	  DEBUG_PRINT1 ("end of pattern ... ");
   6143  1.1  christos 
   6144  1.1  christos 	  /* If we haven't matched the entire string, and we want the
   6145  1.1  christos 	     longest match, try backtracking.  */
   6146  1.1  christos 	  if (d != end_match_2)
   6147  1.1  christos 	    {
   6148  1.1  christos 	      /* 1 if this match ends in the same string (string1 or string2)
   6149  1.1  christos 		 as the best previous match.  */
   6150  1.1  christos 	      boolean same_str_p = (FIRST_STRING_P (match_end)
   6151  1.1  christos 				    == MATCHING_IN_FIRST_STRING);
   6152  1.1  christos 	      /* 1 if this match is the best seen so far.  */
   6153  1.1  christos 	      boolean best_match_p;
   6154  1.1  christos 
   6155  1.1  christos 	      /* AIX compiler got confused when this was combined
   6156  1.1  christos 		 with the previous declaration.  */
   6157  1.1  christos 	      if (same_str_p)
   6158  1.1  christos 		best_match_p = d > match_end;
   6159  1.1  christos 	      else
   6160  1.1  christos 		best_match_p = !MATCHING_IN_FIRST_STRING;
   6161  1.1  christos 
   6162  1.1  christos 	      DEBUG_PRINT1 ("backtracking.\n");
   6163  1.1  christos 
   6164  1.1  christos 	      if (!FAIL_STACK_EMPTY ())
   6165  1.1  christos 		{ /* More failure points to try.  */
   6166  1.1  christos 
   6167  1.1  christos 		  /* If exceeds best match so far, save it.  */
   6168  1.1  christos 		  if (!best_regs_set || best_match_p)
   6169  1.1  christos 		    {
   6170  1.1  christos 		      best_regs_set = true;
   6171  1.1  christos 		      match_end = d;
   6172  1.1  christos 
   6173  1.1  christos 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
   6174  1.1  christos 
   6175  1.1  christos 		      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   6176  1.1  christos 			{
   6177  1.1  christos 			  best_regstart[mcnt] = regstart[mcnt];
   6178  1.1  christos 			  best_regend[mcnt] = regend[mcnt];
   6179  1.1  christos 			}
   6180  1.1  christos 		    }
   6181  1.1  christos 		  goto fail;
   6182  1.1  christos 		}
   6183  1.1  christos 
   6184  1.1  christos 	      /* If no failure points, don't restore garbage.  And if
   6185  1.1  christos 		 last match is real best match, don't restore second
   6186  1.1  christos 		 best one. */
   6187  1.1  christos 	      else if (best_regs_set && !best_match_p)
   6188  1.1  christos 		{
   6189  1.1  christos 		restore_best_regs:
   6190  1.1  christos 		  /* Restore best match.  It may happen that `dend ==
   6191  1.1  christos 		     end_match_1' while the restored d is in string2.
   6192  1.1  christos 		     For example, the pattern `x.*y.*z' against the
   6193  1.1  christos 		     strings `x-' and `y-z-', if the two strings are
   6194  1.1  christos 		     not consecutive in memory.  */
   6195  1.1  christos 		  DEBUG_PRINT1 ("Restoring best registers.\n");
   6196  1.1  christos 
   6197  1.1  christos 		  d = match_end;
   6198  1.1  christos 		  dend = ((d >= string1 && d <= end1)
   6199  1.1  christos 			  ? end_match_1 : end_match_2);
   6200  1.1  christos 
   6201  1.1  christos 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
   6202  1.1  christos 		    {
   6203  1.1  christos 		      regstart[mcnt] = best_regstart[mcnt];
   6204  1.1  christos 		      regend[mcnt] = best_regend[mcnt];
   6205  1.1  christos 		    }
   6206  1.1  christos 		}
   6207  1.1  christos 	    } /* d != end_match_2 */
   6208  1.1  christos 
   6209  1.1  christos 	succeed_label:
   6210  1.1  christos 	  DEBUG_PRINT1 ("Accepting match.\n");
   6211  1.1  christos 	  /* If caller wants register contents data back, do it.  */
   6212  1.1  christos 	  if (regs && !bufp->no_sub)
   6213  1.1  christos 	    {
   6214  1.1  christos 	      /* Have the register data arrays been allocated?  */
   6215  1.1  christos 	      if (bufp->regs_allocated == REGS_UNALLOCATED)
   6216  1.1  christos 		{ /* No.  So allocate them with malloc.  We need one
   6217  1.1  christos 		     extra element beyond `num_regs' for the `-1' marker
   6218  1.1  christos 		     GNU code uses.  */
   6219  1.1  christos 		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
   6220  1.1  christos 		  regs->start = TALLOC (regs->num_regs, regoff_t);
   6221  1.1  christos 		  regs->end = TALLOC (regs->num_regs, regoff_t);
   6222  1.1  christos 		  if (regs->start == NULL || regs->end == NULL)
   6223  1.1  christos 		    {
   6224  1.1  christos 		      FREE_VARIABLES ();
   6225  1.1  christos 		      return -2;
   6226  1.1  christos 		    }
   6227  1.1  christos 		  bufp->regs_allocated = REGS_REALLOCATE;
   6228  1.1  christos 		}
   6229  1.1  christos 	      else if (bufp->regs_allocated == REGS_REALLOCATE)
   6230  1.1  christos 		{ /* Yes.  If we need more elements than were already
   6231  1.1  christos 		     allocated, reallocate them.  If we need fewer, just
   6232  1.1  christos 		     leave it alone.  */
   6233  1.1  christos 		  if (regs->num_regs < num_regs + 1)
   6234  1.1  christos 		    {
   6235  1.1  christos 		      regs->num_regs = num_regs + 1;
   6236  1.1  christos 		      RETALLOC (regs->start, regs->num_regs, regoff_t);
   6237  1.1  christos 		      RETALLOC (regs->end, regs->num_regs, regoff_t);
   6238  1.1  christos 		      if (regs->start == NULL || regs->end == NULL)
   6239  1.1  christos 			{
   6240  1.1  christos 			  FREE_VARIABLES ();
   6241  1.1  christos 			  return -2;
   6242  1.1  christos 			}
   6243  1.1  christos 		    }
   6244  1.1  christos 		}
   6245  1.1  christos 	      else
   6246  1.1  christos 		{
   6247  1.1  christos 		  /* These braces fend off a "empty body in an else-statement"
   6248  1.1  christos 		     warning under GCC when assert expands to nothing.  */
   6249  1.1  christos 		  assert (bufp->regs_allocated == REGS_FIXED);
   6250  1.1  christos 		}
   6251  1.1  christos 
   6252  1.1  christos 	      /* Convert the pointer data in `regstart' and `regend' to
   6253  1.1  christos 		 indices.  Register zero has to be set differently,
   6254  1.1  christos 		 since we haven't kept track of any info for it.  */
   6255  1.1  christos 	      if (regs->num_regs > 0)
   6256  1.1  christos 		{
   6257  1.1  christos 		  regs->start[0] = pos;
   6258  1.1  christos #ifdef WCHAR
   6259  1.1  christos 		  if (MATCHING_IN_FIRST_STRING)
   6260  1.1  christos 		    regs->end[0] = (mbs_offset1 != NULL ?
   6261  1.1  christos 				    mbs_offset1[d-string1] : 0);
   6262  1.1  christos 		  else
   6263  1.1  christos 		    regs->end[0] = csize1 + (mbs_offset2 != NULL
   6264  1.1  christos 					     ? mbs_offset2[d-string2] : 0);
   6265  1.1  christos #else
   6266  1.1  christos 		  regs->end[0] = (MATCHING_IN_FIRST_STRING
   6267  1.1  christos 				  ? ((regoff_t) (d - string1))
   6268  1.1  christos 				  : ((regoff_t) (d - string2 + size1)));
   6269  1.1  christos #endif /* WCHAR */
   6270  1.1  christos 		}
   6271  1.1  christos 
   6272  1.1  christos 	      /* Go through the first `min (num_regs, regs->num_regs)'
   6273  1.1  christos 		 registers, since that is all we initialized.  */
   6274  1.1  christos 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
   6275  1.1  christos 		   mcnt++)
   6276  1.1  christos 		{
   6277  1.1  christos 		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
   6278  1.1  christos 		    regs->start[mcnt] = regs->end[mcnt] = -1;
   6279  1.1  christos 		  else
   6280  1.1  christos 		    {
   6281  1.1  christos 		      regs->start[mcnt]
   6282  1.1  christos 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
   6283  1.1  christos 		      regs->end[mcnt]
   6284  1.1  christos 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
   6285  1.1  christos 		    }
   6286  1.1  christos 		}
   6287  1.1  christos 
   6288  1.1  christos 	      /* If the regs structure we return has more elements than
   6289  1.1  christos 		 were in the pattern, set the extra elements to -1.  If
   6290  1.1  christos 		 we (re)allocated the registers, this is the case,
   6291  1.1  christos 		 because we always allocate enough to have at least one
   6292  1.1  christos 		 -1 at the end.  */
   6293  1.1  christos 	      for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
   6294  1.1  christos 		regs->start[mcnt] = regs->end[mcnt] = -1;
   6295  1.1  christos 	    } /* regs && !bufp->no_sub */
   6296  1.1  christos 
   6297  1.1  christos 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
   6298  1.1  christos 			nfailure_points_pushed, nfailure_points_popped,
   6299  1.1  christos 			nfailure_points_pushed - nfailure_points_popped);
   6300  1.1  christos 	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
   6301  1.1  christos 
   6302  1.1  christos #ifdef WCHAR
   6303  1.1  christos 	  if (MATCHING_IN_FIRST_STRING)
   6304  1.1  christos 	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
   6305  1.1  christos 	  else
   6306  1.1  christos 	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
   6307  1.1  christos 	      csize1;
   6308  1.1  christos 	  mcnt -= pos;
   6309  1.1  christos #else
   6310  1.1  christos 	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
   6311  1.1  christos 			    ? string1 : string2 - size1);
   6312  1.1  christos #endif /* WCHAR */
   6313  1.1  christos 
   6314  1.1  christos 	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
   6315  1.1  christos 
   6316  1.1  christos 	  FREE_VARIABLES ();
   6317  1.1  christos 	  return mcnt;
   6318  1.1  christos 	}
   6319  1.1  christos 
   6320  1.1  christos #ifndef __GNUC__
   6321  1.1  christos       /* Otherwise match next pattern command.  */
   6322  1.1  christos       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   6323  1.1  christos 	{
   6324  1.1  christos #endif
   6325  1.1  christos         /* Ignore these.  Used to ignore the n of succeed_n's which
   6326  1.1  christos            currently have n == 0.  */
   6327  1.1  christos         CASE (no_op):
   6328  1.1  christos           DEBUG_PRINT1 ("EXECUTING no_op.\n");
   6329  1.1  christos           NEXT;
   6330  1.1  christos 
   6331  1.1  christos 	CASE (succeed):
   6332  1.1  christos           DEBUG_PRINT1 ("EXECUTING succeed.\n");
   6333  1.1  christos 	  goto succeed_label;
   6334  1.1  christos 
   6335  1.1  christos         /* Match the next n pattern characters exactly.  The following
   6336  1.1  christos            byte in the pattern defines n, and the n bytes after that
   6337  1.1  christos            are the characters to match.  */
   6338  1.1  christos 	CASE (exactn):
   6339  1.1  christos #ifdef MBS_SUPPORT
   6340  1.1  christos 	CASE (exactn_bin):
   6341  1.1  christos #endif
   6342  1.1  christos 	  mcnt = *p++;
   6343  1.1  christos           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
   6344  1.1  christos 
   6345  1.1  christos           /* This is written out as an if-else so we don't waste time
   6346  1.1  christos              testing `translate' inside the loop.  */
   6347  1.1  christos           if (translate)
   6348  1.1  christos 	    {
   6349  1.1  christos 	      do
   6350  1.1  christos 		{
   6351  1.1  christos 		  PREFETCH ();
   6352  1.1  christos #ifdef WCHAR
   6353  1.1  christos 		  if (*d <= 0xff)
   6354  1.1  christos 		    {
   6355  1.1  christos 		      if ((UCHAR_T) translate[(unsigned char) *d++]
   6356  1.1  christos 			  != (UCHAR_T) *p++)
   6357  1.1  christos 			goto fail;
   6358  1.1  christos 		    }
   6359  1.1  christos 		  else
   6360  1.1  christos 		    {
   6361  1.1  christos 		      if (*d++ != (CHAR_T) *p++)
   6362  1.1  christos 			goto fail;
   6363  1.1  christos 		    }
   6364  1.1  christos #else
   6365  1.1  christos 		  if ((UCHAR_T) translate[(unsigned char) *d++]
   6366  1.1  christos 		      != (UCHAR_T) *p++)
   6367  1.1  christos                     goto fail;
   6368  1.1  christos #endif /* WCHAR */
   6369  1.1  christos 		}
   6370  1.1  christos 	      while (--mcnt);
   6371  1.1  christos 	    }
   6372  1.1  christos 	  else
   6373  1.1  christos 	    {
   6374  1.1  christos 	      do
   6375  1.1  christos 		{
   6376  1.1  christos 		  PREFETCH ();
   6377  1.1  christos 		  if (*d++ != (CHAR_T) *p++) goto fail;
   6378  1.1  christos 		}
   6379  1.1  christos 	      while (--mcnt);
   6380  1.1  christos 	    }
   6381  1.1  christos 	  SET_REGS_MATCHED ();
   6382  1.1  christos           NEXT;
   6383  1.1  christos 
   6384  1.1  christos 
   6385  1.1  christos         /* Match any character except possibly a newline or a null.  */
   6386  1.1  christos 	CASE (anychar):
   6387  1.1  christos           DEBUG_PRINT1 ("EXECUTING anychar.\n");
   6388  1.1  christos 
   6389  1.1  christos           PREFETCH ();
   6390  1.1  christos 
   6391  1.1  christos           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
   6392  1.1  christos               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
   6393  1.1  christos 	    goto fail;
   6394  1.1  christos 
   6395  1.1  christos           SET_REGS_MATCHED ();
   6396  1.1  christos           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
   6397  1.1  christos           d++;
   6398  1.1  christos 	  NEXT;
   6399  1.1  christos 
   6400  1.1  christos 
   6401  1.1  christos 	CASE (charset):
   6402  1.1  christos 	CASE (charset_not):
   6403  1.1  christos 	  {
   6404  1.1  christos 	    register UCHAR_T c;
   6405  1.1  christos #ifdef WCHAR
   6406  1.1  christos 	    unsigned int i, char_class_length, coll_symbol_length,
   6407  1.1  christos               equiv_class_length, ranges_length, chars_length, length;
   6408  1.1  christos 	    CHAR_T *workp, *workp2, *charset_top;
   6409  1.1  christos #define WORK_BUFFER_SIZE 128
   6410  1.1  christos             CHAR_T str_buf[WORK_BUFFER_SIZE];
   6411  1.1  christos # ifdef _LIBC
   6412  1.1  christos 	    uint32_t nrules;
   6413  1.1  christos # endif /* _LIBC */
   6414  1.1  christos #endif /* WCHAR */
   6415  1.1  christos 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
   6416  1.1  christos 
   6417  1.1  christos             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
   6418  1.1  christos 	    PREFETCH ();
   6419  1.1  christos 	    c = TRANSLATE (*d); /* The character to match.  */
   6420  1.1  christos #ifdef WCHAR
   6421  1.1  christos # ifdef _LIBC
   6422  1.1  christos 	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   6423  1.1  christos # endif /* _LIBC */
   6424  1.1  christos 	    charset_top = p - 1;
   6425  1.1  christos 	    char_class_length = *p++;
   6426  1.1  christos 	    coll_symbol_length = *p++;
   6427  1.1  christos 	    equiv_class_length = *p++;
   6428  1.1  christos 	    ranges_length = *p++;
   6429  1.1  christos 	    chars_length = *p++;
   6430  1.1  christos 	    /* p points charset[6], so the address of the next instruction
   6431  1.1  christos 	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
   6432  1.1  christos 	       where l=length of char_classes, m=length of collating_symbol,
   6433  1.1  christos 	       n=equivalence_class, o=length of char_range,
   6434  1.1  christos 	       p'=length of character.  */
   6435  1.1  christos 	    workp = p;
   6436  1.1  christos 	    /* Update p to indicate the next instruction.  */
   6437  1.1  christos 	    p += char_class_length + coll_symbol_length+ equiv_class_length +
   6438  1.1  christos               2*ranges_length + chars_length;
   6439  1.1  christos 
   6440  1.1  christos             /* match with char_class?  */
   6441  1.1  christos 	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
   6442  1.1  christos 	      {
   6443  1.1  christos 		wctype_t wctype;
   6444  1.1  christos 		uintptr_t alignedp = ((uintptr_t)workp
   6445  1.1  christos 				      + __alignof__(wctype_t) - 1)
   6446  1.1  christos 		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
   6447  1.1  christos 		wctype = *((wctype_t*)alignedp);
   6448  1.1  christos 		workp += CHAR_CLASS_SIZE;
   6449  1.1  christos 		if (iswctype((wint_t)c, wctype))
   6450  1.1  christos 		  goto char_set_matched;
   6451  1.1  christos 	      }
   6452  1.1  christos 
   6453  1.1  christos             /* match with collating_symbol?  */
   6454  1.1  christos # ifdef _LIBC
   6455  1.1  christos 	    if (nrules != 0)
   6456  1.1  christos 	      {
   6457  1.1  christos 		const unsigned char *extra = (const unsigned char *)
   6458  1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   6459  1.1  christos 
   6460  1.1  christos 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
   6461  1.1  christos 		     workp++)
   6462  1.1  christos 		  {
   6463  1.1  christos 		    int32_t *wextra;
   6464  1.1  christos 		    wextra = (int32_t*)(extra + *workp++);
   6465  1.1  christos 		    for (i = 0; i < *wextra; ++i)
   6466  1.1  christos 		      if (TRANSLATE(d[i]) != wextra[1 + i])
   6467  1.1  christos 			break;
   6468  1.1  christos 
   6469  1.1  christos 		    if (i == *wextra)
   6470  1.1  christos 		      {
   6471  1.1  christos 			/* Update d, however d will be incremented at
   6472  1.1  christos 			   char_set_matched:, we decrement d here.  */
   6473  1.1  christos 			d += i - 1;
   6474  1.1  christos 			goto char_set_matched;
   6475  1.1  christos 		      }
   6476  1.1  christos 		  }
   6477  1.1  christos 	      }
   6478  1.1  christos 	    else /* (nrules == 0) */
   6479  1.1  christos # endif
   6480  1.1  christos 	      /* If we can't look up collation data, we use wcscoll
   6481  1.1  christos 		 instead.  */
   6482  1.1  christos 	      {
   6483  1.1  christos 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
   6484  1.1  christos 		  {
   6485  1.1  christos 		    const CHAR_T *backup_d = d, *backup_dend = dend;
   6486  1.1  christos 		    length = wcslen (workp);
   6487  1.1  christos 
   6488  1.1  christos 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6489  1.1  christos 		       any substring of the string never match with the
   6490  1.1  christos 		       collating symbol.  */
   6491  1.1  christos 		    if (wcscoll (workp, d) > 0)
   6492  1.1  christos 		      {
   6493  1.1  christos 			workp += length + 1;
   6494  1.1  christos 			continue;
   6495  1.1  christos 		      }
   6496  1.1  christos 
   6497  1.1  christos 		    /* First, we compare the collating symbol with
   6498  1.1  christos 		       the first character of the string.
   6499  1.1  christos 		       If it don't match, we add the next character to
   6500  1.1  christos 		       the compare buffer in turn.  */
   6501  1.1  christos 		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
   6502  1.1  christos 		      {
   6503  1.1  christos 			int match;
   6504  1.1  christos 			if (d == dend)
   6505  1.1  christos 			  {
   6506  1.1  christos 			    if (dend == end_match_2)
   6507  1.1  christos 			      break;
   6508  1.1  christos 			    d = string2;
   6509  1.1  christos 			    dend = end_match_2;
   6510  1.1  christos 			  }
   6511  1.1  christos 
   6512  1.1  christos 			/* add next character to the compare buffer.  */
   6513  1.1  christos 			str_buf[i] = TRANSLATE(*d);
   6514  1.1  christos 			str_buf[i+1] = '\0';
   6515  1.1  christos 
   6516  1.1  christos 			match = wcscoll (workp, str_buf);
   6517  1.1  christos 			if (match == 0)
   6518  1.1  christos 			  goto char_set_matched;
   6519  1.1  christos 
   6520  1.1  christos 			if (match < 0)
   6521  1.1  christos 			  /* (str_buf > workp) indicate (str_buf + X > workp),
   6522  1.1  christos 			     because for all X (str_buf + X > str_buf).
   6523  1.1  christos 			     So we don't need continue this loop.  */
   6524  1.1  christos 			  break;
   6525  1.1  christos 
   6526  1.1  christos 			/* Otherwise(str_buf < workp),
   6527  1.1  christos 			   (str_buf+next_character) may equals (workp).
   6528  1.1  christos 			   So we continue this loop.  */
   6529  1.1  christos 		      }
   6530  1.1  christos 		    /* not matched */
   6531  1.1  christos 		    d = backup_d;
   6532  1.1  christos 		    dend = backup_dend;
   6533  1.1  christos 		    workp += length + 1;
   6534  1.1  christos 		  }
   6535  1.1  christos               }
   6536  1.1  christos             /* match with equivalence_class?  */
   6537  1.1  christos # ifdef _LIBC
   6538  1.1  christos 	    if (nrules != 0)
   6539  1.1  christos 	      {
   6540  1.1  christos                 const CHAR_T *backup_d = d, *backup_dend = dend;
   6541  1.1  christos 		/* Try to match the equivalence class against
   6542  1.1  christos 		   those known to the collate implementation.  */
   6543  1.1  christos 		const int32_t *table;
   6544  1.1  christos 		const int32_t *weights;
   6545  1.1  christos 		const int32_t *extra;
   6546  1.1  christos 		const int32_t *indirect;
   6547  1.1  christos 		int32_t idx, idx2;
   6548  1.1  christos 		wint_t *cp;
   6549  1.1  christos 		size_t len;
   6550  1.1  christos 
   6551  1.1  christos 		/* This #include defines a local function!  */
   6552  1.1  christos #  include <locale/weightwc.h>
   6553  1.1  christos 
   6554  1.1  christos 		table = (const int32_t *)
   6555  1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
   6556  1.1  christos 		weights = (const wint_t *)
   6557  1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
   6558  1.1  christos 		extra = (const wint_t *)
   6559  1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
   6560  1.1  christos 		indirect = (const int32_t *)
   6561  1.1  christos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
   6562  1.1  christos 
   6563  1.1  christos 		/* Write 1 collating element to str_buf, and
   6564  1.1  christos 		   get its index.  */
   6565  1.1  christos 		idx2 = 0;
   6566  1.1  christos 
   6567  1.1  christos 		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
   6568  1.1  christos 		  {
   6569  1.1  christos 		    cp = (wint_t*)str_buf;
   6570  1.1  christos 		    if (d == dend)
   6571  1.1  christos 		      {
   6572  1.1  christos 			if (dend == end_match_2)
   6573  1.1  christos 			  break;
   6574  1.1  christos 			d = string2;
   6575  1.1  christos 			dend = end_match_2;
   6576  1.1  christos 		      }
   6577  1.1  christos 		    str_buf[i] = TRANSLATE(*(d+i));
   6578  1.1  christos 		    str_buf[i+1] = '\0'; /* sentinel */
   6579  1.1  christos 		    idx2 = findidx ((const wint_t**)&cp);
   6580  1.1  christos 		  }
   6581  1.1  christos 
   6582  1.1  christos 		/* Update d, however d will be incremented at
   6583  1.1  christos 		   char_set_matched:, we decrement d here.  */
   6584  1.1  christos 		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
   6585  1.1  christos 		if (d >= dend)
   6586  1.1  christos 		  {
   6587  1.1  christos 		    if (dend == end_match_2)
   6588  1.1  christos 			d = dend;
   6589  1.1  christos 		    else
   6590  1.1  christos 		      {
   6591  1.1  christos 			d = string2;
   6592  1.1  christos 			dend = end_match_2;
   6593  1.1  christos 		      }
   6594  1.1  christos 		  }
   6595  1.1  christos 
   6596  1.1  christos 		len = weights[idx2];
   6597  1.1  christos 
   6598  1.1  christos 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
   6599  1.1  christos 		     workp++)
   6600  1.1  christos 		  {
   6601  1.1  christos 		    idx = (int32_t)*workp;
   6602  1.1  christos 		    /* We already checked idx != 0 in regex_compile. */
   6603  1.1  christos 
   6604  1.1  christos 		    if (idx2 != 0 && len == weights[idx])
   6605  1.1  christos 		      {
   6606  1.1  christos 			int cnt = 0;
   6607  1.1  christos 			while (cnt < len && (weights[idx + 1 + cnt]
   6608  1.1  christos 					     == weights[idx2 + 1 + cnt]))
   6609  1.1  christos 			  ++cnt;
   6610  1.1  christos 
   6611  1.1  christos 			if (cnt == len)
   6612  1.1  christos 			  goto char_set_matched;
   6613  1.1  christos 		      }
   6614  1.1  christos 		  }
   6615  1.1  christos 		/* not matched */
   6616  1.1  christos                 d = backup_d;
   6617  1.1  christos                 dend = backup_dend;
   6618  1.1  christos 	      }
   6619  1.1  christos 	    else /* (nrules == 0) */
   6620  1.1  christos # endif
   6621  1.1  christos 	      /* If we can't look up collation data, we use wcscoll
   6622  1.1  christos 		 instead.  */
   6623  1.1  christos 	      {
   6624  1.1  christos 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
   6625  1.1  christos 		  {
   6626  1.1  christos 		    const CHAR_T *backup_d = d, *backup_dend = dend;
   6627  1.1  christos 		    length = wcslen (workp);
   6628  1.1  christos 
   6629  1.1  christos 		    /* If wcscoll(the collating symbol, whole string) > 0,
   6630  1.1  christos 		       any substring of the string never match with the
   6631  1.1  christos 		       collating symbol.  */
   6632  1.1  christos 		    if (wcscoll (workp, d) > 0)
   6633  1.1  christos 		      {
   6634  1.1  christos 			workp += length + 1;
   6635  1.1  christos 			break;
   6636  1.1  christos 		      }
   6637  1.1  christos 
   6638  1.1  christos 		    /* First, we compare the equivalence class with
   6639  1.1  christos 		       the first character of the string.
   6640  1.1  christos 		       If it don't match, we add the next character to
   6641  1.1  christos 		       the compare buffer in turn.  */
   6642  1.1  christos 		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
   6643  1.1  christos 		      {
   6644  1.1  christos 			int match;
   6645  1.1  christos 			if (d == dend)
   6646  1.1  christos 			  {
   6647  1.1  christos 			    if (dend == end_match_2)
   6648  1.1  christos 			      break;
   6649  1.1  christos 			    d = string2;
   6650  1.1  christos 			    dend = end_match_2;
   6651  1.1  christos 			  }
   6652  1.1  christos 
   6653  1.1  christos 			/* add next character to the compare buffer.  */
   6654  1.1  christos 			str_buf[i] = TRANSLATE(*d);
   6655  1.1  christos 			str_buf[i+1] = '\0';
   6656  1.1  christos 
   6657  1.1  christos 			match = wcscoll (workp, str_buf);
   6658  1.1  christos 
   6659  1.1  christos 			if (match == 0)
   6660  1.1  christos 			  goto char_set_matched;
   6661  1.1  christos 
   6662  1.1  christos 			if (match < 0)
   6663  1.1  christos 			/* (str_buf > workp) indicate (str_buf + X > workp),
   6664  1.1  christos 			   because for all X (str_buf + X > str_buf).
   6665  1.1  christos 			   So we don't need continue this loop.  */
   6666  1.1  christos 			  break;
   6667  1.1  christos 
   6668  1.1  christos 			/* Otherwise(str_buf < workp),
   6669  1.1  christos 			   (str_buf+next_character) may equals (workp).
   6670  1.1  christos 			   So we continue this loop.  */
   6671  1.1  christos 		      }
   6672  1.1  christos 		    /* not matched */
   6673  1.1  christos 		    d = backup_d;
   6674  1.1  christos 		    dend = backup_dend;
   6675  1.1  christos 		    workp += length + 1;
   6676  1.1  christos 		  }
   6677  1.1  christos 	      }
   6678  1.1  christos 
   6679  1.1  christos             /* match with char_range?  */
   6680  1.1  christos # ifdef _LIBC
   6681  1.1  christos 	    if (nrules != 0)
   6682  1.1  christos 	      {
   6683  1.1  christos 		uint32_t collseqval;
   6684  1.1  christos 		const char *collseq = (const char *)
   6685  1.1  christos 		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
   6686  1.1  christos 
   6687  1.1  christos 		collseqval = collseq_table_lookup (collseq, c);
   6688  1.1  christos 
   6689  1.1  christos 		for (; workp < p - chars_length ;)
   6690  1.1  christos 		  {
   6691  1.1  christos 		    uint32_t start_val, end_val;
   6692  1.1  christos 
   6693  1.1  christos 		    /* We already compute the collation sequence value
   6694  1.1  christos 		       of the characters (or collating symbols).  */
   6695  1.1  christos 		    start_val = (uint32_t) *workp++; /* range_start */
   6696  1.1  christos 		    end_val = (uint32_t) *workp++; /* range_end */
   6697  1.1  christos 
   6698  1.1  christos 		    if (start_val <= collseqval && collseqval <= end_val)
   6699  1.1  christos 		      goto char_set_matched;
   6700  1.1  christos 		  }
   6701  1.1  christos 	      }
   6702  1.1  christos 	    else
   6703  1.1  christos # endif
   6704  1.1  christos 	      {
   6705  1.1  christos 		/* We set range_start_char at str_buf[0], range_end_char
   6706  1.1  christos 		   at str_buf[4], and compared char at str_buf[2].  */
   6707  1.1  christos 		str_buf[1] = 0;
   6708  1.1  christos 		str_buf[2] = c;
   6709  1.1  christos 		str_buf[3] = 0;
   6710  1.1  christos 		str_buf[5] = 0;
   6711  1.1  christos 		for (; workp < p - chars_length ;)
   6712  1.1  christos 		  {
   6713  1.1  christos 		    wchar_t *range_start_char, *range_end_char;
   6714  1.1  christos 
   6715  1.1  christos 		    /* match if (range_start_char <= c <= range_end_char).  */
   6716  1.1  christos 
   6717  1.1  christos 		    /* If range_start(or end) < 0, we assume -range_start(end)
   6718  1.1  christos 		       is the offset of the collating symbol which is specified
   6719  1.1  christos 		       as the character of the range start(end).  */
   6720  1.1  christos 
   6721  1.1  christos 		    /* range_start */
   6722  1.1  christos 		    if (*workp < 0)
   6723  1.1  christos 		      range_start_char = charset_top - (*workp++);
   6724  1.1  christos 		    else
   6725  1.1  christos 		      {
   6726  1.1  christos 			str_buf[0] = *workp++;
   6727  1.1  christos 			range_start_char = str_buf;
   6728  1.1  christos 		      }
   6729  1.1  christos 
   6730  1.1  christos 		    /* range_end */
   6731  1.1  christos 		    if (*workp < 0)
   6732  1.1  christos 		      range_end_char = charset_top - (*workp++);
   6733  1.1  christos 		    else
   6734  1.1  christos 		      {
   6735  1.1  christos 			str_buf[4] = *workp++;
   6736  1.1  christos 			range_end_char = str_buf + 4;
   6737  1.1  christos 		      }
   6738  1.1  christos 
   6739  1.1  christos 		    if (wcscoll (range_start_char, str_buf+2) <= 0
   6740  1.1  christos 			&& wcscoll (str_buf+2, range_end_char) <= 0)
   6741  1.1  christos 		      goto char_set_matched;
   6742  1.1  christos 		  }
   6743  1.1  christos 	      }
   6744  1.1  christos 
   6745  1.1  christos             /* match with char?  */
   6746  1.1  christos 	    for (; workp < p ; workp++)
   6747  1.1  christos 	      if (c == *workp)
   6748  1.1  christos 		goto char_set_matched;
   6749  1.1  christos 
   6750  1.1  christos 	    not = !not;
   6751  1.1  christos 
   6752  1.1  christos 	  char_set_matched:
   6753  1.1  christos 	    if (not) goto fail;
   6754  1.1  christos #else
   6755  1.1  christos             /* Cast to `unsigned' instead of `unsigned char' in case the
   6756  1.1  christos                bit list is a full 32 bytes long.  */
   6757  1.1  christos 	    if (c < (unsigned) (*p * BYTEWIDTH)
   6758  1.1  christos 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   6759  1.1  christos 	      not = !not;
   6760  1.1  christos 
   6761  1.1  christos 	    p += 1 + *p;
   6762  1.1  christos 
   6763  1.1  christos 	    if (!not) goto fail;
   6764  1.1  christos #undef WORK_BUFFER_SIZE
   6765  1.1  christos #endif /* WCHAR */
   6766  1.1  christos 	    SET_REGS_MATCHED ();
   6767  1.1  christos             d++;
   6768  1.1  christos 	    NEXT;
   6769  1.1  christos 	  }
   6770  1.1  christos 
   6771  1.1  christos 
   6772  1.1  christos         /* The beginning of a group is represented by start_memory.
   6773  1.1  christos            The arguments are the register number in the next byte, and the
   6774  1.1  christos            number of groups inner to this one in the next.  The text
   6775  1.1  christos            matched within the group is recorded (in the internal
   6776  1.1  christos            registers data structure) under the register number.  */
   6777  1.1  christos         CASE (start_memory):
   6778  1.1  christos 	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
   6779  1.1  christos 			(long int) *p, (long int) p[1]);
   6780  1.1  christos 
   6781  1.1  christos           /* Find out if this group can match the empty string.  */
   6782  1.1  christos 	  p1 = p;		/* To send to group_match_null_string_p.  */
   6783  1.1  christos 
   6784  1.1  christos           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
   6785  1.1  christos             REG_MATCH_NULL_STRING_P (reg_info[*p])
   6786  1.1  christos               = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
   6787  1.1  christos 
   6788  1.1  christos           /* Save the position in the string where we were the last time
   6789  1.1  christos              we were at this open-group operator in case the group is
   6790  1.1  christos              operated upon by a repetition operator, e.g., with `(a*)*b'
   6791  1.1  christos              against `ab'; then we want to ignore where we are now in
   6792  1.1  christos              the string in case this attempt to match fails.  */
   6793  1.1  christos           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6794  1.1  christos                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
   6795  1.1  christos                              : regstart[*p];
   6796  1.1  christos 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
   6797  1.1  christos 			 POINTER_TO_OFFSET (old_regstart[*p]));
   6798  1.1  christos 
   6799  1.1  christos           regstart[*p] = d;
   6800  1.1  christos 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
   6801  1.1  christos 
   6802  1.1  christos           IS_ACTIVE (reg_info[*p]) = 1;
   6803  1.1  christos           MATCHED_SOMETHING (reg_info[*p]) = 0;
   6804  1.1  christos 
   6805  1.1  christos 	  /* Clear this whenever we change the register activity status.  */
   6806  1.1  christos 	  set_regs_matched_done = 0;
   6807  1.1  christos 
   6808  1.1  christos           /* This is the new highest active register.  */
   6809  1.1  christos           highest_active_reg = *p;
   6810  1.1  christos 
   6811  1.1  christos           /* If nothing was active before, this is the new lowest active
   6812  1.1  christos              register.  */
   6813  1.1  christos           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   6814  1.1  christos             lowest_active_reg = *p;
   6815  1.1  christos 
   6816  1.1  christos           /* Move past the register number and inner group count.  */
   6817  1.1  christos           p += 2;
   6818  1.1  christos 	  just_past_start_mem = p;
   6819  1.1  christos 
   6820  1.1  christos           NEXT;
   6821  1.1  christos 
   6822  1.1  christos 
   6823  1.1  christos         /* The stop_memory opcode represents the end of a group.  Its
   6824  1.1  christos            arguments are the same as start_memory's: the register
   6825  1.1  christos            number, and the number of inner groups.  */
   6826  1.1  christos 	CASE (stop_memory):
   6827  1.1  christos 	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
   6828  1.1  christos 			(long int) *p, (long int) p[1]);
   6829  1.1  christos 
   6830  1.1  christos           /* We need to save the string position the last time we were at
   6831  1.1  christos              this close-group operator in case the group is operated
   6832  1.1  christos              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
   6833  1.1  christos              against `aba'; then we want to ignore where we are now in
   6834  1.1  christos              the string in case this attempt to match fails.  */
   6835  1.1  christos           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   6836  1.1  christos                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
   6837  1.1  christos 			   : regend[*p];
   6838  1.1  christos 	  DEBUG_PRINT2 ("      old_regend: %d\n",
   6839  1.1  christos 			 POINTER_TO_OFFSET (old_regend[*p]));
   6840  1.1  christos 
   6841  1.1  christos           regend[*p] = d;
   6842  1.1  christos 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
   6843  1.1  christos 
   6844  1.1  christos           /* This register isn't active anymore.  */
   6845  1.1  christos           IS_ACTIVE (reg_info[*p]) = 0;
   6846  1.1  christos 
   6847  1.1  christos 	  /* Clear this whenever we change the register activity status.  */
   6848  1.1  christos 	  set_regs_matched_done = 0;
   6849  1.1  christos 
   6850  1.1  christos           /* If this was the only register active, nothing is active
   6851  1.1  christos              anymore.  */
   6852  1.1  christos           if (lowest_active_reg == highest_active_reg)
   6853  1.1  christos             {
   6854  1.1  christos               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6855  1.1  christos               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6856  1.1  christos             }
   6857  1.1  christos           else
   6858  1.1  christos             { /* We must scan for the new highest active register, since
   6859  1.1  christos                  it isn't necessarily one less than now: consider
   6860  1.1  christos                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
   6861  1.1  christos                  new highest active register is 1.  */
   6862  1.1  christos               UCHAR_T r = *p - 1;
   6863  1.1  christos               while (r > 0 && !IS_ACTIVE (reg_info[r]))
   6864  1.1  christos                 r--;
   6865  1.1  christos 
   6866  1.1  christos               /* If we end up at register zero, that means that we saved
   6867  1.1  christos                  the registers as the result of an `on_failure_jump', not
   6868  1.1  christos                  a `start_memory', and we jumped to past the innermost
   6869  1.1  christos                  `stop_memory'.  For example, in ((.)*) we save
   6870  1.1  christos                  registers 1 and 2 as a result of the *, but when we pop
   6871  1.1  christos                  back to the second ), we are at the stop_memory 1.
   6872  1.1  christos                  Thus, nothing is active.  */
   6873  1.1  christos 	      if (r == 0)
   6874  1.1  christos                 {
   6875  1.1  christos                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   6876  1.1  christos                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   6877  1.1  christos                 }
   6878  1.1  christos               else
   6879  1.1  christos                 highest_active_reg = r;
   6880  1.1  christos             }
   6881  1.1  christos 
   6882  1.1  christos           /* If just failed to match something this time around with a
   6883  1.1  christos              group that's operated on by a repetition operator, try to
   6884  1.1  christos              force exit from the ``loop'', and restore the register
   6885  1.1  christos              information for this group that we had before trying this
   6886  1.1  christos              last match.  */
   6887  1.1  christos           if ((!MATCHED_SOMETHING (reg_info[*p])
   6888  1.1  christos                || just_past_start_mem == p - 1)
   6889  1.1  christos 	      && (p + 2) < pend)
   6890  1.1  christos             {
   6891  1.1  christos               boolean is_a_jump_n = false;
   6892  1.1  christos 
   6893  1.1  christos               p1 = p + 2;
   6894  1.1  christos               mcnt = 0;
   6895  1.1  christos               switch ((re_opcode_t) *p1++)
   6896  1.1  christos                 {
   6897  1.1  christos                   case jump_n:
   6898  1.1  christos 		    is_a_jump_n = true;
   6899  1.1  christos                   case pop_failure_jump:
   6900  1.1  christos 		  case maybe_pop_jump:
   6901  1.1  christos 		  case jump:
   6902  1.1  christos 		  case dummy_failure_jump:
   6903  1.1  christos                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6904  1.1  christos 		    if (is_a_jump_n)
   6905  1.1  christos 		      p1 += OFFSET_ADDRESS_SIZE;
   6906  1.1  christos                     break;
   6907  1.1  christos 
   6908  1.1  christos                   default:
   6909  1.1  christos                     /* do nothing */ ;
   6910  1.1  christos                 }
   6911  1.1  christos 	      p1 += mcnt;
   6912  1.1  christos 
   6913  1.1  christos               /* If the next operation is a jump backwards in the pattern
   6914  1.1  christos 	         to an on_failure_jump right before the start_memory
   6915  1.1  christos                  corresponding to this stop_memory, exit from the loop
   6916  1.1  christos                  by forcing a failure after pushing on the stack the
   6917  1.1  christos                  on_failure_jump's jump in the pattern, and d.  */
   6918  1.1  christos               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
   6919  1.1  christos                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
   6920  1.1  christos 		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
   6921  1.1  christos 		{
   6922  1.1  christos                   /* If this group ever matched anything, then restore
   6923  1.1  christos                      what its registers were before trying this last
   6924  1.1  christos                      failed match, e.g., with `(a*)*b' against `ab' for
   6925  1.1  christos                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
   6926  1.1  christos                      against `aba' for regend[3].
   6927  1.1  christos 
   6928  1.1  christos                      Also restore the registers for inner groups for,
   6929  1.1  christos                      e.g., `((a*)(b*))*' against `aba' (register 3 would
   6930  1.1  christos                      otherwise get trashed).  */
   6931  1.1  christos 
   6932  1.1  christos                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
   6933  1.1  christos 		    {
   6934  1.1  christos 		      unsigned r;
   6935  1.1  christos 
   6936  1.1  christos                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
   6937  1.1  christos 
   6938  1.1  christos 		      /* Restore this and inner groups' (if any) registers.  */
   6939  1.1  christos                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
   6940  1.1  christos 			   r++)
   6941  1.1  christos                         {
   6942  1.1  christos                           regstart[r] = old_regstart[r];
   6943  1.1  christos 
   6944  1.1  christos                           /* xx why this test?  */
   6945  1.1  christos                           if (old_regend[r] >= regstart[r])
   6946  1.1  christos                             regend[r] = old_regend[r];
   6947  1.1  christos                         }
   6948  1.1  christos                     }
   6949  1.1  christos 		  p1++;
   6950  1.1  christos                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   6951  1.1  christos                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
   6952  1.1  christos 
   6953  1.1  christos                   goto fail;
   6954  1.1  christos                 }
   6955  1.1  christos             }
   6956  1.1  christos 
   6957  1.1  christos           /* Move past the register number and the inner group count.  */
   6958  1.1  christos           p += 2;
   6959  1.1  christos           NEXT;
   6960  1.1  christos 
   6961  1.1  christos 
   6962  1.1  christos 	/* \<digit> has been turned into a `duplicate' command which is
   6963  1.1  christos            followed by the numeric value of <digit> as the register number.  */
   6964  1.1  christos         CASE (duplicate):
   6965  1.1  christos 	  {
   6966  1.1  christos 	    register const CHAR_T *d2, *dend2;
   6967  1.1  christos 	    int regno = *p++;   /* Get which register to match against.  */
   6968  1.1  christos 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
   6969  1.1  christos 
   6970  1.1  christos 	    /* Can't back reference a group which we've never matched.  */
   6971  1.1  christos             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
   6972  1.1  christos               goto fail;
   6973  1.1  christos 
   6974  1.1  christos             /* Where in input to try to start matching.  */
   6975  1.1  christos             d2 = regstart[regno];
   6976  1.1  christos 
   6977  1.1  christos             /* Where to stop matching; if both the place to start and
   6978  1.1  christos                the place to stop matching are in the same string, then
   6979  1.1  christos                set to the place to stop, otherwise, for now have to use
   6980  1.1  christos                the end of the first string.  */
   6981  1.1  christos 
   6982  1.1  christos             dend2 = ((FIRST_STRING_P (regstart[regno])
   6983  1.1  christos 		      == FIRST_STRING_P (regend[regno]))
   6984  1.1  christos 		     ? regend[regno] : end_match_1);
   6985  1.1  christos 	    for (;;)
   6986  1.1  christos 	      {
   6987  1.1  christos 		/* If necessary, advance to next segment in register
   6988  1.1  christos                    contents.  */
   6989  1.1  christos 		while (d2 == dend2)
   6990  1.1  christos 		  {
   6991  1.1  christos 		    if (dend2 == end_match_2) break;
   6992  1.1  christos 		    if (dend2 == regend[regno]) break;
   6993  1.1  christos 
   6994  1.1  christos                     /* End of string1 => advance to string2. */
   6995  1.1  christos                     d2 = string2;
   6996  1.1  christos                     dend2 = regend[regno];
   6997  1.1  christos 		  }
   6998  1.1  christos 		/* At end of register contents => success */
   6999  1.1  christos 		if (d2 == dend2) break;
   7000  1.1  christos 
   7001  1.1  christos 		/* If necessary, advance to next segment in data.  */
   7002  1.1  christos 		PREFETCH ();
   7003  1.1  christos 
   7004  1.1  christos 		/* How many characters left in this segment to match.  */
   7005  1.1  christos 		mcnt = dend - d;
   7006  1.1  christos 
   7007  1.1  christos 		/* Want how many consecutive characters we can match in
   7008  1.1  christos                    one shot, so, if necessary, adjust the count.  */
   7009  1.1  christos                 if (mcnt > dend2 - d2)
   7010  1.1  christos 		  mcnt = dend2 - d2;
   7011  1.1  christos 
   7012  1.1  christos 		/* Compare that many; failure if mismatch, else move
   7013  1.1  christos                    past them.  */
   7014  1.1  christos 		if (translate
   7015  1.1  christos                     ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
   7016  1.1  christos                     : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
   7017  1.1  christos 		  goto fail;
   7018  1.1  christos 		d += mcnt, d2 += mcnt;
   7019  1.1  christos 
   7020  1.1  christos 		/* Do this because we've match some characters.  */
   7021  1.1  christos 		SET_REGS_MATCHED ();
   7022  1.1  christos 	      }
   7023  1.1  christos 	  }
   7024  1.1  christos 	  NEXT;
   7025  1.1  christos 
   7026  1.1  christos 
   7027  1.1  christos         /* begline matches the empty string at the beginning of the string
   7028  1.1  christos            (unless `not_bol' is set in `bufp'), and, if
   7029  1.1  christos            `newline_anchor' is set, after newlines.  */
   7030  1.1  christos 	CASE (begline):
   7031  1.1  christos           DEBUG_PRINT1 ("EXECUTING begline.\n");
   7032  1.1  christos 
   7033  1.1  christos           if (AT_STRINGS_BEG (d))
   7034  1.1  christos             {
   7035  1.1  christos               if (!bufp->not_bol)
   7036  1.1  christos 		{
   7037  1.1  christos 		  NEXT;
   7038  1.1  christos 		}
   7039  1.1  christos             }
   7040  1.1  christos           else if (d[-1] == '\n' && bufp->newline_anchor)
   7041  1.1  christos             {
   7042  1.1  christos               NEXT;
   7043  1.1  christos             }
   7044  1.1  christos           /* In all other cases, we fail.  */
   7045  1.1  christos           goto fail;
   7046  1.1  christos 
   7047  1.1  christos 
   7048  1.1  christos         /* endline is the dual of begline.  */
   7049  1.1  christos 	CASE (endline):
   7050  1.1  christos           DEBUG_PRINT1 ("EXECUTING endline.\n");
   7051  1.1  christos 
   7052  1.1  christos           if (AT_STRINGS_END (d))
   7053  1.1  christos             {
   7054  1.1  christos               if (!bufp->not_eol)
   7055  1.1  christos 		{
   7056  1.1  christos 		  NEXT;
   7057  1.1  christos 		}
   7058  1.1  christos             }
   7059  1.1  christos 
   7060  1.1  christos           /* We have to ``prefetch'' the next character.  */
   7061  1.1  christos           else if ((d == end1 ? *string2 : *d) == '\n'
   7062  1.1  christos                    && bufp->newline_anchor)
   7063  1.1  christos             {
   7064  1.1  christos               NEXT;
   7065  1.1  christos             }
   7066  1.1  christos           goto fail;
   7067  1.1  christos 
   7068  1.1  christos 
   7069  1.1  christos 	/* Match at the very beginning of the data.  */
   7070  1.1  christos         CASE (begbuf):
   7071  1.1  christos           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
   7072  1.1  christos           if (AT_STRINGS_BEG (d))
   7073  1.1  christos 	    {
   7074  1.1  christos 	      NEXT;
   7075  1.1  christos 	    }
   7076  1.1  christos           goto fail;
   7077  1.1  christos 
   7078  1.1  christos 
   7079  1.1  christos 	/* Match at the very end of the data.  */
   7080  1.1  christos         CASE (endbuf):
   7081  1.1  christos           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
   7082  1.1  christos 	  if (AT_STRINGS_END (d))
   7083  1.1  christos 	    {
   7084  1.1  christos 	      NEXT;
   7085  1.1  christos 	    }
   7086  1.1  christos           goto fail;
   7087  1.1  christos 
   7088  1.1  christos 
   7089  1.1  christos         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
   7090  1.1  christos            pushes NULL as the value for the string on the stack.  Then
   7091  1.1  christos            `pop_failure_point' will keep the current value for the
   7092  1.1  christos            string, instead of restoring it.  To see why, consider
   7093  1.1  christos            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
   7094  1.1  christos            then the . fails against the \n.  But the next thing we want
   7095  1.1  christos            to do is match the \n against the \n; if we restored the
   7096  1.1  christos            string value, we would be back at the foo.
   7097  1.1  christos 
   7098  1.1  christos            Because this is used only in specific cases, we don't need to
   7099  1.1  christos            check all the things that `on_failure_jump' does, to make
   7100  1.1  christos            sure the right things get saved on the stack.  Hence we don't
   7101  1.1  christos            share its code.  The only reason to push anything on the
   7102  1.1  christos            stack at all is that otherwise we would have to change
   7103  1.1  christos            `anychar's code to do something besides goto fail in this
   7104  1.1  christos            case; that seems worse than this.  */
   7105  1.1  christos         CASE (on_failure_keep_string_jump):
   7106  1.1  christos           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
   7107  1.1  christos 
   7108  1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7109  1.1  christos #ifdef _LIBC
   7110  1.1  christos           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
   7111  1.1  christos #else
   7112  1.1  christos           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
   7113  1.1  christos #endif
   7114  1.1  christos 
   7115  1.1  christos           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
   7116  1.1  christos           NEXT;
   7117  1.1  christos 
   7118  1.1  christos 
   7119  1.1  christos 	/* Uses of on_failure_jump:
   7120  1.1  christos 
   7121  1.1  christos            Each alternative starts with an on_failure_jump that points
   7122  1.1  christos            to the beginning of the next alternative.  Each alternative
   7123  1.1  christos            except the last ends with a jump that in effect jumps past
   7124  1.1  christos            the rest of the alternatives.  (They really jump to the
   7125  1.1  christos            ending jump of the following alternative, because tensioning
   7126  1.1  christos            these jumps is a hassle.)
   7127  1.1  christos 
   7128  1.1  christos            Repeats start with an on_failure_jump that points past both
   7129  1.1  christos            the repetition text and either the following jump or
   7130  1.1  christos            pop_failure_jump back to this on_failure_jump.  */
   7131  1.1  christos 	CASE (on_failure_jump):
   7132  1.1  christos         on_failure:
   7133  1.1  christos           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
   7134  1.1  christos 
   7135  1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7136  1.1  christos #ifdef _LIBC
   7137  1.1  christos           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
   7138  1.1  christos #else
   7139  1.1  christos           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
   7140  1.1  christos #endif
   7141  1.1  christos 
   7142  1.1  christos           /* If this on_failure_jump comes right before a group (i.e.,
   7143  1.1  christos              the original * applied to a group), save the information
   7144  1.1  christos              for that group and all inner ones, so that if we fail back
   7145  1.1  christos              to this point, the group's information will be correct.
   7146  1.1  christos              For example, in \(a*\)*\1, we need the preceding group,
   7147  1.1  christos              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
   7148  1.1  christos 
   7149  1.1  christos           /* We can't use `p' to check ahead because we push
   7150  1.1  christos              a failure point to `p + mcnt' after we do this.  */
   7151  1.1  christos           p1 = p;
   7152  1.1  christos 
   7153  1.1  christos           /* We need to skip no_op's before we look for the
   7154  1.1  christos              start_memory in case this on_failure_jump is happening as
   7155  1.1  christos              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
   7156  1.1  christos              against aba.  */
   7157  1.1  christos           while (p1 < pend && (re_opcode_t) *p1 == no_op)
   7158  1.1  christos             p1++;
   7159  1.1  christos 
   7160  1.1  christos           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
   7161  1.1  christos             {
   7162  1.1  christos               /* We have a new highest active register now.  This will
   7163  1.1  christos                  get reset at the start_memory we are about to get to,
   7164  1.1  christos                  but we will have saved all the registers relevant to
   7165  1.1  christos                  this repetition op, as described above.  */
   7166  1.1  christos               highest_active_reg = *(p1 + 1) + *(p1 + 2);
   7167  1.1  christos               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   7168  1.1  christos                 lowest_active_reg = *(p1 + 1);
   7169  1.1  christos             }
   7170  1.1  christos 
   7171  1.1  christos           DEBUG_PRINT1 (":\n");
   7172  1.1  christos           PUSH_FAILURE_POINT (p + mcnt, d, -2);
   7173  1.1  christos           NEXT;
   7174  1.1  christos 
   7175  1.1  christos 
   7176  1.1  christos         /* A smart repeat ends with `maybe_pop_jump'.
   7177  1.1  christos 	   We change it to either `pop_failure_jump' or `jump'.  */
   7178  1.1  christos         CASE (maybe_pop_jump):
   7179  1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7180  1.1  christos           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
   7181  1.1  christos           {
   7182  1.1  christos 	    register UCHAR_T *p2 = p;
   7183  1.1  christos 
   7184  1.1  christos             /* Compare the beginning of the repeat with what in the
   7185  1.1  christos                pattern follows its end. If we can establish that there
   7186  1.1  christos                is nothing that they would both match, i.e., that we
   7187  1.1  christos                would have to backtrack because of (as in, e.g., `a*a')
   7188  1.1  christos                then we can change to pop_failure_jump, because we'll
   7189  1.1  christos                never have to backtrack.
   7190  1.1  christos 
   7191  1.1  christos                This is not true in the case of alternatives: in
   7192  1.1  christos                `(a|ab)*' we do need to backtrack to the `ab' alternative
   7193  1.1  christos                (e.g., if the string was `ab').  But instead of trying to
   7194  1.1  christos                detect that here, the alternative has put on a dummy
   7195  1.1  christos                failure point which is what we will end up popping.  */
   7196  1.1  christos 
   7197  1.1  christos 	    /* Skip over open/close-group commands.
   7198  1.1  christos 	       If what follows this loop is a ...+ construct,
   7199  1.1  christos 	       look at what begins its body, since we will have to
   7200  1.1  christos 	       match at least one of that.  */
   7201  1.1  christos 	    while (1)
   7202  1.1  christos 	      {
   7203  1.1  christos 		if (p2 + 2 < pend
   7204  1.1  christos 		    && ((re_opcode_t) *p2 == stop_memory
   7205  1.1  christos 			|| (re_opcode_t) *p2 == start_memory))
   7206  1.1  christos 		  p2 += 3;
   7207  1.1  christos 		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
   7208  1.1  christos 			 && (re_opcode_t) *p2 == dummy_failure_jump)
   7209  1.1  christos 		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
   7210  1.1  christos 		else
   7211  1.1  christos 		  break;
   7212  1.1  christos 	      }
   7213  1.1  christos 
   7214  1.1  christos 	    p1 = p + mcnt;
   7215  1.1  christos 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
   7216  1.1  christos 	       to the `maybe_finalize_jump' of this case.  Examine what
   7217  1.1  christos 	       follows.  */
   7218  1.1  christos 
   7219  1.1  christos             /* If we're at the end of the pattern, we can change.  */
   7220  1.1  christos             if (p2 == pend)
   7221  1.1  christos 	      {
   7222  1.1  christos 		/* Consider what happens when matching ":\(.*\)"
   7223  1.1  christos 		   against ":/".  I don't really understand this code
   7224  1.1  christos 		   yet.  */
   7225  1.1  christos   	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
   7226  1.1  christos 		  pop_failure_jump;
   7227  1.1  christos                 DEBUG_PRINT1
   7228  1.1  christos                   ("  End of pattern: change to `pop_failure_jump'.\n");
   7229  1.1  christos               }
   7230  1.1  christos 
   7231  1.1  christos             else if ((re_opcode_t) *p2 == exactn
   7232  1.1  christos #ifdef MBS_SUPPORT
   7233  1.1  christos 		     || (re_opcode_t) *p2 == exactn_bin
   7234  1.1  christos #endif
   7235  1.1  christos 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
   7236  1.1  christos 	      {
   7237  1.1  christos 		register UCHAR_T c
   7238  1.1  christos                   = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
   7239  1.1  christos 
   7240  1.1  christos                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
   7241  1.1  christos #ifdef MBS_SUPPORT
   7242  1.1  christos 		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
   7243  1.1  christos #endif
   7244  1.1  christos 		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
   7245  1.1  christos                   {
   7246  1.1  christos   		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
   7247  1.1  christos 		      pop_failure_jump;
   7248  1.1  christos #ifdef WCHAR
   7249  1.1  christos 		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
   7250  1.1  christos 				    (wint_t) c,
   7251  1.1  christos 				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
   7252  1.1  christos #else
   7253  1.1  christos 		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
   7254  1.1  christos 				    (char) c,
   7255  1.1  christos 				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
   7256  1.1  christos #endif
   7257  1.1  christos                   }
   7258  1.1  christos 
   7259  1.1  christos #ifndef WCHAR
   7260  1.1  christos 		else if ((re_opcode_t) p1[3] == charset
   7261  1.1  christos 			 || (re_opcode_t) p1[3] == charset_not)
   7262  1.1  christos 		  {
   7263  1.1  christos 		    int not = (re_opcode_t) p1[3] == charset_not;
   7264  1.1  christos 
   7265  1.1  christos 		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
   7266  1.1  christos 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   7267  1.1  christos 		      not = !not;
   7268  1.1  christos 
   7269  1.1  christos                     /* `not' is equal to 1 if c would match, which means
   7270  1.1  christos                         that we can't change to pop_failure_jump.  */
   7271  1.1  christos 		    if (!not)
   7272  1.1  christos                       {
   7273  1.1  christos   		        p[-3] = (unsigned char) pop_failure_jump;
   7274  1.1  christos                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7275  1.1  christos                       }
   7276  1.1  christos 		  }
   7277  1.1  christos #endif /* not WCHAR */
   7278  1.1  christos 	      }
   7279  1.1  christos #ifndef WCHAR
   7280  1.1  christos             else if ((re_opcode_t) *p2 == charset)
   7281  1.1  christos 	      {
   7282  1.1  christos 		/* We win if the first character of the loop is not part
   7283  1.1  christos                    of the charset.  */
   7284  1.1  christos                 if ((re_opcode_t) p1[3] == exactn
   7285  1.1  christos  		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
   7286  1.1  christos  			  && (p2[2 + p1[5] / BYTEWIDTH]
   7287  1.1  christos  			      & (1 << (p1[5] % BYTEWIDTH)))))
   7288  1.1  christos 		  {
   7289  1.1  christos 		    p[-3] = (unsigned char) pop_failure_jump;
   7290  1.1  christos 		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7291  1.1  christos                   }
   7292  1.1  christos 
   7293  1.1  christos 		else if ((re_opcode_t) p1[3] == charset_not)
   7294  1.1  christos 		  {
   7295  1.1  christos 		    int idx;
   7296  1.1  christos 		    /* We win if the charset_not inside the loop
   7297  1.1  christos 		       lists every character listed in the charset after.  */
   7298  1.1  christos 		    for (idx = 0; idx < (int) p2[1]; idx++)
   7299  1.1  christos 		      if (! (p2[2 + idx] == 0
   7300  1.1  christos 			     || (idx < (int) p1[4]
   7301  1.1  christos 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
   7302  1.1  christos 			break;
   7303  1.1  christos 
   7304  1.1  christos 		    if (idx == p2[1])
   7305  1.1  christos                       {
   7306  1.1  christos   		        p[-3] = (unsigned char) pop_failure_jump;
   7307  1.1  christos                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7308  1.1  christos                       }
   7309  1.1  christos 		  }
   7310  1.1  christos 		else if ((re_opcode_t) p1[3] == charset)
   7311  1.1  christos 		  {
   7312  1.1  christos 		    int idx;
   7313  1.1  christos 		    /* We win if the charset inside the loop
   7314  1.1  christos 		       has no overlap with the one after the loop.  */
   7315  1.1  christos 		    for (idx = 0;
   7316  1.1  christos 			 idx < (int) p2[1] && idx < (int) p1[4];
   7317  1.1  christos 			 idx++)
   7318  1.1  christos 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
   7319  1.1  christos 			break;
   7320  1.1  christos 
   7321  1.1  christos 		    if (idx == p2[1] || idx == p1[4])
   7322  1.1  christos                       {
   7323  1.1  christos   		        p[-3] = (unsigned char) pop_failure_jump;
   7324  1.1  christos                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
   7325  1.1  christos                       }
   7326  1.1  christos 		  }
   7327  1.1  christos 	      }
   7328  1.1  christos #endif /* not WCHAR */
   7329  1.1  christos 	  }
   7330  1.1  christos 	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
   7331  1.1  christos 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
   7332  1.1  christos 	    {
   7333  1.1  christos 	      p[-1] = (UCHAR_T) jump;
   7334  1.1  christos               DEBUG_PRINT1 ("  Match => jump.\n");
   7335  1.1  christos 	      goto unconditional_jump;
   7336  1.1  christos 	    }
   7337  1.1  christos         /* Note fall through.  */
   7338  1.1  christos 
   7339  1.1  christos 
   7340  1.1  christos 	/* The end of a simple repeat has a pop_failure_jump back to
   7341  1.1  christos            its matching on_failure_jump, where the latter will push a
   7342  1.1  christos            failure point.  The pop_failure_jump takes off failure
   7343  1.1  christos            points put on by this pop_failure_jump's matching
   7344  1.1  christos            on_failure_jump; we got through the pattern to here from the
   7345  1.1  christos            matching on_failure_jump, so didn't fail.  */
   7346  1.1  christos         CASE (pop_failure_jump):
   7347  1.1  christos           {
   7348  1.1  christos             /* We need to pass separate storage for the lowest and
   7349  1.1  christos                highest registers, even though we don't care about the
   7350  1.1  christos                actual values.  Otherwise, we will restore only one
   7351  1.1  christos                register from the stack, since lowest will == highest in
   7352  1.1  christos                `pop_failure_point'.  */
   7353  1.1  christos             active_reg_t dummy_low_reg, dummy_high_reg;
   7354  1.1  christos             UCHAR_T *pdummy = NULL;
   7355  1.1  christos             const CHAR_T *sdummy = NULL;
   7356  1.1  christos 
   7357  1.1  christos             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
   7358  1.1  christos             POP_FAILURE_POINT (sdummy, pdummy,
   7359  1.1  christos                                dummy_low_reg, dummy_high_reg,
   7360  1.1  christos                                reg_dummy, reg_dummy, reg_info_dummy);
   7361  1.1  christos           }
   7362  1.1  christos 	  /* Note fall through.  */
   7363  1.1  christos 
   7364  1.1  christos 	unconditional_jump:
   7365  1.1  christos #ifdef _LIBC
   7366  1.1  christos 	  DEBUG_PRINT2 ("\n%p: ", p);
   7367  1.1  christos #else
   7368  1.1  christos 	  DEBUG_PRINT2 ("\n0x%x: ", p);
   7369  1.1  christos #endif
   7370  1.1  christos           /* Note fall through.  */
   7371  1.1  christos 
   7372  1.1  christos         /* Unconditionally jump (without popping any failure points).  */
   7373  1.1  christos         CASE (jump):
   7374  1.1  christos 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
   7375  1.1  christos           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
   7376  1.1  christos 	  p += mcnt;				/* Do the jump.  */
   7377  1.1  christos #ifdef _LIBC
   7378  1.1  christos           DEBUG_PRINT2 ("(to %p).\n", p);
   7379  1.1  christos #else
   7380  1.1  christos           DEBUG_PRINT2 ("(to 0x%x).\n", p);
   7381  1.1  christos #endif
   7382  1.1  christos 	  NEXT;
   7383  1.1  christos 
   7384  1.1  christos 
   7385  1.1  christos         /* We need this opcode so we can detect where alternatives end
   7386  1.1  christos            in `group_match_null_string_p' et al.  */
   7387  1.1  christos         CASE (jump_past_alt):
   7388  1.1  christos           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
   7389  1.1  christos           goto unconditional_jump;
   7390  1.1  christos 
   7391  1.1  christos 
   7392  1.1  christos         /* Normally, the on_failure_jump pushes a failure point, which
   7393  1.1  christos            then gets popped at pop_failure_jump.  We will end up at
   7394  1.1  christos            pop_failure_jump, also, and with a pattern of, say, `a+', we
   7395  1.1  christos            are skipping over the on_failure_jump, so we have to push
   7396  1.1  christos            something meaningless for pop_failure_jump to pop.  */
   7397  1.1  christos         CASE (dummy_failure_jump):
   7398  1.1  christos           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
   7399  1.1  christos           /* It doesn't matter what we push for the string here.  What
   7400  1.1  christos              the code at `fail' tests is the value for the pattern.  */
   7401  1.1  christos           PUSH_FAILURE_POINT (NULL, NULL, -2);
   7402  1.1  christos           goto unconditional_jump;
   7403  1.1  christos 
   7404  1.1  christos 
   7405  1.1  christos         /* At the end of an alternative, we need to push a dummy failure
   7406  1.1  christos            point in case we are followed by a `pop_failure_jump', because
   7407  1.1  christos            we don't want the failure point for the alternative to be
   7408  1.1  christos            popped.  For example, matching `(a|ab)*' against `aab'
   7409  1.1  christos            requires that we match the `ab' alternative.  */
   7410  1.1  christos         CASE (push_dummy_failure):
   7411  1.1  christos           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
   7412  1.1  christos           /* See comments just above at `dummy_failure_jump' about the
   7413  1.1  christos              two zeroes.  */
   7414  1.1  christos           PUSH_FAILURE_POINT (NULL, NULL, -2);
   7415  1.1  christos           NEXT;
   7416  1.1  christos 
   7417  1.1  christos         /* Have to succeed matching what follows at least n times.
   7418  1.1  christos            After that, handle like `on_failure_jump'.  */
   7419  1.1  christos         CASE (succeed_n):
   7420  1.1  christos           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   7421  1.1  christos           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
   7422  1.1  christos 
   7423  1.1  christos           assert (mcnt >= 0);
   7424  1.1  christos           /* Originally, this is how many times we HAVE to succeed.  */
   7425  1.1  christos           if (mcnt > 0)
   7426  1.1  christos             {
   7427  1.1  christos                mcnt--;
   7428  1.1  christos 	       p += OFFSET_ADDRESS_SIZE;
   7429  1.1  christos                STORE_NUMBER_AND_INCR (p, mcnt);
   7430  1.1  christos #ifdef _LIBC
   7431  1.1  christos                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
   7432  1.1  christos 			     , mcnt);
   7433  1.1  christos #else
   7434  1.1  christos                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
   7435  1.1  christos 			     , mcnt);
   7436  1.1  christos #endif
   7437  1.1  christos             }
   7438  1.1  christos 	  else if (mcnt == 0)
   7439  1.1  christos             {
   7440  1.1  christos #ifdef _LIBC
   7441  1.1  christos               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
   7442  1.1  christos 			    p + OFFSET_ADDRESS_SIZE);
   7443  1.1  christos #else
   7444  1.1  christos               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
   7445  1.1  christos 			    p + OFFSET_ADDRESS_SIZE);
   7446  1.1  christos #endif /* _LIBC */
   7447  1.1  christos 
   7448  1.1  christos #ifdef WCHAR
   7449  1.1  christos 	      p[1] = (UCHAR_T) no_op;
   7450  1.1  christos #else
   7451  1.1  christos 	      p[2] = (UCHAR_T) no_op;
   7452  1.1  christos               p[3] = (UCHAR_T) no_op;
   7453  1.1  christos #endif /* WCHAR */
   7454  1.1  christos               goto on_failure;
   7455  1.1  christos             }
   7456  1.1  christos           NEXT;
   7457  1.1  christos 
   7458  1.1  christos         CASE (jump_n):
   7459  1.1  christos           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
   7460  1.1  christos           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
   7461  1.1  christos 
   7462  1.1  christos           /* Originally, this is how many times we CAN jump.  */
   7463  1.1  christos           if (mcnt)
   7464  1.1  christos             {
   7465  1.1  christos                mcnt--;
   7466  1.1  christos                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
   7467  1.1  christos 
   7468  1.1  christos #ifdef _LIBC
   7469  1.1  christos                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7470  1.1  christos 			     mcnt);
   7471  1.1  christos #else
   7472  1.1  christos                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
   7473  1.1  christos 			     mcnt);
   7474  1.1  christos #endif /* _LIBC */
   7475  1.1  christos 	       goto unconditional_jump;
   7476  1.1  christos             }
   7477  1.1  christos           /* If don't have to jump any more, skip over the rest of command.  */
   7478  1.1  christos 	  else
   7479  1.1  christos 	    p += 2 * OFFSET_ADDRESS_SIZE;
   7480  1.1  christos           NEXT;
   7481  1.1  christos 
   7482  1.1  christos 	CASE (set_number_at):
   7483  1.1  christos 	  {
   7484  1.1  christos             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
   7485  1.1  christos 
   7486  1.1  christos             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7487  1.1  christos             p1 = p + mcnt;
   7488  1.1  christos             EXTRACT_NUMBER_AND_INCR (mcnt, p);
   7489  1.1  christos #ifdef _LIBC
   7490  1.1  christos             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
   7491  1.1  christos #else
   7492  1.1  christos             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
   7493  1.1  christos #endif
   7494  1.1  christos 	    STORE_NUMBER (p1, mcnt);
   7495  1.1  christos             NEXT;
   7496  1.1  christos           }
   7497  1.1  christos 
   7498  1.1  christos #if 0
   7499  1.1  christos 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
   7500  1.1  christos 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
   7501  1.1  christos 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
   7502  1.1  christos 	   macro and introducing temporary variables works around the bug.  */
   7503  1.1  christos 
   7504  1.1  christos 	CASE (wordbound):
   7505  1.1  christos 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7506  1.1  christos 	  if (AT_WORD_BOUNDARY (d))
   7507  1.1  christos 	    {
   7508  1.1  christos 	      NEXT;
   7509  1.1  christos 	    }
   7510  1.1  christos 	  goto fail;
   7511  1.1  christos 
   7512  1.1  christos 	CASE (notwordbound):
   7513  1.1  christos 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7514  1.1  christos 	  if (AT_WORD_BOUNDARY (d))
   7515  1.1  christos 	    goto fail;
   7516  1.1  christos 	  NEXT;
   7517  1.1  christos #else
   7518  1.1  christos 	CASE (wordbound):
   7519  1.1  christos 	{
   7520  1.1  christos 	  boolean prevchar, thischar;
   7521  1.1  christos 
   7522  1.1  christos 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   7523  1.1  christos 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7524  1.1  christos 	    {
   7525  1.1  christos 	      NEXT;
   7526  1.1  christos 	    }
   7527  1.1  christos 
   7528  1.1  christos 	  prevchar = WORDCHAR_P (d - 1);
   7529  1.1  christos 	  thischar = WORDCHAR_P (d);
   7530  1.1  christos 	  if (prevchar != thischar)
   7531  1.1  christos 	    {
   7532  1.1  christos 	      NEXT;
   7533  1.1  christos 	    }
   7534  1.1  christos 	  goto fail;
   7535  1.1  christos 	}
   7536  1.1  christos 
   7537  1.1  christos       CASE (notwordbound):
   7538  1.1  christos 	{
   7539  1.1  christos 	  boolean prevchar, thischar;
   7540  1.1  christos 
   7541  1.1  christos 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   7542  1.1  christos 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   7543  1.1  christos 	    goto fail;
   7544  1.1  christos 
   7545  1.1  christos 	  prevchar = WORDCHAR_P (d - 1);
   7546  1.1  christos 	  thischar = WORDCHAR_P (d);
   7547  1.1  christos 	  if (prevchar != thischar)
   7548  1.1  christos 	    goto fail;
   7549  1.1  christos 	  NEXT;
   7550  1.1  christos 	}
   7551  1.1  christos #endif
   7552  1.1  christos 
   7553  1.1  christos 	CASE (wordbeg):
   7554  1.1  christos           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
   7555  1.1  christos 	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
   7556  1.1  christos 	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
   7557  1.1  christos 	    {
   7558  1.1  christos 	      NEXT;
   7559  1.1  christos 	    }
   7560  1.1  christos           goto fail;
   7561  1.1  christos 
   7562  1.1  christos 	CASE (wordend):
   7563  1.1  christos           DEBUG_PRINT1 ("EXECUTING wordend.\n");
   7564  1.1  christos 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
   7565  1.1  christos               && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
   7566  1.1  christos 	    {
   7567  1.1  christos 	      NEXT;
   7568  1.1  christos 	    }
   7569  1.1  christos           goto fail;
   7570  1.1  christos 
   7571  1.1  christos #ifdef emacs
   7572  1.1  christos   	CASE (before_dot):
   7573  1.1  christos           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
   7574  1.1  christos  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
   7575  1.1  christos   	    goto fail;
   7576  1.1  christos   	  NEXT;
   7577  1.1  christos 
   7578  1.1  christos   	CASE (at_dot):
   7579  1.1  christos           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
   7580  1.1  christos  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
   7581  1.1  christos   	    goto fail;
   7582  1.1  christos   	  NEXT;
   7583  1.1  christos 
   7584  1.1  christos   	CASE (after_dot):
   7585  1.1  christos           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
   7586  1.1  christos           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
   7587  1.1  christos   	    goto fail;
   7588  1.1  christos   	  NEXT;
   7589  1.1  christos 
   7590  1.1  christos 	CASE (syntaxspec):
   7591  1.1  christos           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
   7592  1.1  christos 	  mcnt = *p++;
   7593  1.1  christos 	  goto matchsyntax;
   7594  1.1  christos 
   7595  1.1  christos         CASE (wordchar):
   7596  1.1  christos           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
   7597  1.1  christos 	  mcnt = (int) Sword;
   7598  1.1  christos         matchsyntax:
   7599  1.1  christos 	  PREFETCH ();
   7600  1.1  christos 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7601  1.1  christos 	  d++;
   7602  1.1  christos 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
   7603  1.1  christos 	    goto fail;
   7604  1.1  christos           SET_REGS_MATCHED ();
   7605  1.1  christos 	  NEXT;
   7606  1.1  christos 
   7607  1.1  christos 	CASE (notsyntaxspec):
   7608  1.1  christos           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
   7609  1.1  christos 	  mcnt = *p++;
   7610  1.1  christos 	  goto matchnotsyntax;
   7611  1.1  christos 
   7612  1.1  christos         CASE (notwordchar):
   7613  1.1  christos           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
   7614  1.1  christos 	  mcnt = (int) Sword;
   7615  1.1  christos         matchnotsyntax:
   7616  1.1  christos 	  PREFETCH ();
   7617  1.1  christos 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   7618  1.1  christos 	  d++;
   7619  1.1  christos 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
   7620  1.1  christos 	    goto fail;
   7621  1.1  christos 	  SET_REGS_MATCHED ();
   7622  1.1  christos           NEXT;
   7623  1.1  christos 
   7624  1.1  christos #else /* not emacs */
   7625  1.1  christos 	CASE (wordchar):
   7626  1.1  christos           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
   7627  1.1  christos 	  PREFETCH ();
   7628  1.1  christos           if (!WORDCHAR_P (d))
   7629  1.1  christos             goto fail;
   7630  1.1  christos 	  SET_REGS_MATCHED ();
   7631  1.1  christos           d++;
   7632  1.1  christos 	  NEXT;
   7633  1.1  christos 
   7634  1.1  christos 	CASE (notwordchar):
   7635  1.1  christos           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
   7636  1.1  christos 	  PREFETCH ();
   7637  1.1  christos 	  if (WORDCHAR_P (d))
   7638  1.1  christos             goto fail;
   7639  1.1  christos           SET_REGS_MATCHED ();
   7640  1.1  christos           d++;
   7641  1.1  christos 	  NEXT;
   7642  1.1  christos #endif /* not emacs */
   7643  1.1  christos 
   7644  1.1  christos #ifndef __GNUC__
   7645  1.1  christos         default:
   7646  1.1  christos           abort ();
   7647  1.1  christos 	}
   7648  1.1  christos       continue;  /* Successfully executed one pattern command; keep going.  */
   7649  1.1  christos #endif
   7650  1.1  christos 
   7651  1.1  christos 
   7652  1.1  christos     /* We goto here if a matching operation fails. */
   7653  1.1  christos     fail:
   7654  1.1  christos       if (!FAIL_STACK_EMPTY ())
   7655  1.1  christos 	{ /* A restart point is known.  Restore to that state.  */
   7656  1.1  christos           DEBUG_PRINT1 ("\nFAIL:\n");
   7657  1.1  christos           POP_FAILURE_POINT (d, p,
   7658  1.1  christos                              lowest_active_reg, highest_active_reg,
   7659  1.1  christos                              regstart, regend, reg_info);
   7660  1.1  christos 
   7661  1.1  christos           /* If this failure point is a dummy, try the next one.  */
   7662  1.1  christos           if (!p)
   7663  1.1  christos 	    goto fail;
   7664  1.1  christos 
   7665  1.1  christos           /* If we failed to the end of the pattern, don't examine *p.  */
   7666  1.1  christos 	  assert (p <= pend);
   7667  1.1  christos           if (p < pend)
   7668  1.1  christos             {
   7669  1.1  christos               boolean is_a_jump_n = false;
   7670  1.1  christos 
   7671  1.1  christos               /* If failed to a backwards jump that's part of a repetition
   7672  1.1  christos                  loop, need to pop this failure point and use the next one.  */
   7673  1.1  christos               switch ((re_opcode_t) *p)
   7674  1.1  christos                 {
   7675  1.1  christos                 case jump_n:
   7676  1.1  christos                   is_a_jump_n = true;
   7677  1.1  christos                 case maybe_pop_jump:
   7678  1.1  christos                 case pop_failure_jump:
   7679  1.1  christos                 case jump:
   7680  1.1  christos                   p1 = p + 1;
   7681  1.1  christos                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7682  1.1  christos                   p1 += mcnt;
   7683  1.1  christos 
   7684  1.1  christos                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
   7685  1.1  christos                       || (!is_a_jump_n
   7686  1.1  christos                           && (re_opcode_t) *p1 == on_failure_jump))
   7687  1.1  christos                     goto fail;
   7688  1.1  christos                   break;
   7689  1.1  christos                 default:
   7690  1.1  christos                   /* do nothing */ ;
   7691  1.1  christos                 }
   7692  1.1  christos             }
   7693  1.1  christos 
   7694  1.1  christos           if (d >= string1 && d <= end1)
   7695  1.1  christos 	    dend = end_match_1;
   7696  1.1  christos         }
   7697  1.1  christos       else
   7698  1.1  christos         break;   /* Matching at this starting point really fails.  */
   7699  1.1  christos     } /* for (;;) */
   7700  1.1  christos 
   7701  1.1  christos   if (best_regs_set)
   7702  1.1  christos     goto restore_best_regs;
   7703  1.1  christos 
   7704  1.1  christos   FREE_VARIABLES ();
   7705  1.1  christos 
   7706  1.1  christos   return -1;         			/* Failure to match.  */
   7707  1.1  christos } /* re_match_2 */
   7708  1.1  christos 
   7709  1.1  christos /* Subroutine definitions for re_match_2.  */
   7711  1.1  christos 
   7712  1.1  christos 
   7713  1.1  christos /* We are passed P pointing to a register number after a start_memory.
   7714  1.1  christos 
   7715  1.1  christos    Return true if the pattern up to the corresponding stop_memory can
   7716  1.1  christos    match the empty string, and false otherwise.
   7717  1.1  christos 
   7718  1.1  christos    If we find the matching stop_memory, sets P to point to one past its number.
   7719  1.1  christos    Otherwise, sets P to an undefined byte less than or equal to END.
   7720  1.1  christos 
   7721  1.1  christos    We don't handle duplicates properly (yet).  */
   7722  1.1  christos 
   7723  1.1  christos static boolean
   7724  1.1  christos PREFIX(group_match_null_string_p) (p, end, reg_info)
   7725  1.1  christos     UCHAR_T **p, *end;
   7726  1.1  christos     PREFIX(register_info_type) *reg_info;
   7727  1.1  christos {
   7728  1.1  christos   int mcnt;
   7729  1.1  christos   /* Point to after the args to the start_memory.  */
   7730  1.1  christos   UCHAR_T *p1 = *p + 2;
   7731  1.1  christos 
   7732  1.1  christos   while (p1 < end)
   7733  1.1  christos     {
   7734  1.1  christos       /* Skip over opcodes that can match nothing, and return true or
   7735  1.1  christos 	 false, as appropriate, when we get to one that can't, or to the
   7736  1.1  christos          matching stop_memory.  */
   7737  1.1  christos 
   7738  1.1  christos       switch ((re_opcode_t) *p1)
   7739  1.1  christos         {
   7740  1.1  christos         /* Could be either a loop or a series of alternatives.  */
   7741  1.1  christos         case on_failure_jump:
   7742  1.1  christos           p1++;
   7743  1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7744  1.1  christos 
   7745  1.1  christos           /* If the next operation is not a jump backwards in the
   7746  1.1  christos 	     pattern.  */
   7747  1.1  christos 
   7748  1.1  christos 	  if (mcnt >= 0)
   7749  1.1  christos 	    {
   7750  1.1  christos               /* Go through the on_failure_jumps of the alternatives,
   7751  1.1  christos                  seeing if any of the alternatives cannot match nothing.
   7752  1.1  christos                  The last alternative starts with only a jump,
   7753  1.1  christos                  whereas the rest start with on_failure_jump and end
   7754  1.1  christos                  with a jump, e.g., here is the pattern for `a|b|c':
   7755  1.1  christos 
   7756  1.1  christos                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
   7757  1.1  christos                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
   7758  1.1  christos                  /exactn/1/c
   7759  1.1  christos 
   7760  1.1  christos                  So, we have to first go through the first (n-1)
   7761  1.1  christos                  alternatives and then deal with the last one separately.  */
   7762  1.1  christos 
   7763  1.1  christos 
   7764  1.1  christos               /* Deal with the first (n-1) alternatives, which start
   7765  1.1  christos                  with an on_failure_jump (see above) that jumps to right
   7766  1.1  christos                  past a jump_past_alt.  */
   7767  1.1  christos 
   7768  1.1  christos               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
   7769  1.1  christos 		     jump_past_alt)
   7770  1.1  christos                 {
   7771  1.1  christos                   /* `mcnt' holds how many bytes long the alternative
   7772  1.1  christos                      is, including the ending `jump_past_alt' and
   7773  1.1  christos                      its number.  */
   7774  1.1  christos 
   7775  1.1  christos                   if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
   7776  1.1  christos 						(1 + OFFSET_ADDRESS_SIZE),
   7777  1.1  christos 						reg_info))
   7778  1.1  christos                     return false;
   7779  1.1  christos 
   7780  1.1  christos                   /* Move to right after this alternative, including the
   7781  1.1  christos 		     jump_past_alt.  */
   7782  1.1  christos                   p1 += mcnt;
   7783  1.1  christos 
   7784  1.1  christos                   /* Break if it's the beginning of an n-th alternative
   7785  1.1  christos                      that doesn't begin with an on_failure_jump.  */
   7786  1.1  christos                   if ((re_opcode_t) *p1 != on_failure_jump)
   7787  1.1  christos                     break;
   7788  1.1  christos 
   7789  1.1  christos 		  /* Still have to check that it's not an n-th
   7790  1.1  christos 		     alternative that starts with an on_failure_jump.  */
   7791  1.1  christos 		  p1++;
   7792  1.1  christos                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7793  1.1  christos                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
   7794  1.1  christos 		      jump_past_alt)
   7795  1.1  christos                     {
   7796  1.1  christos 		      /* Get to the beginning of the n-th alternative.  */
   7797  1.1  christos                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
   7798  1.1  christos                       break;
   7799  1.1  christos                     }
   7800  1.1  christos                 }
   7801  1.1  christos 
   7802  1.1  christos               /* Deal with the last alternative: go back and get number
   7803  1.1  christos                  of the `jump_past_alt' just before it.  `mcnt' contains
   7804  1.1  christos                  the length of the alternative.  */
   7805  1.1  christos               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
   7806  1.1  christos 
   7807  1.1  christos               if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
   7808  1.1  christos                 return false;
   7809  1.1  christos 
   7810  1.1  christos               p1 += mcnt;	/* Get past the n-th alternative.  */
   7811  1.1  christos             } /* if mcnt > 0 */
   7812  1.1  christos           break;
   7813  1.1  christos 
   7814  1.1  christos 
   7815  1.1  christos         case stop_memory:
   7816  1.1  christos 	  assert (p1[1] == **p);
   7817  1.1  christos           *p = p1 + 2;
   7818  1.1  christos           return true;
   7819  1.1  christos 
   7820  1.1  christos 
   7821  1.1  christos         default:
   7822  1.1  christos           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
   7823  1.1  christos             return false;
   7824  1.1  christos         }
   7825  1.1  christos     } /* while p1 < end */
   7826  1.1  christos 
   7827  1.1  christos   return false;
   7828  1.1  christos } /* group_match_null_string_p */
   7829  1.1  christos 
   7830  1.1  christos 
   7831  1.1  christos /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
   7832  1.1  christos    It expects P to be the first byte of a single alternative and END one
   7833  1.1  christos    byte past the last. The alternative can contain groups.  */
   7834  1.1  christos 
   7835  1.1  christos static boolean
   7836  1.1  christos PREFIX(alt_match_null_string_p) (p, end, reg_info)
   7837  1.1  christos     UCHAR_T *p, *end;
   7838  1.1  christos     PREFIX(register_info_type) *reg_info;
   7839  1.1  christos {
   7840  1.1  christos   int mcnt;
   7841  1.1  christos   UCHAR_T *p1 = p;
   7842  1.1  christos 
   7843  1.1  christos   while (p1 < end)
   7844  1.1  christos     {
   7845  1.1  christos       /* Skip over opcodes that can match nothing, and break when we get
   7846  1.1  christos          to one that can't.  */
   7847  1.1  christos 
   7848  1.1  christos       switch ((re_opcode_t) *p1)
   7849  1.1  christos         {
   7850  1.1  christos 	/* It's a loop.  */
   7851  1.1  christos         case on_failure_jump:
   7852  1.1  christos           p1++;
   7853  1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7854  1.1  christos           p1 += mcnt;
   7855  1.1  christos           break;
   7856  1.1  christos 
   7857  1.1  christos 	default:
   7858  1.1  christos           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
   7859  1.1  christos             return false;
   7860  1.1  christos         }
   7861  1.1  christos     }  /* while p1 < end */
   7862  1.1  christos 
   7863  1.1  christos   return true;
   7864  1.1  christos } /* alt_match_null_string_p */
   7865  1.1  christos 
   7866  1.1  christos 
   7867  1.1  christos /* Deals with the ops common to group_match_null_string_p and
   7868  1.1  christos    alt_match_null_string_p.
   7869  1.1  christos 
   7870  1.1  christos    Sets P to one after the op and its arguments, if any.  */
   7871  1.1  christos 
   7872  1.1  christos static boolean
   7873  1.1  christos PREFIX(common_op_match_null_string_p) (p, end, reg_info)
   7874  1.1  christos     UCHAR_T **p, *end;
   7875  1.1  christos     PREFIX(register_info_type) *reg_info;
   7876  1.1  christos {
   7877  1.1  christos   int mcnt;
   7878  1.1  christos   boolean ret;
   7879  1.1  christos   int reg_no;
   7880  1.1  christos   UCHAR_T *p1 = *p;
   7881  1.1  christos 
   7882  1.1  christos   switch ((re_opcode_t) *p1++)
   7883  1.1  christos     {
   7884  1.1  christos     case no_op:
   7885  1.1  christos     case begline:
   7886  1.1  christos     case endline:
   7887  1.1  christos     case begbuf:
   7888  1.1  christos     case endbuf:
   7889  1.1  christos     case wordbeg:
   7890  1.1  christos     case wordend:
   7891  1.1  christos     case wordbound:
   7892  1.1  christos     case notwordbound:
   7893  1.1  christos #ifdef emacs
   7894  1.1  christos     case before_dot:
   7895  1.1  christos     case at_dot:
   7896  1.1  christos     case after_dot:
   7897  1.1  christos #endif
   7898  1.1  christos       break;
   7899  1.1  christos 
   7900  1.1  christos     case start_memory:
   7901  1.1  christos       reg_no = *p1;
   7902  1.1  christos       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
   7903  1.1  christos       ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
   7904  1.1  christos 
   7905  1.1  christos       /* Have to set this here in case we're checking a group which
   7906  1.1  christos          contains a group and a back reference to it.  */
   7907  1.1  christos 
   7908  1.1  christos       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
   7909  1.1  christos         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
   7910  1.1  christos 
   7911  1.1  christos       if (!ret)
   7912  1.1  christos         return false;
   7913  1.1  christos       break;
   7914  1.1  christos 
   7915  1.1  christos     /* If this is an optimized succeed_n for zero times, make the jump.  */
   7916  1.1  christos     case jump:
   7917  1.1  christos       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7918  1.1  christos       if (mcnt >= 0)
   7919  1.1  christos         p1 += mcnt;
   7920  1.1  christos       else
   7921  1.1  christos         return false;
   7922  1.1  christos       break;
   7923  1.1  christos 
   7924  1.1  christos     case succeed_n:
   7925  1.1  christos       /* Get to the number of times to succeed.  */
   7926  1.1  christos       p1 += OFFSET_ADDRESS_SIZE;
   7927  1.1  christos       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7928  1.1  christos 
   7929  1.1  christos       if (mcnt == 0)
   7930  1.1  christos         {
   7931  1.1  christos           p1 -= 2 * OFFSET_ADDRESS_SIZE;
   7932  1.1  christos           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   7933  1.1  christos           p1 += mcnt;
   7934  1.1  christos         }
   7935  1.1  christos       else
   7936  1.1  christos         return false;
   7937  1.1  christos       break;
   7938  1.1  christos 
   7939  1.1  christos     case duplicate:
   7940  1.1  christos       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
   7941  1.1  christos         return false;
   7942  1.1  christos       break;
   7943  1.1  christos 
   7944  1.1  christos     case set_number_at:
   7945  1.1  christos       p1 += 2 * OFFSET_ADDRESS_SIZE;
   7946  1.1  christos 
   7947  1.1  christos     default:
   7948  1.1  christos       /* All other opcodes mean we cannot match the empty string.  */
   7949  1.1  christos       return false;
   7950  1.1  christos   }
   7951  1.1  christos 
   7952  1.1  christos   *p = p1;
   7953  1.1  christos   return true;
   7954  1.1  christos } /* common_op_match_null_string_p */
   7955  1.1  christos 
   7956  1.1  christos 
   7957  1.1  christos /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
   7958  1.1  christos    bytes; nonzero otherwise.  */
   7959  1.1  christos 
   7960  1.1  christos static int
   7961  1.1  christos PREFIX(bcmp_translate) (s1, s2, len, translate)
   7962  1.1  christos      const CHAR_T *s1, *s2;
   7963  1.1  christos      register int len;
   7964  1.1  christos      RE_TRANSLATE_TYPE translate;
   7965  1.1  christos {
   7966  1.1  christos   register const UCHAR_T *p1 = (const UCHAR_T *) s1;
   7967  1.1  christos   register const UCHAR_T *p2 = (const UCHAR_T *) s2;
   7968  1.1  christos   while (len)
   7969  1.1  christos     {
   7970  1.1  christos #ifdef WCHAR
   7971  1.1  christos       if (((*p1<=0xff)?translate[*p1++]:*p1++)
   7972  1.1  christos 	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
   7973  1.1  christos 	return 1;
   7974  1.1  christos #else /* BYTE */
   7975  1.1  christos       if (translate[*p1++] != translate[*p2++]) return 1;
   7976  1.1  christos #endif /* WCHAR */
   7977  1.1  christos       len--;
   7978  1.1  christos     }
   7979  1.1  christos   return 0;
   7980  1.1  christos }
   7981  1.1  christos 
   7982  1.1  christos 
   7984  1.1  christos #else /* not INSIDE_RECURSION */
   7985  1.1  christos 
   7986  1.1  christos /* Entry points for GNU code.  */
   7987  1.1  christos 
   7988  1.1  christos /* re_compile_pattern is the GNU regular expression compiler: it
   7989  1.1  christos    compiles PATTERN (of length SIZE) and puts the result in BUFP.
   7990  1.1  christos    Returns 0 if the pattern was valid, otherwise an error string.
   7991  1.1  christos 
   7992  1.1  christos    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
   7993  1.1  christos    are set in BUFP on entry.
   7994  1.1  christos 
   7995  1.1  christos    We call regex_compile to do the actual compilation.  */
   7996  1.1  christos 
   7997  1.1  christos const char *
   7998  1.1  christos re_compile_pattern (pattern, length, bufp)
   7999  1.1  christos      const char *pattern;
   8000  1.1  christos      size_t length;
   8001  1.1  christos      struct re_pattern_buffer *bufp;
   8002  1.1  christos {
   8003  1.1  christos   reg_errcode_t ret;
   8004  1.1  christos 
   8005  1.1  christos   /* GNU code is written to assume at least RE_NREGS registers will be set
   8006  1.1  christos      (and at least one extra will be -1).  */
   8007  1.1  christos   bufp->regs_allocated = REGS_UNALLOCATED;
   8008  1.1  christos 
   8009  1.1  christos   /* And GNU code determines whether or not to get register information
   8010  1.1  christos      by passing null for the REGS argument to re_match, etc., not by
   8011  1.1  christos      setting no_sub.  */
   8012  1.1  christos   bufp->no_sub = 0;
   8013  1.1  christos 
   8014  1.1  christos   /* Match anchors at newline.  */
   8015  1.1  christos   bufp->newline_anchor = 1;
   8016  1.1  christos 
   8017  1.1  christos # ifdef MBS_SUPPORT
   8018  1.1  christos   if (MB_CUR_MAX != 1)
   8019  1.1  christos     ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
   8020  1.1  christos   else
   8021  1.1  christos # endif
   8022  1.1  christos     ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
   8023  1.1  christos 
   8024  1.1  christos   if (!ret)
   8025  1.1  christos     return NULL;
   8026  1.1  christos   return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
   8027  1.1  christos }
   8028  1.1  christos #ifdef _LIBC
   8029  1.1  christos weak_alias (__re_compile_pattern, re_compile_pattern)
   8030  1.1  christos #endif
   8031  1.1  christos 
   8032  1.1  christos /* Entry points compatible with 4.2 BSD regex library.  We don't define
   8034  1.1  christos    them unless specifically requested.  */
   8035  1.1  christos 
   8036  1.1  christos #if defined _REGEX_RE_COMP || defined _LIBC
   8037  1.1  christos 
   8038  1.1  christos /* BSD has one and only one pattern buffer.  */
   8039  1.1  christos static struct re_pattern_buffer re_comp_buf;
   8040  1.1  christos 
   8041  1.1  christos char *
   8042  1.1  christos #ifdef _LIBC
   8043  1.1  christos /* Make these definitions weak in libc, so POSIX programs can redefine
   8044  1.1  christos    these names if they don't use our functions, and still use
   8045  1.1  christos    regcomp/regexec below without link errors.  */
   8046  1.1  christos weak_function
   8047  1.1  christos #endif
   8048  1.1  christos re_comp (s)
   8049  1.1  christos     const char *s;
   8050  1.1  christos {
   8051  1.1  christos   reg_errcode_t ret;
   8052  1.1  christos 
   8053  1.1  christos   if (!s)
   8054  1.1  christos     {
   8055  1.1  christos       if (!re_comp_buf.buffer)
   8056  1.1  christos 	return gettext ("No previous regular expression");
   8057  1.1  christos       return 0;
   8058  1.1  christos     }
   8059  1.1  christos 
   8060  1.1  christos   if (!re_comp_buf.buffer)
   8061  1.1  christos     {
   8062  1.1  christos       re_comp_buf.buffer = (unsigned char *) malloc (200);
   8063  1.1  christos       if (re_comp_buf.buffer == NULL)
   8064  1.1  christos         return (char *) gettext (re_error_msgid
   8065  1.1  christos 				 + re_error_msgid_idx[(int) REG_ESPACE]);
   8066  1.1  christos       re_comp_buf.allocated = 200;
   8067  1.1  christos 
   8068  1.1  christos       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
   8069  1.1  christos       if (re_comp_buf.fastmap == NULL)
   8070  1.1  christos 	return (char *) gettext (re_error_msgid
   8071  1.1  christos 				 + re_error_msgid_idx[(int) REG_ESPACE]);
   8072  1.1  christos     }
   8073  1.1  christos 
   8074  1.1  christos   /* Since `re_exec' always passes NULL for the `regs' argument, we
   8075  1.1  christos      don't need to initialize the pattern buffer fields which affect it.  */
   8076  1.1  christos 
   8077  1.1  christos   /* Match anchors at newlines.  */
   8078  1.1  christos   re_comp_buf.newline_anchor = 1;
   8079  1.1  christos 
   8080  1.1  christos # ifdef MBS_SUPPORT
   8081  1.1  christos   if (MB_CUR_MAX != 1)
   8082  1.1  christos     ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   8083  1.1  christos   else
   8084  1.1  christos # endif
   8085  1.1  christos     ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   8086  1.1  christos 
   8087  1.1  christos   if (!ret)
   8088  1.1  christos     return NULL;
   8089  1.1  christos 
   8090  1.1  christos   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
   8091  1.1  christos   return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
   8092  1.1  christos }
   8093  1.1  christos 
   8094  1.1  christos 
   8095  1.1  christos int
   8096  1.1  christos #ifdef _LIBC
   8097  1.1  christos weak_function
   8098  1.1  christos #endif
   8099  1.1  christos re_exec (s)
   8100  1.1  christos     const char *s;
   8101  1.1  christos {
   8102  1.1  christos   const int len = strlen (s);
   8103  1.1  christos   return
   8104  1.1  christos     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
   8105  1.1  christos }
   8106  1.1  christos 
   8107  1.1  christos #endif /* _REGEX_RE_COMP */
   8108  1.1  christos 
   8109  1.1  christos /* POSIX.2 functions.  Don't define these for Emacs.  */
   8111  1.1  christos 
   8112  1.1  christos #ifndef emacs
   8113  1.1  christos 
   8114  1.1  christos /* regcomp takes a regular expression as a string and compiles it.
   8115  1.1  christos 
   8116  1.1  christos    PREG is a regex_t *.  We do not expect any fields to be initialized,
   8117  1.1  christos    since POSIX says we shouldn't.  Thus, we set
   8118  1.1  christos 
   8119  1.1  christos      `buffer' to the compiled pattern;
   8120  1.1  christos      `used' to the length of the compiled pattern;
   8121  1.1  christos      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
   8122  1.1  christos        REG_EXTENDED bit in CFLAGS is set; otherwise, to
   8123  1.1  christos        RE_SYNTAX_POSIX_BASIC;
   8124  1.1  christos      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
   8125  1.1  christos      `fastmap' to an allocated space for the fastmap;
   8126  1.1  christos      `fastmap_accurate' to zero;
   8127  1.1  christos      `re_nsub' to the number of subexpressions in PATTERN.
   8128  1.1  christos 
   8129  1.1  christos    PATTERN is the address of the pattern string.
   8130  1.1  christos 
   8131  1.1  christos    CFLAGS is a series of bits which affect compilation.
   8132  1.1  christos 
   8133  1.1  christos      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
   8134  1.1  christos      use POSIX basic syntax.
   8135  1.1  christos 
   8136  1.1  christos      If REG_NEWLINE is set, then . and [^...] don't match newline.
   8137  1.1  christos      Also, regexec will try a match beginning after every newline.
   8138  1.1  christos 
   8139  1.1  christos      If REG_ICASE is set, then we considers upper- and lowercase
   8140  1.1  christos      versions of letters to be equivalent when matching.
   8141  1.1  christos 
   8142  1.1  christos      If REG_NOSUB is set, then when PREG is passed to regexec, that
   8143  1.1  christos      routine will report only success or failure, and nothing about the
   8144  1.1  christos      registers.
   8145  1.1  christos 
   8146  1.1  christos    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
   8147  1.1  christos    the return codes and their meanings.)  */
   8148  1.1  christos 
   8149  1.1  christos int
   8150  1.1  christos regcomp (preg, pattern, cflags)
   8151  1.1  christos     regex_t *preg;
   8152  1.1  christos     const char *pattern;
   8153  1.1  christos     int cflags;
   8154  1.1  christos {
   8155  1.1  christos   reg_errcode_t ret;
   8156  1.1  christos   reg_syntax_t syntax
   8157  1.1  christos     = (cflags & REG_EXTENDED) ?
   8158  1.1  christos       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
   8159  1.1  christos 
   8160  1.1  christos   /* regex_compile will allocate the space for the compiled pattern.  */
   8161  1.1  christos   preg->buffer = 0;
   8162  1.1  christos   preg->allocated = 0;
   8163  1.1  christos   preg->used = 0;
   8164  1.1  christos 
   8165  1.1  christos   /* Try to allocate space for the fastmap.  */
   8166  1.1  christos   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
   8167  1.1  christos 
   8168  1.1  christos   if (cflags & REG_ICASE)
   8169  1.1  christos     {
   8170  1.1  christos       unsigned i;
   8171  1.1  christos 
   8172  1.1  christos       preg->translate
   8173  1.1  christos 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
   8174  1.1  christos 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
   8175  1.1  christos       if (preg->translate == NULL)
   8176  1.1  christos         return (int) REG_ESPACE;
   8177  1.1  christos 
   8178  1.1  christos       /* Map uppercase characters to corresponding lowercase ones.  */
   8179  1.1  christos       for (i = 0; i < CHAR_SET_SIZE; i++)
   8180  1.1  christos         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
   8181  1.1  christos     }
   8182  1.1  christos   else
   8183  1.1  christos     preg->translate = NULL;
   8184  1.1  christos 
   8185  1.1  christos   /* If REG_NEWLINE is set, newlines are treated differently.  */
   8186  1.1  christos   if (cflags & REG_NEWLINE)
   8187  1.1  christos     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
   8188  1.1  christos       syntax &= ~RE_DOT_NEWLINE;
   8189  1.1  christos       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
   8190  1.1  christos       /* It also changes the matching behavior.  */
   8191  1.1  christos       preg->newline_anchor = 1;
   8192  1.1  christos     }
   8193  1.1  christos   else
   8194  1.1  christos     preg->newline_anchor = 0;
   8195  1.1  christos 
   8196  1.1  christos   preg->no_sub = !!(cflags & REG_NOSUB);
   8197  1.1  christos 
   8198  1.1  christos   /* POSIX says a null character in the pattern terminates it, so we
   8199  1.1  christos      can use strlen here in compiling the pattern.  */
   8200  1.1  christos # ifdef MBS_SUPPORT
   8201  1.1  christos   if (MB_CUR_MAX != 1)
   8202  1.1  christos     ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
   8203  1.1  christos   else
   8204  1.1  christos # endif
   8205  1.1  christos     ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
   8206  1.1  christos 
   8207  1.1  christos   /* POSIX doesn't distinguish between an unmatched open-group and an
   8208  1.1  christos      unmatched close-group: both are REG_EPAREN.  */
   8209  1.1  christos   if (ret == REG_ERPAREN) ret = REG_EPAREN;
   8210  1.1  christos 
   8211  1.1  christos   if (ret == REG_NOERROR && preg->fastmap)
   8212  1.1  christos     {
   8213  1.1  christos       /* Compute the fastmap now, since regexec cannot modify the pattern
   8214  1.1  christos 	 buffer.  */
   8215  1.1  christos       if (re_compile_fastmap (preg) == -2)
   8216  1.1  christos 	{
   8217  1.1  christos 	  /* Some error occurred while computing the fastmap, just forget
   8218  1.1  christos 	     about it.  */
   8219  1.1  christos 	  free (preg->fastmap);
   8220  1.1  christos 	  preg->fastmap = NULL;
   8221  1.1  christos 	}
   8222  1.1  christos     }
   8223  1.1  christos 
   8224  1.1  christos   return (int) ret;
   8225  1.1  christos }
   8226  1.1  christos #ifdef _LIBC
   8227  1.1  christos weak_alias (__regcomp, regcomp)
   8228  1.1  christos #endif
   8229  1.1  christos 
   8230  1.1  christos 
   8231  1.1  christos /* regexec searches for a given pattern, specified by PREG, in the
   8232  1.1  christos    string STRING.
   8233  1.1  christos 
   8234  1.1  christos    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
   8235  1.1  christos    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
   8236  1.1  christos    least NMATCH elements, and we set them to the offsets of the
   8237  1.1  christos    corresponding matched substrings.
   8238  1.1  christos 
   8239  1.1  christos    EFLAGS specifies `execution flags' which affect matching: if
   8240  1.1  christos    REG_NOTBOL is set, then ^ does not match at the beginning of the
   8241  1.1  christos    string; if REG_NOTEOL is set, then $ does not match at the end.
   8242  1.1  christos 
   8243  1.1  christos    We return 0 if we find a match and REG_NOMATCH if not.  */
   8244  1.1  christos 
   8245  1.1  christos int
   8246  1.1  christos regexec (preg, string, nmatch, pmatch, eflags)
   8247  1.1  christos     const regex_t *preg;
   8248  1.1  christos     const char *string;
   8249  1.1  christos     size_t nmatch;
   8250  1.1  christos     regmatch_t pmatch[];
   8251  1.1  christos     int eflags;
   8252  1.1  christos {
   8253  1.1  christos   int ret;
   8254  1.1  christos   struct re_registers regs;
   8255  1.1  christos   regex_t private_preg;
   8256  1.1  christos   int len = strlen (string);
   8257  1.1  christos   boolean want_reg_info = !preg->no_sub && nmatch > 0;
   8258  1.1  christos 
   8259  1.1  christos   private_preg = *preg;
   8260  1.1  christos 
   8261  1.1  christos   private_preg.not_bol = !!(eflags & REG_NOTBOL);
   8262  1.1  christos   private_preg.not_eol = !!(eflags & REG_NOTEOL);
   8263  1.1  christos 
   8264  1.1  christos   /* The user has told us exactly how many registers to return
   8265  1.1  christos      information about, via `nmatch'.  We have to pass that on to the
   8266  1.1  christos      matching routines.  */
   8267  1.1  christos   private_preg.regs_allocated = REGS_FIXED;
   8268  1.1  christos 
   8269  1.1  christos   if (want_reg_info)
   8270  1.1  christos     {
   8271  1.1  christos       regs.num_regs = nmatch;
   8272  1.1  christos       regs.start = TALLOC (nmatch * 2, regoff_t);
   8273  1.1  christos       if (regs.start == NULL)
   8274  1.1  christos         return (int) REG_NOMATCH;
   8275  1.1  christos       regs.end = regs.start + nmatch;
   8276  1.1  christos     }
   8277  1.1  christos 
   8278  1.1  christos   /* Perform the searching operation.  */
   8279  1.1  christos   ret = re_search (&private_preg, string, len,
   8280  1.1  christos                    /* start: */ 0, /* range: */ len,
   8281  1.1  christos                    want_reg_info ? &regs : (struct re_registers *) 0);
   8282  1.1  christos 
   8283  1.1  christos   /* Copy the register information to the POSIX structure.  */
   8284  1.1  christos   if (want_reg_info)
   8285  1.1  christos     {
   8286  1.1  christos       if (ret >= 0)
   8287  1.1  christos         {
   8288  1.1  christos           unsigned r;
   8289  1.1  christos 
   8290  1.1  christos           for (r = 0; r < nmatch; r++)
   8291  1.1  christos             {
   8292  1.1  christos               pmatch[r].rm_so = regs.start[r];
   8293  1.1  christos               pmatch[r].rm_eo = regs.end[r];
   8294  1.1  christos             }
   8295  1.1  christos         }
   8296  1.1  christos 
   8297  1.1  christos       /* If we needed the temporary register info, free the space now.  */
   8298  1.1  christos       free (regs.start);
   8299  1.1  christos     }
   8300  1.1  christos 
   8301  1.1  christos   /* We want zero return to mean success, unlike `re_search'.  */
   8302  1.1  christos   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
   8303  1.1  christos }
   8304  1.1  christos #ifdef _LIBC
   8305  1.1  christos weak_alias (__regexec, regexec)
   8306  1.1  christos #endif
   8307  1.1  christos 
   8308  1.1  christos 
   8309  1.1  christos /* Returns a message corresponding to an error code, ERRCODE, returned
   8310  1.1  christos    from either regcomp or regexec.   We don't use PREG here.  */
   8311  1.1  christos 
   8312  1.1  christos size_t
   8313  1.1  christos regerror (errcode, preg, errbuf, errbuf_size)
   8314  1.1  christos     int errcode;
   8315  1.1  christos     const regex_t *preg;
   8316  1.1  christos     char *errbuf;
   8317  1.1  christos     size_t errbuf_size;
   8318  1.1  christos {
   8319  1.1  christos   const char *msg;
   8320  1.1  christos   size_t msg_size;
   8321  1.1  christos 
   8322  1.1  christos   if (errcode < 0
   8323  1.1  christos       || errcode >= (int) (sizeof (re_error_msgid_idx)
   8324  1.1  christos 			   / sizeof (re_error_msgid_idx[0])))
   8325  1.1  christos     /* Only error codes returned by the rest of the code should be passed
   8326  1.1  christos        to this routine.  If we are given anything else, or if other regex
   8327  1.1  christos        code generates an invalid error code, then the program has a bug.
   8328  1.1  christos        Dump core so we can fix it.  */
   8329  1.1  christos     abort ();
   8330  1.1  christos 
   8331  1.1  christos   msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
   8332  1.1  christos 
   8333  1.1  christos   msg_size = strlen (msg) + 1; /* Includes the null.  */
   8334  1.1  christos 
   8335  1.1  christos   if (errbuf_size != 0)
   8336  1.1  christos     {
   8337  1.1  christos       if (msg_size > errbuf_size)
   8338  1.1  christos         {
   8339  1.1  christos #if defined HAVE_MEMPCPY || defined _LIBC
   8340  1.1  christos 	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
   8341  1.1  christos #else
   8342  1.1  christos           memcpy (errbuf, msg, errbuf_size - 1);
   8343  1.1  christos           errbuf[errbuf_size - 1] = 0;
   8344  1.1  christos #endif
   8345  1.1  christos         }
   8346  1.1  christos       else
   8347  1.1  christos         memcpy (errbuf, msg, msg_size);
   8348  1.1  christos     }
   8349  1.1  christos 
   8350  1.1  christos   return msg_size;
   8351  1.1  christos }
   8352  1.1  christos #ifdef _LIBC
   8353  1.1  christos weak_alias (__regerror, regerror)
   8354  1.1  christos #endif
   8355  1.1  christos 
   8356  1.1  christos 
   8357  1.1  christos /* Free dynamically allocated space used by PREG.  */
   8358  1.1  christos 
   8359  1.1  christos void
   8360  1.1  christos regfree (preg)
   8361  1.1  christos     regex_t *preg;
   8362  1.1  christos {
   8363  1.1  christos   if (preg->buffer != NULL)
   8364  1.1  christos     free (preg->buffer);
   8365  1.1  christos   preg->buffer = NULL;
   8366  1.1  christos 
   8367  1.1  christos   preg->allocated = 0;
   8368  1.1  christos   preg->used = 0;
   8369  1.1  christos 
   8370  1.1  christos   if (preg->fastmap != NULL)
   8371  1.1  christos     free (preg->fastmap);
   8372  1.1  christos   preg->fastmap = NULL;
   8373  1.1  christos   preg->fastmap_accurate = 0;
   8374  1.1  christos 
   8375  1.1  christos   if (preg->translate != NULL)
   8376  1.1  christos     free (preg->translate);
   8377  1.1  christos   preg->translate = NULL;
   8378  1.1  christos }
   8379  1.1  christos #ifdef _LIBC
   8380  1.1  christos weak_alias (__regfree, regfree)
   8381  1.1  christos #endif
   8382  1.1  christos 
   8383  1.1  christos #endif /* not emacs  */
   8384  1.1  christos 
   8385  1.1  christos #endif /* not INSIDE_RECURSION */
   8386  1.1  christos 
   8387  1.1  christos 
   8388  1.1  christos #undef STORE_NUMBER
   8390  1.1  christos #undef STORE_NUMBER_AND_INCR
   8391  1.1  christos #undef EXTRACT_NUMBER
   8392  1.1  christos #undef EXTRACT_NUMBER_AND_INCR
   8393  1.1  christos 
   8394  1.1  christos #undef DEBUG_PRINT_COMPILED_PATTERN
   8395  1.1  christos #undef DEBUG_PRINT_DOUBLE_STRING
   8396  1.1  christos 
   8397  1.1  christos #undef INIT_FAIL_STACK
   8398  1.1  christos #undef RESET_FAIL_STACK
   8399  1.1  christos #undef DOUBLE_FAIL_STACK
   8400  1.1  christos #undef PUSH_PATTERN_OP
   8401  1.1  christos #undef PUSH_FAILURE_POINTER
   8402  1.1  christos #undef PUSH_FAILURE_INT
   8403  1.1  christos #undef PUSH_FAILURE_ELT
   8404  1.1  christos #undef POP_FAILURE_POINTER
   8405  1.1  christos #undef POP_FAILURE_INT
   8406  1.1  christos #undef POP_FAILURE_ELT
   8407  1.1  christos #undef DEBUG_PUSH
   8408  1.1  christos #undef DEBUG_POP
   8409  1.1  christos #undef PUSH_FAILURE_POINT
   8410  1.1  christos #undef POP_FAILURE_POINT
   8411  1.1  christos 
   8412  1.1  christos #undef REG_UNSET_VALUE
   8413  1.1  christos #undef REG_UNSET
   8414  1.1  christos 
   8415  1.1  christos #undef PATFETCH
   8416  1.1  christos #undef PATFETCH_RAW
   8417  1.1  christos #undef PATUNFETCH
   8418  1.1  christos #undef TRANSLATE
   8419  1.1  christos 
   8420  1.1  christos #undef INIT_BUF_SIZE
   8421  1.1  christos #undef GET_BUFFER_SPACE
   8422  1.1  christos #undef BUF_PUSH
   8423  1.1  christos #undef BUF_PUSH_2
   8424  1.1  christos #undef BUF_PUSH_3
   8425  1.1  christos #undef STORE_JUMP
   8426  1.1  christos #undef STORE_JUMP2
   8427  1.1  christos #undef INSERT_JUMP
   8428  1.1  christos #undef INSERT_JUMP2
   8429  1.1  christos #undef EXTEND_BUFFER
   8430  1.1  christos #undef GET_UNSIGNED_NUMBER
   8431  1.1  christos #undef FREE_STACK_RETURN
   8432  1.1  christos 
   8433  1.1  christos # undef POINTER_TO_OFFSET
   8434                # undef MATCHING_IN_FRST_STRING
   8435                # undef PREFETCH
   8436                # undef AT_STRINGS_BEG
   8437                # undef AT_STRINGS_END
   8438                # undef WORDCHAR_P
   8439                # undef FREE_VAR
   8440                # undef FREE_VARIABLES
   8441                # undef NO_HIGHEST_ACTIVE_REG
   8442                # undef NO_LOWEST_ACTIVE_REG
   8443                
   8444                # undef CHAR_T
   8445                # undef UCHAR_T
   8446                # undef COMPILED_BUFFER_VAR
   8447                # undef OFFSET_ADDRESS_SIZE
   8448                # undef CHAR_CLASS_SIZE
   8449                # undef PREFIX
   8450                # undef ARG_PREFIX
   8451                # undef PUT_CHAR
   8452                # undef BYTE
   8453                # undef WCHAR
   8454                
   8455                # define DEFINED_ONCE
   8456