Home | History | Annotate | Line # | Download | only in lib
      1  1.1       agc /*
      2  1.1       agc   tre-match-backtrack.c - TRE backtracking regex matching engine
      3  1.1       agc 
      4  1.1       agc   This software is released under a BSD-style license.
      5  1.1       agc   See the file LICENSE for details and copyright.
      6  1.1       agc 
      7  1.1       agc */
      8  1.1       agc 
      9  1.1       agc /*
     10  1.1       agc   This matcher is for regexps that use back referencing.  Regexp matching
     11  1.1       agc   with back referencing is an NP-complete problem on the number of back
     12  1.1       agc   references.  The easiest way to match them is to use a backtracking
     13  1.1       agc   routine which basically goes through all possible paths in the TNFA
     14  1.1       agc   and chooses the one which results in the best (leftmost and longest)
     15  1.1       agc   match.  This can be spectacularly expensive and may run out of stack
     16  1.1       agc   space, but there really is no better known generic algorithm.	 Quoting
     17  1.1       agc   Henry Spencer from comp.compilers:
     18  1.1       agc   <URL: http://compilers.iecc.com/comparch/article/93-03-102>
     19  1.1       agc 
     20  1.1       agc     POSIX.2 REs require longest match, which is really exciting to
     21  1.1       agc     implement since the obsolete ("basic") variant also includes
     22  1.1       agc     \<digit>.  I haven't found a better way of tackling this than doing
     23  1.1       agc     a preliminary match using a DFA (or simulation) on a modified RE
     24  1.1       agc     that just replicates subREs for \<digit>, and then doing a
     25  1.1       agc     backtracking match to determine whether the subRE matches were
     26  1.1       agc     right.  This can be rather slow, but I console myself with the
     27  1.1       agc     thought that people who use \<digit> deserve very slow execution.
     28  1.1       agc     (Pun unintentional but very appropriate.)
     29  1.1       agc 
     30  1.1       agc */
     31  1.1       agc 
     32  1.1       agc 
     33  1.1       agc #ifdef HAVE_CONFIG_H
     34  1.1       agc #include <config.h>
     35  1.1       agc #endif /* HAVE_CONFIG_H */
     36  1.1       agc 
     37  1.3  christos #include "tre-alloca.h"
     38  1.1       agc 
     39  1.1       agc #include <assert.h>
     40  1.1       agc #include <stdlib.h>
     41  1.1       agc #include <string.h>
     42  1.1       agc #ifdef HAVE_WCHAR_H
     43  1.1       agc #include <wchar.h>
     44  1.1       agc #endif /* HAVE_WCHAR_H */
     45  1.1       agc #ifdef HAVE_WCTYPE_H
     46  1.1       agc #include <wctype.h>
     47  1.1       agc #endif /* HAVE_WCTYPE_H */
     48  1.1       agc #ifndef TRE_WCHAR
     49  1.1       agc #include <ctype.h>
     50  1.1       agc #endif /* !TRE_WCHAR */
     51  1.1       agc #ifdef HAVE_MALLOC_H
     52  1.1       agc #include <malloc.h>
     53  1.1       agc #endif /* HAVE_MALLOC_H */
     54  1.1       agc 
     55  1.1       agc #include "tre-internal.h"
     56  1.1       agc #include "tre-mem.h"
     57  1.1       agc #include "tre-match-utils.h"
     58  1.1       agc #include "tre.h"
     59  1.1       agc #include "xmalloc.h"
     60  1.1       agc 
     61  1.1       agc typedef struct {
     62  1.1       agc   int pos;
     63  1.1       agc   const char *str_byte;
     64  1.1       agc #ifdef TRE_WCHAR
     65  1.1       agc   const wchar_t *str_wide;
     66  1.1       agc #endif /* TRE_WCHAR */
     67  1.1       agc   tre_tnfa_transition_t *state;
     68  1.1       agc   int state_id;
     69  1.1       agc   int next_c;
     70  1.1       agc   int *tags;
     71  1.1       agc #ifdef TRE_MBSTATE
     72  1.1       agc   mbstate_t mbstate;
     73  1.1       agc #endif /* TRE_MBSTATE */
     74  1.1       agc } tre_backtrack_item_t;
     75  1.1       agc 
     76  1.1       agc typedef struct tre_backtrack_struct {
     77  1.1       agc   tre_backtrack_item_t item;
     78  1.1       agc   struct tre_backtrack_struct *prev;
     79  1.1       agc   struct tre_backtrack_struct *next;
     80  1.1       agc } *tre_backtrack_t;
     81  1.1       agc 
     82  1.1       agc #ifdef TRE_WCHAR
     83  1.1       agc #define BT_STACK_WIDE_IN(_str_wide)	stack->item.str_wide = (_str_wide)
     84  1.1       agc #define BT_STACK_WIDE_OUT		(str_wide) = stack->item.str_wide
     85  1.1       agc #else /* !TRE_WCHAR */
     86  1.1       agc #define BT_STACK_WIDE_IN(_str_wide)
     87  1.1       agc #define BT_STACK_WIDE_OUT
     88  1.1       agc #endif /* !TRE_WCHAR */
     89  1.1       agc 
     90  1.1       agc #ifdef TRE_MBSTATE
     91  1.1       agc #define BT_STACK_MBSTATE_IN  stack->item.mbstate = (mbstate)
     92  1.1       agc #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
     93  1.1       agc #else /* !TRE_MBSTATE */
     94  1.1       agc #define BT_STACK_MBSTATE_IN
     95  1.1       agc #define BT_STACK_MBSTATE_OUT
     96  1.1       agc #endif /* !TRE_MBSTATE */
     97  1.1       agc 
     98  1.1       agc 
     99  1.1       agc #ifdef TRE_USE_ALLOCA
    100  1.1       agc #define tre_bt_mem_new		  tre_mem_newa
    101  1.1       agc #define tre_bt_mem_alloc	  tre_mem_alloca
    102  1.5       rin #define tre_bt_mem_destroy(obj)	  do { } while (/*CONSTCOND*/(void)0,0)
    103  1.1       agc #else /* !TRE_USE_ALLOCA */
    104  1.1       agc #define tre_bt_mem_new		  tre_mem_new
    105  1.1       agc #define tre_bt_mem_alloc	  tre_mem_alloc
    106  1.1       agc #define tre_bt_mem_destroy	  tre_mem_destroy
    107  1.1       agc #endif /* !TRE_USE_ALLOCA */
    108  1.1       agc 
    109  1.1       agc 
    110  1.1       agc #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
    111  1.1       agc   do									      \
    112  1.1       agc     {									      \
    113  1.3  christos       size_t i;								      \
    114  1.1       agc       if (!stack->next)							      \
    115  1.1       agc 	{								      \
    116  1.1       agc 	  tre_backtrack_t s;						      \
    117  1.1       agc 	  s = tre_bt_mem_alloc(mem, sizeof(*s));			      \
    118  1.1       agc 	  if (!s)							      \
    119  1.1       agc 	    {								      \
    120  1.1       agc 	      tre_bt_mem_destroy(mem);					      \
    121  1.1       agc 	      if (tags)							      \
    122  1.1       agc 		xfree(tags);						      \
    123  1.1       agc 	      if (pmatch)						      \
    124  1.1       agc 		xfree(pmatch);						      \
    125  1.1       agc 	      if (states_seen)						      \
    126  1.1       agc 		xfree(states_seen);					      \
    127  1.1       agc 	      return REG_ESPACE;					      \
    128  1.1       agc 	    }								      \
    129  1.1       agc 	  s->prev = stack;						      \
    130  1.1       agc 	  s->next = NULL;						      \
    131  1.1       agc 	  s->item.tags = tre_bt_mem_alloc(mem,				      \
    132  1.1       agc 					  sizeof(*tags) * tnfa->num_tags);    \
    133  1.1       agc 	  if (!s->item.tags)						      \
    134  1.1       agc 	    {								      \
    135  1.1       agc 	      tre_bt_mem_destroy(mem);					      \
    136  1.1       agc 	      if (tags)							      \
    137  1.1       agc 		xfree(tags);						      \
    138  1.1       agc 	      if (pmatch)						      \
    139  1.1       agc 		xfree(pmatch);						      \
    140  1.1       agc 	      if (states_seen)						      \
    141  1.1       agc 		xfree(states_seen);					      \
    142  1.1       agc 	      return REG_ESPACE;					      \
    143  1.1       agc 	    }								      \
    144  1.1       agc 	  stack->next = s;						      \
    145  1.1       agc 	  stack = s;							      \
    146  1.1       agc 	}								      \
    147  1.1       agc       else								      \
    148  1.1       agc 	stack = stack->next;						      \
    149  1.1       agc       stack->item.pos = (_pos);						      \
    150  1.1       agc       stack->item.str_byte = (_str_byte);				      \
    151  1.1       agc       BT_STACK_WIDE_IN(_str_wide);					      \
    152  1.1       agc       stack->item.state = (_state);					      \
    153  1.1       agc       stack->item.state_id = (_state_id);				      \
    154  1.1       agc       stack->item.next_c = (_next_c);					      \
    155  1.1       agc       for (i = 0; i < tnfa->num_tags; i++)				      \
    156  1.1       agc 	stack->item.tags[i] = (_tags)[i];				      \
    157  1.1       agc       BT_STACK_MBSTATE_IN;						      \
    158  1.1       agc     }									      \
    159  1.5       rin   while (/*CONSTCOND*/(void)0,0)
    160  1.1       agc 
    161  1.1       agc #define BT_STACK_POP()							      \
    162  1.1       agc   do									      \
    163  1.1       agc     {									      \
    164  1.3  christos       size_t i;								      \
    165  1.1       agc       assert(stack->prev);						      \
    166  1.1       agc       pos = stack->item.pos;						      \
    167  1.1       agc       if (type == STR_USER)                                                   \
    168  1.1       agc         str_source->rewind(pos + pos_add_next, str_source->context);          \
    169  1.1       agc       str_byte = stack->item.str_byte;					      \
    170  1.1       agc       BT_STACK_WIDE_OUT;						      \
    171  1.1       agc       state = stack->item.state;					      \
    172  1.5       rin       next_c = (tre_char_t) stack->item.next_c;					      \
    173  1.1       agc       for (i = 0; i < tnfa->num_tags; i++)				      \
    174  1.1       agc 	tags[i] = stack->item.tags[i];					      \
    175  1.1       agc       BT_STACK_MBSTATE_OUT;						      \
    176  1.1       agc       stack = stack->prev;						      \
    177  1.1       agc     }									      \
    178  1.5       rin   while (/*CONSTCOND*/(void)0,0)
    179  1.1       agc 
    180  1.1       agc #undef MIN
    181  1.1       agc #define MIN(a, b) ((a) <= (b) ? (a) : (b))
    182  1.1       agc 
    183  1.1       agc reg_errcode_t
    184  1.1       agc tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
    185  1.1       agc 		       int len, tre_str_type_t type, int *match_tags,
    186  1.1       agc 		       int eflags, int *match_end_ofs)
    187  1.1       agc {
    188  1.1       agc   /* State variables required by GET_NEXT_WCHAR. */
    189  1.1       agc   tre_char_t prev_c = 0, next_c = 0;
    190  1.1       agc   const char *str_byte = string;
    191  1.1       agc   int pos = 0;
    192  1.1       agc   unsigned int pos_add_next = 1;
    193  1.1       agc #ifdef TRE_WCHAR
    194  1.1       agc   const wchar_t *str_wide = string;
    195  1.1       agc #ifdef TRE_MBSTATE
    196  1.1       agc   mbstate_t mbstate;
    197  1.1       agc #endif /* TRE_MBSTATE */
    198  1.1       agc #endif /* TRE_WCHAR */
    199  1.1       agc   int reg_notbol = eflags & REG_NOTBOL;
    200  1.1       agc   int reg_noteol = eflags & REG_NOTEOL;
    201  1.1       agc   int reg_newline = tnfa->cflags & REG_NEWLINE;
    202  1.3  christos   size_t str_user_end = 0;
    203  1.1       agc 
    204  1.1       agc   /* These are used to remember the necessary values of the above
    205  1.1       agc      variables to return to the position where the current search
    206  1.1       agc      started from. */
    207  1.1       agc   int next_c_start;
    208  1.1       agc   const char *str_byte_start;
    209  1.1       agc   int pos_start = -1;
    210  1.1       agc #ifdef TRE_WCHAR
    211  1.1       agc   const wchar_t *str_wide_start;
    212  1.1       agc #endif /* TRE_WCHAR */
    213  1.1       agc #ifdef TRE_MBSTATE
    214  1.1       agc   mbstate_t mbstate_start;
    215  1.1       agc #endif /* TRE_MBSTATE */
    216  1.1       agc 
    217  1.1       agc   /* End offset of best match so far, or -1 if no match found yet. */
    218  1.1       agc   int match_eo = -1;
    219  1.1       agc   /* Tag arrays. */
    220  1.1       agc   int *next_tags, *tags = NULL;
    221  1.1       agc   /* Current TNFA state. */
    222  1.1       agc   tre_tnfa_transition_t *state;
    223  1.1       agc   int *states_seen = NULL;
    224  1.1       agc 
    225  1.1       agc   /* Memory allocator to for allocating the backtracking stack. */
    226  1.1       agc   tre_mem_t mem = tre_bt_mem_new();
    227  1.1       agc 
    228  1.1       agc   /* The backtracking stack. */
    229  1.1       agc   tre_backtrack_t stack;
    230  1.1       agc 
    231  1.1       agc   tre_tnfa_transition_t *trans_i;
    232  1.1       agc   regmatch_t *pmatch = NULL;
    233  1.6       rin   reg_errcode_t ret;
    234  1.1       agc 
    235  1.1       agc #ifdef TRE_MBSTATE
    236  1.1       agc   memset(&mbstate, '\0', sizeof(mbstate));
    237  1.1       agc #endif /* TRE_MBSTATE */
    238  1.1       agc 
    239  1.1       agc   if (!mem)
    240  1.1       agc     return REG_ESPACE;
    241  1.1       agc   stack = tre_bt_mem_alloc(mem, sizeof(*stack));
    242  1.1       agc   if (!stack)
    243  1.1       agc     {
    244  1.1       agc       ret = REG_ESPACE;
    245  1.1       agc       goto error_exit;
    246  1.1       agc     }
    247  1.1       agc   stack->prev = NULL;
    248  1.1       agc   stack->next = NULL;
    249  1.1       agc 
    250  1.1       agc   DPRINT(("tnfa_execute_backtrack, input type %d\n", type));
    251  1.1       agc   DPRINT(("len = %d\n", len));
    252  1.1       agc 
    253  1.1       agc #ifdef TRE_USE_ALLOCA
    254  1.1       agc   tags = alloca(sizeof(*tags) * tnfa->num_tags);
    255  1.1       agc   pmatch = alloca(sizeof(*pmatch) * tnfa->num_submatches);
    256  1.1       agc   states_seen = alloca(sizeof(*states_seen) * tnfa->num_states);
    257  1.1       agc #else /* !TRE_USE_ALLOCA */
    258  1.1       agc   if (tnfa->num_tags)
    259  1.1       agc     {
    260  1.1       agc       tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
    261  1.1       agc       if (!tags)
    262  1.1       agc 	{
    263  1.1       agc 	  ret = REG_ESPACE;
    264  1.1       agc 	  goto error_exit;
    265  1.1       agc 	}
    266  1.1       agc     }
    267  1.1       agc   if (tnfa->num_submatches)
    268  1.1       agc     {
    269  1.1       agc       pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
    270  1.1       agc       if (!pmatch)
    271  1.1       agc 	{
    272  1.1       agc 	  ret = REG_ESPACE;
    273  1.1       agc 	  goto error_exit;
    274  1.1       agc 	}
    275  1.1       agc     }
    276  1.1       agc   if (tnfa->num_states)
    277  1.1       agc     {
    278  1.1       agc       states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
    279  1.1       agc       if (!states_seen)
    280  1.1       agc 	{
    281  1.1       agc 	  ret = REG_ESPACE;
    282  1.1       agc 	  goto error_exit;
    283  1.1       agc 	}
    284  1.1       agc     }
    285  1.1       agc #endif /* !TRE_USE_ALLOCA */
    286  1.1       agc 
    287  1.1       agc  retry:
    288  1.1       agc   {
    289  1.3  christos     size_t i;
    290  1.1       agc     for (i = 0; i < tnfa->num_tags; i++)
    291  1.1       agc       {
    292  1.1       agc 	tags[i] = -1;
    293  1.1       agc 	if (match_tags)
    294  1.1       agc 	  match_tags[i] = -1;
    295  1.1       agc       }
    296  1.1       agc     for (i = 0; i < tnfa->num_states; i++)
    297  1.1       agc       states_seen[i] = 0;
    298  1.1       agc   }
    299  1.1       agc 
    300  1.1       agc   state = NULL;
    301  1.1       agc   pos = pos_start;
    302  1.1       agc   if (type == STR_USER)
    303  1.1       agc     str_source->rewind(pos + pos_add_next, str_source->context);
    304  1.1       agc   GET_NEXT_WCHAR();
    305  1.1       agc   pos_start = pos;
    306  1.1       agc   next_c_start = next_c;
    307  1.1       agc   str_byte_start = str_byte;
    308  1.1       agc #ifdef TRE_WCHAR
    309  1.1       agc   str_wide_start = str_wide;
    310  1.1       agc #endif /* TRE_WCHAR */
    311  1.1       agc #ifdef TRE_MBSTATE
    312  1.1       agc   mbstate_start = mbstate;
    313  1.1       agc #endif /* TRE_MBSTATE */
    314  1.1       agc 
    315  1.1       agc   /* Handle initial states. */
    316  1.1       agc   next_tags = NULL;
    317  1.1       agc   for (trans_i = tnfa->initial; trans_i->state; trans_i++)
    318  1.1       agc     {
    319  1.1       agc       DPRINT(("> init %p, prev_c %lc\n", trans_i->state, (tre_cint_t)prev_c));
    320  1.1       agc       if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
    321  1.1       agc 	{
    322  1.1       agc 	  DPRINT(("assert failed\n"));
    323  1.1       agc 	  continue;
    324  1.1       agc 	}
    325  1.1       agc       if (state == NULL)
    326  1.1       agc 	{
    327  1.1       agc 	  /* Start from this state. */
    328  1.1       agc 	  state = trans_i->state;
    329  1.1       agc 	  next_tags = trans_i->tags;
    330  1.1       agc 	}
    331  1.1       agc       else
    332  1.1       agc 	{
    333  1.1       agc 	  /* Backtrack to this state. */
    334  1.1       agc 	  DPRINT(("saving state %d for backtracking\n", trans_i->state_id));
    335  1.1       agc 	  BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state,
    336  1.1       agc 			trans_i->state_id, next_c, tags, mbstate);
    337  1.1       agc 	  {
    338  1.1       agc 	    int *tmp = trans_i->tags;
    339  1.1       agc 	    if (tmp)
    340  1.1       agc 	      while (*tmp >= 0)
    341  1.1       agc 		stack->item.tags[*tmp++] = pos;
    342  1.1       agc 	  }
    343  1.1       agc 	}
    344  1.1       agc     }
    345  1.1       agc 
    346  1.1       agc   if (next_tags)
    347  1.1       agc     for (; *next_tags >= 0; next_tags++)
    348  1.1       agc       tags[*next_tags] = pos;
    349  1.1       agc 
    350  1.1       agc 
    351  1.1       agc   DPRINT(("entering match loop, pos %d, str_byte %p\n", pos, str_byte));
    352  1.1       agc   DPRINT(("pos:chr/code | state and tags\n"));
    353  1.1       agc   DPRINT(("-------------+------------------------------------------------\n"));
    354  1.1       agc 
    355  1.1       agc   if (state == NULL)
    356  1.1       agc     goto backtrack;
    357  1.1       agc 
    358  1.5       rin   while (/*CONSTCOND*/(void)1,1)
    359  1.1       agc     {
    360  1.1       agc       tre_tnfa_transition_t *next_state;
    361  1.1       agc       int empty_br_match;
    362  1.1       agc 
    363  1.1       agc       DPRINT(("start loop\n"));
    364  1.1       agc       if (state == tnfa->final)
    365  1.1       agc 	{
    366  1.1       agc 	  DPRINT(("  match found, %d %d\n", match_eo, pos));
    367  1.1       agc 	  if (match_eo < pos
    368  1.1       agc 	      || (match_eo == pos
    369  1.1       agc 		  && match_tags
    370  1.1       agc 		  && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
    371  1.1       agc 				   tags, match_tags)))
    372  1.1       agc 	    {
    373  1.3  christos 	      size_t i;
    374  1.1       agc 	      /* This match wins the previous match. */
    375  1.1       agc 	      DPRINT(("	 win previous\n"));
    376  1.1       agc 	      match_eo = pos;
    377  1.1       agc 	      if (match_tags)
    378  1.1       agc 		for (i = 0; i < tnfa->num_tags; i++)
    379  1.1       agc 		  match_tags[i] = tags[i];
    380  1.1       agc 	    }
    381  1.1       agc 	  /* Our TNFAs never have transitions leaving from the final state,
    382  1.1       agc 	     so we jump right to backtracking. */
    383  1.1       agc 	  goto backtrack;
    384  1.1       agc 	}
    385  1.1       agc 
    386  1.1       agc #ifdef TRE_DEBUG
    387  1.1       agc       DPRINT(("%3d:%2lc/%05d | %p ", pos, (tre_cint_t)next_c, (int)next_c,
    388  1.1       agc 	      state));
    389  1.1       agc       {
    390  1.1       agc 	int i;
    391  1.1       agc 	for (i = 0; i < tnfa->num_tags; i++)
    392  1.1       agc 	  DPRINT(("%d%s", tags[i], i < tnfa->num_tags - 1 ? ", " : ""));
    393  1.1       agc 	DPRINT(("\n"));
    394  1.1       agc       }
    395  1.1       agc #endif /* TRE_DEBUG */
    396  1.1       agc 
    397  1.1       agc       /* Go to the next character in the input string. */
    398  1.1       agc       empty_br_match = 0;
    399  1.1       agc       trans_i = state;
    400  1.1       agc       if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
    401  1.1       agc 	{
    402  1.1       agc 	  /* This is a back reference state.  All transitions leaving from
    403  1.1       agc 	     this state have the same back reference "assertion".  Instead
    404  1.1       agc 	     of reading the next character, we match the back reference. */
    405  1.2     ahoka 	  regoff_t so, eo, bt = trans_i->u.backref;
    406  1.2     ahoka 	  regoff_t bt_len;
    407  1.2     ahoka 	  regoff_t result;
    408  1.1       agc 
    409  1.1       agc 	  DPRINT(("  should match back reference %d\n", bt));
    410  1.1       agc 	  /* Get the substring we need to match against.  Remember to
    411  1.1       agc 	     turn off REG_NOSUB temporarily. */
    412  1.7       rin 	  tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
    413  1.1       agc 			  tnfa, tags, pos);
    414  1.2     ahoka 	  /* LINTED */so = pmatch[bt].rm_so;
    415  1.2     ahoka 	  /* LINTED */eo = pmatch[bt].rm_eo;
    416  1.1       agc 	  bt_len = eo - so;
    417  1.1       agc 
    418  1.1       agc #ifdef TRE_DEBUG
    419  1.1       agc 	  {
    420  1.1       agc 	    int slen;
    421  1.1       agc 	    if (len < 0)
    422  1.1       agc 	      slen = bt_len;
    423  1.1       agc 	    else
    424  1.1       agc 	      slen = MIN(bt_len, len - pos);
    425  1.1       agc 
    426  1.1       agc 	    if (type == STR_BYTE)
    427  1.1       agc 	      {
    428  1.1       agc 		DPRINT(("  substring (len %d) is [%d, %d[: '%.*s'\n",
    429  1.1       agc 			bt_len, so, eo, bt_len, (char*)string + so));
    430  1.1       agc 		DPRINT(("  current string is '%.*s'\n", slen, str_byte - 1));
    431  1.1       agc 	      }
    432  1.1       agc #ifdef TRE_WCHAR
    433  1.1       agc 	    else if (type == STR_WIDE)
    434  1.1       agc 	      {
    435  1.1       agc 		DPRINT(("  substring (len %d) is [%d, %d[: '%.*" STRF "'\n",
    436  1.1       agc 			bt_len, so, eo, bt_len, (wchar_t*)string + so));
    437  1.1       agc 		DPRINT(("  current string is '%.*" STRF "'\n",
    438  1.1       agc 			slen, str_wide - 1));
    439  1.1       agc 	      }
    440  1.1       agc #endif /* TRE_WCHAR */
    441  1.1       agc 	  }
    442  1.1       agc #endif
    443  1.1       agc 
    444  1.1       agc 	  if (len < 0)
    445  1.1       agc 	    {
    446  1.1       agc 	      if (type == STR_USER)
    447  1.1       agc 		result = str_source->compare((unsigned)so, (unsigned)pos,
    448  1.1       agc 					     (unsigned)bt_len,
    449  1.1       agc 					     str_source->context);
    450  1.1       agc #ifdef TRE_WCHAR
    451  1.1       agc 	      else if (type == STR_WIDE)
    452  1.1       agc 		result = wcsncmp((const wchar_t*)string + so, str_wide - 1,
    453  1.1       agc 				 (size_t)bt_len);
    454  1.1       agc #endif /* TRE_WCHAR */
    455  1.1       agc 	      else
    456  1.2     ahoka 		/* LINTED */result = strncmp((const char*)string + so, str_byte - 1,
    457  1.1       agc 				 (size_t)bt_len);
    458  1.1       agc 	    }
    459  1.1       agc 	  else if (len - pos < bt_len)
    460  1.1       agc 	    result = 1;
    461  1.1       agc #ifdef TRE_WCHAR
    462  1.1       agc 	  else if (type == STR_WIDE)
    463  1.1       agc 	    result = wmemcmp((const wchar_t*)string + so, str_wide - 1,
    464  1.1       agc 			     (size_t)bt_len);
    465  1.1       agc #endif /* TRE_WCHAR */
    466  1.1       agc 	  else
    467  1.2     ahoka 	    /* LINTED */result = memcmp((const char*)string + so, str_byte - 1,
    468  1.1       agc 			    (size_t)bt_len);
    469  1.1       agc 
    470  1.1       agc 	  if (result == 0)
    471  1.1       agc 	    {
    472  1.1       agc 	      /* Back reference matched.  Check for infinite loop. */
    473  1.1       agc 	      if (bt_len == 0)
    474  1.1       agc 		empty_br_match = 1;
    475  1.1       agc 	      if (empty_br_match && states_seen[trans_i->state_id])
    476  1.1       agc 		{
    477  1.1       agc 		  DPRINT(("  avoid loop\n"));
    478  1.1       agc 		  goto backtrack;
    479  1.1       agc 		}
    480  1.1       agc 
    481  1.1       agc 	      states_seen[trans_i->state_id] = empty_br_match;
    482  1.1       agc 
    483  1.1       agc 	      /* Advance in input string and resync `prev_c', `next_c'
    484  1.1       agc 		 and pos. */
    485  1.1       agc 	      DPRINT(("	 back reference matched\n"));
    486  1.2     ahoka 	      /* LINTED */str_byte += bt_len - 1;
    487  1.1       agc #ifdef TRE_WCHAR
    488  1.2     ahoka 	      /* LINTED */str_wide += bt_len - 1;
    489  1.1       agc #endif /* TRE_WCHAR */
    490  1.2     ahoka 	      /* LINTED */pos += bt_len - 1;
    491  1.1       agc 	      GET_NEXT_WCHAR();
    492  1.1       agc 	      DPRINT(("	 pos now %d\n", pos));
    493  1.1       agc 	    }
    494  1.1       agc 	  else
    495  1.1       agc 	    {
    496  1.1       agc 	      DPRINT(("	 back reference did not match\n"));
    497  1.1       agc 	      goto backtrack;
    498  1.1       agc 	    }
    499  1.1       agc 	}
    500  1.1       agc       else
    501  1.1       agc 	{
    502  1.1       agc 	  /* Check for end of string. */
    503  1.1       agc 	  if (len < 0)
    504  1.1       agc 	    {
    505  1.1       agc 	      if (type == STR_USER)
    506  1.1       agc 		{
    507  1.1       agc 		  if (str_user_end)
    508  1.1       agc 		    goto backtrack;
    509  1.1       agc 		}
    510  1.1       agc 	      else if (next_c == L'\0')
    511  1.1       agc 		goto backtrack;
    512  1.1       agc 	    }
    513  1.1       agc 	  else
    514  1.1       agc 	    {
    515  1.1       agc 	      if (pos >= len)
    516  1.1       agc 		goto backtrack;
    517  1.1       agc 	    }
    518  1.1       agc 
    519  1.1       agc 	  /* Read the next character. */
    520  1.1       agc 	  GET_NEXT_WCHAR();
    521  1.1       agc 	}
    522  1.1       agc 
    523  1.1       agc       next_state = NULL;
    524  1.1       agc       for (trans_i = state; trans_i->state; trans_i++)
    525  1.1       agc 	{
    526  1.1       agc 	  DPRINT(("  transition %d-%d (%c-%c) %d to %d\n",
    527  1.1       agc 		  trans_i->code_min, trans_i->code_max,
    528  1.1       agc 		  trans_i->code_min, trans_i->code_max,
    529  1.1       agc 		  trans_i->assertions, trans_i->state_id));
    530  1.1       agc 	  if (trans_i->code_min <= (tre_cint_t)prev_c
    531  1.1       agc 	      && trans_i->code_max >= (tre_cint_t)prev_c)
    532  1.1       agc 	    {
    533  1.1       agc 	      if (trans_i->assertions
    534  1.1       agc 		  && (CHECK_ASSERTIONS(trans_i->assertions)
    535  1.1       agc 		      || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
    536  1.1       agc 		{
    537  1.1       agc 		  DPRINT(("  assertion failed\n"));
    538  1.1       agc 		  continue;
    539  1.1       agc 		}
    540  1.1       agc 
    541  1.1       agc 	      if (next_state == NULL)
    542  1.1       agc 		{
    543  1.1       agc 		  /* First matching transition. */
    544  1.1       agc 		  DPRINT(("  Next state is %d\n", trans_i->state_id));
    545  1.1       agc 		  next_state = trans_i->state;
    546  1.1       agc 		  next_tags = trans_i->tags;
    547  1.1       agc 		}
    548  1.1       agc 	      else
    549  1.1       agc 		{
    550  1.1       agc 		  /* Second matching transition.  We may need to backtrack here
    551  1.1       agc 		     to take this transition instead of the first one, so we
    552  1.1       agc 		     push this transition in the backtracking stack so we can
    553  1.1       agc 		     jump back here if needed. */
    554  1.1       agc 		  DPRINT(("  saving state %d for backtracking\n",
    555  1.1       agc 			  trans_i->state_id));
    556  1.1       agc 		  BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state,
    557  1.1       agc 				trans_i->state_id, next_c, tags, mbstate);
    558  1.1       agc 		  {
    559  1.1       agc 		    int *tmp;
    560  1.1       agc 		    for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
    561  1.1       agc 		      stack->item.tags[*tmp] = pos;
    562  1.1       agc 		  }
    563  1.1       agc #if 0 /* XXX - it's important not to look at all transitions here to keep
    564  1.1       agc 	 the stack small! */
    565  1.1       agc 		  break;
    566  1.1       agc #endif
    567  1.1       agc 		}
    568  1.1       agc 	    }
    569  1.1       agc 	}
    570  1.1       agc 
    571  1.1       agc       if (next_state != NULL)
    572  1.1       agc 	{
    573  1.1       agc 	  /* Matching transitions were found.  Take the first one. */
    574  1.1       agc 	  state = next_state;
    575  1.1       agc 
    576  1.1       agc 	  /* Update the tag values. */
    577  1.1       agc 	  if (next_tags)
    578  1.1       agc 	    while (*next_tags >= 0)
    579  1.1       agc 	      tags[*next_tags++] = pos;
    580  1.1       agc 	}
    581  1.1       agc       else
    582  1.1       agc 	{
    583  1.1       agc 	backtrack:
    584  1.1       agc 	  /* A matching transition was not found.  Try to backtrack. */
    585  1.1       agc 	  if (stack->prev)
    586  1.1       agc 	    {
    587  1.1       agc 	      DPRINT(("	 backtracking\n"));
    588  1.4     joerg #if ASSERT_BACKREF
    589  1.4     joerg 	      if (stack->item.state->assertions)
    590  1.1       agc 		{
    591  1.1       agc 		  DPRINT(("  states_seen[%d] = 0\n",
    592  1.1       agc 			  stack->item.state_id));
    593  1.1       agc 		  states_seen[stack->item.state_id] = 0;
    594  1.1       agc 		}
    595  1.4     joerg #endif
    596  1.1       agc 
    597  1.1       agc 	      BT_STACK_POP();
    598  1.1       agc 	    }
    599  1.1       agc 	  else if (match_eo < 0)
    600  1.1       agc 	    {
    601  1.1       agc 	      /* Try starting from a later position in the input string. */
    602  1.1       agc 	      /* Check for end of string. */
    603  1.1       agc 	      if (len < 0)
    604  1.1       agc 		{
    605  1.1       agc 		  if (next_c == L'\0')
    606  1.1       agc 		    {
    607  1.1       agc 		      DPRINT(("end of string.\n"));
    608  1.1       agc 		      break;
    609  1.1       agc 		    }
    610  1.1       agc 		}
    611  1.1       agc 	      else
    612  1.1       agc 		{
    613  1.1       agc 		  if (pos >= len)
    614  1.1       agc 		    {
    615  1.1       agc 		      DPRINT(("end of string.\n"));
    616  1.1       agc 		      break;
    617  1.1       agc 		    }
    618  1.1       agc 		}
    619  1.1       agc 	      DPRINT(("restarting from next start position\n"));
    620  1.5       rin 	      next_c = (tre_char_t) next_c_start;
    621  1.1       agc #ifdef TRE_MBSTATE
    622  1.1       agc 	      mbstate = mbstate_start;
    623  1.1       agc #endif /* TRE_MBSTATE */
    624  1.1       agc 	      str_byte = str_byte_start;
    625  1.1       agc #ifdef TRE_WCHAR
    626  1.1       agc 	      str_wide = str_wide_start;
    627  1.1       agc #endif /* TRE_WCHAR */
    628  1.1       agc 	      goto retry;
    629  1.1       agc 	    }
    630  1.1       agc 	  else
    631  1.1       agc 	    {
    632  1.1       agc 	      DPRINT(("finished\n"));
    633  1.1       agc 	      break;
    634  1.1       agc 	    }
    635  1.1       agc 	}
    636  1.1       agc     }
    637  1.1       agc 
    638  1.1       agc   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
    639  1.1       agc   *match_end_ofs = match_eo;
    640  1.1       agc 
    641  1.1       agc  error_exit:
    642  1.1       agc   tre_bt_mem_destroy(mem);
    643  1.1       agc #ifndef TRE_USE_ALLOCA
    644  1.1       agc   if (tags)
    645  1.1       agc     xfree(tags);
    646  1.1       agc   if (pmatch)
    647  1.1       agc     xfree(pmatch);
    648  1.1       agc   if (states_seen)
    649  1.1       agc     xfree(states_seen);
    650  1.1       agc #endif /* !TRE_USE_ALLOCA */
    651  1.1       agc 
    652  1.1       agc   return ret;
    653  1.1       agc }
    654