Home | History | Annotate | Line # | Download | only in lib
tre-match-approx.c revision 1.6.4.1
      1 /*
      2   tre-match-approx.c - TRE approximate regex matching engine
      3 
      4   This software is released under a BSD-style license.
      5   See the file LICENSE for details and copyright.
      6 
      7 */
      8 #ifdef HAVE_CONFIG_H
      9 #include <config.h>
     10 #endif /* HAVE_CONFIG_H */
     11 
     12 #include "tre-alloca.h"
     13 
     14 #define __USE_STRING_INLINES
     15 #undef __NO_INLINE__
     16 
     17 #include <assert.h>
     18 #include <stdlib.h>
     19 #include <string.h>
     20 #include <limits.h>
     21 #ifdef HAVE_WCHAR_H
     22 #include <wchar.h>
     23 #endif /* HAVE_WCHAR_H */
     24 #ifdef HAVE_WCTYPE_H
     25 #include <wctype.h>
     26 #endif /* HAVE_WCTYPE_H */
     27 #ifndef TRE_WCHAR
     28 #include <ctype.h>
     29 #endif /* !TRE_WCHAR */
     30 #ifdef HAVE_MALLOC_H
     31 #include <malloc.h>
     32 #endif /* HAVE_MALLOC_H */
     33 #include <stdint.h>
     34 
     35 #include "tre-internal.h"
     36 #include "tre-match-utils.h"
     37 #include "tre.h"
     38 #include "xmalloc.h"
     39 
     40 #define TRE_M_COST	0
     41 #define TRE_M_NUM_INS	1
     42 #define TRE_M_NUM_DEL	2
     43 #define TRE_M_NUM_SUBST 3
     44 #define TRE_M_NUM_ERR	4
     45 #define TRE_M_LAST	5
     46 
     47 #define TRE_M_MAX_DEPTH 3
     48 
     49 typedef struct {
     50   /* State in the TNFA transition table. */
     51   tre_tnfa_transition_t *state;
     52   /* Position in input string. */
     53   int pos;
     54   /* Tag values. */
     55   int *tags;
     56   /* Matching parameters. */
     57   regaparams_t params;
     58   /* Nesting depth of parameters.  This is used as an index in
     59      the `costs' array. */
     60   int depth;
     61   /* Costs and counter values for different parameter nesting depths. */
     62   int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
     63 } tre_tnfa_approx_reach_t;
     64 
     65 
     66 #ifdef TRE_DEBUG
     67 /* Prints the `reach' array in a readable fashion with DPRINT. */
     68 static void
     69 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
     70 		int pos, size_t num_tags)
     71 {
     72   int id;
     73 
     74   /* Print each state on one line. */
     75   DPRINT(("  reach:\n"));
     76   for (id = 0; id < tnfa->num_states; id++)
     77     {
     78       int i, j;
     79       if (reach[id].pos < pos)
     80 	continue;  /* Not reached. */
     81       DPRINT(("	 %03d, costs ", id));
     82       for (i = 0; i <= reach[id].depth; i++)
     83 	{
     84 	  DPRINT(("["));
     85 	  for (j = 0; j < TRE_M_LAST; j++)
     86 	    {
     87 	      DPRINT(("%2d", reach[id].costs[i][j]));
     88 	      if (j + 1 < TRE_M_LAST)
     89 		DPRINT((","));
     90 	    }
     91 	  DPRINT(("]"));
     92 	  if (i + 1 <= reach[id].depth)
     93 	    DPRINT((", "));
     94 	}
     95       DPRINT(("\n	tags "));
     96       for (i = 0; i < num_tags; i++)
     97 	{
     98 	  DPRINT(("%02d", reach[id].tags[i]));
     99 	  if (i + 1 < num_tags)
    100 	    DPRINT((","));
    101 	}
    102       DPRINT(("\n"));
    103     }
    104   DPRINT(("\n"));
    105 }
    106 #endif /* TRE_DEBUG */
    107 
    108 
    109 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
    110    array.  If `pa' specifies default values, they are taken from
    111    `default_params'. */
    112 inline static void
    113 tre_set_params(tre_tnfa_approx_reach_t *reach,
    114 	       int *pa, regaparams_t default_params)
    115 {
    116   int value;
    117 
    118   /* If depth is increased reset costs and counters to zero for the
    119      new levels. */
    120   value = pa[TRE_PARAM_DEPTH];
    121   assert(value <= TRE_M_MAX_DEPTH);
    122   if (value > reach->depth)
    123     {
    124       int i, j;
    125       for (i = reach->depth + 1; i <= value; i++)
    126 	for (j = 0; j < TRE_M_LAST; j++)
    127 	  reach->costs[i][j] = 0;
    128     }
    129   reach->depth = value;
    130 
    131   /* Set insert cost. */
    132   value = pa[TRE_PARAM_COST_INS];
    133   if (value == TRE_PARAM_DEFAULT)
    134     reach->params.cost_ins = default_params.cost_ins;
    135   else if (value != TRE_PARAM_UNSET)
    136     reach->params.cost_ins = value;
    137 
    138   /* Set delete cost. */
    139   value = pa[TRE_PARAM_COST_DEL];
    140   if (value == TRE_PARAM_DEFAULT)
    141     reach->params.cost_del = default_params.cost_del;
    142   else if (value != TRE_PARAM_UNSET)
    143     reach->params.cost_del = value;
    144 
    145   /* Set substitute cost. */
    146   value = pa[TRE_PARAM_COST_SUBST];
    147   if (value == TRE_PARAM_DEFAULT)
    148     reach->params.cost_subst = default_params.cost_subst;
    149   else
    150     reach->params.cost_subst = value;
    151 
    152   /* Set maximum cost. */
    153   value = pa[TRE_PARAM_COST_MAX];
    154   if (value == TRE_PARAM_DEFAULT)
    155     reach->params.max_cost = default_params.max_cost;
    156   else if (value != TRE_PARAM_UNSET)
    157     reach->params.max_cost = value;
    158 
    159   /* Set maximum inserts. */
    160   value = pa[TRE_PARAM_MAX_INS];
    161   if (value == TRE_PARAM_DEFAULT)
    162     reach->params.max_ins = default_params.max_ins;
    163   else if (value != TRE_PARAM_UNSET)
    164     reach->params.max_ins = value;
    165 
    166   /* Set maximum deletes. */
    167   value = pa[TRE_PARAM_MAX_DEL];
    168   if (value == TRE_PARAM_DEFAULT)
    169     reach->params.max_del = default_params.max_del;
    170   else if (value != TRE_PARAM_UNSET)
    171     reach->params.max_del = value;
    172 
    173   /* Set maximum substitutes. */
    174   value = pa[TRE_PARAM_MAX_SUBST];
    175   if (value == TRE_PARAM_DEFAULT)
    176     reach->params.max_subst = default_params.max_subst;
    177   else if (value != TRE_PARAM_UNSET)
    178     reach->params.max_subst = value;
    179 
    180   /* Set maximum number of errors. */
    181   value = pa[TRE_PARAM_MAX_ERR];
    182   if (value == TRE_PARAM_DEFAULT)
    183     reach->params.max_err = default_params.max_err;
    184   else if (value != TRE_PARAM_UNSET)
    185     reach->params.max_err = value;
    186 }
    187 
    188 reg_errcode_t
    189 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
    190 		    tre_str_type_t type, int *match_tags,
    191 		    regamatch_t *match, regaparams_t default_params,
    192 		    int eflags, int *match_end_ofs)
    193 {
    194   /* State variables required by GET_NEXT_WCHAR. */
    195   tre_char_t prev_c = 0, next_c = 0;
    196   const char *str_byte = string;
    197   int pos = -1;
    198   unsigned int pos_add_next = 1;
    199 #ifdef TRE_WCHAR
    200   const wchar_t *str_wide = string;
    201 #ifdef TRE_MBSTATE
    202   mbstate_t mbstate;
    203 #endif /* !TRE_WCHAR */
    204 #endif /* TRE_WCHAR */
    205   int reg_notbol = eflags & REG_NOTBOL;
    206   int reg_noteol = eflags & REG_NOTEOL;
    207   int reg_newline = tnfa->cflags & REG_NEWLINE;
    208   size_t str_user_end = 0;
    209 
    210   int prev_pos;
    211 
    212   /* Number of tags. */
    213   size_t num_tags;
    214   /* The reach tables. */
    215   tre_tnfa_approx_reach_t *reach, *reach_next;
    216   /* Tag array for temporary use. */
    217   int *tmp_tags;
    218 
    219   /* End offset of best match so far, or -1 if no match found yet. */
    220   int match_eo = -1;
    221   /* Costs of the match. */
    222   int match_costs[TRE_M_LAST];
    223 
    224   /* Space for temporary data required for matching. */
    225   unsigned char *buf;
    226 
    227   size_t i, id;
    228 
    229   reg_errcode_t ret;
    230 
    231   if (!match_tags)
    232     num_tags = 0;
    233   else
    234     num_tags = tnfa->num_tags;
    235 
    236 #ifdef TRE_MBSTATE
    237   memset(&mbstate, '\0', sizeof(mbstate));
    238 #endif /* TRE_MBSTATE */
    239 
    240   DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
    241 	  "match_tags %p\n",
    242 	  type, len, eflags,
    243 	  match_tags));
    244   DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
    245 	  default_params.max_cost,
    246 	  default_params.cost_ins,
    247 	  default_params.cost_del,
    248 	  default_params.cost_subst));
    249 
    250   /* Allocate memory for temporary data required for matching.	This needs to
    251      be done for every matching operation to be thread safe.  This allocates
    252      everything in a single large block from the stack frame using alloca()
    253      or with malloc() if alloca is unavailable. */
    254   {
    255     unsigned char *buf_cursor;
    256 
    257     /* Ensure that tag_bytes*num_states cannot overflow, and that it don't
    258      * contribute more than 1/8 of SIZE_MAX to total_bytes. */
    259     if (num_tags > SIZE_MAX/(8 * sizeof(*tmp_tags) * tnfa->num_states))
    260       return REG_ESPACE;
    261 
    262     /* Likewise check reach_bytes. */
    263     if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_next)))
    264       return REG_ESPACE;
    265 
    266     /* Space needed for one array of tags. */
    267     size_t tag_bytes = sizeof(*tmp_tags) * num_tags;
    268     /* Space needed for one reach table. */
    269     size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states;
    270     /* Total space needed. */
    271     size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
    272     /* Add some extra to make sure we can align the pointers.  The multiplier
    273        used here must be equal to the number of ALIGN calls below. */
    274     total_bytes += (sizeof(long) - 1) * 3;
    275 
    276     /* Allocate the memory. */
    277 #ifdef TRE_USE_ALLOCA
    278     buf = alloca(total_bytes);
    279 #else /* !TRE_USE_ALLOCA */
    280     buf = xmalloc((unsigned)total_bytes);
    281 #endif /* !TRE_USE_ALLOCA */
    282     if (!buf)
    283       return REG_ESPACE;
    284     memset(buf, 0, (size_t)total_bytes);
    285 
    286     /* Allocate `tmp_tags' from `buf'. */
    287     tmp_tags = (void *)buf;
    288     buf_cursor = buf + tag_bytes;
    289     buf_cursor += ALIGN(buf_cursor, long);
    290 
    291     /* Allocate `reach' from `buf'. */
    292     reach = (void *)buf_cursor;
    293     buf_cursor += reach_bytes;
    294     buf_cursor += ALIGN(buf_cursor, long);
    295 
    296     /* Allocate `reach_next' from `buf'. */
    297     reach_next = (void *)buf_cursor;
    298     buf_cursor += reach_bytes;
    299     buf_cursor += ALIGN(buf_cursor, long);
    300 
    301     /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
    302     for (i = 0; i < tnfa->num_states; i++)
    303       {
    304 	reach[i].tags = (void *)buf_cursor;
    305 	buf_cursor += tag_bytes;
    306 	reach_next[i].tags = (void *)buf_cursor;
    307 	buf_cursor += tag_bytes;
    308       }
    309     assert(buf_cursor <= buf + total_bytes);
    310   }
    311 
    312   for (i = 0; i < TRE_M_LAST; i++)
    313     match_costs[i] = INT_MAX;
    314 
    315   /* Mark the reach arrays empty. */
    316   for (i = 0; i < tnfa->num_states; i++)
    317     reach[i].pos = reach_next[i].pos = -2;
    318 
    319   prev_pos = pos;
    320   GET_NEXT_WCHAR();
    321   pos = 0;
    322 
    323   while (/*CONSTCOND*/(void)1,1)
    324     {
    325       DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
    326 
    327       /* Add initial states to `reach_next' if an exact match has not yet
    328 	 been found. */
    329       if (match_costs[TRE_M_COST] > 0)
    330 	{
    331 	  tre_tnfa_transition_t *trans;
    332 	  DPRINT(("  init"));
    333 	  for (trans = tnfa->initial; trans->state; trans++)
    334 	    {
    335 	      int stateid = trans->state_id;
    336 
    337 	      /* If this state is not currently in `reach_next', add it
    338 		 there. */
    339 	      if (reach_next[stateid].pos < pos)
    340 		{
    341 		  if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
    342 		    {
    343 		      /* Assertions failed, don't add this state. */
    344 		      DPRINT((" !%d (assert)", stateid));
    345 		      continue;
    346 		    }
    347 		  DPRINT((" %d", stateid));
    348 		  reach_next[stateid].state = trans->state;
    349 		  reach_next[stateid].pos = pos;
    350 
    351 		  /* Compute tag values after this transition. */
    352 		  for (i = 0; i < num_tags; i++)
    353 		    reach_next[stateid].tags[i] = -1;
    354 
    355 		  if (trans->tags)
    356 		    for (i = 0; trans->tags[i] >= 0; i++)
    357 		      if ((size_t)trans->tags[i] < num_tags)
    358 			reach_next[stateid].tags[trans->tags[i]] = pos;
    359 
    360 		  /* Set the parameters, depth, and costs. */
    361 		  reach_next[stateid].params = default_params;
    362 		  reach_next[stateid].depth = 0;
    363 		  for (i = 0; i < TRE_M_LAST; i++)
    364 		    reach_next[stateid].costs[0][i] = 0;
    365 		  if (trans->params)
    366 		    tre_set_params(&reach_next[stateid], trans->params,
    367 				   default_params);
    368 
    369 		  /* If this is the final state, mark the exact match. */
    370 		  if (trans->state == tnfa->final)
    371 		    {
    372 		      match_eo = pos;
    373 		      for (i = 0; i < num_tags; i++)
    374 			match_tags[i] = reach_next[stateid].tags[i];
    375 		      for (i = 0; i < TRE_M_LAST; i++)
    376 			match_costs[i] = 0;
    377 		    }
    378 		}
    379 	    }
    380 	    DPRINT(("\n"));
    381 	}
    382 
    383 
    384       /* Handle inserts.  This is done by pretending there's an epsilon
    385 	 transition from each state in `reach' back to the same state.
    386 	 We don't need to worry about the final state here; this will never
    387 	 give a better match than what we already have. */
    388       for (id = 0; id < tnfa->num_states; id++)
    389 	{
    390 	  int depth;
    391 	  int cost, cost0;
    392 
    393 	  if (reach[id].pos != prev_pos)
    394 	    {
    395 	      DPRINT(("	 insert: %d not reached\n", id));
    396 	      continue;	 /* Not reached. */
    397 	    }
    398 
    399 	  depth = reach[id].depth;
    400 
    401 	  /* Compute and check cost at current depth. */
    402 	  cost = reach[id].costs[depth][TRE_M_COST];
    403 	  if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
    404 	    cost += reach[id].params.cost_ins;
    405 	  if (cost > reach[id].params.max_cost)
    406 	    continue;  /* Cost too large. */
    407 
    408 	  /* Check number of inserts at current depth. */
    409 	  if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
    410 	      > reach[id].params.max_ins)
    411 	    continue;  /* Too many inserts. */
    412 
    413 	  /* Check total number of errors at current depth. */
    414 	  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
    415 	      > reach[id].params.max_err)
    416 	    continue;  /* Too many errors. */
    417 
    418 	  /* Compute overall cost. */
    419 	  cost0 = cost;
    420 	  if (depth > 0)
    421 	    {
    422 	      cost0 = reach[id].costs[0][TRE_M_COST];
    423 	      if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
    424 		cost0 += reach[id].params.cost_ins;
    425 	      else
    426 		cost0 += default_params.cost_ins;
    427 	    }
    428 
    429 	  DPRINT(("  insert: from %d to %d, cost %d: ", id, id,
    430 		  reach[id].costs[depth][TRE_M_COST]));
    431 	  if (reach_next[id].pos == pos
    432 	      && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
    433 	    {
    434 	      DPRINT(("lose\n"));
    435 	      continue;
    436 	    }
    437 	  DPRINT(("win\n"));
    438 
    439 	  /* Copy state, position, tags, parameters, and depth. */
    440 	  reach_next[id].state = reach[id].state;
    441 	  reach_next[id].pos = pos;
    442 	  for (i = 0; i < num_tags; i++)
    443 	    reach_next[id].tags[i] = reach[id].tags[i];
    444 	  reach_next[id].params = reach[id].params;
    445 	  reach_next[id].depth = reach[id].depth;
    446 
    447 	  /* Set the costs after this transition. */
    448 	  memcpy(reach_next[id].costs, reach[id].costs,
    449 		 sizeof(reach_next[id].costs[0][0])
    450 		 * TRE_M_LAST * (depth + 1));
    451 	  reach_next[id].costs[depth][TRE_M_COST] = cost;
    452 	  reach_next[id].costs[depth][TRE_M_NUM_INS]++;
    453 	  reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
    454 	  if (depth > 0)
    455 	    {
    456 	      reach_next[id].costs[0][TRE_M_COST] = cost0;
    457 	      reach_next[id].costs[0][TRE_M_NUM_INS]++;
    458 	      reach_next[id].costs[0][TRE_M_NUM_ERR]++;
    459 	    }
    460 
    461 	}
    462 
    463 
    464       /* Handle deletes.  This is done by traversing through the whole TNFA
    465 	 pretending that all transitions are epsilon transitions, until
    466 	 no more states can be reached with better costs. */
    467       {
    468 	/* XXX - dynamic ringbuffer size */
    469 	tre_tnfa_approx_reach_t *ringbuffer[512];
    470 	tre_tnfa_approx_reach_t **deque_start, **deque_end;
    471 
    472 	deque_start = deque_end = ringbuffer;
    473 
    474 	/* Add all states in `reach_next' to the deque. */
    475 	for (id = 0; id < tnfa->num_states; id++)
    476 	  {
    477 	    if (reach_next[id].pos != pos)
    478 	      continue;
    479 	    *deque_end = &reach_next[id];
    480 	    deque_end++;
    481 	    assert(deque_end != deque_start);
    482 	  }
    483 
    484 	/* Repeat until the deque is empty. */
    485 	while (deque_end != deque_start)
    486 	  {
    487 	    tre_tnfa_approx_reach_t *reach_p;
    488 	    int depth;
    489 	    int cost, cost0;
    490 	    tre_tnfa_transition_t *trans;
    491 
    492 	    /* Pop the first item off the deque. */
    493 	    reach_p = *deque_start;
    494 	    id = reach_p - reach_next;
    495 	    depth = reach_p->depth;
    496 
    497 	    /* Compute cost at current depth. */
    498 	    cost = reach_p->costs[depth][TRE_M_COST];
    499 	    if (reach_p->params.cost_del != TRE_PARAM_UNSET)
    500 	      cost += reach_p->params.cost_del;
    501 
    502 	    /* Check cost, number of deletes, and total number of errors
    503 	       at current depth. */
    504 	    if (cost > reach_p->params.max_cost
    505 		|| (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
    506 		    > reach_p->params.max_del)
    507 		|| (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
    508 		    > reach_p->params.max_err))
    509 	      {
    510 		/* Too many errors or cost too large. */
    511 		DPRINT(("  delete: from %03d: cost too large\n", id));
    512 		deque_start++;
    513 		if (deque_start >= (ringbuffer + 512))
    514 		  deque_start = ringbuffer;
    515 		continue;
    516 	      }
    517 
    518 	    /* Compute overall cost. */
    519 	    cost0 = cost;
    520 	    if (depth > 0)
    521 	      {
    522 		cost0 = reach_p->costs[0][TRE_M_COST];
    523 		if (reach_p->params.cost_del != TRE_PARAM_UNSET)
    524 		  cost0 += reach_p->params.cost_del;
    525 		else
    526 		  cost0 += default_params.cost_del;
    527 	      }
    528 
    529 	    for (trans = reach_p->state; trans->state; trans++)
    530 	      {
    531 		int dest_id = trans->state_id;
    532 		DPRINT(("  delete: from %03d to %03d, cost %d (%d): ",
    533 			id, dest_id, cost0, reach_p->params.max_cost));
    534 
    535 		if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
    536 		  {
    537 		    DPRINT(("assertion failed\n"));
    538 		    continue;
    539 		  }
    540 
    541 		/* Compute tag values after this transition. */
    542 		for (i = 0; i < num_tags; i++)
    543 		  tmp_tags[i] = reach_p->tags[i];
    544 		if (trans->tags)
    545 		  for (i = 0; trans->tags[i] >= 0; i++)
    546 		    if ((size_t)trans->tags[i] < num_tags)
    547 		      tmp_tags[trans->tags[i]] = pos;
    548 
    549 		/* If another path has also reached this state, choose the one
    550 		   with the smallest cost or best tags if costs are equal. */
    551 		if (reach_next[dest_id].pos == pos
    552 		    && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
    553 			|| (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
    554 			    && (!match_tags
    555 				|| !tre_tag_order(num_tags,
    556 						  tnfa->tag_directions,
    557 						  tmp_tags,
    558 						  reach_next[dest_id].tags)))))
    559 		  {
    560 		    DPRINT(("lose, cost0 %d, have %d\n",
    561 			    cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
    562 		    continue;
    563 		  }
    564 		DPRINT(("win\n"));
    565 
    566 		/* Set state, position, tags, parameters, depth, and costs. */
    567 		reach_next[dest_id].state = trans->state;
    568 		reach_next[dest_id].pos = pos;
    569 		for (i = 0; i < num_tags; i++)
    570 		  reach_next[dest_id].tags[i] = tmp_tags[i];
    571 
    572 		reach_next[dest_id].params = reach_p->params;
    573 		if (trans->params)
    574 		  tre_set_params(&reach_next[dest_id], trans->params,
    575 				 default_params);
    576 
    577 		reach_next[dest_id].depth = reach_p->depth;
    578 		memcpy(&reach_next[dest_id].costs,
    579 		       reach_p->costs,
    580 		       sizeof(reach_p->costs[0][0])
    581 		       * TRE_M_LAST * (depth + 1));
    582 		reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
    583 		reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
    584 		reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
    585 		if (depth > 0)
    586 		  {
    587 		    reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
    588 		    reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
    589 		    reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
    590 		  }
    591 
    592 		if (trans->state == tnfa->final
    593 		    && (match_eo < 0
    594 			|| match_costs[TRE_M_COST] > cost0
    595 			|| (match_costs[TRE_M_COST] == cost0
    596 			    && (num_tags > 0
    597 				&& tmp_tags[0] <= match_tags[0]))))
    598 		  {
    599 		    DPRINT(("	 setting new match at %d, cost %d\n",
    600 			    pos, cost0));
    601 		    match_eo = pos;
    602 		    memcpy(match_costs, reach_next[dest_id].costs[0],
    603 			   sizeof(match_costs[0]) * TRE_M_LAST);
    604 		    for (i = 0; i < num_tags; i++)
    605 		      match_tags[i] = tmp_tags[i];
    606 		  }
    607 
    608 		/* Add to the end of the deque. */
    609 		*deque_end = &reach_next[dest_id];
    610 		deque_end++;
    611 		if (deque_end >= (ringbuffer + 512))
    612 		  deque_end = ringbuffer;
    613 		assert(deque_end != deque_start);
    614 	      }
    615 	    deque_start++;
    616 	    if (deque_start >= (ringbuffer + 512))
    617 	      deque_start = ringbuffer;
    618 	  }
    619 
    620       }
    621 
    622 #ifdef TRE_DEBUG
    623       tre_print_reach(tnfa, reach_next, pos, num_tags);
    624 #endif /* TRE_DEBUG */
    625 
    626       /* Check for end of string. */
    627       if (len < 0)
    628 	{
    629 	  if (type == STR_USER)
    630 	    {
    631 	      if (str_user_end)
    632 		break;
    633 	    }
    634 	  else if (next_c == L'\0')
    635 	    break;
    636 	}
    637       else
    638 	{
    639 	  if (pos >= len)
    640 	    break;
    641 	}
    642 
    643       prev_pos = pos;
    644       GET_NEXT_WCHAR();
    645 
    646       /* Swap `reach' and `reach_next'. */
    647       {
    648 	tre_tnfa_approx_reach_t *tmp;
    649 	tmp = reach;
    650 	reach = reach_next;
    651 	reach_next = tmp;
    652       }
    653 
    654       /* Handle exact matches and substitutions. */
    655       for (id = 0; id < tnfa->num_states; id++)
    656 	{
    657 	  tre_tnfa_transition_t *trans;
    658 
    659 	  if (reach[id].pos < prev_pos)
    660 	    continue;  /* Not reached. */
    661 	  for (trans = reach[id].state; trans->state; trans++)
    662 	    {
    663 	      int dest_id;
    664 	      int depth;
    665 	      int cost, cost0, err;
    666 
    667 	      if (trans->assertions
    668 		  && (CHECK_ASSERTIONS(trans->assertions)
    669 		      || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
    670 		{
    671 		  DPRINT(("  exact,  from %d: assert failed\n", id));
    672 		  continue;
    673 		}
    674 
    675 	      depth = reach[id].depth;
    676 	      dest_id = trans->state_id;
    677 
    678 	      cost = reach[id].costs[depth][TRE_M_COST];
    679 	      cost0 = reach[id].costs[0][TRE_M_COST];
    680 	      err = 0;
    681 
    682 	      if (trans->code_min > (tre_cint_t)prev_c
    683 		  || trans->code_max < (tre_cint_t)prev_c)
    684 		{
    685 		  /* Handle substitutes.  The required character was not in
    686 		     the string, so match it in place of whatever was supposed
    687 		     to be there and increase costs accordingly. */
    688 		  err = 1;
    689 
    690 		  /* Compute and check cost at current depth. */
    691 		  cost = reach[id].costs[depth][TRE_M_COST];
    692 		  if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
    693 		    cost += reach[id].params.cost_subst;
    694 		  if (cost > reach[id].params.max_cost)
    695 		    continue; /* Cost too large. */
    696 
    697 		  /* Check number of substitutes at current depth. */
    698 		  if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
    699 		      > reach[id].params.max_subst)
    700 		    continue; /* Too many substitutes. */
    701 
    702 		  /* Check total number of errors at current depth. */
    703 		  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
    704 		      > reach[id].params.max_err)
    705 		    continue; /* Too many errors. */
    706 
    707 		  /* Compute overall cost. */
    708 		  cost0 = cost;
    709 		  if (depth > 0)
    710 		    {
    711 		      cost0 = reach[id].costs[0][TRE_M_COST];
    712 		      if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
    713 			cost0 += reach[id].params.cost_subst;
    714 		      else
    715 			cost0 += default_params.cost_subst;
    716 		    }
    717 		  DPRINT(("  subst,  from %03d to %03d, cost %d: ",
    718 			  id, dest_id, cost0));
    719 		}
    720 	      else
    721 		DPRINT(("  exact,  from %03d to %03d, cost %d: ",
    722 			id, dest_id, cost0));
    723 
    724 	      /* Compute tag values after this transition. */
    725 	      for (i = 0; i < num_tags; i++)
    726 		tmp_tags[i] = reach[id].tags[i];
    727 	      if (trans->tags)
    728 		for (i = 0; trans->tags[i] >= 0; i++)
    729 		  if ((size_t)trans->tags[i] < num_tags)
    730 		    tmp_tags[trans->tags[i]] = pos;
    731 
    732 	      /* If another path has also reached this state, choose the
    733 		 one with the smallest cost or best tags if costs are equal. */
    734 	      if (reach_next[dest_id].pos == pos
    735 		  && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
    736 		      || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
    737 			  && !tre_tag_order(num_tags, tnfa->tag_directions,
    738 					    tmp_tags,
    739 					    reach_next[dest_id].tags))))
    740 		{
    741 		  DPRINT(("lose\n"));
    742 		  continue;
    743 		}
    744 	      DPRINT(("win %d %d\n",
    745 		      reach_next[dest_id].pos,
    746 		      reach_next[dest_id].costs[0][TRE_M_COST]));
    747 
    748 	      /* Set state, position, tags, and depth. */
    749 	      reach_next[dest_id].state = trans->state;
    750 	      reach_next[dest_id].pos = pos;
    751 	      for (i = 0; i < num_tags; i++)
    752 		reach_next[dest_id].tags[i] = tmp_tags[i];
    753 	      reach_next[dest_id].depth = reach[id].depth;
    754 
    755 	      /* Set parameters. */
    756 	      reach_next[dest_id].params = reach[id].params;
    757 	      if (trans->params)
    758 		tre_set_params(&reach_next[dest_id], trans->params,
    759 			       default_params);
    760 
    761 	      /* Set the costs after this transition. */
    762 	      memcpy(&reach_next[dest_id].costs,
    763 		     reach[id].costs,
    764 		     sizeof(reach[id].costs[0][0])
    765 		     * TRE_M_LAST * (depth + 1));
    766 	      reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
    767 	      reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
    768 	      reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
    769 	      if (depth > 0)
    770 		{
    771 		  reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
    772 		  reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
    773 		  reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
    774 		}
    775 
    776 	      if (trans->state == tnfa->final
    777 		  && (match_eo < 0
    778 		      || cost0 < match_costs[TRE_M_COST]
    779 		      || (cost0 == match_costs[TRE_M_COST]
    780 			  && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
    781 		{
    782 		  DPRINT(("    setting new match at %d, cost %d\n",
    783 			  pos, cost0));
    784 		  match_eo = pos;
    785 		  for (i = 0; i < TRE_M_LAST; i++)
    786 		    match_costs[i] = reach_next[dest_id].costs[0][i];
    787 		  for (i = 0; i < num_tags; i++)
    788 		    match_tags[i] = tmp_tags[i];
    789 		}
    790 	    }
    791 	}
    792     }
    793 
    794   DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
    795 	  match_costs[TRE_M_COST]));
    796 
    797   match->cost = match_costs[TRE_M_COST];
    798   match->num_ins = match_costs[TRE_M_NUM_INS];
    799   match->num_del = match_costs[TRE_M_NUM_DEL];
    800   match->num_subst = match_costs[TRE_M_NUM_SUBST];
    801   *match_end_ofs = match_eo;
    802 
    803   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
    804 error_exit:
    805 #ifndef TRE_USE_ALLOCA
    806   if (buf)
    807     xfree(buf);
    808 #endif /* !TRE_USE_ALLOCA */
    809   return ret;
    810 }
    811