Home | History | Annotate | Line # | Download | only in lib
tre-match-approx.c revision 1.1.1.1
      1 /*
      2   tre-match-approx.c - TRE approximate regex matching engine
      3 
      4   This software is released under a BSD-style license.
      5   See the file LICENSE for details and copyright.
      6 
      7 */
      8 
      9 #ifdef HAVE_CONFIG_H
     10 #include <config.h>
     11 #endif /* HAVE_CONFIG_H */
     12 
     13 /* AIX requires this to be the first thing in the file.	 */
     14 #ifdef TRE_USE_ALLOCA
     15 #ifndef __GNUC__
     16 # if HAVE_ALLOCA_H
     17 #  include <alloca.h>
     18 # else
     19 #  ifdef _AIX
     20  #pragma alloca
     21 #  else
     22 #   ifndef alloca /* predefined by HP cc +Olibcalls */
     23 char *alloca ();
     24 #   endif
     25 #  endif
     26 # endif
     27 #endif
     28 #endif /* TRE_USE_ALLOCA */
     29 
     30 #define __USE_STRING_INLINES
     31 #undef __NO_INLINE__
     32 
     33 #include <assert.h>
     34 #include <stdlib.h>
     35 #include <string.h>
     36 #include <limits.h>
     37 #ifdef HAVE_WCHAR_H
     38 #include <wchar.h>
     39 #endif /* HAVE_WCHAR_H */
     40 #ifdef HAVE_WCTYPE_H
     41 #include <wctype.h>
     42 #endif /* HAVE_WCTYPE_H */
     43 #ifndef TRE_WCHAR
     44 #include <ctype.h>
     45 #endif /* !TRE_WCHAR */
     46 #ifdef HAVE_MALLOC_H
     47 #include <malloc.h>
     48 #endif /* HAVE_MALLOC_H */
     49 
     50 #include "tre-internal.h"
     51 #include "tre-match-utils.h"
     52 #include "tre.h"
     53 #include "xmalloc.h"
     54 
     55 #define TRE_M_COST	0
     56 #define TRE_M_NUM_INS	1
     57 #define TRE_M_NUM_DEL	2
     58 #define TRE_M_NUM_SUBST 3
     59 #define TRE_M_NUM_ERR	4
     60 #define TRE_M_LAST	5
     61 
     62 #define TRE_M_MAX_DEPTH 3
     63 
     64 typedef struct {
     65   /* State in the TNFA transition table. */
     66   tre_tnfa_transition_t *state;
     67   /* Position in input string. */
     68   int pos;
     69   /* Tag values. */
     70   int *tags;
     71   /* Matching parameters. */
     72   regaparams_t params;
     73   /* Nesting depth of parameters.  This is used as an index in
     74      the `costs' array. */
     75   int depth;
     76   /* Costs and counter values for different parameter nesting depths. */
     77   int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
     78 } tre_tnfa_approx_reach_t;
     79 
     80 
     81 #ifdef TRE_DEBUG
     82 /* Prints the `reach' array in a readable fashion with DPRINT. */
     83 static void
     84 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
     85 		int pos, int num_tags)
     86 {
     87   int id;
     88 
     89   /* Print each state on one line. */
     90   DPRINT(("  reach:\n"));
     91   for (id = 0; id < tnfa->num_states; id++)
     92     {
     93       int i, j;
     94       if (reach[id].pos < pos)
     95 	continue;  /* Not reached. */
     96       DPRINT(("	 %03d, costs ", id));
     97       for (i = 0; i <= reach[id].depth; i++)
     98 	{
     99 	  DPRINT(("["));
    100 	  for (j = 0; j < TRE_M_LAST; j++)
    101 	    {
    102 	      DPRINT(("%2d", reach[id].costs[i][j]));
    103 	      if (j + 1 < TRE_M_LAST)
    104 		DPRINT((","));
    105 	    }
    106 	  DPRINT(("]"));
    107 	  if (i + 1 <= reach[id].depth)
    108 	    DPRINT((", "));
    109 	}
    110       DPRINT(("\n	tags "));
    111       for (i = 0; i < num_tags; i++)
    112 	{
    113 	  DPRINT(("%02d", reach[id].tags[i]));
    114 	  if (i + 1 < num_tags)
    115 	    DPRINT((","));
    116 	}
    117       DPRINT(("\n"));
    118     }
    119   DPRINT(("\n"));
    120 }
    121 #endif /* TRE_DEBUG */
    122 
    123 
    124 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
    125    array.  If `pa' specifies default values, they are taken from
    126    `default_params'. */
    127 inline static void
    128 tre_set_params(tre_tnfa_approx_reach_t *reach,
    129 	       int *pa, regaparams_t default_params)
    130 {
    131   int value;
    132 
    133   /* If depth is increased reset costs and counters to zero for the
    134      new levels. */
    135   value = pa[TRE_PARAM_DEPTH];
    136   assert(value <= TRE_M_MAX_DEPTH);
    137   if (value > reach->depth)
    138     {
    139       int i, j;
    140       for (i = reach->depth + 1; i <= value; i++)
    141 	for (j = 0; j < TRE_M_LAST; j++)
    142 	  reach->costs[i][j] = 0;
    143     }
    144   reach->depth = value;
    145 
    146   /* Set insert cost. */
    147   value = pa[TRE_PARAM_COST_INS];
    148   if (value == TRE_PARAM_DEFAULT)
    149     reach->params.cost_ins = default_params.cost_ins;
    150   else if (value != TRE_PARAM_UNSET)
    151     reach->params.cost_ins = value;
    152 
    153   /* Set delete cost. */
    154   value = pa[TRE_PARAM_COST_DEL];
    155   if (value == TRE_PARAM_DEFAULT)
    156     reach->params.cost_del = default_params.cost_del;
    157   else if (value != TRE_PARAM_UNSET)
    158     reach->params.cost_del = value;
    159 
    160   /* Set substitute cost. */
    161   value = pa[TRE_PARAM_COST_SUBST];
    162   if (value == TRE_PARAM_DEFAULT)
    163     reach->params.cost_subst = default_params.cost_subst;
    164   else
    165     reach->params.cost_subst = value;
    166 
    167   /* Set maximum cost. */
    168   value = pa[TRE_PARAM_COST_MAX];
    169   if (value == TRE_PARAM_DEFAULT)
    170     reach->params.max_cost = default_params.max_cost;
    171   else if (value != TRE_PARAM_UNSET)
    172     reach->params.max_cost = value;
    173 
    174   /* Set maximum inserts. */
    175   value = pa[TRE_PARAM_MAX_INS];
    176   if (value == TRE_PARAM_DEFAULT)
    177     reach->params.max_ins = default_params.max_ins;
    178   else if (value != TRE_PARAM_UNSET)
    179     reach->params.max_ins = value;
    180 
    181   /* Set maximum deletes. */
    182   value = pa[TRE_PARAM_MAX_DEL];
    183   if (value == TRE_PARAM_DEFAULT)
    184     reach->params.max_del = default_params.max_del;
    185   else if (value != TRE_PARAM_UNSET)
    186     reach->params.max_del = value;
    187 
    188   /* Set maximum substitutes. */
    189   value = pa[TRE_PARAM_MAX_SUBST];
    190   if (value == TRE_PARAM_DEFAULT)
    191     reach->params.max_subst = default_params.max_subst;
    192   else if (value != TRE_PARAM_UNSET)
    193     reach->params.max_subst = value;
    194 
    195   /* Set maximum number of errors. */
    196   value = pa[TRE_PARAM_MAX_ERR];
    197   if (value == TRE_PARAM_DEFAULT)
    198     reach->params.max_err = default_params.max_err;
    199   else if (value != TRE_PARAM_UNSET)
    200     reach->params.max_err = value;
    201 }
    202 
    203 reg_errcode_t
    204 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
    205 		    tre_str_type_t type, int *match_tags,
    206 		    regamatch_t *match, regaparams_t default_params,
    207 		    int eflags, int *match_end_ofs)
    208 {
    209   /* State variables required by GET_NEXT_WCHAR. */
    210   tre_char_t prev_c = 0, next_c = 0;
    211   const char *str_byte = string;
    212   int pos = -1;
    213   unsigned int pos_add_next = 1;
    214 #ifdef TRE_WCHAR
    215   const wchar_t *str_wide = string;
    216 #ifdef TRE_MBSTATE
    217   mbstate_t mbstate;
    218 #endif /* !TRE_WCHAR */
    219 #endif /* TRE_WCHAR */
    220   int reg_notbol = eflags & REG_NOTBOL;
    221   int reg_noteol = eflags & REG_NOTEOL;
    222   int reg_newline = tnfa->cflags & REG_NEWLINE;
    223   int str_user_end = 0;
    224 
    225   int prev_pos;
    226 
    227   /* Number of tags. */
    228   int num_tags;
    229   /* The reach tables. */
    230   tre_tnfa_approx_reach_t *reach, *reach_next;
    231   /* Tag array for temporary use. */
    232   int *tmp_tags;
    233 
    234   /* End offset of best match so far, or -1 if no match found yet. */
    235   int match_eo = -1;
    236   /* Costs of the match. */
    237   int match_costs[TRE_M_LAST];
    238 
    239   /* Space for temporary data required for matching. */
    240   unsigned char *buf;
    241 
    242   int i, id;
    243 
    244   if (!match_tags)
    245     num_tags = 0;
    246   else
    247     num_tags = tnfa->num_tags;
    248 
    249 #ifdef TRE_MBSTATE
    250   memset(&mbstate, '\0', sizeof(mbstate));
    251 #endif /* TRE_MBSTATE */
    252 
    253   DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
    254 	  "match_tags %p\n",
    255 	  type, len, eflags,
    256 	  match_tags));
    257   DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
    258 	  default_params.max_cost,
    259 	  default_params.cost_ins,
    260 	  default_params.cost_del,
    261 	  default_params.cost_subst));
    262 
    263   /* Allocate memory for temporary data required for matching.	This needs to
    264      be done for every matching operation to be thread safe.  This allocates
    265      everything in a single large block from the stack frame using alloca()
    266      or with malloc() if alloca is unavailable. */
    267   {
    268     unsigned char *buf_cursor;
    269     /* Space needed for one array of tags. */
    270     int tag_bytes = sizeof(*tmp_tags) * num_tags;
    271     /* Space needed for one reach table. */
    272     int reach_bytes = sizeof(*reach_next) * tnfa->num_states;
    273     /* Total space needed. */
    274     int total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
    275     /* Add some extra to make sure we can align the pointers.  The multiplier
    276        used here must be equal to the number of ALIGN calls below. */
    277     total_bytes += (sizeof(long) - 1) * 3;
    278 
    279     /* Allocate the memory. */
    280 #ifdef TRE_USE_ALLOCA
    281     buf = alloca(total_bytes);
    282 #else /* !TRE_USE_ALLOCA */
    283     buf = xmalloc((unsigned)total_bytes);
    284 #endif /* !TRE_USE_ALLOCA */
    285     if (!buf)
    286       return REG_ESPACE;
    287     memset(buf, 0, (size_t)total_bytes);
    288 
    289     /* Allocate `tmp_tags' from `buf'. */
    290     tmp_tags = (void *)buf;
    291     buf_cursor = buf + tag_bytes;
    292     buf_cursor += ALIGN(buf_cursor, long);
    293 
    294     /* Allocate `reach' from `buf'. */
    295     reach = (void *)buf_cursor;
    296     buf_cursor += reach_bytes;
    297     buf_cursor += ALIGN(buf_cursor, long);
    298 
    299     /* Allocate `reach_next' from `buf'. */
    300     reach_next = (void *)buf_cursor;
    301     buf_cursor += reach_bytes;
    302     buf_cursor += ALIGN(buf_cursor, long);
    303 
    304     /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
    305     for (i = 0; i < tnfa->num_states; i++)
    306       {
    307 	reach[i].tags = (void *)buf_cursor;
    308 	buf_cursor += tag_bytes;
    309 	reach_next[i].tags = (void *)buf_cursor;
    310 	buf_cursor += tag_bytes;
    311       }
    312     assert(buf_cursor <= buf + total_bytes);
    313   }
    314 
    315   for (i = 0; i < TRE_M_LAST; i++)
    316     match_costs[i] = INT_MAX;
    317 
    318   /* Mark the reach arrays empty. */
    319   for (i = 0; i < tnfa->num_states; i++)
    320     reach[i].pos = reach_next[i].pos = -2;
    321 
    322   prev_pos = pos;
    323   GET_NEXT_WCHAR();
    324   pos = 0;
    325 
    326   while (/*CONSTCOND*/1)
    327     {
    328       DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
    329 
    330       /* Add initial states to `reach_next' if an exact match has not yet
    331 	 been found. */
    332       if (match_costs[TRE_M_COST] > 0)
    333 	{
    334 	  tre_tnfa_transition_t *trans;
    335 	  DPRINT(("  init"));
    336 	  for (trans = tnfa->initial; trans->state; trans++)
    337 	    {
    338 	      int stateid = trans->state_id;
    339 
    340 	      /* If this state is not currently in `reach_next', add it
    341 		 there. */
    342 	      if (reach_next[stateid].pos < pos)
    343 		{
    344 		  if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
    345 		    {
    346 		      /* Assertions failed, don't add this state. */
    347 		      DPRINT((" !%d (assert)", stateid));
    348 		      continue;
    349 		    }
    350 		  DPRINT((" %d", stateid));
    351 		  reach_next[stateid].state = trans->state;
    352 		  reach_next[stateid].pos = pos;
    353 
    354 		  /* Compute tag values after this transition. */
    355 		  for (i = 0; i < num_tags; i++)
    356 		    reach_next[stateid].tags[i] = -1;
    357 
    358 		  if (trans->tags)
    359 		    for (i = 0; trans->tags[i] >= 0; i++)
    360 		      if (trans->tags[i] < num_tags)
    361 			reach_next[stateid].tags[trans->tags[i]] = pos;
    362 
    363 		  /* Set the parameters, depth, and costs. */
    364 		  reach_next[stateid].params = default_params;
    365 		  reach_next[stateid].depth = 0;
    366 		  for (i = 0; i < TRE_M_LAST; i++)
    367 		    reach_next[stateid].costs[0][i] = 0;
    368 		  if (trans->params)
    369 		    tre_set_params(&reach_next[stateid], trans->params,
    370 				   default_params);
    371 
    372 		  /* If this is the final state, mark the exact match. */
    373 		  if (trans->state == tnfa->final)
    374 		    {
    375 		      match_eo = pos;
    376 		      for (i = 0; i < num_tags; i++)
    377 			match_tags[i] = reach_next[stateid].tags[i];
    378 		      for (i = 0; i < TRE_M_LAST; i++)
    379 			match_costs[i] = 0;
    380 		    }
    381 		}
    382 	    }
    383 	    DPRINT(("\n"));
    384 	}
    385 
    386 
    387       /* Handle inserts.  This is done by pretending there's an epsilon
    388 	 transition from each state in `reach' back to the same state.
    389 	 We don't need to worry about the final state here; this will never
    390 	 give a better match than what we already have. */
    391       for (id = 0; id < tnfa->num_states; id++)
    392 	{
    393 	  int depth;
    394 	  int cost, cost0;
    395 
    396 	  if (reach[id].pos != prev_pos)
    397 	    {
    398 	      DPRINT(("	 insert: %d not reached\n", id));
    399 	      continue;	 /* Not reached. */
    400 	    }
    401 
    402 	  depth = reach[id].depth;
    403 
    404 	  /* Compute and check cost at current depth. */
    405 	  cost = reach[id].costs[depth][TRE_M_COST];
    406 	  if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
    407 	    cost += reach[id].params.cost_ins;
    408 	  if (cost > reach[id].params.max_cost)
    409 	    continue;  /* Cost too large. */
    410 
    411 	  /* Check number of inserts at current depth. */
    412 	  if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
    413 	      > reach[id].params.max_ins)
    414 	    continue;  /* Too many inserts. */
    415 
    416 	  /* Check total number of errors at current depth. */
    417 	  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
    418 	      > reach[id].params.max_err)
    419 	    continue;  /* Too many errors. */
    420 
    421 	  /* Compute overall cost. */
    422 	  cost0 = cost;
    423 	  if (depth > 0)
    424 	    {
    425 	      cost0 = reach[id].costs[0][TRE_M_COST];
    426 	      if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
    427 		cost0 += reach[id].params.cost_ins;
    428 	      else
    429 		cost0 += default_params.cost_ins;
    430 	    }
    431 
    432 	  DPRINT(("  insert: from %d to %d, cost %d: ", id, id,
    433 		  reach[id].costs[depth][TRE_M_COST]));
    434 	  if (reach_next[id].pos == pos
    435 	      && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
    436 	    {
    437 	      DPRINT(("lose\n"));
    438 	      continue;
    439 	    }
    440 	  DPRINT(("win\n"));
    441 
    442 	  /* Copy state, position, tags, parameters, and depth. */
    443 	  reach_next[id].state = reach[id].state;
    444 	  reach_next[id].pos = pos;
    445 	  for (i = 0; i < num_tags; i++)
    446 	    reach_next[id].tags[i] = reach[id].tags[i];
    447 	  reach_next[id].params = reach[id].params;
    448 	  reach_next[id].depth = reach[id].depth;
    449 
    450 	  /* Set the costs after this transition. */
    451 	  memcpy(reach_next[id].costs, reach[id].costs,
    452 		 sizeof(reach_next[id].costs[0][0])
    453 		 * TRE_M_LAST * (depth + 1));
    454 	  reach_next[id].costs[depth][TRE_M_COST] = cost;
    455 	  reach_next[id].costs[depth][TRE_M_NUM_INS]++;
    456 	  reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
    457 	  if (depth > 0)
    458 	    {
    459 	      reach_next[id].costs[0][TRE_M_COST] = cost0;
    460 	      reach_next[id].costs[0][TRE_M_NUM_INS]++;
    461 	      reach_next[id].costs[0][TRE_M_NUM_ERR]++;
    462 	    }
    463 
    464 	}
    465 
    466 
    467       /* Handle deletes.  This is done by traversing through the whole TNFA
    468 	 pretending that all transitions are epsilon transitions, until
    469 	 no more states can be reached with better costs. */
    470       {
    471 	/* XXX - dynamic ringbuffer size */
    472 	tre_tnfa_approx_reach_t *ringbuffer[512];
    473 	tre_tnfa_approx_reach_t **deque_start, **deque_end;
    474 
    475 	deque_start = deque_end = ringbuffer;
    476 
    477 	/* Add all states in `reach_next' to the deque. */
    478 	for (id = 0; id < tnfa->num_states; id++)
    479 	  {
    480 	    if (reach_next[id].pos != pos)
    481 	      continue;
    482 	    *deque_end = &reach_next[id];
    483 	    deque_end++;
    484 	    assert(deque_end != deque_start);
    485 	  }
    486 
    487 	/* Repeat until the deque is empty. */
    488 	while (deque_end != deque_start)
    489 	  {
    490 	    tre_tnfa_approx_reach_t *reach_p;
    491 	    int depth;
    492 	    int cost, cost0;
    493 	    tre_tnfa_transition_t *trans;
    494 
    495 	    /* Pop the first item off the deque. */
    496 	    reach_p = *deque_start;
    497 	    id = reach_p - reach_next;
    498 	    depth = reach_p->depth;
    499 
    500 	    /* Compute cost at current depth. */
    501 	    cost = reach_p->costs[depth][TRE_M_COST];
    502 	    if (reach_p->params.cost_del != TRE_PARAM_UNSET)
    503 	      cost += reach_p->params.cost_del;
    504 
    505 	    /* Check cost, number of deletes, and total number of errors
    506 	       at current depth. */
    507 	    if (cost > reach_p->params.max_cost
    508 		|| (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
    509 		    > reach_p->params.max_del)
    510 		|| (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
    511 		    > reach_p->params.max_err))
    512 	      {
    513 		/* Too many errors or cost too large. */
    514 		DPRINT(("  delete: from %03d: cost too large\n", id));
    515 		deque_start++;
    516 		if (deque_start >= (ringbuffer + 512))
    517 		  deque_start = ringbuffer;
    518 		continue;
    519 	      }
    520 
    521 	    /* Compute overall cost. */
    522 	    cost0 = cost;
    523 	    if (depth > 0)
    524 	      {
    525 		cost0 = reach_p->costs[0][TRE_M_COST];
    526 		if (reach_p->params.cost_del != TRE_PARAM_UNSET)
    527 		  cost0 += reach_p->params.cost_del;
    528 		else
    529 		  cost0 += default_params.cost_del;
    530 	      }
    531 
    532 	    for (trans = reach_p->state; trans->state; trans++)
    533 	      {
    534 		int dest_id = trans->state_id;
    535 		DPRINT(("  delete: from %03d to %03d, cost %d (%d): ",
    536 			id, dest_id, cost0, reach_p->params.max_cost));
    537 
    538 		if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
    539 		  {
    540 		    DPRINT(("assertion failed\n"));
    541 		    continue;
    542 		  }
    543 
    544 		/* Compute tag values after this transition. */
    545 		for (i = 0; i < num_tags; i++)
    546 		  tmp_tags[i] = reach_p->tags[i];
    547 		if (trans->tags)
    548 		  for (i = 0; trans->tags[i] >= 0; i++)
    549 		    if (trans->tags[i] < num_tags)
    550 		      tmp_tags[trans->tags[i]] = pos;
    551 
    552 		/* If another path has also reached this state, choose the one
    553 		   with the smallest cost or best tags if costs are equal. */
    554 		if (reach_next[dest_id].pos == pos
    555 		    && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
    556 			|| (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
    557 			    && (!match_tags
    558 				|| !tre_tag_order(num_tags,
    559 						  tnfa->tag_directions,
    560 						  tmp_tags,
    561 						  reach_next[dest_id].tags)))))
    562 		  {
    563 		    DPRINT(("lose, cost0 %d, have %d\n",
    564 			    cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
    565 		    continue;
    566 		  }
    567 		DPRINT(("win\n"));
    568 
    569 		/* Set state, position, tags, parameters, depth, and costs. */
    570 		reach_next[dest_id].state = trans->state;
    571 		reach_next[dest_id].pos = pos;
    572 		for (i = 0; i < num_tags; i++)
    573 		  reach_next[dest_id].tags[i] = tmp_tags[i];
    574 
    575 		reach_next[dest_id].params = reach_p->params;
    576 		if (trans->params)
    577 		  tre_set_params(&reach_next[dest_id], trans->params,
    578 				 default_params);
    579 
    580 		reach_next[dest_id].depth = reach_p->depth;
    581 		memcpy(&reach_next[dest_id].costs,
    582 		       reach_p->costs,
    583 		       sizeof(reach_p->costs[0][0])
    584 		       * TRE_M_LAST * (depth + 1));
    585 		reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
    586 		reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
    587 		reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
    588 		if (depth > 0)
    589 		  {
    590 		    reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
    591 		    reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
    592 		    reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
    593 		  }
    594 
    595 		if (trans->state == tnfa->final
    596 		    && (match_eo < 0
    597 			|| match_costs[TRE_M_COST] > cost0
    598 			|| (match_costs[TRE_M_COST] == cost0
    599 			    && (num_tags > 0
    600 				&& tmp_tags[0] <= match_tags[0]))))
    601 		  {
    602 		    DPRINT(("	 setting new match at %d, cost %d\n",
    603 			    pos, cost0));
    604 		    match_eo = pos;
    605 		    memcpy(match_costs, reach_next[dest_id].costs[0],
    606 			   sizeof(match_costs[0]) * TRE_M_LAST);
    607 		    for (i = 0; i < num_tags; i++)
    608 		      match_tags[i] = tmp_tags[i];
    609 		  }
    610 
    611 		/* Add to the end of the deque. */
    612 		*deque_end = &reach_next[dest_id];
    613 		deque_end++;
    614 		if (deque_end >= (ringbuffer + 512))
    615 		  deque_end = ringbuffer;
    616 		assert(deque_end != deque_start);
    617 	      }
    618 	    deque_start++;
    619 	    if (deque_start >= (ringbuffer + 512))
    620 	      deque_start = ringbuffer;
    621 	  }
    622 
    623       }
    624 
    625 #ifdef TRE_DEBUG
    626       tre_print_reach(tnfa, reach_next, pos, num_tags);
    627 #endif /* TRE_DEBUG */
    628 
    629       /* Check for end of string. */
    630       if (len < 0)
    631 	{
    632 	  if (type == STR_USER)
    633 	    {
    634 	      if (str_user_end)
    635 		break;
    636 	    }
    637 	  else if (next_c == L'\0')
    638 	    break;
    639 	}
    640       else
    641 	{
    642 	  if (pos >= len)
    643 	    break;
    644 	}
    645 
    646       prev_pos = pos;
    647       GET_NEXT_WCHAR();
    648 
    649       /* Swap `reach' and `reach_next'. */
    650       {
    651 	tre_tnfa_approx_reach_t *tmp;
    652 	tmp = reach;
    653 	reach = reach_next;
    654 	reach_next = tmp;
    655       }
    656 
    657       /* Handle exact matches and substitutions. */
    658       for (id = 0; id < tnfa->num_states; id++)
    659 	{
    660 	  tre_tnfa_transition_t *trans;
    661 
    662 	  if (reach[id].pos < prev_pos)
    663 	    continue;  /* Not reached. */
    664 	  for (trans = reach[id].state; trans->state; trans++)
    665 	    {
    666 	      int dest_id;
    667 	      int depth;
    668 	      int cost, cost0, err;
    669 
    670 	      if (trans->assertions
    671 		  && (CHECK_ASSERTIONS(trans->assertions)
    672 		      || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
    673 		{
    674 		  DPRINT(("  exact,  from %d: assert failed\n", id));
    675 		  continue;
    676 		}
    677 
    678 	      depth = reach[id].depth;
    679 	      dest_id = trans->state_id;
    680 
    681 	      cost = reach[id].costs[depth][TRE_M_COST];
    682 	      cost0 = reach[id].costs[0][TRE_M_COST];
    683 	      err = 0;
    684 
    685 	      if (trans->code_min > (tre_cint_t)prev_c
    686 		  || trans->code_max < (tre_cint_t)prev_c)
    687 		{
    688 		  /* Handle substitutes.  The required character was not in
    689 		     the string, so match it in place of whatever was supposed
    690 		     to be there and increase costs accordingly. */
    691 		  err = 1;
    692 
    693 		  /* Compute and check cost at current depth. */
    694 		  cost = reach[id].costs[depth][TRE_M_COST];
    695 		  if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
    696 		    cost += reach[id].params.cost_subst;
    697 		  if (cost > reach[id].params.max_cost)
    698 		    continue; /* Cost too large. */
    699 
    700 		  /* Check number of substitutes at current depth. */
    701 		  if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
    702 		      > reach[id].params.max_subst)
    703 		    continue; /* Too many substitutes. */
    704 
    705 		  /* Check total number of errors at current depth. */
    706 		  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
    707 		      > reach[id].params.max_err)
    708 		    continue; /* Too many errors. */
    709 
    710 		  /* Compute overall cost. */
    711 		  cost0 = cost;
    712 		  if (depth > 0)
    713 		    {
    714 		      cost0 = reach[id].costs[0][TRE_M_COST];
    715 		      if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
    716 			cost0 += reach[id].params.cost_subst;
    717 		      else
    718 			cost0 += default_params.cost_subst;
    719 		    }
    720 		  DPRINT(("  subst,  from %03d to %03d, cost %d: ",
    721 			  id, dest_id, cost0));
    722 		}
    723 	      else
    724 		DPRINT(("  exact,  from %03d to %03d, cost %d: ",
    725 			id, dest_id, cost0));
    726 
    727 	      /* Compute tag values after this transition. */
    728 	      for (i = 0; i < num_tags; i++)
    729 		tmp_tags[i] = reach[id].tags[i];
    730 	      if (trans->tags)
    731 		for (i = 0; trans->tags[i] >= 0; i++)
    732 		  if (trans->tags[i] < num_tags)
    733 		    tmp_tags[trans->tags[i]] = pos;
    734 
    735 	      /* If another path has also reached this state, choose the
    736 		 one with the smallest cost or best tags if costs are equal. */
    737 	      if (reach_next[dest_id].pos == pos
    738 		  && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
    739 		      || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
    740 			  && !tre_tag_order(num_tags, tnfa->tag_directions,
    741 					    tmp_tags,
    742 					    reach_next[dest_id].tags))))
    743 		{
    744 		  DPRINT(("lose\n"));
    745 		  continue;
    746 		}
    747 	      DPRINT(("win %d %d\n",
    748 		      reach_next[dest_id].pos,
    749 		      reach_next[dest_id].costs[0][TRE_M_COST]));
    750 
    751 	      /* Set state, position, tags, and depth. */
    752 	      reach_next[dest_id].state = trans->state;
    753 	      reach_next[dest_id].pos = pos;
    754 	      for (i = 0; i < num_tags; i++)
    755 		reach_next[dest_id].tags[i] = tmp_tags[i];
    756 	      reach_next[dest_id].depth = reach[id].depth;
    757 
    758 	      /* Set parameters. */
    759 	      reach_next[dest_id].params = reach[id].params;
    760 	      if (trans->params)
    761 		tre_set_params(&reach_next[dest_id], trans->params,
    762 			       default_params);
    763 
    764 	      /* Set the costs after this transition. */
    765 		memcpy(&reach_next[dest_id].costs,
    766 		       reach[id].costs,
    767 		       sizeof(reach[id].costs[0][0])
    768 		       * TRE_M_LAST * (depth + 1));
    769 	      reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
    770 	      reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
    771 	      reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
    772 	      if (depth > 0)
    773 		{
    774 		  reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
    775 		  reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
    776 		  reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
    777 		}
    778 
    779 	      if (trans->state == tnfa->final
    780 		  && (match_eo < 0
    781 		      || cost0 < match_costs[TRE_M_COST]
    782 		      || (cost0 == match_costs[TRE_M_COST]
    783 			  && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
    784 		{
    785 		  DPRINT(("    setting new match at %d, cost %d\n",
    786 			  pos, cost0));
    787 		  match_eo = pos;
    788 		  for (i = 0; i < TRE_M_LAST; i++)
    789 		    match_costs[i] = reach_next[dest_id].costs[0][i];
    790 		  for (i = 0; i < num_tags; i++)
    791 		    match_tags[i] = tmp_tags[i];
    792 		}
    793 	    }
    794 	}
    795     }
    796 
    797   DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
    798 	  match_costs[TRE_M_COST]));
    799 
    800 #ifndef TRE_USE_ALLOCA
    801   if (buf)
    802     xfree(buf);
    803 #endif /* !TRE_USE_ALLOCA */
    804 
    805   match->cost = match_costs[TRE_M_COST];
    806   match->num_ins = match_costs[TRE_M_NUM_INS];
    807   match->num_del = match_costs[TRE_M_NUM_DEL];
    808   match->num_subst = match_costs[TRE_M_NUM_SUBST];
    809   *match_end_ofs = match_eo;
    810 
    811   return match_eo >= 0 ? REG_OK : REG_NOMATCH;
    812 }
    813