Home | History | Annotate | Line # | Download | only in lib
tre-match-approx.c revision 1.1.1.1.6.1
      1 /*
      2   tre-match-approx.c - TRE approximate regex matching engine
      3 
      4   This software is released under a BSD-style license.
      5   See the file LICENSE for details and copyright.
      6 
      7 */
      8 #ifdef HAVE_CONFIG_H
      9 #include <config.h>
     10 #endif /* HAVE_CONFIG_H */
     11 
     12 #include "tre-alloca.h"
     13 
     14 #define __USE_STRING_INLINES
     15 #undef __NO_INLINE__
     16 
     17 #include <assert.h>
     18 #include <stdlib.h>
     19 #include <string.h>
     20 #include <limits.h>
     21 #ifdef HAVE_WCHAR_H
     22 #include <wchar.h>
     23 #endif /* HAVE_WCHAR_H */
     24 #ifdef HAVE_WCTYPE_H
     25 #include <wctype.h>
     26 #endif /* HAVE_WCTYPE_H */
     27 #ifndef TRE_WCHAR
     28 #include <ctype.h>
     29 #endif /* !TRE_WCHAR */
     30 #ifdef HAVE_MALLOC_H
     31 #include <malloc.h>
     32 #endif /* HAVE_MALLOC_H */
     33 
     34 #include "tre-internal.h"
     35 #include "tre-match-utils.h"
     36 #include "tre.h"
     37 #include "xmalloc.h"
     38 
     39 #define TRE_M_COST	0
     40 #define TRE_M_NUM_INS	1
     41 #define TRE_M_NUM_DEL	2
     42 #define TRE_M_NUM_SUBST 3
     43 #define TRE_M_NUM_ERR	4
     44 #define TRE_M_LAST	5
     45 
     46 #define TRE_M_MAX_DEPTH 3
     47 
     48 typedef struct {
     49   /* State in the TNFA transition table. */
     50   tre_tnfa_transition_t *state;
     51   /* Position in input string. */
     52   int pos;
     53   /* Tag values. */
     54   int *tags;
     55   /* Matching parameters. */
     56   regaparams_t params;
     57   /* Nesting depth of parameters.  This is used as an index in
     58      the `costs' array. */
     59   int depth;
     60   /* Costs and counter values for different parameter nesting depths. */
     61   int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
     62 } tre_tnfa_approx_reach_t;
     63 
     64 
     65 #ifdef TRE_DEBUG
     66 /* Prints the `reach' array in a readable fashion with DPRINT. */
     67 static void
     68 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
     69 		int pos, size_t num_tags)
     70 {
     71   int id;
     72 
     73   /* Print each state on one line. */
     74   DPRINT(("  reach:\n"));
     75   for (id = 0; id < tnfa->num_states; id++)
     76     {
     77       int i, j;
     78       if (reach[id].pos < pos)
     79 	continue;  /* Not reached. */
     80       DPRINT(("	 %03d, costs ", id));
     81       for (i = 0; i <= reach[id].depth; i++)
     82 	{
     83 	  DPRINT(("["));
     84 	  for (j = 0; j < TRE_M_LAST; j++)
     85 	    {
     86 	      DPRINT(("%2d", reach[id].costs[i][j]));
     87 	      if (j + 1 < TRE_M_LAST)
     88 		DPRINT((","));
     89 	    }
     90 	  DPRINT(("]"));
     91 	  if (i + 1 <= reach[id].depth)
     92 	    DPRINT((", "));
     93 	}
     94       DPRINT(("\n	tags "));
     95       for (i = 0; i < num_tags; i++)
     96 	{
     97 	  DPRINT(("%02d", reach[id].tags[i]));
     98 	  if (i + 1 < num_tags)
     99 	    DPRINT((","));
    100 	}
    101       DPRINT(("\n"));
    102     }
    103   DPRINT(("\n"));
    104 }
    105 #endif /* TRE_DEBUG */
    106 
    107 
    108 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
    109    array.  If `pa' specifies default values, they are taken from
    110    `default_params'. */
    111 inline static void
    112 tre_set_params(tre_tnfa_approx_reach_t *reach,
    113 	       int *pa, regaparams_t default_params)
    114 {
    115   int value;
    116 
    117   /* If depth is increased reset costs and counters to zero for the
    118      new levels. */
    119   value = pa[TRE_PARAM_DEPTH];
    120   assert(value <= TRE_M_MAX_DEPTH);
    121   if (value > reach->depth)
    122     {
    123       int i, j;
    124       for (i = reach->depth + 1; i <= value; i++)
    125 	for (j = 0; j < TRE_M_LAST; j++)
    126 	  reach->costs[i][j] = 0;
    127     }
    128   reach->depth = value;
    129 
    130   /* Set insert cost. */
    131   value = pa[TRE_PARAM_COST_INS];
    132   if (value == TRE_PARAM_DEFAULT)
    133     reach->params.cost_ins = default_params.cost_ins;
    134   else if (value != TRE_PARAM_UNSET)
    135     reach->params.cost_ins = value;
    136 
    137   /* Set delete cost. */
    138   value = pa[TRE_PARAM_COST_DEL];
    139   if (value == TRE_PARAM_DEFAULT)
    140     reach->params.cost_del = default_params.cost_del;
    141   else if (value != TRE_PARAM_UNSET)
    142     reach->params.cost_del = value;
    143 
    144   /* Set substitute cost. */
    145   value = pa[TRE_PARAM_COST_SUBST];
    146   if (value == TRE_PARAM_DEFAULT)
    147     reach->params.cost_subst = default_params.cost_subst;
    148   else
    149     reach->params.cost_subst = value;
    150 
    151   /* Set maximum cost. */
    152   value = pa[TRE_PARAM_COST_MAX];
    153   if (value == TRE_PARAM_DEFAULT)
    154     reach->params.max_cost = default_params.max_cost;
    155   else if (value != TRE_PARAM_UNSET)
    156     reach->params.max_cost = value;
    157 
    158   /* Set maximum inserts. */
    159   value = pa[TRE_PARAM_MAX_INS];
    160   if (value == TRE_PARAM_DEFAULT)
    161     reach->params.max_ins = default_params.max_ins;
    162   else if (value != TRE_PARAM_UNSET)
    163     reach->params.max_ins = value;
    164 
    165   /* Set maximum deletes. */
    166   value = pa[TRE_PARAM_MAX_DEL];
    167   if (value == TRE_PARAM_DEFAULT)
    168     reach->params.max_del = default_params.max_del;
    169   else if (value != TRE_PARAM_UNSET)
    170     reach->params.max_del = value;
    171 
    172   /* Set maximum substitutes. */
    173   value = pa[TRE_PARAM_MAX_SUBST];
    174   if (value == TRE_PARAM_DEFAULT)
    175     reach->params.max_subst = default_params.max_subst;
    176   else if (value != TRE_PARAM_UNSET)
    177     reach->params.max_subst = value;
    178 
    179   /* Set maximum number of errors. */
    180   value = pa[TRE_PARAM_MAX_ERR];
    181   if (value == TRE_PARAM_DEFAULT)
    182     reach->params.max_err = default_params.max_err;
    183   else if (value != TRE_PARAM_UNSET)
    184     reach->params.max_err = value;
    185 }
    186 
    187 reg_errcode_t
    188 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
    189 		    tre_str_type_t type, int *match_tags,
    190 		    regamatch_t *match, regaparams_t default_params,
    191 		    int eflags, int *match_end_ofs)
    192 {
    193   /* State variables required by GET_NEXT_WCHAR. */
    194   tre_char_t prev_c = 0, next_c = 0;
    195   const char *str_byte = string;
    196   int pos = -1;
    197   unsigned int pos_add_next = 1;
    198 #ifdef TRE_WCHAR
    199   const wchar_t *str_wide = string;
    200 #ifdef TRE_MBSTATE
    201   mbstate_t mbstate;
    202 #endif /* !TRE_WCHAR */
    203 #endif /* TRE_WCHAR */
    204   int reg_notbol = eflags & REG_NOTBOL;
    205   int reg_noteol = eflags & REG_NOTEOL;
    206   int reg_newline = tnfa->cflags & REG_NEWLINE;
    207   size_t str_user_end = 0;
    208 
    209   int prev_pos;
    210 
    211   /* Number of tags. */
    212   size_t num_tags;
    213   /* The reach tables. */
    214   tre_tnfa_approx_reach_t *reach, *reach_next;
    215   /* Tag array for temporary use. */
    216   int *tmp_tags;
    217 
    218   /* End offset of best match so far, or -1 if no match found yet. */
    219   int match_eo = -1;
    220   /* Costs of the match. */
    221   int match_costs[TRE_M_LAST];
    222 
    223   /* Space for temporary data required for matching. */
    224   unsigned char *buf;
    225 
    226   size_t i, id;
    227 
    228   if (!match_tags)
    229     num_tags = 0;
    230   else
    231     num_tags = tnfa->num_tags;
    232 
    233 #ifdef TRE_MBSTATE
    234   memset(&mbstate, '\0', sizeof(mbstate));
    235 #endif /* TRE_MBSTATE */
    236 
    237   DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
    238 	  "match_tags %p\n",
    239 	  type, len, eflags,
    240 	  match_tags));
    241   DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
    242 	  default_params.max_cost,
    243 	  default_params.cost_ins,
    244 	  default_params.cost_del,
    245 	  default_params.cost_subst));
    246 
    247   /* Allocate memory for temporary data required for matching.	This needs to
    248      be done for every matching operation to be thread safe.  This allocates
    249      everything in a single large block from the stack frame using alloca()
    250      or with malloc() if alloca is unavailable. */
    251   {
    252     unsigned char *buf_cursor;
    253     /* Space needed for one array of tags. */
    254     size_t tag_bytes = sizeof(*tmp_tags) * num_tags;
    255     /* Space needed for one reach table. */
    256     size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states;
    257     /* Total space needed. */
    258     size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
    259     /* Add some extra to make sure we can align the pointers.  The multiplier
    260        used here must be equal to the number of ALIGN calls below. */
    261     total_bytes += (sizeof(long) - 1) * 3;
    262 
    263     /* Allocate the memory. */
    264 #ifdef TRE_USE_ALLOCA
    265     buf = alloca(total_bytes);
    266 #else /* !TRE_USE_ALLOCA */
    267     buf = xmalloc((unsigned)total_bytes);
    268 #endif /* !TRE_USE_ALLOCA */
    269     if (!buf)
    270       return REG_ESPACE;
    271     memset(buf, 0, (size_t)total_bytes);
    272 
    273     /* Allocate `tmp_tags' from `buf'. */
    274     tmp_tags = (void *)buf;
    275     buf_cursor = buf + tag_bytes;
    276     buf_cursor += ALIGN(buf_cursor, long);
    277 
    278     /* Allocate `reach' from `buf'. */
    279     reach = (void *)buf_cursor;
    280     buf_cursor += reach_bytes;
    281     buf_cursor += ALIGN(buf_cursor, long);
    282 
    283     /* Allocate `reach_next' from `buf'. */
    284     reach_next = (void *)buf_cursor;
    285     buf_cursor += reach_bytes;
    286     buf_cursor += ALIGN(buf_cursor, long);
    287 
    288     /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
    289     for (i = 0; i < tnfa->num_states; i++)
    290       {
    291 	reach[i].tags = (void *)buf_cursor;
    292 	buf_cursor += tag_bytes;
    293 	reach_next[i].tags = (void *)buf_cursor;
    294 	buf_cursor += tag_bytes;
    295       }
    296     assert(buf_cursor <= buf + total_bytes);
    297   }
    298 
    299   for (i = 0; i < TRE_M_LAST; i++)
    300     match_costs[i] = INT_MAX;
    301 
    302   /* Mark the reach arrays empty. */
    303   for (i = 0; i < tnfa->num_states; i++)
    304     reach[i].pos = reach_next[i].pos = -2;
    305 
    306   prev_pos = pos;
    307   GET_NEXT_WCHAR();
    308   pos = 0;
    309 
    310   while (/*CONSTCOND*/1)
    311     {
    312       DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
    313 
    314       /* Add initial states to `reach_next' if an exact match has not yet
    315 	 been found. */
    316       if (match_costs[TRE_M_COST] > 0)
    317 	{
    318 	  tre_tnfa_transition_t *trans;
    319 	  DPRINT(("  init"));
    320 	  for (trans = tnfa->initial; trans->state; trans++)
    321 	    {
    322 	      int stateid = trans->state_id;
    323 
    324 	      /* If this state is not currently in `reach_next', add it
    325 		 there. */
    326 	      if (reach_next[stateid].pos < pos)
    327 		{
    328 		  if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
    329 		    {
    330 		      /* Assertions failed, don't add this state. */
    331 		      DPRINT((" !%d (assert)", stateid));
    332 		      continue;
    333 		    }
    334 		  DPRINT((" %d", stateid));
    335 		  reach_next[stateid].state = trans->state;
    336 		  reach_next[stateid].pos = pos;
    337 
    338 		  /* Compute tag values after this transition. */
    339 		  for (i = 0; i < num_tags; i++)
    340 		    reach_next[stateid].tags[i] = -1;
    341 
    342 		  if (trans->tags)
    343 		    for (i = 0; trans->tags[i] >= 0; i++)
    344 		      if ((size_t)trans->tags[i] < num_tags)
    345 			reach_next[stateid].tags[trans->tags[i]] = pos;
    346 
    347 		  /* Set the parameters, depth, and costs. */
    348 		  reach_next[stateid].params = default_params;
    349 		  reach_next[stateid].depth = 0;
    350 		  for (i = 0; i < TRE_M_LAST; i++)
    351 		    reach_next[stateid].costs[0][i] = 0;
    352 		  if (trans->params)
    353 		    tre_set_params(&reach_next[stateid], trans->params,
    354 				   default_params);
    355 
    356 		  /* If this is the final state, mark the exact match. */
    357 		  if (trans->state == tnfa->final)
    358 		    {
    359 		      match_eo = pos;
    360 		      for (i = 0; i < num_tags; i++)
    361 			match_tags[i] = reach_next[stateid].tags[i];
    362 		      for (i = 0; i < TRE_M_LAST; i++)
    363 			match_costs[i] = 0;
    364 		    }
    365 		}
    366 	    }
    367 	    DPRINT(("\n"));
    368 	}
    369 
    370 
    371       /* Handle inserts.  This is done by pretending there's an epsilon
    372 	 transition from each state in `reach' back to the same state.
    373 	 We don't need to worry about the final state here; this will never
    374 	 give a better match than what we already have. */
    375       for (id = 0; id < tnfa->num_states; id++)
    376 	{
    377 	  int depth;
    378 	  int cost, cost0;
    379 
    380 	  if (reach[id].pos != prev_pos)
    381 	    {
    382 	      DPRINT(("	 insert: %d not reached\n", id));
    383 	      continue;	 /* Not reached. */
    384 	    }
    385 
    386 	  depth = reach[id].depth;
    387 
    388 	  /* Compute and check cost at current depth. */
    389 	  cost = reach[id].costs[depth][TRE_M_COST];
    390 	  if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
    391 	    cost += reach[id].params.cost_ins;
    392 	  if (cost > reach[id].params.max_cost)
    393 	    continue;  /* Cost too large. */
    394 
    395 	  /* Check number of inserts at current depth. */
    396 	  if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
    397 	      > reach[id].params.max_ins)
    398 	    continue;  /* Too many inserts. */
    399 
    400 	  /* Check total number of errors at current depth. */
    401 	  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
    402 	      > reach[id].params.max_err)
    403 	    continue;  /* Too many errors. */
    404 
    405 	  /* Compute overall cost. */
    406 	  cost0 = cost;
    407 	  if (depth > 0)
    408 	    {
    409 	      cost0 = reach[id].costs[0][TRE_M_COST];
    410 	      if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
    411 		cost0 += reach[id].params.cost_ins;
    412 	      else
    413 		cost0 += default_params.cost_ins;
    414 	    }
    415 
    416 	  DPRINT(("  insert: from %d to %d, cost %d: ", id, id,
    417 		  reach[id].costs[depth][TRE_M_COST]));
    418 	  if (reach_next[id].pos == pos
    419 	      && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
    420 	    {
    421 	      DPRINT(("lose\n"));
    422 	      continue;
    423 	    }
    424 	  DPRINT(("win\n"));
    425 
    426 	  /* Copy state, position, tags, parameters, and depth. */
    427 	  reach_next[id].state = reach[id].state;
    428 	  reach_next[id].pos = pos;
    429 	  for (i = 0; i < num_tags; i++)
    430 	    reach_next[id].tags[i] = reach[id].tags[i];
    431 	  reach_next[id].params = reach[id].params;
    432 	  reach_next[id].depth = reach[id].depth;
    433 
    434 	  /* Set the costs after this transition. */
    435 	  memcpy(reach_next[id].costs, reach[id].costs,
    436 		 sizeof(reach_next[id].costs[0][0])
    437 		 * TRE_M_LAST * (depth + 1));
    438 	  reach_next[id].costs[depth][TRE_M_COST] = cost;
    439 	  reach_next[id].costs[depth][TRE_M_NUM_INS]++;
    440 	  reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
    441 	  if (depth > 0)
    442 	    {
    443 	      reach_next[id].costs[0][TRE_M_COST] = cost0;
    444 	      reach_next[id].costs[0][TRE_M_NUM_INS]++;
    445 	      reach_next[id].costs[0][TRE_M_NUM_ERR]++;
    446 	    }
    447 
    448 	}
    449 
    450 
    451       /* Handle deletes.  This is done by traversing through the whole TNFA
    452 	 pretending that all transitions are epsilon transitions, until
    453 	 no more states can be reached with better costs. */
    454       {
    455 	/* XXX - dynamic ringbuffer size */
    456 	tre_tnfa_approx_reach_t *ringbuffer[512];
    457 	tre_tnfa_approx_reach_t **deque_start, **deque_end;
    458 
    459 	deque_start = deque_end = ringbuffer;
    460 
    461 	/* Add all states in `reach_next' to the deque. */
    462 	for (id = 0; id < tnfa->num_states; id++)
    463 	  {
    464 	    if (reach_next[id].pos != pos)
    465 	      continue;
    466 	    *deque_end = &reach_next[id];
    467 	    deque_end++;
    468 	    assert(deque_end != deque_start);
    469 	  }
    470 
    471 	/* Repeat until the deque is empty. */
    472 	while (deque_end != deque_start)
    473 	  {
    474 	    tre_tnfa_approx_reach_t *reach_p;
    475 	    int depth;
    476 	    int cost, cost0;
    477 	    tre_tnfa_transition_t *trans;
    478 
    479 	    /* Pop the first item off the deque. */
    480 	    reach_p = *deque_start;
    481 	    id = reach_p - reach_next;
    482 	    depth = reach_p->depth;
    483 
    484 	    /* Compute cost at current depth. */
    485 	    cost = reach_p->costs[depth][TRE_M_COST];
    486 	    if (reach_p->params.cost_del != TRE_PARAM_UNSET)
    487 	      cost += reach_p->params.cost_del;
    488 
    489 	    /* Check cost, number of deletes, and total number of errors
    490 	       at current depth. */
    491 	    if (cost > reach_p->params.max_cost
    492 		|| (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
    493 		    > reach_p->params.max_del)
    494 		|| (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
    495 		    > reach_p->params.max_err))
    496 	      {
    497 		/* Too many errors or cost too large. */
    498 		DPRINT(("  delete: from %03d: cost too large\n", id));
    499 		deque_start++;
    500 		if (deque_start >= (ringbuffer + 512))
    501 		  deque_start = ringbuffer;
    502 		continue;
    503 	      }
    504 
    505 	    /* Compute overall cost. */
    506 	    cost0 = cost;
    507 	    if (depth > 0)
    508 	      {
    509 		cost0 = reach_p->costs[0][TRE_M_COST];
    510 		if (reach_p->params.cost_del != TRE_PARAM_UNSET)
    511 		  cost0 += reach_p->params.cost_del;
    512 		else
    513 		  cost0 += default_params.cost_del;
    514 	      }
    515 
    516 	    for (trans = reach_p->state; trans->state; trans++)
    517 	      {
    518 		int dest_id = trans->state_id;
    519 		DPRINT(("  delete: from %03d to %03d, cost %d (%d): ",
    520 			id, dest_id, cost0, reach_p->params.max_cost));
    521 
    522 		if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
    523 		  {
    524 		    DPRINT(("assertion failed\n"));
    525 		    continue;
    526 		  }
    527 
    528 		/* Compute tag values after this transition. */
    529 		for (i = 0; i < num_tags; i++)
    530 		  tmp_tags[i] = reach_p->tags[i];
    531 		if (trans->tags)
    532 		  for (i = 0; trans->tags[i] >= 0; i++)
    533 		    if ((size_t)trans->tags[i] < num_tags)
    534 		      tmp_tags[trans->tags[i]] = pos;
    535 
    536 		/* If another path has also reached this state, choose the one
    537 		   with the smallest cost or best tags if costs are equal. */
    538 		if (reach_next[dest_id].pos == pos
    539 		    && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
    540 			|| (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
    541 			    && (!match_tags
    542 				|| !tre_tag_order(num_tags,
    543 						  tnfa->tag_directions,
    544 						  tmp_tags,
    545 						  reach_next[dest_id].tags)))))
    546 		  {
    547 		    DPRINT(("lose, cost0 %d, have %d\n",
    548 			    cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
    549 		    continue;
    550 		  }
    551 		DPRINT(("win\n"));
    552 
    553 		/* Set state, position, tags, parameters, depth, and costs. */
    554 		reach_next[dest_id].state = trans->state;
    555 		reach_next[dest_id].pos = pos;
    556 		for (i = 0; i < num_tags; i++)
    557 		  reach_next[dest_id].tags[i] = tmp_tags[i];
    558 
    559 		reach_next[dest_id].params = reach_p->params;
    560 		if (trans->params)
    561 		  tre_set_params(&reach_next[dest_id], trans->params,
    562 				 default_params);
    563 
    564 		reach_next[dest_id].depth = reach_p->depth;
    565 		memcpy(&reach_next[dest_id].costs,
    566 		       reach_p->costs,
    567 		       sizeof(reach_p->costs[0][0])
    568 		       * TRE_M_LAST * (depth + 1));
    569 		reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
    570 		reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
    571 		reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
    572 		if (depth > 0)
    573 		  {
    574 		    reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
    575 		    reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
    576 		    reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
    577 		  }
    578 
    579 		if (trans->state == tnfa->final
    580 		    && (match_eo < 0
    581 			|| match_costs[TRE_M_COST] > cost0
    582 			|| (match_costs[TRE_M_COST] == cost0
    583 			    && (num_tags > 0
    584 				&& tmp_tags[0] <= match_tags[0]))))
    585 		  {
    586 		    DPRINT(("	 setting new match at %d, cost %d\n",
    587 			    pos, cost0));
    588 		    match_eo = pos;
    589 		    memcpy(match_costs, reach_next[dest_id].costs[0],
    590 			   sizeof(match_costs[0]) * TRE_M_LAST);
    591 		    for (i = 0; i < num_tags; i++)
    592 		      match_tags[i] = tmp_tags[i];
    593 		  }
    594 
    595 		/* Add to the end of the deque. */
    596 		*deque_end = &reach_next[dest_id];
    597 		deque_end++;
    598 		if (deque_end >= (ringbuffer + 512))
    599 		  deque_end = ringbuffer;
    600 		assert(deque_end != deque_start);
    601 	      }
    602 	    deque_start++;
    603 	    if (deque_start >= (ringbuffer + 512))
    604 	      deque_start = ringbuffer;
    605 	  }
    606 
    607       }
    608 
    609 #ifdef TRE_DEBUG
    610       tre_print_reach(tnfa, reach_next, pos, num_tags);
    611 #endif /* TRE_DEBUG */
    612 
    613       /* Check for end of string. */
    614       if (len < 0)
    615 	{
    616 	  if (type == STR_USER)
    617 	    {
    618 	      if (str_user_end)
    619 		break;
    620 	    }
    621 	  else if (next_c == L'\0')
    622 	    break;
    623 	}
    624       else
    625 	{
    626 	  if (pos >= len)
    627 	    break;
    628 	}
    629 
    630       prev_pos = pos;
    631       GET_NEXT_WCHAR();
    632 
    633       /* Swap `reach' and `reach_next'. */
    634       {
    635 	tre_tnfa_approx_reach_t *tmp;
    636 	tmp = reach;
    637 	reach = reach_next;
    638 	reach_next = tmp;
    639       }
    640 
    641       /* Handle exact matches and substitutions. */
    642       for (id = 0; id < tnfa->num_states; id++)
    643 	{
    644 	  tre_tnfa_transition_t *trans;
    645 
    646 	  if (reach[id].pos < prev_pos)
    647 	    continue;  /* Not reached. */
    648 	  for (trans = reach[id].state; trans->state; trans++)
    649 	    {
    650 	      int dest_id;
    651 	      int depth;
    652 	      int cost, cost0, err;
    653 
    654 	      if (trans->assertions
    655 		  && (CHECK_ASSERTIONS(trans->assertions)
    656 		      || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
    657 		{
    658 		  DPRINT(("  exact,  from %d: assert failed\n", id));
    659 		  continue;
    660 		}
    661 
    662 	      depth = reach[id].depth;
    663 	      dest_id = trans->state_id;
    664 
    665 	      cost = reach[id].costs[depth][TRE_M_COST];
    666 	      cost0 = reach[id].costs[0][TRE_M_COST];
    667 	      err = 0;
    668 
    669 	      if (trans->code_min > (tre_cint_t)prev_c
    670 		  || trans->code_max < (tre_cint_t)prev_c)
    671 		{
    672 		  /* Handle substitutes.  The required character was not in
    673 		     the string, so match it in place of whatever was supposed
    674 		     to be there and increase costs accordingly. */
    675 		  err = 1;
    676 
    677 		  /* Compute and check cost at current depth. */
    678 		  cost = reach[id].costs[depth][TRE_M_COST];
    679 		  if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
    680 		    cost += reach[id].params.cost_subst;
    681 		  if (cost > reach[id].params.max_cost)
    682 		    continue; /* Cost too large. */
    683 
    684 		  /* Check number of substitutes at current depth. */
    685 		  if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
    686 		      > reach[id].params.max_subst)
    687 		    continue; /* Too many substitutes. */
    688 
    689 		  /* Check total number of errors at current depth. */
    690 		  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
    691 		      > reach[id].params.max_err)
    692 		    continue; /* Too many errors. */
    693 
    694 		  /* Compute overall cost. */
    695 		  cost0 = cost;
    696 		  if (depth > 0)
    697 		    {
    698 		      cost0 = reach[id].costs[0][TRE_M_COST];
    699 		      if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
    700 			cost0 += reach[id].params.cost_subst;
    701 		      else
    702 			cost0 += default_params.cost_subst;
    703 		    }
    704 		  DPRINT(("  subst,  from %03d to %03d, cost %d: ",
    705 			  id, dest_id, cost0));
    706 		}
    707 	      else
    708 		DPRINT(("  exact,  from %03d to %03d, cost %d: ",
    709 			id, dest_id, cost0));
    710 
    711 	      /* Compute tag values after this transition. */
    712 	      for (i = 0; i < num_tags; i++)
    713 		tmp_tags[i] = reach[id].tags[i];
    714 	      if (trans->tags)
    715 		for (i = 0; trans->tags[i] >= 0; i++)
    716 		  if ((size_t)trans->tags[i] < num_tags)
    717 		    tmp_tags[trans->tags[i]] = pos;
    718 
    719 	      /* If another path has also reached this state, choose the
    720 		 one with the smallest cost or best tags if costs are equal. */
    721 	      if (reach_next[dest_id].pos == pos
    722 		  && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
    723 		      || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
    724 			  && !tre_tag_order(num_tags, tnfa->tag_directions,
    725 					    tmp_tags,
    726 					    reach_next[dest_id].tags))))
    727 		{
    728 		  DPRINT(("lose\n"));
    729 		  continue;
    730 		}
    731 	      DPRINT(("win %d %d\n",
    732 		      reach_next[dest_id].pos,
    733 		      reach_next[dest_id].costs[0][TRE_M_COST]));
    734 
    735 	      /* Set state, position, tags, and depth. */
    736 	      reach_next[dest_id].state = trans->state;
    737 	      reach_next[dest_id].pos = pos;
    738 	      for (i = 0; i < num_tags; i++)
    739 		reach_next[dest_id].tags[i] = tmp_tags[i];
    740 	      reach_next[dest_id].depth = reach[id].depth;
    741 
    742 	      /* Set parameters. */
    743 	      reach_next[dest_id].params = reach[id].params;
    744 	      if (trans->params)
    745 		tre_set_params(&reach_next[dest_id], trans->params,
    746 			       default_params);
    747 
    748 	      /* Set the costs after this transition. */
    749 		memcpy(&reach_next[dest_id].costs,
    750 		       reach[id].costs,
    751 		       sizeof(reach[id].costs[0][0])
    752 		       * TRE_M_LAST * (depth + 1));
    753 	      reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
    754 	      reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
    755 	      reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
    756 	      if (depth > 0)
    757 		{
    758 		  reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
    759 		  reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
    760 		  reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
    761 		}
    762 
    763 	      if (trans->state == tnfa->final
    764 		  && (match_eo < 0
    765 		      || cost0 < match_costs[TRE_M_COST]
    766 		      || (cost0 == match_costs[TRE_M_COST]
    767 			  && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
    768 		{
    769 		  DPRINT(("    setting new match at %d, cost %d\n",
    770 			  pos, cost0));
    771 		  match_eo = pos;
    772 		  for (i = 0; i < TRE_M_LAST; i++)
    773 		    match_costs[i] = reach_next[dest_id].costs[0][i];
    774 		  for (i = 0; i < num_tags; i++)
    775 		    match_tags[i] = tmp_tags[i];
    776 		}
    777 	    }
    778 	}
    779     }
    780 
    781   DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
    782 	  match_costs[TRE_M_COST]));
    783 
    784 #ifndef TRE_USE_ALLOCA
    785   if (buf)
    786     xfree(buf);
    787 #endif /* !TRE_USE_ALLOCA */
    788 
    789   match->cost = match_costs[TRE_M_COST];
    790   match->num_ins = match_costs[TRE_M_NUM_INS];
    791   match->num_del = match_costs[TRE_M_NUM_DEL];
    792   match->num_subst = match_costs[TRE_M_NUM_SUBST];
    793   *match_end_ofs = match_eo;
    794 
    795   return match_eo >= 0 ? REG_OK : REG_NOMATCH;
    796 }
    797