tre-match-approx.c revision 1.3 1 /*
2 tre-match-approx.c - TRE approximate regex matching engine
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif /* HAVE_CONFIG_H */
11
12 #include "tre-alloca.h"
13
14 #define __USE_STRING_INLINES
15 #undef __NO_INLINE__
16
17 #include <assert.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #ifdef HAVE_WCHAR_H
22 #include <wchar.h>
23 #endif /* HAVE_WCHAR_H */
24 #ifdef HAVE_WCTYPE_H
25 #include <wctype.h>
26 #endif /* HAVE_WCTYPE_H */
27 #ifndef TRE_WCHAR
28 #include <ctype.h>
29 #endif /* !TRE_WCHAR */
30 #ifdef HAVE_MALLOC_H
31 #include <malloc.h>
32 #endif /* HAVE_MALLOC_H */
33
34 #include "tre-internal.h"
35 #include "tre-match-utils.h"
36 #include "tre.h"
37 #include "xmalloc.h"
38
39 #define TRE_M_COST 0
40 #define TRE_M_NUM_INS 1
41 #define TRE_M_NUM_DEL 2
42 #define TRE_M_NUM_SUBST 3
43 #define TRE_M_NUM_ERR 4
44 #define TRE_M_LAST 5
45
46 #define TRE_M_MAX_DEPTH 3
47
48 typedef struct {
49 /* State in the TNFA transition table. */
50 tre_tnfa_transition_t *state;
51 /* Position in input string. */
52 int pos;
53 /* Tag values. */
54 int *tags;
55 /* Matching parameters. */
56 regaparams_t params;
57 /* Nesting depth of parameters. This is used as an index in
58 the `costs' array. */
59 int depth;
60 /* Costs and counter values for different parameter nesting depths. */
61 int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
62 } tre_tnfa_approx_reach_t;
63
64
65 #ifdef TRE_DEBUG
66 /* Prints the `reach' array in a readable fashion with DPRINT. */
67 static void
68 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
69 int pos, size_t num_tags)
70 {
71 int id;
72
73 /* Print each state on one line. */
74 DPRINT((" reach:\n"));
75 for (id = 0; id < tnfa->num_states; id++)
76 {
77 int i, j;
78 if (reach[id].pos < pos)
79 continue; /* Not reached. */
80 DPRINT((" %03d, costs ", id));
81 for (i = 0; i <= reach[id].depth; i++)
82 {
83 DPRINT(("["));
84 for (j = 0; j < TRE_M_LAST; j++)
85 {
86 DPRINT(("%2d", reach[id].costs[i][j]));
87 if (j + 1 < TRE_M_LAST)
88 DPRINT((","));
89 }
90 DPRINT(("]"));
91 if (i + 1 <= reach[id].depth)
92 DPRINT((", "));
93 }
94 DPRINT(("\n tags "));
95 for (i = 0; i < num_tags; i++)
96 {
97 DPRINT(("%02d", reach[id].tags[i]));
98 if (i + 1 < num_tags)
99 DPRINT((","));
100 }
101 DPRINT(("\n"));
102 }
103 DPRINT(("\n"));
104 }
105 #endif /* TRE_DEBUG */
106
107
108 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
109 array. If `pa' specifies default values, they are taken from
110 `default_params'. */
111 inline static void
112 tre_set_params(tre_tnfa_approx_reach_t *reach,
113 int *pa, regaparams_t default_params)
114 {
115 int value;
116
117 /* If depth is increased reset costs and counters to zero for the
118 new levels. */
119 value = pa[TRE_PARAM_DEPTH];
120 assert(value <= TRE_M_MAX_DEPTH);
121 if (value > reach->depth)
122 {
123 int i, j;
124 for (i = reach->depth + 1; i <= value; i++)
125 for (j = 0; j < TRE_M_LAST; j++)
126 reach->costs[i][j] = 0;
127 }
128 reach->depth = value;
129
130 /* Set insert cost. */
131 value = pa[TRE_PARAM_COST_INS];
132 if (value == TRE_PARAM_DEFAULT)
133 reach->params.cost_ins = default_params.cost_ins;
134 else if (value != TRE_PARAM_UNSET)
135 reach->params.cost_ins = value;
136
137 /* Set delete cost. */
138 value = pa[TRE_PARAM_COST_DEL];
139 if (value == TRE_PARAM_DEFAULT)
140 reach->params.cost_del = default_params.cost_del;
141 else if (value != TRE_PARAM_UNSET)
142 reach->params.cost_del = value;
143
144 /* Set substitute cost. */
145 value = pa[TRE_PARAM_COST_SUBST];
146 if (value == TRE_PARAM_DEFAULT)
147 reach->params.cost_subst = default_params.cost_subst;
148 else
149 reach->params.cost_subst = value;
150
151 /* Set maximum cost. */
152 value = pa[TRE_PARAM_COST_MAX];
153 if (value == TRE_PARAM_DEFAULT)
154 reach->params.max_cost = default_params.max_cost;
155 else if (value != TRE_PARAM_UNSET)
156 reach->params.max_cost = value;
157
158 /* Set maximum inserts. */
159 value = pa[TRE_PARAM_MAX_INS];
160 if (value == TRE_PARAM_DEFAULT)
161 reach->params.max_ins = default_params.max_ins;
162 else if (value != TRE_PARAM_UNSET)
163 reach->params.max_ins = value;
164
165 /* Set maximum deletes. */
166 value = pa[TRE_PARAM_MAX_DEL];
167 if (value == TRE_PARAM_DEFAULT)
168 reach->params.max_del = default_params.max_del;
169 else if (value != TRE_PARAM_UNSET)
170 reach->params.max_del = value;
171
172 /* Set maximum substitutes. */
173 value = pa[TRE_PARAM_MAX_SUBST];
174 if (value == TRE_PARAM_DEFAULT)
175 reach->params.max_subst = default_params.max_subst;
176 else if (value != TRE_PARAM_UNSET)
177 reach->params.max_subst = value;
178
179 /* Set maximum number of errors. */
180 value = pa[TRE_PARAM_MAX_ERR];
181 if (value == TRE_PARAM_DEFAULT)
182 reach->params.max_err = default_params.max_err;
183 else if (value != TRE_PARAM_UNSET)
184 reach->params.max_err = value;
185 }
186
187 reg_errcode_t
188 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
189 tre_str_type_t type, int *match_tags,
190 regamatch_t *match, regaparams_t default_params,
191 int eflags, int *match_end_ofs)
192 {
193 /* State variables required by GET_NEXT_WCHAR. */
194 tre_char_t prev_c = 0, next_c = 0;
195 const char *str_byte = string;
196 int pos = -1;
197 unsigned int pos_add_next = 1;
198 #ifdef TRE_WCHAR
199 const wchar_t *str_wide = string;
200 #ifdef TRE_MBSTATE
201 mbstate_t mbstate;
202 #endif /* !TRE_WCHAR */
203 #endif /* TRE_WCHAR */
204 int reg_notbol = eflags & REG_NOTBOL;
205 int reg_noteol = eflags & REG_NOTEOL;
206 int reg_newline = tnfa->cflags & REG_NEWLINE;
207 size_t str_user_end = 0;
208
209 int prev_pos;
210
211 /* Number of tags. */
212 size_t num_tags;
213 /* The reach tables. */
214 tre_tnfa_approx_reach_t *reach, *reach_next;
215 /* Tag array for temporary use. */
216 int *tmp_tags;
217
218 /* End offset of best match so far, or -1 if no match found yet. */
219 int match_eo = -1;
220 /* Costs of the match. */
221 int match_costs[TRE_M_LAST];
222
223 /* Space for temporary data required for matching. */
224 unsigned char *buf;
225
226 size_t i, id;
227
228 if (!match_tags)
229 num_tags = 0;
230 else
231 num_tags = tnfa->num_tags;
232
233 #ifdef TRE_MBSTATE
234 memset(&mbstate, '\0', sizeof(mbstate));
235 #endif /* TRE_MBSTATE */
236
237 DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
238 "match_tags %p\n",
239 type, len, eflags,
240 match_tags));
241 DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
242 default_params.max_cost,
243 default_params.cost_ins,
244 default_params.cost_del,
245 default_params.cost_subst));
246
247 /* Allocate memory for temporary data required for matching. This needs to
248 be done for every matching operation to be thread safe. This allocates
249 everything in a single large block from the stack frame using alloca()
250 or with malloc() if alloca is unavailable. */
251 {
252 unsigned char *buf_cursor;
253 /* Space needed for one array of tags. */
254 size_t tag_bytes = sizeof(*tmp_tags) * num_tags;
255 /* Space needed for one reach table. */
256 size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states;
257 /* Total space needed. */
258 size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
259 /* Add some extra to make sure we can align the pointers. The multiplier
260 used here must be equal to the number of ALIGN calls below. */
261 total_bytes += (sizeof(long) - 1) * 3;
262
263 /* Allocate the memory. */
264 #ifdef TRE_USE_ALLOCA
265 buf = alloca(total_bytes);
266 #else /* !TRE_USE_ALLOCA */
267 buf = xmalloc((unsigned)total_bytes);
268 #endif /* !TRE_USE_ALLOCA */
269 if (!buf)
270 return REG_ESPACE;
271 memset(buf, 0, (size_t)total_bytes);
272
273 /* Allocate `tmp_tags' from `buf'. */
274 tmp_tags = (void *)buf;
275 buf_cursor = buf + tag_bytes;
276 buf_cursor += ALIGN(buf_cursor, long);
277
278 /* Allocate `reach' from `buf'. */
279 reach = (void *)buf_cursor;
280 buf_cursor += reach_bytes;
281 buf_cursor += ALIGN(buf_cursor, long);
282
283 /* Allocate `reach_next' from `buf'. */
284 reach_next = (void *)buf_cursor;
285 buf_cursor += reach_bytes;
286 buf_cursor += ALIGN(buf_cursor, long);
287
288 /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
289 for (i = 0; i < tnfa->num_states; i++)
290 {
291 reach[i].tags = (void *)buf_cursor;
292 buf_cursor += tag_bytes;
293 reach_next[i].tags = (void *)buf_cursor;
294 buf_cursor += tag_bytes;
295 }
296 assert(buf_cursor <= buf + total_bytes);
297 }
298
299 for (i = 0; i < TRE_M_LAST; i++)
300 match_costs[i] = INT_MAX;
301
302 /* Mark the reach arrays empty. */
303 for (i = 0; i < tnfa->num_states; i++)
304 reach[i].pos = reach_next[i].pos = -2;
305
306 prev_pos = pos;
307 GET_NEXT_WCHAR();
308 pos = 0;
309
310 while (/*CONSTCOND*/(void)1,1)
311 {
312 DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
313
314 /* Add initial states to `reach_next' if an exact match has not yet
315 been found. */
316 if (match_costs[TRE_M_COST] > 0)
317 {
318 tre_tnfa_transition_t *trans;
319 DPRINT((" init"));
320 for (trans = tnfa->initial; trans->state; trans++)
321 {
322 int stateid = trans->state_id;
323
324 /* If this state is not currently in `reach_next', add it
325 there. */
326 if (reach_next[stateid].pos < pos)
327 {
328 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
329 {
330 /* Assertions failed, don't add this state. */
331 DPRINT((" !%d (assert)", stateid));
332 continue;
333 }
334 DPRINT((" %d", stateid));
335 reach_next[stateid].state = trans->state;
336 reach_next[stateid].pos = pos;
337
338 /* Compute tag values after this transition. */
339 for (i = 0; i < num_tags; i++)
340 reach_next[stateid].tags[i] = -1;
341
342 if (trans->tags)
343 for (i = 0; trans->tags[i] >= 0; i++)
344 if ((size_t)trans->tags[i] < num_tags)
345 reach_next[stateid].tags[trans->tags[i]] = pos;
346
347 /* Set the parameters, depth, and costs. */
348 reach_next[stateid].params = default_params;
349 reach_next[stateid].depth = 0;
350 for (i = 0; i < TRE_M_LAST; i++)
351 reach_next[stateid].costs[0][i] = 0;
352 if (trans->params)
353 tre_set_params(&reach_next[stateid], trans->params,
354 default_params);
355
356 /* If this is the final state, mark the exact match. */
357 if (trans->state == tnfa->final)
358 {
359 match_eo = pos;
360 for (i = 0; i < num_tags; i++)
361 match_tags[i] = reach_next[stateid].tags[i];
362 for (i = 0; i < TRE_M_LAST; i++)
363 match_costs[i] = 0;
364 }
365 }
366 }
367 DPRINT(("\n"));
368 }
369
370
371 /* Handle inserts. This is done by pretending there's an epsilon
372 transition from each state in `reach' back to the same state.
373 We don't need to worry about the final state here; this will never
374 give a better match than what we already have. */
375 for (id = 0; id < tnfa->num_states; id++)
376 {
377 int depth;
378 int cost, cost0;
379
380 if (reach[id].pos != prev_pos)
381 {
382 DPRINT((" insert: %d not reached\n", id));
383 continue; /* Not reached. */
384 }
385
386 depth = reach[id].depth;
387
388 /* Compute and check cost at current depth. */
389 cost = reach[id].costs[depth][TRE_M_COST];
390 if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
391 cost += reach[id].params.cost_ins;
392 if (cost > reach[id].params.max_cost)
393 continue; /* Cost too large. */
394
395 /* Check number of inserts at current depth. */
396 if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
397 > reach[id].params.max_ins)
398 continue; /* Too many inserts. */
399
400 /* Check total number of errors at current depth. */
401 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
402 > reach[id].params.max_err)
403 continue; /* Too many errors. */
404
405 /* Compute overall cost. */
406 cost0 = cost;
407 if (depth > 0)
408 {
409 cost0 = reach[id].costs[0][TRE_M_COST];
410 if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
411 cost0 += reach[id].params.cost_ins;
412 else
413 cost0 += default_params.cost_ins;
414 }
415
416 DPRINT((" insert: from %d to %d, cost %d: ", id, id,
417 reach[id].costs[depth][TRE_M_COST]));
418 if (reach_next[id].pos == pos
419 && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
420 {
421 DPRINT(("lose\n"));
422 continue;
423 }
424 DPRINT(("win\n"));
425
426 /* Copy state, position, tags, parameters, and depth. */
427 reach_next[id].state = reach[id].state;
428 reach_next[id].pos = pos;
429 for (i = 0; i < num_tags; i++)
430 reach_next[id].tags[i] = reach[id].tags[i];
431 reach_next[id].params = reach[id].params;
432 reach_next[id].depth = reach[id].depth;
433
434 /* Set the costs after this transition. */
435 memcpy(reach_next[id].costs, reach[id].costs,
436 sizeof(reach_next[id].costs[0][0])
437 * TRE_M_LAST * (depth + 1));
438 reach_next[id].costs[depth][TRE_M_COST] = cost;
439 reach_next[id].costs[depth][TRE_M_NUM_INS]++;
440 reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
441 if (depth > 0)
442 {
443 reach_next[id].costs[0][TRE_M_COST] = cost0;
444 reach_next[id].costs[0][TRE_M_NUM_INS]++;
445 reach_next[id].costs[0][TRE_M_NUM_ERR]++;
446 }
447
448 }
449
450
451 /* Handle deletes. This is done by traversing through the whole TNFA
452 pretending that all transitions are epsilon transitions, until
453 no more states can be reached with better costs. */
454 {
455 /* XXX - dynamic ringbuffer size */
456 tre_tnfa_approx_reach_t *ringbuffer[512];
457 tre_tnfa_approx_reach_t **deque_start, **deque_end;
458
459 deque_start = deque_end = ringbuffer;
460
461 /* Add all states in `reach_next' to the deque. */
462 for (id = 0; id < tnfa->num_states; id++)
463 {
464 if (reach_next[id].pos != pos)
465 continue;
466 *deque_end = &reach_next[id];
467 deque_end++;
468 assert(deque_end != deque_start);
469 }
470
471 /* Repeat until the deque is empty. */
472 while (deque_end != deque_start)
473 {
474 tre_tnfa_approx_reach_t *reach_p;
475 int depth;
476 int cost, cost0;
477 tre_tnfa_transition_t *trans;
478
479 /* Pop the first item off the deque. */
480 reach_p = *deque_start;
481 id = reach_p - reach_next;
482 depth = reach_p->depth;
483
484 /* Compute cost at current depth. */
485 cost = reach_p->costs[depth][TRE_M_COST];
486 if (reach_p->params.cost_del != TRE_PARAM_UNSET)
487 cost += reach_p->params.cost_del;
488
489 /* Check cost, number of deletes, and total number of errors
490 at current depth. */
491 if (cost > reach_p->params.max_cost
492 || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
493 > reach_p->params.max_del)
494 || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
495 > reach_p->params.max_err))
496 {
497 /* Too many errors or cost too large. */
498 DPRINT((" delete: from %03d: cost too large\n", id));
499 deque_start++;
500 if (deque_start >= (ringbuffer + 512))
501 deque_start = ringbuffer;
502 continue;
503 }
504
505 /* Compute overall cost. */
506 cost0 = cost;
507 if (depth > 0)
508 {
509 cost0 = reach_p->costs[0][TRE_M_COST];
510 if (reach_p->params.cost_del != TRE_PARAM_UNSET)
511 cost0 += reach_p->params.cost_del;
512 else
513 cost0 += default_params.cost_del;
514 }
515
516 for (trans = reach_p->state; trans->state; trans++)
517 {
518 int dest_id = trans->state_id;
519 DPRINT((" delete: from %03d to %03d, cost %d (%d): ",
520 id, dest_id, cost0, reach_p->params.max_cost));
521
522 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
523 {
524 DPRINT(("assertion failed\n"));
525 continue;
526 }
527
528 /* Compute tag values after this transition. */
529 for (i = 0; i < num_tags; i++)
530 tmp_tags[i] = reach_p->tags[i];
531 if (trans->tags)
532 for (i = 0; trans->tags[i] >= 0; i++)
533 if ((size_t)trans->tags[i] < num_tags)
534 tmp_tags[trans->tags[i]] = pos;
535
536 /* If another path has also reached this state, choose the one
537 with the smallest cost or best tags if costs are equal. */
538 if (reach_next[dest_id].pos == pos
539 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
540 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
541 && (!match_tags
542 || !tre_tag_order(num_tags,
543 tnfa->tag_directions,
544 tmp_tags,
545 reach_next[dest_id].tags)))))
546 {
547 DPRINT(("lose, cost0 %d, have %d\n",
548 cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
549 continue;
550 }
551 DPRINT(("win\n"));
552
553 /* Set state, position, tags, parameters, depth, and costs. */
554 reach_next[dest_id].state = trans->state;
555 reach_next[dest_id].pos = pos;
556 for (i = 0; i < num_tags; i++)
557 reach_next[dest_id].tags[i] = tmp_tags[i];
558
559 reach_next[dest_id].params = reach_p->params;
560 if (trans->params)
561 tre_set_params(&reach_next[dest_id], trans->params,
562 default_params);
563
564 reach_next[dest_id].depth = reach_p->depth;
565 memcpy(&reach_next[dest_id].costs,
566 reach_p->costs,
567 sizeof(reach_p->costs[0][0])
568 * TRE_M_LAST * (depth + 1));
569 reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
570 reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
571 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
572 if (depth > 0)
573 {
574 reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
575 reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
576 reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
577 }
578
579 if (trans->state == tnfa->final
580 && (match_eo < 0
581 || match_costs[TRE_M_COST] > cost0
582 || (match_costs[TRE_M_COST] == cost0
583 && (num_tags > 0
584 && tmp_tags[0] <= match_tags[0]))))
585 {
586 DPRINT((" setting new match at %d, cost %d\n",
587 pos, cost0));
588 match_eo = pos;
589 memcpy(match_costs, reach_next[dest_id].costs[0],
590 sizeof(match_costs[0]) * TRE_M_LAST);
591 for (i = 0; i < num_tags; i++)
592 match_tags[i] = tmp_tags[i];
593 }
594
595 /* Add to the end of the deque. */
596 *deque_end = &reach_next[dest_id];
597 deque_end++;
598 if (deque_end >= (ringbuffer + 512))
599 deque_end = ringbuffer;
600 assert(deque_end != deque_start);
601 }
602 deque_start++;
603 if (deque_start >= (ringbuffer + 512))
604 deque_start = ringbuffer;
605 }
606
607 }
608
609 #ifdef TRE_DEBUG
610 tre_print_reach(tnfa, reach_next, pos, num_tags);
611 #endif /* TRE_DEBUG */
612
613 /* Check for end of string. */
614 if (len < 0)
615 {
616 if (type == STR_USER)
617 {
618 if (str_user_end)
619 break;
620 }
621 else if (next_c == L'\0')
622 break;
623 }
624 else
625 {
626 if (pos >= len)
627 break;
628 }
629
630 prev_pos = pos;
631 GET_NEXT_WCHAR();
632
633 /* Swap `reach' and `reach_next'. */
634 {
635 tre_tnfa_approx_reach_t *tmp;
636 tmp = reach;
637 reach = reach_next;
638 reach_next = tmp;
639 }
640
641 /* Handle exact matches and substitutions. */
642 for (id = 0; id < tnfa->num_states; id++)
643 {
644 tre_tnfa_transition_t *trans;
645
646 if (reach[id].pos < prev_pos)
647 continue; /* Not reached. */
648 for (trans = reach[id].state; trans->state; trans++)
649 {
650 int dest_id;
651 int depth;
652 int cost, cost0, err;
653
654 if (trans->assertions
655 && (CHECK_ASSERTIONS(trans->assertions)
656 || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
657 {
658 DPRINT((" exact, from %d: assert failed\n", id));
659 continue;
660 }
661
662 depth = reach[id].depth;
663 dest_id = trans->state_id;
664
665 cost = reach[id].costs[depth][TRE_M_COST];
666 cost0 = reach[id].costs[0][TRE_M_COST];
667 err = 0;
668
669 if (trans->code_min > (tre_cint_t)prev_c
670 || trans->code_max < (tre_cint_t)prev_c)
671 {
672 /* Handle substitutes. The required character was not in
673 the string, so match it in place of whatever was supposed
674 to be there and increase costs accordingly. */
675 err = 1;
676
677 /* Compute and check cost at current depth. */
678 cost = reach[id].costs[depth][TRE_M_COST];
679 if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
680 cost += reach[id].params.cost_subst;
681 if (cost > reach[id].params.max_cost)
682 continue; /* Cost too large. */
683
684 /* Check number of substitutes at current depth. */
685 if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
686 > reach[id].params.max_subst)
687 continue; /* Too many substitutes. */
688
689 /* Check total number of errors at current depth. */
690 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
691 > reach[id].params.max_err)
692 continue; /* Too many errors. */
693
694 /* Compute overall cost. */
695 cost0 = cost;
696 if (depth > 0)
697 {
698 cost0 = reach[id].costs[0][TRE_M_COST];
699 if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
700 cost0 += reach[id].params.cost_subst;
701 else
702 cost0 += default_params.cost_subst;
703 }
704 DPRINT((" subst, from %03d to %03d, cost %d: ",
705 id, dest_id, cost0));
706 }
707 else
708 DPRINT((" exact, from %03d to %03d, cost %d: ",
709 id, dest_id, cost0));
710
711 /* Compute tag values after this transition. */
712 for (i = 0; i < num_tags; i++)
713 tmp_tags[i] = reach[id].tags[i];
714 if (trans->tags)
715 for (i = 0; trans->tags[i] >= 0; i++)
716 if ((size_t)trans->tags[i] < num_tags)
717 tmp_tags[trans->tags[i]] = pos;
718
719 /* If another path has also reached this state, choose the
720 one with the smallest cost or best tags if costs are equal. */
721 if (reach_next[dest_id].pos == pos
722 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
723 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
724 && !tre_tag_order(num_tags, tnfa->tag_directions,
725 tmp_tags,
726 reach_next[dest_id].tags))))
727 {
728 DPRINT(("lose\n"));
729 continue;
730 }
731 DPRINT(("win %d %d\n",
732 reach_next[dest_id].pos,
733 reach_next[dest_id].costs[0][TRE_M_COST]));
734
735 /* Set state, position, tags, and depth. */
736 reach_next[dest_id].state = trans->state;
737 reach_next[dest_id].pos = pos;
738 for (i = 0; i < num_tags; i++)
739 reach_next[dest_id].tags[i] = tmp_tags[i];
740 reach_next[dest_id].depth = reach[id].depth;
741
742 /* Set parameters. */
743 reach_next[dest_id].params = reach[id].params;
744 if (trans->params)
745 tre_set_params(&reach_next[dest_id], trans->params,
746 default_params);
747
748 /* Set the costs after this transition. */
749 memcpy(&reach_next[dest_id].costs,
750 reach[id].costs,
751 sizeof(reach[id].costs[0][0])
752 * TRE_M_LAST * (depth + 1));
753 reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
754 reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
755 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
756 if (depth > 0)
757 {
758 reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
759 reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
760 reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
761 }
762
763 if (trans->state == tnfa->final
764 && (match_eo < 0
765 || cost0 < match_costs[TRE_M_COST]
766 || (cost0 == match_costs[TRE_M_COST]
767 && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
768 {
769 DPRINT((" setting new match at %d, cost %d\n",
770 pos, cost0));
771 match_eo = pos;
772 for (i = 0; i < TRE_M_LAST; i++)
773 match_costs[i] = reach_next[dest_id].costs[0][i];
774 for (i = 0; i < num_tags; i++)
775 match_tags[i] = tmp_tags[i];
776 }
777 }
778 }
779 }
780
781 DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
782 match_costs[TRE_M_COST]));
783
784 #ifndef TRE_USE_ALLOCA
785 if (buf)
786 xfree(buf);
787 #endif /* !TRE_USE_ALLOCA */
788
789 match->cost = match_costs[TRE_M_COST];
790 match->num_ins = match_costs[TRE_M_NUM_INS];
791 match->num_del = match_costs[TRE_M_NUM_DEL];
792 match->num_subst = match_costs[TRE_M_NUM_SUBST];
793 *match_end_ofs = match_eo;
794
795 return match_eo >= 0 ? REG_OK : REG_NOMATCH;
796 }
797