tre-match-approx.c revision 1.1 1 /*
2 tre-match-approx.c - TRE approximate regex matching engine
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12
13 /* AIX requires this to be the first thing in the file. */
14 #ifdef TRE_USE_ALLOCA
15 #ifndef __GNUC__
16 # if HAVE_ALLOCA_H
17 # include <alloca.h>
18 # else
19 # ifdef _AIX
20 #pragma alloca
21 # else
22 # ifndef alloca /* predefined by HP cc +Olibcalls */
23 char *alloca ();
24 # endif
25 # endif
26 # endif
27 #endif
28 #endif /* TRE_USE_ALLOCA */
29
30 #define __USE_STRING_INLINES
31 #undef __NO_INLINE__
32
33 #include <assert.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <limits.h>
37 #ifdef HAVE_WCHAR_H
38 #include <wchar.h>
39 #endif /* HAVE_WCHAR_H */
40 #ifdef HAVE_WCTYPE_H
41 #include <wctype.h>
42 #endif /* HAVE_WCTYPE_H */
43 #ifndef TRE_WCHAR
44 #include <ctype.h>
45 #endif /* !TRE_WCHAR */
46 #ifdef HAVE_MALLOC_H
47 #include <malloc.h>
48 #endif /* HAVE_MALLOC_H */
49
50 #include "tre-internal.h"
51 #include "tre-match-utils.h"
52 #include "tre.h"
53 #include "xmalloc.h"
54
55 #define TRE_M_COST 0
56 #define TRE_M_NUM_INS 1
57 #define TRE_M_NUM_DEL 2
58 #define TRE_M_NUM_SUBST 3
59 #define TRE_M_NUM_ERR 4
60 #define TRE_M_LAST 5
61
62 #define TRE_M_MAX_DEPTH 3
63
64 typedef struct {
65 /* State in the TNFA transition table. */
66 tre_tnfa_transition_t *state;
67 /* Position in input string. */
68 int pos;
69 /* Tag values. */
70 int *tags;
71 /* Matching parameters. */
72 regaparams_t params;
73 /* Nesting depth of parameters. This is used as an index in
74 the `costs' array. */
75 int depth;
76 /* Costs and counter values for different parameter nesting depths. */
77 int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
78 } tre_tnfa_approx_reach_t;
79
80
81 #ifdef TRE_DEBUG
82 /* Prints the `reach' array in a readable fashion with DPRINT. */
83 static void
84 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
85 int pos, int num_tags)
86 {
87 int id;
88
89 /* Print each state on one line. */
90 DPRINT((" reach:\n"));
91 for (id = 0; id < tnfa->num_states; id++)
92 {
93 int i, j;
94 if (reach[id].pos < pos)
95 continue; /* Not reached. */
96 DPRINT((" %03d, costs ", id));
97 for (i = 0; i <= reach[id].depth; i++)
98 {
99 DPRINT(("["));
100 for (j = 0; j < TRE_M_LAST; j++)
101 {
102 DPRINT(("%2d", reach[id].costs[i][j]));
103 if (j + 1 < TRE_M_LAST)
104 DPRINT((","));
105 }
106 DPRINT(("]"));
107 if (i + 1 <= reach[id].depth)
108 DPRINT((", "));
109 }
110 DPRINT(("\n tags "));
111 for (i = 0; i < num_tags; i++)
112 {
113 DPRINT(("%02d", reach[id].tags[i]));
114 if (i + 1 < num_tags)
115 DPRINT((","));
116 }
117 DPRINT(("\n"));
118 }
119 DPRINT(("\n"));
120 }
121 #endif /* TRE_DEBUG */
122
123
124 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
125 array. If `pa' specifies default values, they are taken from
126 `default_params'. */
127 inline static void
128 tre_set_params(tre_tnfa_approx_reach_t *reach,
129 int *pa, regaparams_t default_params)
130 {
131 int value;
132
133 /* If depth is increased reset costs and counters to zero for the
134 new levels. */
135 value = pa[TRE_PARAM_DEPTH];
136 assert(value <= TRE_M_MAX_DEPTH);
137 if (value > reach->depth)
138 {
139 int i, j;
140 for (i = reach->depth + 1; i <= value; i++)
141 for (j = 0; j < TRE_M_LAST; j++)
142 reach->costs[i][j] = 0;
143 }
144 reach->depth = value;
145
146 /* Set insert cost. */
147 value = pa[TRE_PARAM_COST_INS];
148 if (value == TRE_PARAM_DEFAULT)
149 reach->params.cost_ins = default_params.cost_ins;
150 else if (value != TRE_PARAM_UNSET)
151 reach->params.cost_ins = value;
152
153 /* Set delete cost. */
154 value = pa[TRE_PARAM_COST_DEL];
155 if (value == TRE_PARAM_DEFAULT)
156 reach->params.cost_del = default_params.cost_del;
157 else if (value != TRE_PARAM_UNSET)
158 reach->params.cost_del = value;
159
160 /* Set substitute cost. */
161 value = pa[TRE_PARAM_COST_SUBST];
162 if (value == TRE_PARAM_DEFAULT)
163 reach->params.cost_subst = default_params.cost_subst;
164 else
165 reach->params.cost_subst = value;
166
167 /* Set maximum cost. */
168 value = pa[TRE_PARAM_COST_MAX];
169 if (value == TRE_PARAM_DEFAULT)
170 reach->params.max_cost = default_params.max_cost;
171 else if (value != TRE_PARAM_UNSET)
172 reach->params.max_cost = value;
173
174 /* Set maximum inserts. */
175 value = pa[TRE_PARAM_MAX_INS];
176 if (value == TRE_PARAM_DEFAULT)
177 reach->params.max_ins = default_params.max_ins;
178 else if (value != TRE_PARAM_UNSET)
179 reach->params.max_ins = value;
180
181 /* Set maximum deletes. */
182 value = pa[TRE_PARAM_MAX_DEL];
183 if (value == TRE_PARAM_DEFAULT)
184 reach->params.max_del = default_params.max_del;
185 else if (value != TRE_PARAM_UNSET)
186 reach->params.max_del = value;
187
188 /* Set maximum substitutes. */
189 value = pa[TRE_PARAM_MAX_SUBST];
190 if (value == TRE_PARAM_DEFAULT)
191 reach->params.max_subst = default_params.max_subst;
192 else if (value != TRE_PARAM_UNSET)
193 reach->params.max_subst = value;
194
195 /* Set maximum number of errors. */
196 value = pa[TRE_PARAM_MAX_ERR];
197 if (value == TRE_PARAM_DEFAULT)
198 reach->params.max_err = default_params.max_err;
199 else if (value != TRE_PARAM_UNSET)
200 reach->params.max_err = value;
201 }
202
203 reg_errcode_t
204 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
205 tre_str_type_t type, int *match_tags,
206 regamatch_t *match, regaparams_t default_params,
207 int eflags, int *match_end_ofs)
208 {
209 /* State variables required by GET_NEXT_WCHAR. */
210 tre_char_t prev_c = 0, next_c = 0;
211 const char *str_byte = string;
212 int pos = -1;
213 unsigned int pos_add_next = 1;
214 #ifdef TRE_WCHAR
215 const wchar_t *str_wide = string;
216 #ifdef TRE_MBSTATE
217 mbstate_t mbstate;
218 #endif /* !TRE_WCHAR */
219 #endif /* TRE_WCHAR */
220 int reg_notbol = eflags & REG_NOTBOL;
221 int reg_noteol = eflags & REG_NOTEOL;
222 int reg_newline = tnfa->cflags & REG_NEWLINE;
223 int str_user_end = 0;
224
225 int prev_pos;
226
227 /* Number of tags. */
228 int num_tags;
229 /* The reach tables. */
230 tre_tnfa_approx_reach_t *reach, *reach_next;
231 /* Tag array for temporary use. */
232 int *tmp_tags;
233
234 /* End offset of best match so far, or -1 if no match found yet. */
235 int match_eo = -1;
236 /* Costs of the match. */
237 int match_costs[TRE_M_LAST];
238
239 /* Space for temporary data required for matching. */
240 unsigned char *buf;
241
242 int i, id;
243
244 if (!match_tags)
245 num_tags = 0;
246 else
247 num_tags = tnfa->num_tags;
248
249 #ifdef TRE_MBSTATE
250 memset(&mbstate, '\0', sizeof(mbstate));
251 #endif /* TRE_MBSTATE */
252
253 DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
254 "match_tags %p\n",
255 type, len, eflags,
256 match_tags));
257 DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
258 default_params.max_cost,
259 default_params.cost_ins,
260 default_params.cost_del,
261 default_params.cost_subst));
262
263 /* Allocate memory for temporary data required for matching. This needs to
264 be done for every matching operation to be thread safe. This allocates
265 everything in a single large block from the stack frame using alloca()
266 or with malloc() if alloca is unavailable. */
267 {
268 unsigned char *buf_cursor;
269 /* Space needed for one array of tags. */
270 int tag_bytes = sizeof(*tmp_tags) * num_tags;
271 /* Space needed for one reach table. */
272 int reach_bytes = sizeof(*reach_next) * tnfa->num_states;
273 /* Total space needed. */
274 int total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
275 /* Add some extra to make sure we can align the pointers. The multiplier
276 used here must be equal to the number of ALIGN calls below. */
277 total_bytes += (sizeof(long) - 1) * 3;
278
279 /* Allocate the memory. */
280 #ifdef TRE_USE_ALLOCA
281 buf = alloca(total_bytes);
282 #else /* !TRE_USE_ALLOCA */
283 buf = xmalloc((unsigned)total_bytes);
284 #endif /* !TRE_USE_ALLOCA */
285 if (!buf)
286 return REG_ESPACE;
287 memset(buf, 0, (size_t)total_bytes);
288
289 /* Allocate `tmp_tags' from `buf'. */
290 tmp_tags = (void *)buf;
291 buf_cursor = buf + tag_bytes;
292 buf_cursor += ALIGN(buf_cursor, long);
293
294 /* Allocate `reach' from `buf'. */
295 reach = (void *)buf_cursor;
296 buf_cursor += reach_bytes;
297 buf_cursor += ALIGN(buf_cursor, long);
298
299 /* Allocate `reach_next' from `buf'. */
300 reach_next = (void *)buf_cursor;
301 buf_cursor += reach_bytes;
302 buf_cursor += ALIGN(buf_cursor, long);
303
304 /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
305 for (i = 0; i < tnfa->num_states; i++)
306 {
307 reach[i].tags = (void *)buf_cursor;
308 buf_cursor += tag_bytes;
309 reach_next[i].tags = (void *)buf_cursor;
310 buf_cursor += tag_bytes;
311 }
312 assert(buf_cursor <= buf + total_bytes);
313 }
314
315 for (i = 0; i < TRE_M_LAST; i++)
316 match_costs[i] = INT_MAX;
317
318 /* Mark the reach arrays empty. */
319 for (i = 0; i < tnfa->num_states; i++)
320 reach[i].pos = reach_next[i].pos = -2;
321
322 prev_pos = pos;
323 GET_NEXT_WCHAR();
324 pos = 0;
325
326 while (/*CONSTCOND*/1)
327 {
328 DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
329
330 /* Add initial states to `reach_next' if an exact match has not yet
331 been found. */
332 if (match_costs[TRE_M_COST] > 0)
333 {
334 tre_tnfa_transition_t *trans;
335 DPRINT((" init"));
336 for (trans = tnfa->initial; trans->state; trans++)
337 {
338 int stateid = trans->state_id;
339
340 /* If this state is not currently in `reach_next', add it
341 there. */
342 if (reach_next[stateid].pos < pos)
343 {
344 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
345 {
346 /* Assertions failed, don't add this state. */
347 DPRINT((" !%d (assert)", stateid));
348 continue;
349 }
350 DPRINT((" %d", stateid));
351 reach_next[stateid].state = trans->state;
352 reach_next[stateid].pos = pos;
353
354 /* Compute tag values after this transition. */
355 for (i = 0; i < num_tags; i++)
356 reach_next[stateid].tags[i] = -1;
357
358 if (trans->tags)
359 for (i = 0; trans->tags[i] >= 0; i++)
360 if (trans->tags[i] < num_tags)
361 reach_next[stateid].tags[trans->tags[i]] = pos;
362
363 /* Set the parameters, depth, and costs. */
364 reach_next[stateid].params = default_params;
365 reach_next[stateid].depth = 0;
366 for (i = 0; i < TRE_M_LAST; i++)
367 reach_next[stateid].costs[0][i] = 0;
368 if (trans->params)
369 tre_set_params(&reach_next[stateid], trans->params,
370 default_params);
371
372 /* If this is the final state, mark the exact match. */
373 if (trans->state == tnfa->final)
374 {
375 match_eo = pos;
376 for (i = 0; i < num_tags; i++)
377 match_tags[i] = reach_next[stateid].tags[i];
378 for (i = 0; i < TRE_M_LAST; i++)
379 match_costs[i] = 0;
380 }
381 }
382 }
383 DPRINT(("\n"));
384 }
385
386
387 /* Handle inserts. This is done by pretending there's an epsilon
388 transition from each state in `reach' back to the same state.
389 We don't need to worry about the final state here; this will never
390 give a better match than what we already have. */
391 for (id = 0; id < tnfa->num_states; id++)
392 {
393 int depth;
394 int cost, cost0;
395
396 if (reach[id].pos != prev_pos)
397 {
398 DPRINT((" insert: %d not reached\n", id));
399 continue; /* Not reached. */
400 }
401
402 depth = reach[id].depth;
403
404 /* Compute and check cost at current depth. */
405 cost = reach[id].costs[depth][TRE_M_COST];
406 if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
407 cost += reach[id].params.cost_ins;
408 if (cost > reach[id].params.max_cost)
409 continue; /* Cost too large. */
410
411 /* Check number of inserts at current depth. */
412 if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
413 > reach[id].params.max_ins)
414 continue; /* Too many inserts. */
415
416 /* Check total number of errors at current depth. */
417 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
418 > reach[id].params.max_err)
419 continue; /* Too many errors. */
420
421 /* Compute overall cost. */
422 cost0 = cost;
423 if (depth > 0)
424 {
425 cost0 = reach[id].costs[0][TRE_M_COST];
426 if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
427 cost0 += reach[id].params.cost_ins;
428 else
429 cost0 += default_params.cost_ins;
430 }
431
432 DPRINT((" insert: from %d to %d, cost %d: ", id, id,
433 reach[id].costs[depth][TRE_M_COST]));
434 if (reach_next[id].pos == pos
435 && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
436 {
437 DPRINT(("lose\n"));
438 continue;
439 }
440 DPRINT(("win\n"));
441
442 /* Copy state, position, tags, parameters, and depth. */
443 reach_next[id].state = reach[id].state;
444 reach_next[id].pos = pos;
445 for (i = 0; i < num_tags; i++)
446 reach_next[id].tags[i] = reach[id].tags[i];
447 reach_next[id].params = reach[id].params;
448 reach_next[id].depth = reach[id].depth;
449
450 /* Set the costs after this transition. */
451 memcpy(reach_next[id].costs, reach[id].costs,
452 sizeof(reach_next[id].costs[0][0])
453 * TRE_M_LAST * (depth + 1));
454 reach_next[id].costs[depth][TRE_M_COST] = cost;
455 reach_next[id].costs[depth][TRE_M_NUM_INS]++;
456 reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
457 if (depth > 0)
458 {
459 reach_next[id].costs[0][TRE_M_COST] = cost0;
460 reach_next[id].costs[0][TRE_M_NUM_INS]++;
461 reach_next[id].costs[0][TRE_M_NUM_ERR]++;
462 }
463
464 }
465
466
467 /* Handle deletes. This is done by traversing through the whole TNFA
468 pretending that all transitions are epsilon transitions, until
469 no more states can be reached with better costs. */
470 {
471 /* XXX - dynamic ringbuffer size */
472 tre_tnfa_approx_reach_t *ringbuffer[512];
473 tre_tnfa_approx_reach_t **deque_start, **deque_end;
474
475 deque_start = deque_end = ringbuffer;
476
477 /* Add all states in `reach_next' to the deque. */
478 for (id = 0; id < tnfa->num_states; id++)
479 {
480 if (reach_next[id].pos != pos)
481 continue;
482 *deque_end = &reach_next[id];
483 deque_end++;
484 assert(deque_end != deque_start);
485 }
486
487 /* Repeat until the deque is empty. */
488 while (deque_end != deque_start)
489 {
490 tre_tnfa_approx_reach_t *reach_p;
491 int depth;
492 int cost, cost0;
493 tre_tnfa_transition_t *trans;
494
495 /* Pop the first item off the deque. */
496 reach_p = *deque_start;
497 id = reach_p - reach_next;
498 depth = reach_p->depth;
499
500 /* Compute cost at current depth. */
501 cost = reach_p->costs[depth][TRE_M_COST];
502 if (reach_p->params.cost_del != TRE_PARAM_UNSET)
503 cost += reach_p->params.cost_del;
504
505 /* Check cost, number of deletes, and total number of errors
506 at current depth. */
507 if (cost > reach_p->params.max_cost
508 || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
509 > reach_p->params.max_del)
510 || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
511 > reach_p->params.max_err))
512 {
513 /* Too many errors or cost too large. */
514 DPRINT((" delete: from %03d: cost too large\n", id));
515 deque_start++;
516 if (deque_start >= (ringbuffer + 512))
517 deque_start = ringbuffer;
518 continue;
519 }
520
521 /* Compute overall cost. */
522 cost0 = cost;
523 if (depth > 0)
524 {
525 cost0 = reach_p->costs[0][TRE_M_COST];
526 if (reach_p->params.cost_del != TRE_PARAM_UNSET)
527 cost0 += reach_p->params.cost_del;
528 else
529 cost0 += default_params.cost_del;
530 }
531
532 for (trans = reach_p->state; trans->state; trans++)
533 {
534 int dest_id = trans->state_id;
535 DPRINT((" delete: from %03d to %03d, cost %d (%d): ",
536 id, dest_id, cost0, reach_p->params.max_cost));
537
538 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
539 {
540 DPRINT(("assertion failed\n"));
541 continue;
542 }
543
544 /* Compute tag values after this transition. */
545 for (i = 0; i < num_tags; i++)
546 tmp_tags[i] = reach_p->tags[i];
547 if (trans->tags)
548 for (i = 0; trans->tags[i] >= 0; i++)
549 if (trans->tags[i] < num_tags)
550 tmp_tags[trans->tags[i]] = pos;
551
552 /* If another path has also reached this state, choose the one
553 with the smallest cost or best tags if costs are equal. */
554 if (reach_next[dest_id].pos == pos
555 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
556 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
557 && (!match_tags
558 || !tre_tag_order(num_tags,
559 tnfa->tag_directions,
560 tmp_tags,
561 reach_next[dest_id].tags)))))
562 {
563 DPRINT(("lose, cost0 %d, have %d\n",
564 cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
565 continue;
566 }
567 DPRINT(("win\n"));
568
569 /* Set state, position, tags, parameters, depth, and costs. */
570 reach_next[dest_id].state = trans->state;
571 reach_next[dest_id].pos = pos;
572 for (i = 0; i < num_tags; i++)
573 reach_next[dest_id].tags[i] = tmp_tags[i];
574
575 reach_next[dest_id].params = reach_p->params;
576 if (trans->params)
577 tre_set_params(&reach_next[dest_id], trans->params,
578 default_params);
579
580 reach_next[dest_id].depth = reach_p->depth;
581 memcpy(&reach_next[dest_id].costs,
582 reach_p->costs,
583 sizeof(reach_p->costs[0][0])
584 * TRE_M_LAST * (depth + 1));
585 reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
586 reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
587 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
588 if (depth > 0)
589 {
590 reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
591 reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
592 reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
593 }
594
595 if (trans->state == tnfa->final
596 && (match_eo < 0
597 || match_costs[TRE_M_COST] > cost0
598 || (match_costs[TRE_M_COST] == cost0
599 && (num_tags > 0
600 && tmp_tags[0] <= match_tags[0]))))
601 {
602 DPRINT((" setting new match at %d, cost %d\n",
603 pos, cost0));
604 match_eo = pos;
605 memcpy(match_costs, reach_next[dest_id].costs[0],
606 sizeof(match_costs[0]) * TRE_M_LAST);
607 for (i = 0; i < num_tags; i++)
608 match_tags[i] = tmp_tags[i];
609 }
610
611 /* Add to the end of the deque. */
612 *deque_end = &reach_next[dest_id];
613 deque_end++;
614 if (deque_end >= (ringbuffer + 512))
615 deque_end = ringbuffer;
616 assert(deque_end != deque_start);
617 }
618 deque_start++;
619 if (deque_start >= (ringbuffer + 512))
620 deque_start = ringbuffer;
621 }
622
623 }
624
625 #ifdef TRE_DEBUG
626 tre_print_reach(tnfa, reach_next, pos, num_tags);
627 #endif /* TRE_DEBUG */
628
629 /* Check for end of string. */
630 if (len < 0)
631 {
632 if (type == STR_USER)
633 {
634 if (str_user_end)
635 break;
636 }
637 else if (next_c == L'\0')
638 break;
639 }
640 else
641 {
642 if (pos >= len)
643 break;
644 }
645
646 prev_pos = pos;
647 GET_NEXT_WCHAR();
648
649 /* Swap `reach' and `reach_next'. */
650 {
651 tre_tnfa_approx_reach_t *tmp;
652 tmp = reach;
653 reach = reach_next;
654 reach_next = tmp;
655 }
656
657 /* Handle exact matches and substitutions. */
658 for (id = 0; id < tnfa->num_states; id++)
659 {
660 tre_tnfa_transition_t *trans;
661
662 if (reach[id].pos < prev_pos)
663 continue; /* Not reached. */
664 for (trans = reach[id].state; trans->state; trans++)
665 {
666 int dest_id;
667 int depth;
668 int cost, cost0, err;
669
670 if (trans->assertions
671 && (CHECK_ASSERTIONS(trans->assertions)
672 || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
673 {
674 DPRINT((" exact, from %d: assert failed\n", id));
675 continue;
676 }
677
678 depth = reach[id].depth;
679 dest_id = trans->state_id;
680
681 cost = reach[id].costs[depth][TRE_M_COST];
682 cost0 = reach[id].costs[0][TRE_M_COST];
683 err = 0;
684
685 if (trans->code_min > (tre_cint_t)prev_c
686 || trans->code_max < (tre_cint_t)prev_c)
687 {
688 /* Handle substitutes. The required character was not in
689 the string, so match it in place of whatever was supposed
690 to be there and increase costs accordingly. */
691 err = 1;
692
693 /* Compute and check cost at current depth. */
694 cost = reach[id].costs[depth][TRE_M_COST];
695 if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
696 cost += reach[id].params.cost_subst;
697 if (cost > reach[id].params.max_cost)
698 continue; /* Cost too large. */
699
700 /* Check number of substitutes at current depth. */
701 if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
702 > reach[id].params.max_subst)
703 continue; /* Too many substitutes. */
704
705 /* Check total number of errors at current depth. */
706 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
707 > reach[id].params.max_err)
708 continue; /* Too many errors. */
709
710 /* Compute overall cost. */
711 cost0 = cost;
712 if (depth > 0)
713 {
714 cost0 = reach[id].costs[0][TRE_M_COST];
715 if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
716 cost0 += reach[id].params.cost_subst;
717 else
718 cost0 += default_params.cost_subst;
719 }
720 DPRINT((" subst, from %03d to %03d, cost %d: ",
721 id, dest_id, cost0));
722 }
723 else
724 DPRINT((" exact, from %03d to %03d, cost %d: ",
725 id, dest_id, cost0));
726
727 /* Compute tag values after this transition. */
728 for (i = 0; i < num_tags; i++)
729 tmp_tags[i] = reach[id].tags[i];
730 if (trans->tags)
731 for (i = 0; trans->tags[i] >= 0; i++)
732 if (trans->tags[i] < num_tags)
733 tmp_tags[trans->tags[i]] = pos;
734
735 /* If another path has also reached this state, choose the
736 one with the smallest cost or best tags if costs are equal. */
737 if (reach_next[dest_id].pos == pos
738 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
739 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
740 && !tre_tag_order(num_tags, tnfa->tag_directions,
741 tmp_tags,
742 reach_next[dest_id].tags))))
743 {
744 DPRINT(("lose\n"));
745 continue;
746 }
747 DPRINT(("win %d %d\n",
748 reach_next[dest_id].pos,
749 reach_next[dest_id].costs[0][TRE_M_COST]));
750
751 /* Set state, position, tags, and depth. */
752 reach_next[dest_id].state = trans->state;
753 reach_next[dest_id].pos = pos;
754 for (i = 0; i < num_tags; i++)
755 reach_next[dest_id].tags[i] = tmp_tags[i];
756 reach_next[dest_id].depth = reach[id].depth;
757
758 /* Set parameters. */
759 reach_next[dest_id].params = reach[id].params;
760 if (trans->params)
761 tre_set_params(&reach_next[dest_id], trans->params,
762 default_params);
763
764 /* Set the costs after this transition. */
765 memcpy(&reach_next[dest_id].costs,
766 reach[id].costs,
767 sizeof(reach[id].costs[0][0])
768 * TRE_M_LAST * (depth + 1));
769 reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
770 reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
771 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
772 if (depth > 0)
773 {
774 reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
775 reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
776 reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
777 }
778
779 if (trans->state == tnfa->final
780 && (match_eo < 0
781 || cost0 < match_costs[TRE_M_COST]
782 || (cost0 == match_costs[TRE_M_COST]
783 && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
784 {
785 DPRINT((" setting new match at %d, cost %d\n",
786 pos, cost0));
787 match_eo = pos;
788 for (i = 0; i < TRE_M_LAST; i++)
789 match_costs[i] = reach_next[dest_id].costs[0][i];
790 for (i = 0; i < num_tags; i++)
791 match_tags[i] = tmp_tags[i];
792 }
793 }
794 }
795 }
796
797 DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
798 match_costs[TRE_M_COST]));
799
800 #ifndef TRE_USE_ALLOCA
801 if (buf)
802 xfree(buf);
803 #endif /* !TRE_USE_ALLOCA */
804
805 match->cost = match_costs[TRE_M_COST];
806 match->num_ins = match_costs[TRE_M_NUM_INS];
807 match->num_del = match_costs[TRE_M_NUM_DEL];
808 match->num_subst = match_costs[TRE_M_NUM_SUBST];
809 *match_end_ofs = match_eo;
810
811 return match_eo >= 0 ? REG_OK : REG_NOMATCH;
812 }
813