tre-compile.c revision 1.1.1.2 1 /*
2 tre-compile.c - TRE regex compiler
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 /*
10 TODO:
11 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12 function calls.
13 */
14
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif /* HAVE_CONFIG_H */
19 #include <stdio.h>
20 #include <assert.h>
21 #include <string.h>
22
23 #include "tre-internal.h"
24 #include "tre-mem.h"
25 #include "tre-stack.h"
26 #include "tre-ast.h"
27 #include "tre-parse.h"
28 #include "tre-compile.h"
29 #include "tre.h"
30 #include "xmalloc.h"
31
32 /*
33 Algorithms to setup tags so that submatch addressing can be done.
34 */
35
36
37 /* Inserts a catenation node to the root of the tree given in `node'.
38 As the left child a new tag with number `tag_id' to `node' is added,
39 and the right child is the old root. */
40 static reg_errcode_t
41 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
42 {
43 tre_catenation_t *c;
44
45 DPRINT(("add_tag_left: tag %d\n", tag_id));
46
47 c = tre_mem_alloc(mem, sizeof(*c));
48 if (c == NULL)
49 return REG_ESPACE;
50 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
51 if (c->left == NULL)
52 return REG_ESPACE;
53 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
54 if (c->right == NULL)
55 return REG_ESPACE;
56
57 c->right->obj = node->obj;
58 c->right->type = node->type;
59 c->right->nullable = -1;
60 c->right->submatch_id = -1;
61 c->right->firstpos = NULL;
62 c->right->lastpos = NULL;
63 c->right->num_tags = 0;
64 node->obj = c;
65 node->type = CATENATION;
66 return REG_OK;
67 }
68
69 /* Inserts a catenation node to the root of the tree given in `node'.
70 As the right child a new tag with number `tag_id' to `node' is added,
71 and the left child is the old root. */
72 static reg_errcode_t
73 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
74 {
75 tre_catenation_t *c;
76
77 DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
78
79 c = tre_mem_alloc(mem, sizeof(*c));
80 if (c == NULL)
81 return REG_ESPACE;
82 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
83 if (c->right == NULL)
84 return REG_ESPACE;
85 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
86 if (c->left == NULL)
87 return REG_ESPACE;
88
89 c->left->obj = node->obj;
90 c->left->type = node->type;
91 c->left->nullable = -1;
92 c->left->submatch_id = -1;
93 c->left->firstpos = NULL;
94 c->left->lastpos = NULL;
95 c->left->num_tags = 0;
96 node->obj = c;
97 node->type = CATENATION;
98 return REG_OK;
99 }
100
101 typedef enum {
102 ADDTAGS_RECURSE,
103 ADDTAGS_AFTER_ITERATION,
104 ADDTAGS_AFTER_UNION_LEFT,
105 ADDTAGS_AFTER_UNION_RIGHT,
106 ADDTAGS_AFTER_CAT_LEFT,
107 ADDTAGS_AFTER_CAT_RIGHT,
108 ADDTAGS_SET_SUBMATCH_END
109 } tre_addtags_symbol_t;
110
111
112 typedef struct {
113 int tag;
114 int next_tag;
115 } tre_tag_states_t;
116
117
118 /* Go through `regset' and set submatch data for submatches that are
119 using this tag. */
120 static void
121 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
122 {
123 int i;
124
125 for (i = 0; regset[i] >= 0; i++)
126 {
127 int id = regset[i] / 2;
128 int start = !(regset[i] % 2);
129 DPRINT((" Using tag %d for %s offset of "
130 "submatch %d\n", tag,
131 start ? "start" : "end", id));
132 if (start)
133 tnfa->submatch_data[id].so_tag = tag;
134 else
135 tnfa->submatch_data[id].eo_tag = tag;
136 }
137 regset[0] = -1;
138 }
139
140
141 /* Adds tags to appropriate locations in the parse tree in `tree', so that
142 subexpressions marked for submatch addressing can be traced. */
143 static reg_errcode_t
144 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
145 tre_tnfa_t *tnfa)
146 {
147 reg_errcode_t status = REG_OK;
148 tre_addtags_symbol_t symbol;
149 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
150 int bottom = tre_stack_num_objects(stack);
151 /* True for first pass (counting number of needed tags) */
152 int first_pass = (mem == NULL || tnfa == NULL);
153 int *regset, *orig_regset;
154 int num_tags = 0; /* Total number of tags. */
155 int num_minimals = 0; /* Number of special minimal tags. */
156 int tag = 0; /* The tag that is to be added next. */
157 int next_tag = 1; /* Next tag to use after this one. */
158 int *parents; /* Stack of submatches the current submatch is
159 contained in. */
160 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
161 tre_tag_states_t *saved_states;
162
163 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
164 if (!first_pass)
165 {
166 tnfa->end_tag = 0;
167 tnfa->minimal_tags[0] = -1;
168 }
169
170 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
171 if (regset == NULL)
172 return REG_ESPACE;
173 regset[0] = -1;
174 orig_regset = regset;
175
176 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
177 if (parents == NULL)
178 {
179 xfree(regset);
180 return REG_ESPACE;
181 }
182 parents[0] = -1;
183
184 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
185 if (saved_states == NULL)
186 {
187 xfree(regset);
188 xfree(parents);
189 return REG_ESPACE;
190 }
191 else
192 {
193 unsigned int i;
194 for (i = 0; i <= tnfa->num_submatches; i++)
195 saved_states[i].tag = -1;
196 }
197
198 STACK_PUSH(stack, voidptr, node);
199 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
200
201 while (tre_stack_num_objects(stack) > bottom)
202 {
203 if (status != REG_OK)
204 break;
205
206 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
207 switch (symbol)
208 {
209
210 case ADDTAGS_SET_SUBMATCH_END:
211 {
212 int id = tre_stack_pop_int(stack);
213 int i;
214
215 /* Add end of this submatch to regset. */
216 for (i = 0; regset[i] >= 0; i++);
217 regset[i] = id * 2 + 1;
218 regset[i + 1] = -1;
219
220 /* Pop this submatch from the parents stack. */
221 for (i = 0; parents[i] >= 0; i++);
222 parents[i - 1] = -1;
223 break;
224 }
225
226 case ADDTAGS_RECURSE:
227 node = tre_stack_pop_voidptr(stack);
228
229 if (node->submatch_id >= 0)
230 {
231 int id = node->submatch_id;
232 int i;
233
234
235 /* Add start of this submatch to regset. */
236 for (i = 0; regset[i] >= 0; i++);
237 regset[i] = id * 2;
238 regset[i + 1] = -1;
239
240 if (!first_pass)
241 {
242 for (i = 0; parents[i] >= 0; i++);
243 tnfa->submatch_data[id].parents = NULL;
244 if (i > 0)
245 {
246 int *p = xmalloc(sizeof(*p) * (i + 1));
247 if (p == NULL)
248 {
249 status = REG_ESPACE;
250 break;
251 }
252 assert(tnfa->submatch_data[id].parents == NULL);
253 tnfa->submatch_data[id].parents = p;
254 for (i = 0; parents[i] >= 0; i++)
255 p[i] = parents[i];
256 p[i] = -1;
257 }
258 }
259
260 /* Add end of this submatch to regset after processing this
261 node. */
262 STACK_PUSHX(stack, int, node->submatch_id);
263 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
264 }
265
266 switch (node->type)
267 {
268 case LITERAL:
269 {
270 tre_literal_t *lit = node->obj;
271
272 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
273 {
274 int i;
275 DPRINT(("Literal %d-%d\n",
276 (int)lit->code_min, (int)lit->code_max));
277 if (regset[0] >= 0)
278 {
279 /* Regset is not empty, so add a tag before the
280 literal or backref. */
281 if (!first_pass)
282 {
283 status = tre_add_tag_left(mem, node, tag);
284 tnfa->tag_directions[tag] = direction;
285 if (minimal_tag >= 0)
286 {
287 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
288 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
289 tnfa->minimal_tags[i] = tag;
290 tnfa->minimal_tags[i + 1] = minimal_tag;
291 tnfa->minimal_tags[i + 2] = -1;
292 minimal_tag = -1;
293 num_minimals++;
294 }
295 tre_purge_regset(regset, tnfa, tag);
296 }
297 else
298 {
299 DPRINT((" num_tags = 1\n"));
300 node->num_tags = 1;
301 }
302
303 DPRINT((" num_tags++\n"));
304 regset[0] = -1;
305 tag = next_tag;
306 num_tags++;
307 next_tag++;
308 }
309 }
310 else
311 {
312 assert(!IS_TAG(lit));
313 }
314 break;
315 }
316 case CATENATION:
317 {
318 tre_catenation_t *cat = node->obj;
319 tre_ast_node_t *left = cat->left;
320 tre_ast_node_t *right = cat->right;
321 int reserved_tag = -1;
322 DPRINT(("Catenation, next_tag = %d\n", next_tag));
323
324
325 /* After processing right child. */
326 STACK_PUSHX(stack, voidptr, node);
327 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
328
329 /* Process right child. */
330 STACK_PUSHX(stack, voidptr, right);
331 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
332
333 /* After processing left child. */
334 STACK_PUSHX(stack, int, next_tag + left->num_tags);
335 DPRINT((" Pushing %d for after left\n",
336 next_tag + left->num_tags));
337 if (left->num_tags > 0 && right->num_tags > 0)
338 {
339 /* Reserve the next tag to the right child. */
340 DPRINT((" Reserving next_tag %d to right child\n",
341 next_tag));
342 reserved_tag = next_tag;
343 next_tag++;
344 }
345 STACK_PUSHX(stack, int, reserved_tag);
346 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
347
348 /* Process left child. */
349 STACK_PUSHX(stack, voidptr, left);
350 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
351
352 }
353 break;
354 case ITERATION:
355 {
356 tre_iteration_t *iter = node->obj;
357 DPRINT(("Iteration\n"));
358
359 if (first_pass)
360 {
361 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
362 }
363 else
364 {
365 STACK_PUSHX(stack, int, tag);
366 STACK_PUSHX(stack, int, iter->minimal);
367 }
368 STACK_PUSHX(stack, voidptr, node);
369 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
370
371 STACK_PUSHX(stack, voidptr, iter->arg);
372 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
373
374 /* Regset is not empty, so add a tag here. */
375 if (regset[0] >= 0 || iter->minimal)
376 {
377 if (!first_pass)
378 {
379 int i;
380 status = tre_add_tag_left(mem, node, tag);
381 if (iter->minimal)
382 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
383 else
384 tnfa->tag_directions[tag] = direction;
385 if (minimal_tag >= 0)
386 {
387 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
388 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
389 tnfa->minimal_tags[i] = tag;
390 tnfa->minimal_tags[i + 1] = minimal_tag;
391 tnfa->minimal_tags[i + 2] = -1;
392 minimal_tag = -1;
393 num_minimals++;
394 }
395 tre_purge_regset(regset, tnfa, tag);
396 }
397
398 DPRINT((" num_tags++\n"));
399 regset[0] = -1;
400 tag = next_tag;
401 num_tags++;
402 next_tag++;
403 }
404 direction = TRE_TAG_MINIMIZE;
405 }
406 break;
407 case UNION:
408 {
409 tre_union_t *uni = node->obj;
410 tre_ast_node_t *left = uni->left;
411 tre_ast_node_t *right = uni->right;
412 int left_tag;
413 int right_tag;
414
415 if (regset[0] >= 0)
416 {
417 left_tag = next_tag;
418 right_tag = next_tag + 1;
419 }
420 else
421 {
422 left_tag = tag;
423 right_tag = next_tag;
424 }
425
426 DPRINT(("Union\n"));
427
428 /* After processing right child. */
429 STACK_PUSHX(stack, int, right_tag);
430 STACK_PUSHX(stack, int, left_tag);
431 STACK_PUSHX(stack, voidptr, regset);
432 STACK_PUSHX(stack, int, regset[0] >= 0);
433 STACK_PUSHX(stack, voidptr, node);
434 STACK_PUSHX(stack, voidptr, right);
435 STACK_PUSHX(stack, voidptr, left);
436 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
437
438 /* Process right child. */
439 STACK_PUSHX(stack, voidptr, right);
440 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
441
442 /* After processing left child. */
443 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
444
445 /* Process left child. */
446 STACK_PUSHX(stack, voidptr, left);
447 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
448
449 /* Regset is not empty, so add a tag here. */
450 if (regset[0] >= 0)
451 {
452 if (!first_pass)
453 {
454 int i;
455 status = tre_add_tag_left(mem, node, tag);
456 tnfa->tag_directions[tag] = direction;
457 if (minimal_tag >= 0)
458 {
459 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
460 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
461 tnfa->minimal_tags[i] = tag;
462 tnfa->minimal_tags[i + 1] = minimal_tag;
463 tnfa->minimal_tags[i + 2] = -1;
464 minimal_tag = -1;
465 num_minimals++;
466 }
467 tre_purge_regset(regset, tnfa, tag);
468 }
469
470 DPRINT((" num_tags++\n"));
471 regset[0] = -1;
472 tag = next_tag;
473 num_tags++;
474 next_tag++;
475 }
476
477 if (node->num_submatches > 0)
478 {
479 /* The next two tags are reserved for markers. */
480 next_tag++;
481 tag = next_tag;
482 next_tag++;
483 }
484
485 break;
486 }
487 }
488
489 if (node->submatch_id >= 0)
490 {
491 int i;
492 /* Push this submatch on the parents stack. */
493 for (i = 0; parents[i] >= 0; i++);
494 parents[i] = node->submatch_id;
495 parents[i + 1] = -1;
496 }
497
498 break; /* end case: ADDTAGS_RECURSE */
499
500 case ADDTAGS_AFTER_ITERATION:
501 {
502 int minimal = 0;
503 int enter_tag;
504 node = tre_stack_pop_voidptr(stack);
505 if (first_pass)
506 {
507 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
508 + tre_stack_pop_int(stack);
509 minimal_tag = -1;
510 }
511 else
512 {
513 minimal = tre_stack_pop_int(stack);
514 enter_tag = tre_stack_pop_int(stack);
515 if (minimal)
516 minimal_tag = enter_tag;
517 }
518
519 DPRINT(("After iteration\n"));
520 if (!first_pass)
521 {
522 DPRINT((" Setting direction to %s\n",
523 minimal ? "minimize" : "maximize"));
524 if (minimal)
525 direction = TRE_TAG_MINIMIZE;
526 else
527 direction = TRE_TAG_MAXIMIZE;
528 }
529 break;
530 }
531
532 case ADDTAGS_AFTER_CAT_LEFT:
533 {
534 int new_tag = tre_stack_pop_int(stack);
535 next_tag = tre_stack_pop_int(stack);
536 DPRINT(("After cat left, tag = %d, next_tag = %d\n",
537 tag, next_tag));
538 if (new_tag >= 0)
539 {
540 DPRINT((" Setting tag to %d\n", new_tag));
541 tag = new_tag;
542 }
543 break;
544 }
545
546 case ADDTAGS_AFTER_CAT_RIGHT:
547 DPRINT(("After cat right\n"));
548 node = tre_stack_pop_voidptr(stack);
549 if (first_pass)
550 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
551 + ((tre_catenation_t *)node->obj)->right->num_tags;
552 break;
553
554 case ADDTAGS_AFTER_UNION_LEFT:
555 DPRINT(("After union left\n"));
556 /* Lift the bottom of the `regset' array so that when processing
557 the right operand the items currently in the array are
558 invisible. The original bottom was saved at ADDTAGS_UNION and
559 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
560 while (*regset >= 0)
561 regset++;
562 break;
563
564 case ADDTAGS_AFTER_UNION_RIGHT:
565 {
566 int added_tags, tag_left, tag_right;
567 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
568 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
569 DPRINT(("After union right\n"));
570 node = tre_stack_pop_voidptr(stack);
571 added_tags = tre_stack_pop_int(stack);
572 if (first_pass)
573 {
574 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
575 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
576 + ((node->num_submatches > 0) ? 2 : 0);
577 }
578 regset = tre_stack_pop_voidptr(stack);
579 tag_left = tre_stack_pop_int(stack);
580 tag_right = tre_stack_pop_int(stack);
581
582 /* Add tags after both children, the left child gets a smaller
583 tag than the right child. This guarantees that we prefer
584 the left child over the right child. */
585 /* XXX - This is not always necessary (if the children have
586 tags which must be seen for every match of that child). */
587 /* XXX - Check if this is the only place where tre_add_tag_right
588 is used. If so, use tre_add_tag_left (putting the tag before
589 the child as opposed after the child) and throw away
590 tre_add_tag_right. */
591 if (node->num_submatches > 0)
592 {
593 if (!first_pass)
594 {
595 status = tre_add_tag_right(mem, left, tag_left);
596 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
597 status = tre_add_tag_right(mem, right, tag_right);
598 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
599 }
600 DPRINT((" num_tags += 2\n"));
601 num_tags += 2;
602 }
603 direction = TRE_TAG_MAXIMIZE;
604 break;
605 }
606
607 default:
608 assert(0);
609 break;
610
611 } /* end switch(symbol) */
612 } /* end while(tre_stack_num_objects(stack) > bottom) */
613
614 if (!first_pass)
615 tre_purge_regset(regset, tnfa, tag);
616
617 if (!first_pass && minimal_tag >= 0)
618 {
619 int i;
620 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
621 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
622 tnfa->minimal_tags[i] = tag;
623 tnfa->minimal_tags[i + 1] = minimal_tag;
624 tnfa->minimal_tags[i + 2] = -1;
625 minimal_tag = -1;
626 num_minimals++;
627 }
628
629 DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
630 first_pass? "First pass" : "Second pass", num_tags));
631
632 assert(tree->num_tags == num_tags);
633 tnfa->end_tag = num_tags;
634 tnfa->num_tags = num_tags;
635 tnfa->num_minimals = num_minimals;
636 xfree(orig_regset);
637 xfree(parents);
638 xfree(saved_states);
639 return status;
640 }
641
642
643
644 /*
645 AST to TNFA compilation routines.
646 */
647
648 typedef enum {
649 COPY_RECURSE,
650 COPY_SET_RESULT_PTR
651 } tre_copyast_symbol_t;
652
653 /* Flags for tre_copy_ast(). */
654 #define COPY_REMOVE_TAGS 1
655 #define COPY_MAXIMIZE_FIRST_TAG 2
656
657 static reg_errcode_t
658 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
659 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
660 tre_ast_node_t **copy, int *max_pos)
661 {
662 reg_errcode_t status = REG_OK;
663 int bottom = tre_stack_num_objects(stack);
664 int num_copied = 0;
665 int first_tag = 1;
666 tre_ast_node_t **result = copy;
667 tre_copyast_symbol_t symbol;
668
669 STACK_PUSH(stack, voidptr, ast);
670 STACK_PUSH(stack, int, COPY_RECURSE);
671
672 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
673 {
674 tre_ast_node_t *node;
675 if (status != REG_OK)
676 break;
677
678 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
679 switch (symbol)
680 {
681 case COPY_SET_RESULT_PTR:
682 result = tre_stack_pop_voidptr(stack);
683 break;
684 case COPY_RECURSE:
685 node = tre_stack_pop_voidptr(stack);
686 switch (node->type)
687 {
688 case LITERAL:
689 {
690 tre_literal_t *lit = node->obj;
691 int pos = lit->position;
692 int min = lit->code_min;
693 int max = lit->code_max;
694 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
695 {
696 /* XXX - e.g. [ab] has only one position but two
697 nodes, so we are creating holes in the state space
698 here. Not fatal, just wastes memory. */
699 pos += *pos_add;
700 num_copied++;
701 }
702 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
703 {
704 /* Change this tag to empty. */
705 min = EMPTY;
706 max = pos = -1;
707 }
708 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
709 && first_tag)
710 {
711 /* Maximize the first tag. */
712 tag_directions[max] = TRE_TAG_MAXIMIZE;
713 first_tag = 0;
714 }
715 *result = tre_ast_new_literal(mem, min, max, pos);
716 if (*result == NULL)
717 status = REG_ESPACE;
718
719 if (pos > *max_pos)
720 *max_pos = pos;
721 break;
722 }
723 case UNION:
724 {
725 tre_union_t *uni = node->obj;
726 tre_union_t *tmp;
727 *result = tre_ast_new_union(mem, uni->left, uni->right);
728 if (*result == NULL)
729 {
730 status = REG_ESPACE;
731 break;
732 }
733 tmp = (*result)->obj;
734 result = &tmp->left;
735 STACK_PUSHX(stack, voidptr, uni->right);
736 STACK_PUSHX(stack, int, COPY_RECURSE);
737 STACK_PUSHX(stack, voidptr, &tmp->right);
738 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
739 STACK_PUSHX(stack, voidptr, uni->left);
740 STACK_PUSHX(stack, int, COPY_RECURSE);
741 break;
742 }
743 case CATENATION:
744 {
745 tre_catenation_t *cat = node->obj;
746 tre_catenation_t *tmp;
747 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
748 if (*result == NULL)
749 {
750 status = REG_ESPACE;
751 break;
752 }
753 tmp = (*result)->obj;
754 tmp->left = NULL;
755 tmp->right = NULL;
756 result = &tmp->left;
757
758 STACK_PUSHX(stack, voidptr, cat->right);
759 STACK_PUSHX(stack, int, COPY_RECURSE);
760 STACK_PUSHX(stack, voidptr, &tmp->right);
761 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
762 STACK_PUSHX(stack, voidptr, cat->left);
763 STACK_PUSHX(stack, int, COPY_RECURSE);
764 break;
765 }
766 case ITERATION:
767 {
768 tre_iteration_t *iter = node->obj;
769 STACK_PUSHX(stack, voidptr, iter->arg);
770 STACK_PUSHX(stack, int, COPY_RECURSE);
771 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
772 iter->max, iter->minimal);
773 if (*result == NULL)
774 {
775 status = REG_ESPACE;
776 break;
777 }
778 iter = (*result)->obj;
779 result = &iter->arg;
780 break;
781 }
782 default:
783 assert(0);
784 break;
785 }
786 break;
787 }
788 }
789 *pos_add += num_copied;
790 return status;
791 }
792
793 typedef enum {
794 EXPAND_RECURSE,
795 EXPAND_AFTER_ITER
796 } tre_expand_ast_symbol_t;
797
798 /* Expands each iteration node that has a finite nonzero minimum or maximum
799 iteration count to a catenated sequence of copies of the node. */
800 static reg_errcode_t
801 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
802 int *position, tre_tag_direction_t *tag_directions,
803 int *max_depth)
804 {
805 reg_errcode_t status = REG_OK;
806 int bottom = tre_stack_num_objects(stack);
807 int pos_add = 0;
808 int pos_add_total = 0;
809 int max_pos = 0;
810 /* Current approximate matching parameters. */
811 int params[TRE_PARAM_LAST];
812 /* Approximate parameter nesting level. */
813 int params_depth = 0;
814 int iter_depth = 0;
815 int i;
816
817 for (i = 0; i < TRE_PARAM_LAST; i++)
818 params[i] = TRE_PARAM_DEFAULT;
819
820 STACK_PUSHR(stack, voidptr, ast);
821 STACK_PUSHR(stack, int, EXPAND_RECURSE);
822 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
823 {
824 tre_ast_node_t *node;
825 tre_expand_ast_symbol_t symbol;
826
827 if (status != REG_OK)
828 break;
829
830 DPRINT(("pos_add %d\n", pos_add));
831
832 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
833 node = tre_stack_pop_voidptr(stack);
834 switch (symbol)
835 {
836 case EXPAND_RECURSE:
837 switch (node->type)
838 {
839 case LITERAL:
840 {
841 tre_literal_t *lit= node->obj;
842 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
843 {
844 lit->position += pos_add;
845 if (lit->position > max_pos)
846 max_pos = lit->position;
847 }
848 break;
849 }
850 case UNION:
851 {
852 tre_union_t *uni = node->obj;
853 STACK_PUSHX(stack, voidptr, uni->right);
854 STACK_PUSHX(stack, int, EXPAND_RECURSE);
855 STACK_PUSHX(stack, voidptr, uni->left);
856 STACK_PUSHX(stack, int, EXPAND_RECURSE);
857 break;
858 }
859 case CATENATION:
860 {
861 tre_catenation_t *cat = node->obj;
862 STACK_PUSHX(stack, voidptr, cat->right);
863 STACK_PUSHX(stack, int, EXPAND_RECURSE);
864 STACK_PUSHX(stack, voidptr, cat->left);
865 STACK_PUSHX(stack, int, EXPAND_RECURSE);
866 break;
867 }
868 case ITERATION:
869 {
870 tre_iteration_t *iter = node->obj;
871 STACK_PUSHX(stack, int, pos_add);
872 STACK_PUSHX(stack, voidptr, node);
873 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
874 STACK_PUSHX(stack, voidptr, iter->arg);
875 STACK_PUSHX(stack, int, EXPAND_RECURSE);
876 /* If we are going to expand this node at EXPAND_AFTER_ITER
877 then don't increase the `pos' fields of the nodes now, it
878 will get done when expanding. */
879 if (iter->min > 1 || iter->max > 1)
880 pos_add = 0;
881 iter_depth++;
882 DPRINT(("iter\n"));
883 break;
884 }
885 default:
886 assert(0);
887 break;
888 }
889 break;
890 case EXPAND_AFTER_ITER:
891 {
892 tre_iteration_t *iter = node->obj;
893 int pos_add_last;
894 pos_add = tre_stack_pop_int(stack);
895 pos_add_last = pos_add;
896 if (iter->min > 1 || iter->max > 1)
897 {
898 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
899 int j;
900 int pos_add_save = pos_add;
901
902 /* Create a catenated sequence of copies of the node. */
903 for (j = 0; j < iter->min; j++)
904 {
905 tre_ast_node_t *copy;
906 /* Remove tags from all but the last copy. */
907 int flags = ((j + 1 < iter->min)
908 ? COPY_REMOVE_TAGS
909 : COPY_MAXIMIZE_FIRST_TAG);
910 DPRINT((" pos_add %d\n", pos_add));
911 pos_add_save = pos_add;
912 status = tre_copy_ast(mem, stack, iter->arg, flags,
913 &pos_add, tag_directions, ©,
914 &max_pos);
915 if (status != REG_OK)
916 return status;
917 if (seq1 != NULL)
918 seq1 = tre_ast_new_catenation(mem, seq1, copy);
919 else
920 seq1 = copy;
921 if (seq1 == NULL)
922 return REG_ESPACE;
923 }
924
925 if (iter->max == -1)
926 {
927 /* No upper limit. */
928 pos_add_save = pos_add;
929 status = tre_copy_ast(mem, stack, iter->arg, 0,
930 &pos_add, NULL, &seq2, &max_pos);
931 if (status != REG_OK)
932 return status;
933 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
934 if (seq2 == NULL)
935 return REG_ESPACE;
936 }
937 else
938 {
939 for (j = iter->min; j < iter->max; j++)
940 {
941 tre_ast_node_t *tmp, *copy;
942 pos_add_save = pos_add;
943 status = tre_copy_ast(mem, stack, iter->arg, 0,
944 &pos_add, NULL, ©, &max_pos);
945 if (status != REG_OK)
946 return status;
947 if (seq2 != NULL)
948 seq2 = tre_ast_new_catenation(mem, copy, seq2);
949 else
950 seq2 = copy;
951 if (seq2 == NULL)
952 return REG_ESPACE;
953 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
954 if (tmp == NULL)
955 return REG_ESPACE;
956 seq2 = tre_ast_new_union(mem, tmp, seq2);
957 if (seq2 == NULL)
958 return REG_ESPACE;
959 }
960 }
961
962 pos_add = pos_add_save;
963 if (seq1 == NULL)
964 seq1 = seq2;
965 else if (seq2 != NULL)
966 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
967 if (seq1 == NULL)
968 return REG_ESPACE;
969 node->obj = seq1->obj;
970 node->type = seq1->type;
971 }
972
973 iter_depth--;
974 pos_add_total += pos_add - pos_add_last;
975 if (iter_depth == 0)
976 pos_add = pos_add_total;
977
978 /* If approximate parameters are specified, surround the result
979 with two parameter setting nodes. The one on the left sets
980 the specified parameters, and the one on the right restores
981 the old parameters. */
982 if (iter->params)
983 {
984 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
985 int *old_params;
986
987 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
988 if (!tmp_l)
989 return REG_ESPACE;
990 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
991 iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
992 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
993 if (!tmp_r)
994 return REG_ESPACE;
995 old_params = tre_mem_alloc(mem, sizeof(*old_params)
996 * TRE_PARAM_LAST);
997 if (!old_params)
998 return REG_ESPACE;
999 for (i = 0; i < TRE_PARAM_LAST; i++)
1000 old_params[i] = params[i];
1001 ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
1002 old_params[TRE_PARAM_DEPTH] = params_depth;
1003 /* XXX - this is the only place where ast_new_node is
1004 needed -- should be moved inside AST module. */
1005 node_copy = tre_ast_new_node(mem, ITERATION,
1006 sizeof(tre_iteration_t));
1007 if (!node_copy)
1008 return REG_ESPACE;
1009 node_copy->obj = node->obj;
1010 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
1011 if (!tmp_node)
1012 return REG_ESPACE;
1013 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
1014 if (!tmp_node)
1015 return REG_ESPACE;
1016 /* Replace the contents of `node' with `tmp_node'. */
1017 memcpy(node, tmp_node, sizeof(*node));
1018 node->obj = tmp_node->obj;
1019 node->type = tmp_node->type;
1020 params_depth++;
1021 if (params_depth > *max_depth)
1022 *max_depth = params_depth;
1023 }
1024 break;
1025 }
1026 default:
1027 assert(0);
1028 break;
1029 }
1030 }
1031
1032 *position += pos_add_total;
1033
1034 /* `max_pos' should never be larger than `*position' if the above
1035 code works, but just an extra safeguard let's make sure
1036 `*position' is set large enough so enough memory will be
1037 allocated for the transition table. */
1038 if (max_pos > *position)
1039 *position = max_pos;
1040
1041 #ifdef TRE_DEBUG
1042 DPRINT(("Expanded AST:\n"));
1043 tre_ast_print(ast);
1044 DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
1045 #endif
1046
1047 return status;
1048 }
1049
1050 static tre_pos_and_tags_t *
1051 tre_set_empty(tre_mem_t mem)
1052 {
1053 tre_pos_and_tags_t *new_set;
1054
1055 new_set = tre_mem_calloc(mem, sizeof(*new_set));
1056 if (new_set == NULL)
1057 return NULL;
1058
1059 new_set[0].position = -1;
1060 new_set[0].code_min = -1;
1061 new_set[0].code_max = -1;
1062
1063 return new_set;
1064 }
1065
1066 static tre_pos_and_tags_t *
1067 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
1068 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
1069 {
1070 tre_pos_and_tags_t *new_set;
1071
1072 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
1073 if (new_set == NULL)
1074 return NULL;
1075
1076 new_set[0].position = position;
1077 new_set[0].code_min = code_min;
1078 new_set[0].code_max = code_max;
1079 new_set[0].class = class;
1080 new_set[0].neg_classes = neg_classes;
1081 new_set[0].backref = backref;
1082 new_set[1].position = -1;
1083 new_set[1].code_min = -1;
1084 new_set[1].code_max = -1;
1085
1086 return new_set;
1087 }
1088
1089 static tre_pos_and_tags_t *
1090 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
1091 int *tags, int assertions, int *params)
1092 {
1093 int s1, s2, i, j;
1094 tre_pos_and_tags_t *new_set;
1095 int *new_tags;
1096 int num_tags;
1097
1098 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
1099 for (s1 = 0; set1[s1].position >= 0; s1++);
1100 for (s2 = 0; set2[s2].position >= 0; s2++);
1101 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
1102 if (!new_set )
1103 return NULL;
1104
1105 for (s1 = 0; set1[s1].position >= 0; s1++)
1106 {
1107 new_set[s1].position = set1[s1].position;
1108 new_set[s1].code_min = set1[s1].code_min;
1109 new_set[s1].code_max = set1[s1].code_max;
1110 new_set[s1].assertions = set1[s1].assertions | assertions;
1111 new_set[s1].class = set1[s1].class;
1112 new_set[s1].neg_classes = set1[s1].neg_classes;
1113 new_set[s1].backref = set1[s1].backref;
1114 if (set1[s1].tags == NULL && tags == NULL)
1115 new_set[s1].tags = NULL;
1116 else
1117 {
1118 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
1119 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
1120 * (i + num_tags + 1)));
1121 if (new_tags == NULL)
1122 return NULL;
1123 for (j = 0; j < i; j++)
1124 new_tags[j] = set1[s1].tags[j];
1125 for (i = 0; i < num_tags; i++)
1126 new_tags[j + i] = tags[i];
1127 new_tags[j + i] = -1;
1128 new_set[s1].tags = new_tags;
1129 }
1130 if (set1[s1].params)
1131 new_set[s1].params = set1[s1].params;
1132 if (params)
1133 {
1134 if (!new_set[s1].params)
1135 new_set[s1].params = params;
1136 else
1137 {
1138 new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
1139 TRE_PARAM_LAST);
1140 if (!new_set[s1].params)
1141 return NULL;
1142 for (i = 0; i < TRE_PARAM_LAST; i++)
1143 if (params[i] != TRE_PARAM_UNSET)
1144 new_set[s1].params[i] = params[i];
1145 }
1146 }
1147 }
1148
1149 for (s2 = 0; set2[s2].position >= 0; s2++)
1150 {
1151 new_set[s1 + s2].position = set2[s2].position;
1152 new_set[s1 + s2].code_min = set2[s2].code_min;
1153 new_set[s1 + s2].code_max = set2[s2].code_max;
1154 /* XXX - why not | assertions here as well? */
1155 new_set[s1 + s2].assertions = set2[s2].assertions;
1156 new_set[s1 + s2].class = set2[s2].class;
1157 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
1158 new_set[s1 + s2].backref = set2[s2].backref;
1159 if (set2[s2].tags == NULL)
1160 new_set[s1 + s2].tags = NULL;
1161 else
1162 {
1163 for (i = 0; set2[s2].tags[i] >= 0; i++);
1164 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
1165 if (new_tags == NULL)
1166 return NULL;
1167 for (j = 0; j < i; j++)
1168 new_tags[j] = set2[s2].tags[j];
1169 new_tags[j] = -1;
1170 new_set[s1 + s2].tags = new_tags;
1171 }
1172 if (set2[s2].params)
1173 new_set[s1 + s2].params = set2[s2].params;
1174 if (params)
1175 {
1176 if (!new_set[s1 + s2].params)
1177 new_set[s1 + s2].params = params;
1178 else
1179 {
1180 new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
1181 TRE_PARAM_LAST);
1182 if (!new_set[s1 + s2].params)
1183 return NULL;
1184 for (i = 0; i < TRE_PARAM_LAST; i++)
1185 if (params[i] != TRE_PARAM_UNSET)
1186 new_set[s1 + s2].params[i] = params[i];
1187 }
1188 }
1189 }
1190 new_set[s1 + s2].position = -1;
1191 return new_set;
1192 }
1193
1194 /* Finds the empty path through `node' which is the one that should be
1195 taken according to POSIX.2 rules, and adds the tags on that path to
1196 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
1197 set to the number of tags seen on the path. */
1198 static reg_errcode_t
1199 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
1200 int *assertions, int *params, int *num_tags_seen,
1201 int *params_seen)
1202 {
1203 tre_literal_t *lit;
1204 tre_union_t *uni;
1205 tre_catenation_t *cat;
1206 tre_iteration_t *iter;
1207 int i;
1208 int bottom = tre_stack_num_objects(stack);
1209 reg_errcode_t status = REG_OK;
1210 if (num_tags_seen)
1211 *num_tags_seen = 0;
1212 if (params_seen)
1213 *params_seen = 0;
1214
1215 status = tre_stack_push_voidptr(stack, node);
1216
1217 /* Walk through the tree recursively. */
1218 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1219 {
1220 node = tre_stack_pop_voidptr(stack);
1221
1222 switch (node->type)
1223 {
1224 case LITERAL:
1225 lit = (tre_literal_t *)node->obj;
1226 switch (lit->code_min)
1227 {
1228 case TAG:
1229 if (lit->code_max >= 0)
1230 {
1231 if (tags != NULL)
1232 {
1233 /* Add the tag to `tags'. */
1234 for (i = 0; tags[i] >= 0; i++)
1235 if (tags[i] == lit->code_max)
1236 break;
1237 if (tags[i] < 0)
1238 {
1239 tags[i] = lit->code_max;
1240 tags[i + 1] = -1;
1241 }
1242 }
1243 if (num_tags_seen)
1244 (*num_tags_seen)++;
1245 }
1246 break;
1247 case ASSERTION:
1248 assert(lit->code_max >= 1
1249 || lit->code_max <= ASSERT_LAST);
1250 if (assertions != NULL)
1251 *assertions |= lit->code_max;
1252 break;
1253 case PARAMETER:
1254 if (params != NULL)
1255 for (i = 0; i < TRE_PARAM_LAST; i++)
1256 params[i] = lit->u.params[i];
1257 if (params_seen != NULL)
1258 *params_seen = 1;
1259 break;
1260 case EMPTY:
1261 break;
1262 default:
1263 assert(0);
1264 break;
1265 }
1266 break;
1267
1268 case UNION:
1269 /* Subexpressions starting earlier take priority over ones
1270 starting later, so we prefer the left subexpression over the
1271 right subexpression. */
1272 uni = (tre_union_t *)node->obj;
1273 if (uni->left->nullable)
1274 STACK_PUSHX(stack, voidptr, uni->left)
1275 else if (uni->right->nullable)
1276 STACK_PUSHX(stack, voidptr, uni->right)
1277 else
1278 assert(0);
1279 break;
1280
1281 case CATENATION:
1282 /* The path must go through both children. */
1283 cat = (tre_catenation_t *)node->obj;
1284 assert(cat->left->nullable);
1285 assert(cat->right->nullable);
1286 STACK_PUSHX(stack, voidptr, cat->left);
1287 STACK_PUSHX(stack, voidptr, cat->right);
1288 break;
1289
1290 case ITERATION:
1291 /* A match with an empty string is preferred over no match at
1292 all, so we go through the argument if possible. */
1293 iter = (tre_iteration_t *)node->obj;
1294 if (iter->arg->nullable)
1295 STACK_PUSHX(stack, voidptr, iter->arg);
1296 break;
1297
1298 default:
1299 assert(0);
1300 break;
1301 }
1302 }
1303
1304 return status;
1305 }
1306
1307
1308 typedef enum {
1309 NFL_RECURSE,
1310 NFL_POST_UNION,
1311 NFL_POST_CATENATION,
1312 NFL_POST_ITERATION
1313 } tre_nfl_stack_symbol_t;
1314
1315
1316 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
1317 the nodes of the AST `tree'. */
1318 static reg_errcode_t
1319 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
1320 {
1321 int bottom = tre_stack_num_objects(stack);
1322
1323 STACK_PUSHR(stack, voidptr, tree);
1324 STACK_PUSHR(stack, int, NFL_RECURSE);
1325
1326 while (tre_stack_num_objects(stack) > bottom)
1327 {
1328 tre_nfl_stack_symbol_t symbol;
1329 tre_ast_node_t *node;
1330
1331 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
1332 node = tre_stack_pop_voidptr(stack);
1333 switch (symbol)
1334 {
1335 case NFL_RECURSE:
1336 switch (node->type)
1337 {
1338 case LITERAL:
1339 {
1340 tre_literal_t *lit = (tre_literal_t *)node->obj;
1341 if (IS_BACKREF(lit))
1342 {
1343 /* Back references: nullable = false, firstpos = {i},
1344 lastpos = {i}. */
1345 node->nullable = 0;
1346 node->firstpos = tre_set_one(mem, lit->position, 0,
1347 TRE_CHAR_MAX, 0, NULL, -1);
1348 if (!node->firstpos)
1349 return REG_ESPACE;
1350 node->lastpos = tre_set_one(mem, lit->position, 0,
1351 TRE_CHAR_MAX, 0, NULL,
1352 (int)lit->code_max);
1353 if (!node->lastpos)
1354 return REG_ESPACE;
1355 }
1356 else if (lit->code_min < 0)
1357 {
1358 /* Tags, empty strings, params, and zero width assertions:
1359 nullable = true, firstpos = {}, and lastpos = {}. */
1360 node->nullable = 1;
1361 node->firstpos = tre_set_empty(mem);
1362 if (!node->firstpos)
1363 return REG_ESPACE;
1364 node->lastpos = tre_set_empty(mem);
1365 if (!node->lastpos)
1366 return REG_ESPACE;
1367 }
1368 else
1369 {
1370 /* Literal at position i: nullable = false, firstpos = {i},
1371 lastpos = {i}. */
1372 node->nullable = 0;
1373 node->firstpos =
1374 tre_set_one(mem, lit->position, (int)lit->code_min,
1375 (int)lit->code_max, 0, NULL, -1);
1376 if (!node->firstpos)
1377 return REG_ESPACE;
1378 node->lastpos = tre_set_one(mem, lit->position,
1379 (int)lit->code_min,
1380 (int)lit->code_max,
1381 lit->u.class, lit->neg_classes,
1382 -1);
1383 if (!node->lastpos)
1384 return REG_ESPACE;
1385 }
1386 break;
1387 }
1388
1389 case UNION:
1390 /* Compute the attributes for the two subtrees, and after that
1391 for this node. */
1392 STACK_PUSHR(stack, voidptr, node);
1393 STACK_PUSHR(stack, int, NFL_POST_UNION);
1394 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
1395 STACK_PUSHR(stack, int, NFL_RECURSE);
1396 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
1397 STACK_PUSHR(stack, int, NFL_RECURSE);
1398 break;
1399
1400 case CATENATION:
1401 /* Compute the attributes for the two subtrees, and after that
1402 for this node. */
1403 STACK_PUSHR(stack, voidptr, node);
1404 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
1405 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
1406 STACK_PUSHR(stack, int, NFL_RECURSE);
1407 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
1408 STACK_PUSHR(stack, int, NFL_RECURSE);
1409 break;
1410
1411 case ITERATION:
1412 /* Compute the attributes for the subtree, and after that for
1413 this node. */
1414 STACK_PUSHR(stack, voidptr, node);
1415 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
1416 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
1417 STACK_PUSHR(stack, int, NFL_RECURSE);
1418 break;
1419 }
1420 break; /* end case: NFL_RECURSE */
1421
1422 case NFL_POST_UNION:
1423 {
1424 tre_union_t *uni = (tre_union_t *)node->obj;
1425 node->nullable = uni->left->nullable || uni->right->nullable;
1426 node->firstpos = tre_set_union(mem, uni->left->firstpos,
1427 uni->right->firstpos, NULL, 0, NULL);
1428 if (!node->firstpos)
1429 return REG_ESPACE;
1430 node->lastpos = tre_set_union(mem, uni->left->lastpos,
1431 uni->right->lastpos, NULL, 0, NULL);
1432 if (!node->lastpos)
1433 return REG_ESPACE;
1434 break;
1435 }
1436
1437 case NFL_POST_ITERATION:
1438 {
1439 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1440
1441 if (iter->min == 0 || iter->arg->nullable)
1442 node->nullable = 1;
1443 else
1444 node->nullable = 0;
1445 node->firstpos = iter->arg->firstpos;
1446 node->lastpos = iter->arg->lastpos;
1447 break;
1448 }
1449
1450 case NFL_POST_CATENATION:
1451 {
1452 int num_tags, *tags, assertions, params_seen;
1453 int *params;
1454 reg_errcode_t status;
1455 tre_catenation_t *cat = node->obj;
1456 node->nullable = cat->left->nullable && cat->right->nullable;
1457
1458 /* Compute firstpos. */
1459 if (cat->left->nullable)
1460 {
1461 /* The left side matches the empty string. Make a first pass
1462 with tre_match_empty() to get the number of tags and
1463 parameters. */
1464 status = tre_match_empty(stack, cat->left,
1465 NULL, NULL, NULL, &num_tags,
1466 ¶ms_seen);
1467 if (status != REG_OK)
1468 return status;
1469 /* Allocate arrays for the tags and parameters. */
1470 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
1471 if (!tags)
1472 return REG_ESPACE;
1473 tags[0] = -1;
1474 assertions = 0;
1475 params = NULL;
1476 if (params_seen)
1477 {
1478 params = tre_mem_alloc(mem, sizeof(*params)
1479 * TRE_PARAM_LAST);
1480 if (!params)
1481 {
1482 xfree(tags);
1483 return REG_ESPACE;
1484 }
1485 }
1486 /* Second pass with tre_mach_empty() to get the list of
1487 tags and parameters. */
1488 status = tre_match_empty(stack, cat->left, tags,
1489 &assertions, params, NULL, NULL);
1490 if (status != REG_OK)
1491 {
1492 xfree(tags);
1493 return status;
1494 }
1495 node->firstpos =
1496 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
1497 tags, assertions, params);
1498 xfree(tags);
1499 if (!node->firstpos)
1500 return REG_ESPACE;
1501 }
1502 else
1503 {
1504 node->firstpos = cat->left->firstpos;
1505 }
1506
1507 /* Compute lastpos. */
1508 if (cat->right->nullable)
1509 {
1510 /* The right side matches the empty string. Make a first pass
1511 with tre_match_empty() to get the number of tags and
1512 parameters. */
1513 status = tre_match_empty(stack, cat->right,
1514 NULL, NULL, NULL, &num_tags,
1515 ¶ms_seen);
1516 if (status != REG_OK)
1517 return status;
1518 /* Allocate arrays for the tags and parameters. */
1519 tags = xmalloc(sizeof(int) * (num_tags + 1));
1520 if (!tags)
1521 return REG_ESPACE;
1522 tags[0] = -1;
1523 assertions = 0;
1524 params = NULL;
1525 if (params_seen)
1526 {
1527 params = tre_mem_alloc(mem, sizeof(*params)
1528 * TRE_PARAM_LAST);
1529 if (!params)
1530 {
1531 xfree(tags);
1532 return REG_ESPACE;
1533 }
1534 }
1535 /* Second pass with tre_mach_empty() to get the list of
1536 tags and parameters. */
1537 status = tre_match_empty(stack, cat->right, tags,
1538 &assertions, params, NULL, NULL);
1539 if (status != REG_OK)
1540 {
1541 xfree(tags);
1542 return status;
1543 }
1544 node->lastpos =
1545 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
1546 tags, assertions, params);
1547 xfree(tags);
1548 if (!node->lastpos)
1549 return REG_ESPACE;
1550 }
1551 else
1552 {
1553 node->lastpos = cat->right->lastpos;
1554 }
1555 break;
1556 }
1557
1558 default:
1559 assert(0);
1560 break;
1561 }
1562 }
1563
1564 return REG_OK;
1565 }
1566
1567
1568 /* Adds a transition from each position in `p1' to each position in `p2'. */
1569 static reg_errcode_t
1570 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
1571 tre_tnfa_transition_t *transitions,
1572 int *counts, int *offs)
1573 {
1574 tre_pos_and_tags_t *orig_p2 = p2;
1575 tre_tnfa_transition_t *trans;
1576 int i, j, k, l, dup, prev_p2_pos;
1577
1578 if (transitions != NULL)
1579 while (p1->position >= 0)
1580 {
1581 p2 = orig_p2;
1582 prev_p2_pos = -1;
1583 while (p2->position >= 0)
1584 {
1585 /* Optimization: if this position was already handled, skip it. */
1586 if (p2->position == prev_p2_pos)
1587 {
1588 p2++;
1589 continue;
1590 }
1591 prev_p2_pos = p2->position;
1592 /* Set `trans' to point to the next unused transition from
1593 position `p1->position'. */
1594 trans = transitions + offs[p1->position];
1595 while (trans->state != NULL)
1596 {
1597 #if 0
1598 /* If we find a previous transition from `p1->position' to
1599 `p2->position', it is overwritten. This can happen only
1600 if there are nested loops in the regexp, like in "((a)*)*".
1601 In POSIX.2 repetition using the outer loop is always
1602 preferred over using the inner loop. Therefore the
1603 transition for the inner loop is useless and can be thrown
1604 away. */
1605 /* XXX - The same position is used for all nodes in a bracket
1606 expression, so this optimization cannot be used (it will
1607 break bracket expressions) unless I figure out a way to
1608 detect it here. */
1609 if (trans->state_id == p2->position)
1610 {
1611 DPRINT(("*"));
1612 break;
1613 }
1614 #endif
1615 trans++;
1616 }
1617
1618 if (trans->state == NULL)
1619 (trans + 1)->state = NULL;
1620 /* Use the character ranges, assertions, etc. from `p1' for
1621 the transition from `p1' to `p2'. */
1622 trans->code_min = (tre_cint_t) p1->code_min;
1623 trans->code_max = (tre_cint_t) p1->code_max;
1624 trans->state = transitions + offs[p2->position];
1625 trans->state_id = p2->position;
1626 trans->assertions = p1->assertions | p2->assertions
1627 | (p1->class ? ASSERT_CHAR_CLASS : 0)
1628 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
1629 if (p1->backref >= 0)
1630 {
1631 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
1632 assert(p2->backref < 0);
1633 trans->u.backref = p1->backref;
1634 trans->assertions |= ASSERT_BACKREF;
1635 }
1636 else
1637 trans->u.class = p1->class;
1638 if (p1->neg_classes != NULL)
1639 {
1640 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
1641 trans->neg_classes =
1642 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
1643 if (trans->neg_classes == NULL)
1644 return REG_ESPACE;
1645 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
1646 trans->neg_classes[i] = p1->neg_classes[i];
1647 trans->neg_classes[i] = (tre_ctype_t)0;
1648 }
1649 else
1650 trans->neg_classes = NULL;
1651
1652 /* Find out how many tags this transition has. */
1653 i = 0;
1654 if (p1->tags != NULL)
1655 while(p1->tags[i] >= 0)
1656 i++;
1657 j = 0;
1658 if (p2->tags != NULL)
1659 while(p2->tags[j] >= 0)
1660 j++;
1661
1662 /* If we are overwriting a transition, free the old tag array. */
1663 if (trans->tags != NULL)
1664 xfree(trans->tags);
1665 trans->tags = NULL;
1666
1667 /* If there were any tags, allocate an array and fill it. */
1668 if (i + j > 0)
1669 {
1670 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
1671 if (!trans->tags)
1672 return REG_ESPACE;
1673 i = 0;
1674 if (p1->tags != NULL)
1675 while(p1->tags[i] >= 0)
1676 {
1677 trans->tags[i] = p1->tags[i];
1678 i++;
1679 }
1680 l = i;
1681 j = 0;
1682 if (p2->tags != NULL)
1683 while (p2->tags[j] >= 0)
1684 {
1685 /* Don't add duplicates. */
1686 dup = 0;
1687 for (k = 0; k < i; k++)
1688 if (trans->tags[k] == p2->tags[j])
1689 {
1690 dup = 1;
1691 break;
1692 }
1693 if (!dup)
1694 trans->tags[l++] = p2->tags[j];
1695 j++;
1696 }
1697 trans->tags[l] = -1;
1698 }
1699
1700 /* Set the parameter array. If both `p2' and `p1' have same
1701 parameters, the values in `p2' override those in `p1'. */
1702 if (p1->params || p2->params)
1703 {
1704 if (!trans->params)
1705 trans->params = xmalloc(sizeof(*trans->params)
1706 * TRE_PARAM_LAST);
1707 if (!trans->params)
1708 return REG_ESPACE;
1709 for (i = 0; i < TRE_PARAM_LAST; i++)
1710 {
1711 trans->params[i] = TRE_PARAM_UNSET;
1712 if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
1713 trans->params[i] = p1->params[i];
1714 if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
1715 trans->params[i] = p2->params[i];
1716 }
1717 }
1718 else
1719 {
1720 if (trans->params)
1721 xfree(trans->params);
1722 trans->params = NULL;
1723 }
1724
1725
1726 #ifdef TRE_DEBUG
1727 {
1728 int *tags;
1729
1730 DPRINT((" %2d -> %2d on %3d", p1->position, p2->position,
1731 p1->code_min));
1732 if (p1->code_max != p1->code_min)
1733 DPRINT(("-%3d", p1->code_max));
1734 tags = trans->tags;
1735 if (tags)
1736 {
1737 DPRINT((", tags ["));
1738 while (*tags >= 0)
1739 {
1740 DPRINT(("%d", *tags));
1741 tags++;
1742 if (*tags >= 0)
1743 DPRINT((","));
1744 }
1745 DPRINT(("]"));
1746 }
1747 if (trans->assertions)
1748 DPRINT((", assert %d", trans->assertions));
1749 if (trans->assertions & ASSERT_BACKREF)
1750 DPRINT((", backref %d", trans->u.backref));
1751 else if (trans->u.class)
1752 DPRINT((", class %ld", (long)trans->u.class));
1753 if (trans->neg_classes)
1754 DPRINT((", neg_classes %p", trans->neg_classes));
1755 if (trans->params)
1756 {
1757 DPRINT((", "));
1758 tre_print_params(trans->params);
1759 }
1760 DPRINT(("\n"));
1761 }
1762 #endif /* TRE_DEBUG */
1763 p2++;
1764 }
1765 p1++;
1766 }
1767 else
1768 /* Compute a maximum limit for the number of transitions leaving
1769 from each state. */
1770 while (p1->position >= 0)
1771 {
1772 p2 = orig_p2;
1773 while (p2->position >= 0)
1774 {
1775 counts[p1->position]++;
1776 p2++;
1777 }
1778 p1++;
1779 }
1780 return REG_OK;
1781 }
1782
1783 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
1784 labelled with one character range (there are no transitions on empty
1785 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
1786 the regexp. */
1787 static reg_errcode_t
1788 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
1789 int *counts, int *offs)
1790 {
1791 tre_union_t *uni;
1792 tre_catenation_t *cat;
1793 tre_iteration_t *iter;
1794 reg_errcode_t errcode = REG_OK;
1795
1796 /* XXX - recurse using a stack!. */
1797 switch (node->type)
1798 {
1799 case LITERAL:
1800 break;
1801 case UNION:
1802 uni = (tre_union_t *)node->obj;
1803 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
1804 if (errcode != REG_OK)
1805 return errcode;
1806 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
1807 break;
1808
1809 case CATENATION:
1810 cat = (tre_catenation_t *)node->obj;
1811 /* Add a transition from each position in cat->left->lastpos
1812 to each position in cat->right->firstpos. */
1813 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
1814 transitions, counts, offs);
1815 if (errcode != REG_OK)
1816 return errcode;
1817 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
1818 if (errcode != REG_OK)
1819 return errcode;
1820 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
1821 break;
1822
1823 case ITERATION:
1824 iter = (tre_iteration_t *)node->obj;
1825 assert(iter->max == -1 || iter->max == 1);
1826
1827 if (iter->max == -1)
1828 {
1829 assert(iter->min == 0 || iter->min == 1);
1830 /* Add a transition from each last position in the iterated
1831 expression to each first position. */
1832 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
1833 transitions, counts, offs);
1834 if (errcode != REG_OK)
1835 return errcode;
1836 }
1837 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
1838 break;
1839 }
1840 return errcode;
1841 }
1842
1843
1844 #define ERROR_EXIT(err) \
1845 do \
1846 { \
1847 errcode = err; \
1848 if (/*CONSTCOND*/(void)1,1) \
1849 goto error_exit; \
1850 } \
1851 while (/*CONSTCOND*/(void)0,0)
1852
1853
1854 int
1855 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
1856 {
1857 tre_stack_t *stack;
1858 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
1859 tre_pos_and_tags_t *p;
1860 int *counts = NULL, *offs = NULL;
1861 int i, add = 0;
1862 tre_tnfa_transition_t *transitions, *initial;
1863 tre_tnfa_t *tnfa = NULL;
1864 tre_submatch_data_t *submatch_data;
1865 tre_tag_direction_t *tag_directions = NULL;
1866 reg_errcode_t errcode;
1867 tre_mem_t mem;
1868
1869 /* Parse context. */
1870 tre_parse_ctx_t parse_ctx;
1871
1872 /* Allocate a stack used throughout the compilation process for various
1873 purposes. */
1874 stack = tre_stack_new(512, 10240, 128);
1875 if (!stack)
1876 return REG_ESPACE;
1877 /* Allocate a fast memory allocator. */
1878 mem = tre_mem_new();
1879 if (!mem)
1880 {
1881 tre_stack_destroy(stack);
1882 return REG_ESPACE;
1883 }
1884
1885 /* Parse the regexp. */
1886 memset(&parse_ctx, 0, sizeof(parse_ctx));
1887 parse_ctx.mem = mem;
1888 parse_ctx.stack = stack;
1889 parse_ctx.re = regex;
1890 parse_ctx.len = n;
1891 parse_ctx.cflags = cflags;
1892 parse_ctx.max_backref = -1;
1893 /* workaround for PR#14408: use 8-bit optimizations in 8-bit mode */
1894 parse_ctx.cur_max = (cflags & REG_USEBYTES) ? 1 : TRE_MB_CUR_MAX;
1895 DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
1896 errcode = tre_parse(&parse_ctx);
1897 if (errcode != REG_OK)
1898 ERROR_EXIT(errcode);
1899 preg->re_nsub = parse_ctx.submatch_id - 1;
1900 tree = parse_ctx.result;
1901
1902 /* Back references and approximate matching cannot currently be used
1903 in the same regexp. */
1904 if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
1905 ERROR_EXIT(REG_BADPAT);
1906
1907 #ifdef TRE_DEBUG
1908 tre_ast_print(tree);
1909 #endif /* TRE_DEBUG */
1910
1911 /* Referring to nonexistent subexpressions is illegal. */
1912 if (parse_ctx.max_backref > (int)preg->re_nsub)
1913 ERROR_EXIT(REG_ESUBREG);
1914
1915 /* Allocate the TNFA struct. */
1916 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
1917 if (tnfa == NULL)
1918 ERROR_EXIT(REG_ESPACE);
1919 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
1920 tnfa->have_approx = parse_ctx.have_approx;
1921 tnfa->num_submatches = parse_ctx.submatch_id;
1922
1923 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
1924 regexp does not have back references, this can be skipped. */
1925 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
1926 {
1927 DPRINT(("tre_compile: setting up tags\n"));
1928
1929 /* Figure out how many tags we will need. */
1930 errcode = tre_add_tags(NULL, stack, tree, tnfa);
1931 if (errcode != REG_OK)
1932 ERROR_EXIT(errcode);
1933 #ifdef TRE_DEBUG
1934 tre_ast_print(tree);
1935 #endif /* TRE_DEBUG */
1936
1937 if (tnfa->num_tags > 0)
1938 {
1939 tag_directions = xmalloc(sizeof(*tag_directions)
1940 * (tnfa->num_tags + 1));
1941 if (tag_directions == NULL)
1942 ERROR_EXIT(REG_ESPACE);
1943 tnfa->tag_directions = tag_directions;
1944 memset(tag_directions, -1,
1945 sizeof(*tag_directions) * (tnfa->num_tags + 1));
1946 }
1947 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
1948 sizeof(tnfa->minimal_tags));
1949 if (tnfa->minimal_tags == NULL)
1950 ERROR_EXIT(REG_ESPACE);
1951
1952 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
1953 sizeof(*submatch_data));
1954 if (submatch_data == NULL)
1955 ERROR_EXIT(REG_ESPACE);
1956 tnfa->submatch_data = submatch_data;
1957
1958 errcode = tre_add_tags(mem, stack, tree, tnfa);
1959 if (errcode != REG_OK)
1960 ERROR_EXIT(errcode);
1961
1962 #ifdef TRE_DEBUG
1963 for (i = 0; i < parse_ctx.submatch_id; i++)
1964 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
1965 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
1966 for (i = 0; i < tnfa->num_tags; i++)
1967 DPRINT(("t%d is %s\n", i,
1968 tag_directions[i] == TRE_TAG_MINIMIZE ?
1969 "minimized" : "maximized"));
1970 #endif /* TRE_DEBUG */
1971 }
1972
1973 /* Expand iteration nodes. */
1974 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
1975 tag_directions, &tnfa->params_depth);
1976 if (errcode != REG_OK)
1977 ERROR_EXIT(errcode);
1978
1979 /* Add a dummy node for the final state.
1980 XXX - For certain patterns this dummy node can be optimized away,
1981 for example "a*" or "ab*". Figure out a simple way to detect
1982 this possibility. */
1983 tmp_ast_l = tree;
1984 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
1985 if (tmp_ast_r == NULL)
1986 ERROR_EXIT(REG_ESPACE);
1987
1988 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
1989 if (tree == NULL)
1990 ERROR_EXIT(REG_ESPACE);
1991
1992 #ifdef TRE_DEBUG
1993 tre_ast_print(tree);
1994 DPRINT(("Number of states: %d\n", parse_ctx.position));
1995 #endif /* TRE_DEBUG */
1996
1997 errcode = tre_compute_nfl(mem, stack, tree);
1998 if (errcode != REG_OK)
1999 ERROR_EXIT(errcode);
2000
2001 counts = xmalloc(sizeof(int) * parse_ctx.position);
2002 if (counts == NULL)
2003 ERROR_EXIT(REG_ESPACE);
2004
2005 offs = xmalloc(sizeof(int) * parse_ctx.position);
2006 if (offs == NULL)
2007 ERROR_EXIT(REG_ESPACE);
2008
2009 for (i = 0; i < parse_ctx.position; i++)
2010 counts[i] = 0;
2011 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2012
2013 add = 0;
2014 for (i = 0; i < parse_ctx.position; i++)
2015 {
2016 offs[i] = add;
2017 add += counts[i] + 1;
2018 counts[i] = 0;
2019 }
2020 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2021 if (transitions == NULL)
2022 ERROR_EXIT(REG_ESPACE);
2023 tnfa->transitions = transitions;
2024 tnfa->num_transitions = add;
2025
2026 DPRINT(("Converting to TNFA:\n"));
2027 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2028 if (errcode != REG_OK)
2029 ERROR_EXIT(errcode);
2030
2031 /* If in eight bit mode, compute a table of characters that can be the
2032 first character of a match. */
2033 tnfa->first_char = -1;
2034 if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
2035 {
2036 int count = 0;
2037 tre_cint_t k;
2038 DPRINT(("Characters that can start a match:"));
2039 tnfa->firstpos_chars = xcalloc(256, sizeof(char));
2040 if (tnfa->firstpos_chars == NULL)
2041 ERROR_EXIT(REG_ESPACE);
2042 for (p = tree->firstpos; p->position >= 0; p++)
2043 {
2044 tre_tnfa_transition_t *j = transitions + offs[p->position];
2045 while (j->state != NULL)
2046 {
2047 for (k = j->code_min; k <= j->code_max && k < 256; k++)
2048 {
2049 DPRINT((" %d", k));
2050 tnfa->firstpos_chars[k] = 1;
2051 count++;
2052 }
2053 j++;
2054 }
2055 }
2056 DPRINT(("\n"));
2057 #define TRE_OPTIMIZE_FIRST_CHAR 1
2058 #if TRE_OPTIMIZE_FIRST_CHAR
2059 if (count == 1)
2060 {
2061 for (k = 0; k < 256; k++)
2062 if (tnfa->firstpos_chars[k])
2063 {
2064 DPRINT(("first char must be %d\n", k));
2065 tnfa->first_char = k;
2066 xfree(tnfa->firstpos_chars);
2067 tnfa->firstpos_chars = NULL;
2068 break;
2069 }
2070 }
2071 #endif
2072
2073 }
2074 else
2075 tnfa->firstpos_chars = NULL;
2076
2077
2078 p = tree->firstpos;
2079 i = 0;
2080 while (p->position >= 0)
2081 {
2082 i++;
2083
2084 #ifdef TRE_DEBUG
2085 {
2086 int *tags;
2087 DPRINT(("initial: %d", p->position));
2088 tags = p->tags;
2089 if (tags != NULL)
2090 {
2091 if (*tags >= 0)
2092 DPRINT(("/"));
2093 while (*tags >= 0)
2094 {
2095 DPRINT(("%d", *tags));
2096 tags++;
2097 if (*tags >= 0)
2098 DPRINT((","));
2099 }
2100 }
2101 DPRINT((", assert %d", p->assertions));
2102 if (p->params)
2103 {
2104 DPRINT((", "));
2105 tre_print_params(p->params);
2106 }
2107 DPRINT(("\n"));
2108 }
2109 #endif /* TRE_DEBUG */
2110
2111 p++;
2112 }
2113
2114 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2115 if (initial == NULL)
2116 ERROR_EXIT(REG_ESPACE);
2117 tnfa->initial = initial;
2118
2119 i = 0;
2120 for (p = tree->firstpos; p->position >= 0; p++)
2121 {
2122 initial[i].state = transitions + offs[p->position];
2123 initial[i].state_id = p->position;
2124 initial[i].tags = NULL;
2125 /* Copy the arrays p->tags, and p->params, they are allocated
2126 from a tre_mem object. */
2127 if (p->tags)
2128 {
2129 int j;
2130 for (j = 0; p->tags[j] >= 0; j++);
2131 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2132 if (!initial[i].tags)
2133 ERROR_EXIT(REG_ESPACE);
2134 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2135 }
2136 initial[i].params = NULL;
2137 if (p->params)
2138 {
2139 initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
2140 if (!initial[i].params)
2141 ERROR_EXIT(REG_ESPACE);
2142 memcpy(initial[i].params, p->params,
2143 sizeof(*p->params) * TRE_PARAM_LAST);
2144 }
2145 initial[i].assertions = p->assertions;
2146 i++;
2147 }
2148 initial[i].state = NULL;
2149
2150 tnfa->num_transitions = add;
2151 tnfa->final = transitions + offs[tree->lastpos[0].position];
2152 tnfa->num_states = parse_ctx.position;
2153 tnfa->cflags = cflags;
2154
2155 DPRINT(("final state %p\n", (void *)tnfa->final));
2156
2157 tre_mem_destroy(mem);
2158 tre_stack_destroy(stack);
2159 xfree(counts);
2160 xfree(offs);
2161
2162 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2163 return REG_OK;
2164
2165 error_exit:
2166 /* Free everything that was allocated and return the error code. */
2167 tre_mem_destroy(mem);
2168 if (stack != NULL)
2169 tre_stack_destroy(stack);
2170 if (counts != NULL)
2171 xfree(counts);
2172 if (offs != NULL)
2173 xfree(offs);
2174 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2175 tre_free(preg);
2176 return errcode;
2177 }
2178
2179
2180
2181
2182 void
2183 tre_free(regex_t *preg)
2184 {
2185 tre_tnfa_t *tnfa;
2186 unsigned int i;
2187 tre_tnfa_transition_t *trans;
2188
2189 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2190 if (!tnfa)
2191 return;
2192
2193 for (i = 0; i < tnfa->num_transitions; i++)
2194 if (tnfa->transitions[i].state)
2195 {
2196 if (tnfa->transitions[i].tags)
2197 xfree(tnfa->transitions[i].tags);
2198 if (tnfa->transitions[i].neg_classes)
2199 xfree(tnfa->transitions[i].neg_classes);
2200 if (tnfa->transitions[i].params)
2201 xfree(tnfa->transitions[i].params);
2202 }
2203 if (tnfa->transitions)
2204 xfree(tnfa->transitions);
2205
2206 if (tnfa->initial)
2207 {
2208 for (trans = tnfa->initial; trans->state; trans++)
2209 {
2210 if (trans->tags)
2211 xfree(trans->tags);
2212 if (trans->params)
2213 xfree(trans->params);
2214 }
2215 xfree(tnfa->initial);
2216 }
2217
2218 if (tnfa->submatch_data)
2219 {
2220 for (i = 0; i < tnfa->num_submatches; i++)
2221 if (tnfa->submatch_data[i].parents)
2222 xfree(tnfa->submatch_data[i].parents);
2223 xfree(tnfa->submatch_data);
2224 }
2225
2226 if (tnfa->tag_directions)
2227 xfree(tnfa->tag_directions);
2228 if (tnfa->firstpos_chars)
2229 xfree(tnfa->firstpos_chars);
2230 if (tnfa->minimal_tags)
2231 xfree(tnfa->minimal_tags);
2232 xfree(tnfa);
2233 }
2234
2235 char *
2236 tre_version(void)
2237 {
2238 static char str[256];
2239 char *version;
2240
2241 if (str[0] == 0)
2242 {
2243 (void) tre_config(TRE_CONFIG_VERSION, &version);
2244 (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
2245 }
2246 return str;
2247 }
2248
2249 int
2250 tre_config(int query, void *result)
2251 {
2252 int *int_result = result;
2253 const char **string_result = result;
2254
2255 switch (query)
2256 {
2257 case TRE_CONFIG_APPROX:
2258 #ifdef TRE_APPROX
2259 *int_result = 1;
2260 #else /* !TRE_APPROX */
2261 *int_result = 0;
2262 #endif /* !TRE_APPROX */
2263 return REG_OK;
2264
2265 case TRE_CONFIG_WCHAR:
2266 #ifdef TRE_WCHAR
2267 *int_result = 1;
2268 #else /* !TRE_WCHAR */
2269 *int_result = 0;
2270 #endif /* !TRE_WCHAR */
2271 return REG_OK;
2272
2273 case TRE_CONFIG_MULTIBYTE:
2274 #ifdef TRE_MULTIBYTE
2275 *int_result = 1;
2276 #else /* !TRE_MULTIBYTE */
2277 *int_result = 0;
2278 #endif /* !TRE_MULTIBYTE */
2279 return REG_OK;
2280
2281 case TRE_CONFIG_SYSTEM_ABI:
2282 #ifdef TRE_CONFIG_SYSTEM_ABI
2283 *int_result = 1;
2284 #else /* !TRE_CONFIG_SYSTEM_ABI */
2285 *int_result = 0;
2286 #endif /* !TRE_CONFIG_SYSTEM_ABI */
2287 return REG_OK;
2288
2289 case TRE_CONFIG_VERSION:
2290 *string_result = TRE_VERSION;
2291 return REG_OK;
2292 }
2293
2294 return REG_NOMATCH;
2295 }
2296
2297
2298 /* EOF */
2299