tre-compile.c revision 1.5 1 /*
2 tre-compile.c - TRE regex compiler
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 /*
10 TODO:
11 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12 function calls.
13 */
14
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif /* HAVE_CONFIG_H */
19 #include <stdio.h>
20 #include <assert.h>
21 #include <string.h>
22
23 #include "tre-internal.h"
24 #include "tre-mem.h"
25 #include "tre-stack.h"
26 #include "tre-ast.h"
27 #include "tre-parse.h"
28 #include "tre-compile.h"
29 #include "tre.h"
30 #include "xmalloc.h"
31
32 /*
33 Algorithms to setup tags so that submatch addressing can be done.
34 */
35
36
37 /* Inserts a catenation node to the root of the tree given in `node'.
38 As the left child a new tag with number `tag_id' to `node' is added,
39 and the right child is the old root. */
40 static reg_errcode_t
41 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
42 {
43 tre_catenation_t *c;
44
45 DPRINT(("add_tag_left: tag %d\n", tag_id));
46
47 c = tre_mem_alloc(mem, sizeof(*c));
48 if (c == NULL)
49 return REG_ESPACE;
50 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
51 if (c->left == NULL)
52 return REG_ESPACE;
53 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
54 if (c->right == NULL)
55 return REG_ESPACE;
56
57 c->right->obj = node->obj;
58 c->right->type = node->type;
59 c->right->nullable = -1;
60 c->right->submatch_id = -1;
61 c->right->firstpos = NULL;
62 c->right->lastpos = NULL;
63 c->right->num_tags = 0;
64 node->obj = c;
65 node->type = CATENATION;
66 return REG_OK;
67 }
68
69 /* Inserts a catenation node to the root of the tree given in `node'.
70 As the right child a new tag with number `tag_id' to `node' is added,
71 and the left child is the old root. */
72 static reg_errcode_t
73 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
74 {
75 tre_catenation_t *c;
76
77 DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
78
79 c = tre_mem_alloc(mem, sizeof(*c));
80 if (c == NULL)
81 return REG_ESPACE;
82 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
83 if (c->right == NULL)
84 return REG_ESPACE;
85 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
86 if (c->left == NULL)
87 return REG_ESPACE;
88
89 c->left->obj = node->obj;
90 c->left->type = node->type;
91 c->left->nullable = -1;
92 c->left->submatch_id = -1;
93 c->left->firstpos = NULL;
94 c->left->lastpos = NULL;
95 c->left->num_tags = 0;
96 node->obj = c;
97 node->type = CATENATION;
98 return REG_OK;
99 }
100
101 typedef enum {
102 ADDTAGS_RECURSE,
103 ADDTAGS_AFTER_ITERATION,
104 ADDTAGS_AFTER_UNION_LEFT,
105 ADDTAGS_AFTER_UNION_RIGHT,
106 ADDTAGS_AFTER_CAT_LEFT,
107 ADDTAGS_AFTER_CAT_RIGHT,
108 ADDTAGS_SET_SUBMATCH_END
109 } tre_addtags_symbol_t;
110
111
112 typedef struct {
113 int tag;
114 int next_tag;
115 } tre_tag_states_t;
116
117
118 /* Go through `regset' and set submatch data for submatches that are
119 using this tag. */
120 static void
121 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
122 {
123 int i;
124
125 for (i = 0; regset[i] >= 0; i++)
126 {
127 int id = regset[i] / 2;
128 int start = !(regset[i] % 2);
129 DPRINT((" Using tag %d for %s offset of "
130 "submatch %d\n", tag,
131 start ? "start" : "end", id));
132 if (start)
133 tnfa->submatch_data[id].so_tag = tag;
134 else
135 tnfa->submatch_data[id].eo_tag = tag;
136 }
137 regset[0] = -1;
138 }
139
140
141 /* Adds tags to appropriate locations in the parse tree in `tree', so that
142 subexpressions marked for submatch addressing can be traced. */
143 static reg_errcode_t
144 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
145 tre_tnfa_t *tnfa)
146 {
147 reg_errcode_t status = REG_OK;
148 tre_addtags_symbol_t symbol;
149 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
150 int bottom = tre_stack_num_objects(stack);
151 /* True for first pass (counting number of needed tags) */
152 int first_pass = (mem == NULL || tnfa == NULL);
153 int *regset, *orig_regset;
154 int num_tags = 0; /* Total number of tags. */
155 int num_minimals = 0; /* Number of special minimal tags. */
156 int tag = 0; /* The tag that is to be added next. */
157 int next_tag = 1; /* Next tag to use after this one. */
158 int *parents; /* Stack of submatches the current submatch is
159 contained in. */
160 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
161 tre_tag_states_t *saved_states;
162
163 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
164 if (!first_pass)
165 {
166 tnfa->end_tag = 0;
167 tnfa->minimal_tags[0] = -1;
168 }
169
170 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
171 if (regset == NULL)
172 return REG_ESPACE;
173 regset[0] = -1;
174 orig_regset = regset;
175
176 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
177 if (parents == NULL)
178 {
179 xfree(regset);
180 return REG_ESPACE;
181 }
182 parents[0] = -1;
183
184 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
185 if (saved_states == NULL)
186 {
187 xfree(regset);
188 xfree(parents);
189 return REG_ESPACE;
190 }
191 else
192 {
193 unsigned int i;
194 for (i = 0; i <= tnfa->num_submatches; i++)
195 saved_states[i].tag = -1;
196 }
197
198 STACK_PUSH(stack, voidptr, node);
199 STACK_PUSH(stack, long, ADDTAGS_RECURSE);
200
201 while (tre_stack_num_objects(stack) > bottom)
202 {
203 if (status != REG_OK)
204 break;
205
206 symbol = (tre_addtags_symbol_t)tre_stack_pop_long(stack);
207 switch (symbol)
208 {
209
210 case ADDTAGS_SET_SUBMATCH_END:
211 {
212 int id = (int)tre_stack_pop_long(stack);
213 int i;
214
215 /* Add end of this submatch to regset. */
216 for (i = 0; regset[i] >= 0; i++);
217 regset[i] = id * 2 + 1;
218 regset[i + 1] = -1;
219
220 /* Pop this submatch from the parents stack. */
221 for (i = 0; parents[i] >= 0; i++);
222 parents[i - 1] = -1;
223 break;
224 }
225
226 case ADDTAGS_RECURSE:
227 node = tre_stack_pop_voidptr(stack);
228
229 if (node->submatch_id >= 0)
230 {
231 int id = node->submatch_id;
232 int i;
233
234
235 /* Add start of this submatch to regset. */
236 for (i = 0; regset[i] >= 0; i++);
237 regset[i] = id * 2;
238 regset[i + 1] = -1;
239
240 if (!first_pass)
241 {
242 for (i = 0; parents[i] >= 0; i++);
243 tnfa->submatch_data[id].parents = NULL;
244 if (i > 0)
245 {
246 int *p = xmalloc(sizeof(*p) * (i + 1));
247 if (p == NULL)
248 {
249 status = REG_ESPACE;
250 break;
251 }
252 assert(tnfa->submatch_data[id].parents == NULL);
253 tnfa->submatch_data[id].parents = p;
254 for (i = 0; parents[i] >= 0; i++)
255 p[i] = parents[i];
256 p[i] = -1;
257 }
258 }
259
260 /* Add end of this submatch to regset after processing this
261 node. */
262 STACK_PUSHX(stack, long, node->submatch_id);
263 STACK_PUSHX(stack, long, ADDTAGS_SET_SUBMATCH_END);
264 }
265
266 switch (node->type)
267 {
268 case LITERAL:
269 {
270 tre_literal_t *lit = node->obj;
271
272 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
273 {
274 int i;
275 DPRINT(("Literal %d-%d\n",
276 (int)lit->code_min, (int)lit->code_max));
277 if (regset[0] >= 0)
278 {
279 /* Regset is not empty, so add a tag before the
280 literal or backref. */
281 if (!first_pass)
282 {
283 status = tre_add_tag_left(mem, node, tag);
284 tnfa->tag_directions[tag] = direction;
285 if (minimal_tag >= 0)
286 {
287 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
288 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
289 tnfa->minimal_tags[i] = tag;
290 tnfa->minimal_tags[i + 1] = minimal_tag;
291 tnfa->minimal_tags[i + 2] = -1;
292 minimal_tag = -1;
293 num_minimals++;
294 }
295 tre_purge_regset(regset, tnfa, tag);
296 }
297 else
298 {
299 DPRINT((" num_tags = 1\n"));
300 node->num_tags = 1;
301 }
302
303 DPRINT((" num_tags++\n"));
304 regset[0] = -1;
305 tag = next_tag;
306 num_tags++;
307 next_tag++;
308 }
309 }
310 else
311 {
312 assert(!IS_TAG(lit));
313 }
314 break;
315 }
316 case CATENATION:
317 {
318 tre_catenation_t *cat = node->obj;
319 tre_ast_node_t *left = cat->left;
320 tre_ast_node_t *right = cat->right;
321 int reserved_tag = -1;
322 DPRINT(("Catenation, next_tag = %d\n", next_tag));
323
324
325 /* After processing right child. */
326 STACK_PUSHX(stack, voidptr, node);
327 STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_RIGHT);
328
329 /* Process right child. */
330 STACK_PUSHX(stack, voidptr, right);
331 STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
332
333 /* After processing left child. */
334 STACK_PUSHX(stack, long, (long)(next_tag + left->num_tags));
335 DPRINT((" Pushing %d for after left\n",
336 next_tag + left->num_tags));
337 if (left->num_tags > 0 && right->num_tags > 0)
338 {
339 /* Reserve the next tag to the right child. */
340 DPRINT((" Reserving next_tag %d to right child\n",
341 next_tag));
342 reserved_tag = next_tag;
343 next_tag++;
344 }
345 STACK_PUSHX(stack, long, reserved_tag);
346 STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_LEFT);
347
348 /* Process left child. */
349 STACK_PUSHX(stack, voidptr, left);
350 STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
351
352 }
353 break;
354 case ITERATION:
355 {
356 tre_iteration_t *iter = node->obj;
357 DPRINT(("Iteration\n"));
358
359 if (first_pass)
360 {
361 STACK_PUSHX(stack, long, regset[0] >= 0 || iter->minimal);
362 }
363 else
364 {
365 STACK_PUSHX(stack, long, tag);
366 STACK_PUSHX(stack, long, iter->minimal);
367 }
368 STACK_PUSHX(stack, voidptr, node);
369 STACK_PUSHX(stack, long, ADDTAGS_AFTER_ITERATION);
370
371 STACK_PUSHX(stack, voidptr, iter->arg);
372 STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
373
374 /* Regset is not empty, so add a tag here. */
375 if (regset[0] >= 0 || iter->minimal)
376 {
377 if (!first_pass)
378 {
379 int i;
380 status = tre_add_tag_left(mem, node, tag);
381 if (iter->minimal)
382 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
383 else
384 tnfa->tag_directions[tag] = direction;
385 if (minimal_tag >= 0)
386 {
387 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
388 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
389 tnfa->minimal_tags[i] = tag;
390 tnfa->minimal_tags[i + 1] = minimal_tag;
391 tnfa->minimal_tags[i + 2] = -1;
392 minimal_tag = -1;
393 num_minimals++;
394 }
395 tre_purge_regset(regset, tnfa, tag);
396 }
397
398 DPRINT((" num_tags++\n"));
399 regset[0] = -1;
400 tag = next_tag;
401 num_tags++;
402 next_tag++;
403 }
404 direction = TRE_TAG_MINIMIZE;
405 }
406 break;
407 case UNION:
408 {
409 tre_union_t *uni = node->obj;
410 tre_ast_node_t *left = uni->left;
411 tre_ast_node_t *right = uni->right;
412 int left_tag;
413 int right_tag;
414
415 if (regset[0] >= 0)
416 {
417 left_tag = next_tag;
418 right_tag = next_tag + 1;
419 }
420 else
421 {
422 left_tag = tag;
423 right_tag = next_tag;
424 }
425
426 DPRINT(("Union\n"));
427
428 /* After processing right child. */
429 STACK_PUSHX(stack, long, right_tag);
430 STACK_PUSHX(stack, long, left_tag);
431 STACK_PUSHX(stack, voidptr, regset);
432 STACK_PUSHX(stack, long, regset[0] >= 0);
433 STACK_PUSHX(stack, voidptr, node);
434 STACK_PUSHX(stack, voidptr, right);
435 STACK_PUSHX(stack, voidptr, left);
436 STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_RIGHT);
437
438 /* Process right child. */
439 STACK_PUSHX(stack, voidptr, right);
440 STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
441
442 /* After processing left child. */
443 STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_LEFT);
444
445 /* Process left child. */
446 STACK_PUSHX(stack, voidptr, left);
447 STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
448
449 /* Regset is not empty, so add a tag here. */
450 if (regset[0] >= 0)
451 {
452 if (!first_pass)
453 {
454 int i;
455 status = tre_add_tag_left(mem, node, tag);
456 tnfa->tag_directions[tag] = direction;
457 if (minimal_tag >= 0)
458 {
459 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
460 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
461 tnfa->minimal_tags[i] = tag;
462 tnfa->minimal_tags[i + 1] = minimal_tag;
463 tnfa->minimal_tags[i + 2] = -1;
464 minimal_tag = -1;
465 num_minimals++;
466 }
467 tre_purge_regset(regset, tnfa, tag);
468 }
469
470 DPRINT((" num_tags++\n"));
471 regset[0] = -1;
472 tag = next_tag;
473 num_tags++;
474 next_tag++;
475 }
476
477 if (node->num_submatches > 0)
478 {
479 /* The next two tags are reserved for markers. */
480 next_tag++;
481 tag = next_tag;
482 next_tag++;
483 }
484
485 break;
486 }
487 }
488
489 if (node->submatch_id >= 0)
490 {
491 int i;
492 /* Push this submatch on the parents stack. */
493 for (i = 0; parents[i] >= 0; i++);
494 parents[i] = node->submatch_id;
495 parents[i + 1] = -1;
496 }
497
498 break; /* end case: ADDTAGS_RECURSE */
499
500 case ADDTAGS_AFTER_ITERATION:
501 {
502 int minimal = 0;
503 int enter_tag;
504 node = tre_stack_pop_voidptr(stack);
505 if (first_pass)
506 {
507 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
508 + tre_stack_pop_long(stack);
509 minimal_tag = -1;
510 }
511 else
512 {
513 minimal = tre_stack_pop_int(stack);
514 enter_tag = tre_stack_pop_int(stack);
515 if (minimal)
516 minimal_tag = enter_tag;
517 }
518
519 DPRINT(("After iteration\n"));
520 if (!first_pass)
521 {
522 DPRINT((" Setting direction to %s\n",
523 minimal ? "minimize" : "maximize"));
524 if (minimal)
525 direction = TRE_TAG_MINIMIZE;
526 else
527 direction = TRE_TAG_MAXIMIZE;
528 }
529 break;
530 }
531
532 case ADDTAGS_AFTER_CAT_LEFT:
533 {
534 int new_tag = tre_stack_pop_int(stack);
535 next_tag = tre_stack_pop_int(stack);
536 DPRINT(("After cat left, tag = %d, next_tag = %d\n",
537 tag, next_tag));
538 if (new_tag >= 0)
539 {
540 DPRINT((" Setting tag to %d\n", new_tag));
541 tag = new_tag;
542 }
543 break;
544 }
545
546 case ADDTAGS_AFTER_CAT_RIGHT:
547 DPRINT(("After cat right\n"));
548 node = tre_stack_pop_voidptr(stack);
549 if (first_pass)
550 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
551 + ((tre_catenation_t *)node->obj)->right->num_tags;
552 break;
553
554 case ADDTAGS_AFTER_UNION_LEFT:
555 DPRINT(("After union left\n"));
556 /* Lift the bottom of the `regset' array so that when processing
557 the right operand the items currently in the array are
558 invisible. The original bottom was saved at ADDTAGS_UNION and
559 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
560 while (*regset >= 0)
561 regset++;
562 break;
563
564 case ADDTAGS_AFTER_UNION_RIGHT:
565 {
566 int added_tags, tag_left, tag_right;
567 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
568 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
569 DPRINT(("After union right\n"));
570 node = tre_stack_pop_voidptr(stack);
571 added_tags = tre_stack_pop_int(stack);
572 if (first_pass)
573 {
574 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
575 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
576 + ((node->num_submatches > 0) ? 2 : 0);
577 }
578 regset = tre_stack_pop_voidptr(stack);
579 tag_left = tre_stack_pop_int(stack);
580 tag_right = tre_stack_pop_int(stack);
581
582 /* Add tags after both children, the left child gets a smaller
583 tag than the right child. This guarantees that we prefer
584 the left child over the right child. */
585 /* XXX - This is not always necessary (if the children have
586 tags which must be seen for every match of that child). */
587 /* XXX - Check if this is the only place where tre_add_tag_right
588 is used. If so, use tre_add_tag_left (putting the tag before
589 the child as opposed after the child) and throw away
590 tre_add_tag_right. */
591 if (node->num_submatches > 0)
592 {
593 if (!first_pass)
594 {
595 status = tre_add_tag_right(mem, left, tag_left);
596 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
597 status = tre_add_tag_right(mem, right, tag_right);
598 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
599 }
600 DPRINT((" num_tags += 2\n"));
601 num_tags += 2;
602 }
603 direction = TRE_TAG_MAXIMIZE;
604 break;
605 }
606
607 default:
608 assert(0);
609 break;
610
611 } /* end switch(symbol) */
612 } /* end while(tre_stack_num_objects(stack) > bottom) */
613
614 if (!first_pass)
615 tre_purge_regset(regset, tnfa, tag);
616
617 if (!first_pass && minimal_tag >= 0)
618 {
619 int i;
620 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
621 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
622 tnfa->minimal_tags[i] = tag;
623 tnfa->minimal_tags[i + 1] = minimal_tag;
624 tnfa->minimal_tags[i + 2] = -1;
625 minimal_tag = -1;
626 num_minimals++;
627 }
628
629 DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
630 first_pass? "First pass" : "Second pass", num_tags));
631
632 assert(tree->num_tags == num_tags);
633 tnfa->end_tag = num_tags;
634 tnfa->num_tags = num_tags;
635 tnfa->num_minimals = num_minimals;
636 xfree(orig_regset);
637 xfree(parents);
638 xfree(saved_states);
639 return status;
640 }
641
642
643
644 /*
645 AST to TNFA compilation routines.
646 */
647
648 typedef enum {
649 COPY_RECURSE,
650 COPY_SET_RESULT_PTR
651 } tre_copyast_symbol_t;
652
653 /* Flags for tre_copy_ast(). */
654 #define COPY_REMOVE_TAGS 1
655 #define COPY_MAXIMIZE_FIRST_TAG 2
656
657 static reg_errcode_t
658 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
659 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
660 tre_ast_node_t **copy, int *max_pos)
661 {
662 reg_errcode_t status = REG_OK;
663 int bottom = tre_stack_num_objects(stack);
664 int num_copied = 0;
665 int first_tag = 1;
666 tre_ast_node_t **result = copy;
667 tre_copyast_symbol_t symbol;
668
669 STACK_PUSH(stack, voidptr, ast);
670 STACK_PUSH(stack, long, COPY_RECURSE);
671
672 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
673 {
674 tre_ast_node_t *node;
675 if (status != REG_OK)
676 break;
677
678 symbol = (tre_copyast_symbol_t)tre_stack_pop_long(stack);
679 switch (symbol)
680 {
681 case COPY_SET_RESULT_PTR:
682 result = tre_stack_pop_voidptr(stack);
683 break;
684 case COPY_RECURSE:
685 node = tre_stack_pop_voidptr(stack);
686 switch (node->type)
687 {
688 case LITERAL:
689 {
690 tre_literal_t *lit = node->obj;
691 int pos = lit->position;
692 int min = lit->code_min;
693 int max = lit->code_max;
694 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
695 {
696 /* XXX - e.g. [ab] has only one position but two
697 nodes, so we are creating holes in the state space
698 here. Not fatal, just wastes memory. */
699 pos += *pos_add;
700 num_copied++;
701 }
702 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
703 {
704 /* Change this tag to empty. */
705 min = EMPTY;
706 max = pos = -1;
707 }
708 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
709 && first_tag)
710 {
711 /* Maximize the first tag. */
712 tag_directions[max] = TRE_TAG_MAXIMIZE;
713 first_tag = 0;
714 }
715 *result = tre_ast_new_literal(mem, min, max, pos);
716 if (*result == NULL)
717 status = REG_ESPACE;
718
719 if (pos > *max_pos)
720 *max_pos = pos;
721 break;
722 }
723 case UNION:
724 {
725 tre_union_t *uni = node->obj;
726 tre_union_t *tmp;
727 *result = tre_ast_new_union(mem, uni->left, uni->right);
728 if (*result == NULL)
729 {
730 status = REG_ESPACE;
731 break;
732 }
733 tmp = (*result)->obj;
734 result = &tmp->left;
735 STACK_PUSHX(stack, voidptr, uni->right);
736 STACK_PUSHX(stack, long, COPY_RECURSE);
737 STACK_PUSHX(stack, voidptr, &tmp->right);
738 STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR);
739 STACK_PUSHX(stack, voidptr, uni->left);
740 STACK_PUSHX(stack, long, COPY_RECURSE);
741 break;
742 }
743 case CATENATION:
744 {
745 tre_catenation_t *cat = node->obj;
746 tre_catenation_t *tmp;
747 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
748 if (*result == NULL)
749 {
750 status = REG_ESPACE;
751 break;
752 }
753 tmp = (*result)->obj;
754 tmp->left = NULL;
755 tmp->right = NULL;
756 result = &tmp->left;
757
758 STACK_PUSHX(stack, voidptr, cat->right);
759 STACK_PUSHX(stack, long, COPY_RECURSE);
760 STACK_PUSHX(stack, voidptr, &tmp->right);
761 STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR);
762 STACK_PUSHX(stack, voidptr, cat->left);
763 STACK_PUSHX(stack, long, COPY_RECURSE);
764 break;
765 }
766 case ITERATION:
767 {
768 tre_iteration_t *iter = node->obj;
769 STACK_PUSHX(stack, voidptr, iter->arg);
770 STACK_PUSHX(stack, long, COPY_RECURSE);
771 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
772 iter->max, iter->minimal);
773 if (*result == NULL)
774 {
775 status = REG_ESPACE;
776 break;
777 }
778 iter = (*result)->obj;
779 result = &iter->arg;
780 break;
781 }
782 default:
783 assert(0);
784 break;
785 }
786 break;
787 }
788 }
789 *pos_add += num_copied;
790 return status;
791 }
792
793 typedef enum {
794 EXPAND_RECURSE,
795 EXPAND_AFTER_ITER
796 } tre_expand_ast_symbol_t;
797
798 /* Expands each iteration node that has a finite nonzero minimum or maximum
799 iteration count to a catenated sequence of copies of the node. */
800 static reg_errcode_t
801 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
802 int *position, tre_tag_direction_t *tag_directions,
803 int *max_depth)
804 {
805 reg_errcode_t status = REG_OK;
806 int bottom = tre_stack_num_objects(stack);
807 int pos_add = 0;
808 int pos_add_total = 0;
809 int max_pos = 0;
810 /* Current approximate matching parameters. */
811 int params[TRE_PARAM_LAST];
812 /* Approximate parameter nesting level. */
813 int params_depth = 0;
814 int iter_depth = 0;
815 int i;
816
817 for (i = 0; i < TRE_PARAM_LAST; i++)
818 params[i] = TRE_PARAM_DEFAULT;
819
820 STACK_PUSHR(stack, voidptr, ast);
821 STACK_PUSHR(stack, long, EXPAND_RECURSE);
822 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
823 {
824 tre_ast_node_t *node;
825 tre_expand_ast_symbol_t symbol;
826
827 if (status != REG_OK)
828 break;
829
830 DPRINT(("pos_add %d\n", pos_add));
831
832 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_long(stack);
833 node = tre_stack_pop_voidptr(stack);
834 switch (symbol)
835 {
836 case EXPAND_RECURSE:
837 switch (node->type)
838 {
839 case LITERAL:
840 {
841 tre_literal_t *lit= node->obj;
842 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
843 {
844 lit->position += pos_add;
845 if (lit->position > max_pos)
846 max_pos = lit->position;
847 }
848 break;
849 }
850 case UNION:
851 {
852 tre_union_t *uni = node->obj;
853 STACK_PUSHX(stack, voidptr, uni->right);
854 STACK_PUSHX(stack, long, EXPAND_RECURSE);
855 STACK_PUSHX(stack, voidptr, uni->left);
856 STACK_PUSHX(stack, long, EXPAND_RECURSE);
857 break;
858 }
859 case CATENATION:
860 {
861 tre_catenation_t *cat = node->obj;
862 STACK_PUSHX(stack, voidptr, cat->right);
863 STACK_PUSHX(stack, long, EXPAND_RECURSE);
864 STACK_PUSHX(stack, voidptr, cat->left);
865 STACK_PUSHX(stack, long, EXPAND_RECURSE);
866 break;
867 }
868 case ITERATION:
869 {
870 tre_iteration_t *iter = node->obj;
871 STACK_PUSHX(stack, long, pos_add);
872 STACK_PUSHX(stack, voidptr, node);
873 STACK_PUSHX(stack, long, EXPAND_AFTER_ITER);
874 STACK_PUSHX(stack, voidptr, iter->arg);
875 STACK_PUSHX(stack, long, EXPAND_RECURSE);
876 /* If we are going to expand this node at EXPAND_AFTER_ITER
877 then don't increase the `pos' fields of the nodes now, it
878 will get done when expanding. */
879 if (iter->min > 1 || iter->max > 1)
880 pos_add = 0;
881 iter_depth++;
882 DPRINT(("iter\n"));
883 break;
884 }
885 default:
886 assert(0);
887 break;
888 }
889 break;
890 case EXPAND_AFTER_ITER:
891 {
892 tre_iteration_t *iter = node->obj;
893 int pos_add_last;
894 pos_add = tre_stack_pop_int(stack);
895 pos_add_last = pos_add;
896 if (iter->min > 1 || iter->max > 1)
897 {
898 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
899 int j;
900 int pos_add_save = pos_add;
901
902 /* Create a catenated sequence of copies of the node. */
903 for (j = 0; j < iter->min; j++)
904 {
905 tre_ast_node_t *copy;
906 /* Remove tags from all but the last copy. */
907 int flags = ((j + 1 < iter->min)
908 ? COPY_REMOVE_TAGS
909 : COPY_MAXIMIZE_FIRST_TAG);
910 DPRINT((" pos_add %d\n", pos_add));
911 pos_add_save = pos_add;
912 status = tre_copy_ast(mem, stack, iter->arg, flags,
913 &pos_add, tag_directions, ©,
914 &max_pos);
915 if (status != REG_OK)
916 return status;
917 if (seq1 != NULL)
918 seq1 = tre_ast_new_catenation(mem, seq1, copy);
919 else
920 seq1 = copy;
921 if (seq1 == NULL)
922 return REG_ESPACE;
923 }
924
925 if (iter->max == -1)
926 {
927 /* No upper limit. */
928 pos_add_save = pos_add;
929 status = tre_copy_ast(mem, stack, iter->arg, 0,
930 &pos_add, NULL, &seq2, &max_pos);
931 if (status != REG_OK)
932 return status;
933 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
934 if (seq2 == NULL)
935 return REG_ESPACE;
936 }
937 else
938 {
939 for (j = iter->min; j < iter->max; j++)
940 {
941 tre_ast_node_t *tmp, *copy;
942 pos_add_save = pos_add;
943 status = tre_copy_ast(mem, stack, iter->arg, 0,
944 &pos_add, NULL, ©, &max_pos);
945 if (status != REG_OK)
946 return status;
947 if (seq2 != NULL)
948 seq2 = tre_ast_new_catenation(mem, copy, seq2);
949 else
950 seq2 = copy;
951 if (seq2 == NULL)
952 return REG_ESPACE;
953 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
954 if (tmp == NULL)
955 return REG_ESPACE;
956 seq2 = tre_ast_new_union(mem, tmp, seq2);
957 if (seq2 == NULL)
958 return REG_ESPACE;
959 }
960 }
961
962 pos_add = pos_add_save;
963 if (seq1 == NULL)
964 seq1 = seq2;
965 else if (seq2 != NULL)
966 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
967 if (seq1 == NULL)
968 return REG_ESPACE;
969 node->obj = seq1->obj;
970 node->type = seq1->type;
971 }
972
973 iter_depth--;
974 pos_add_total += pos_add - pos_add_last;
975 if (iter_depth == 0)
976 pos_add = pos_add_total;
977
978 /* If approximate parameters are specified, surround the result
979 with two parameter setting nodes. The one on the left sets
980 the specified parameters, and the one on the right restores
981 the old parameters. */
982 if (iter->params)
983 {
984 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
985 int *old_params;
986
987 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
988 if (!tmp_l)
989 return REG_ESPACE;
990 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
991 iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
992 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
993 if (!tmp_r)
994 return REG_ESPACE;
995 old_params = tre_mem_alloc(mem, sizeof(*old_params)
996 * TRE_PARAM_LAST);
997 if (!old_params)
998 return REG_ESPACE;
999 for (i = 0; i < TRE_PARAM_LAST; i++)
1000 old_params[i] = params[i];
1001 ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
1002 old_params[TRE_PARAM_DEPTH] = params_depth;
1003 /* XXX - this is the only place where ast_new_node is
1004 needed -- should be moved inside AST module. */
1005 node_copy = tre_ast_new_node(mem, ITERATION,
1006 sizeof(tre_iteration_t));
1007 if (!node_copy)
1008 return REG_ESPACE;
1009 node_copy->obj = node->obj;
1010 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
1011 if (!tmp_node)
1012 return REG_ESPACE;
1013 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
1014 if (!tmp_node)
1015 return REG_ESPACE;
1016 /* Replace the contents of `node' with `tmp_node'. */
1017 memcpy(node, tmp_node, sizeof(*node));
1018 node->obj = tmp_node->obj;
1019 node->type = tmp_node->type;
1020 params_depth++;
1021 if (params_depth > *max_depth)
1022 *max_depth = params_depth;
1023 }
1024 break;
1025 }
1026 default:
1027 assert(0);
1028 break;
1029 }
1030 }
1031
1032 *position += pos_add_total;
1033
1034 /* `max_pos' should never be larger than `*position' if the above
1035 code works, but just an extra safeguard let's make sure
1036 `*position' is set large enough so enough memory will be
1037 allocated for the transition table. */
1038 if (max_pos > *position)
1039 *position = max_pos;
1040
1041 #ifdef TRE_DEBUG
1042 DPRINT(("Expanded AST:\n"));
1043 tre_ast_print(ast);
1044 DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
1045 #endif
1046
1047 return status;
1048 }
1049
1050 static tre_pos_and_tags_t *
1051 tre_set_empty(tre_mem_t mem)
1052 {
1053 tre_pos_and_tags_t *new_set;
1054
1055 new_set = tre_mem_calloc(mem, sizeof(*new_set));
1056 if (new_set == NULL)
1057 return NULL;
1058
1059 new_set[0].position = -1;
1060 new_set[0].code_min = -1;
1061 new_set[0].code_max = -1;
1062
1063 return new_set;
1064 }
1065
1066 static tre_pos_and_tags_t *
1067 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
1068 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
1069 {
1070 tre_pos_and_tags_t *new_set;
1071
1072 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
1073 if (new_set == NULL)
1074 return NULL;
1075
1076 new_set[0].position = position;
1077 new_set[0].code_min = code_min;
1078 new_set[0].code_max = code_max;
1079 new_set[0].class = class;
1080 new_set[0].neg_classes = neg_classes;
1081 new_set[0].backref = backref;
1082 new_set[1].position = -1;
1083 new_set[1].code_min = -1;
1084 new_set[1].code_max = -1;
1085
1086 return new_set;
1087 }
1088
1089 static tre_pos_and_tags_t *
1090 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
1091 int *tags, int assertions, int *params)
1092 {
1093 int s1, s2, i, j;
1094 tre_pos_and_tags_t *new_set;
1095 int *new_tags;
1096 int num_tags;
1097
1098 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
1099 for (s1 = 0; set1[s1].position >= 0; s1++);
1100 for (s2 = 0; set2[s2].position >= 0; s2++);
1101 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
1102 if (!new_set )
1103 return NULL;
1104
1105 for (s1 = 0; set1[s1].position >= 0; s1++)
1106 {
1107 new_set[s1].position = set1[s1].position;
1108 new_set[s1].code_min = set1[s1].code_min;
1109 new_set[s1].code_max = set1[s1].code_max;
1110 new_set[s1].assertions = set1[s1].assertions | assertions;
1111 new_set[s1].class = set1[s1].class;
1112 new_set[s1].neg_classes = set1[s1].neg_classes;
1113 new_set[s1].backref = set1[s1].backref;
1114 if (set1[s1].tags == NULL && tags == NULL)
1115 new_set[s1].tags = NULL;
1116 else
1117 {
1118 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
1119 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
1120 * (i + num_tags + 1)));
1121 if (new_tags == NULL)
1122 return NULL;
1123 for (j = 0; j < i; j++)
1124 new_tags[j] = set1[s1].tags[j];
1125 for (i = 0; i < num_tags; i++)
1126 new_tags[j + i] = tags[i];
1127 new_tags[j + i] = -1;
1128 new_set[s1].tags = new_tags;
1129 }
1130 if (set1[s1].params)
1131 new_set[s1].params = set1[s1].params;
1132 if (params)
1133 {
1134 if (!new_set[s1].params)
1135 new_set[s1].params = params;
1136 else
1137 {
1138 new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
1139 TRE_PARAM_LAST);
1140 if (!new_set[s1].params)
1141 return NULL;
1142 for (i = 0; i < TRE_PARAM_LAST; i++)
1143 if (params[i] != TRE_PARAM_UNSET)
1144 new_set[s1].params[i] = params[i];
1145 }
1146 }
1147 }
1148
1149 for (s2 = 0; set2[s2].position >= 0; s2++)
1150 {
1151 new_set[s1 + s2].position = set2[s2].position;
1152 new_set[s1 + s2].code_min = set2[s2].code_min;
1153 new_set[s1 + s2].code_max = set2[s2].code_max;
1154 /* XXX - why not | assertions here as well? */
1155 new_set[s1 + s2].assertions = set2[s2].assertions;
1156 new_set[s1 + s2].class = set2[s2].class;
1157 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
1158 new_set[s1 + s2].backref = set2[s2].backref;
1159 if (set2[s2].tags == NULL)
1160 new_set[s1 + s2].tags = NULL;
1161 else
1162 {
1163 for (i = 0; set2[s2].tags[i] >= 0; i++);
1164 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
1165 if (new_tags == NULL)
1166 return NULL;
1167 for (j = 0; j < i; j++)
1168 new_tags[j] = set2[s2].tags[j];
1169 new_tags[j] = -1;
1170 new_set[s1 + s2].tags = new_tags;
1171 }
1172 if (set2[s2].params)
1173 new_set[s1 + s2].params = set2[s2].params;
1174 if (params)
1175 {
1176 if (!new_set[s1 + s2].params)
1177 new_set[s1 + s2].params = params;
1178 else
1179 {
1180 new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
1181 TRE_PARAM_LAST);
1182 if (!new_set[s1 + s2].params)
1183 return NULL;
1184 for (i = 0; i < TRE_PARAM_LAST; i++)
1185 if (params[i] != TRE_PARAM_UNSET)
1186 new_set[s1 + s2].params[i] = params[i];
1187 }
1188 }
1189 }
1190 new_set[s1 + s2].position = -1;
1191 return new_set;
1192 }
1193
1194 /* Finds the empty path through `node' which is the one that should be
1195 taken according to POSIX.2 rules, and adds the tags on that path to
1196 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
1197 set to the number of tags seen on the path. */
1198 static reg_errcode_t
1199 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
1200 int *assertions, int *params, int *num_tags_seen,
1201 int *params_seen)
1202 {
1203 tre_literal_t *lit;
1204 tre_union_t *uni;
1205 tre_catenation_t *cat;
1206 tre_iteration_t *iter;
1207 int i;
1208 int bottom = tre_stack_num_objects(stack);
1209 reg_errcode_t status = REG_OK;
1210 if (num_tags_seen)
1211 *num_tags_seen = 0;
1212 if (params_seen)
1213 *params_seen = 0;
1214
1215 status = tre_stack_push_voidptr(stack, node);
1216
1217 /* Walk through the tree recursively. */
1218 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1219 {
1220 node = tre_stack_pop_voidptr(stack);
1221
1222 switch (node->type)
1223 {
1224 case LITERAL:
1225 lit = (tre_literal_t *)node->obj;
1226 switch (lit->code_min)
1227 {
1228 case TAG:
1229 if (lit->code_max >= 0)
1230 {
1231 if (tags != NULL)
1232 {
1233 /* Add the tag to `tags'. */
1234 for (i = 0; tags[i] >= 0; i++)
1235 if (tags[i] == lit->code_max)
1236 break;
1237 if (tags[i] < 0)
1238 {
1239 tags[i] = lit->code_max;
1240 tags[i + 1] = -1;
1241 }
1242 }
1243 if (num_tags_seen)
1244 (*num_tags_seen)++;
1245 }
1246 break;
1247 case ASSERTION:
1248 assert(lit->code_max >= 1
1249 || lit->code_max <= ASSERT_LAST);
1250 if (assertions != NULL)
1251 *assertions |= lit->code_max;
1252 break;
1253 case PARAMETER:
1254 if (params != NULL)
1255 for (i = 0; i < TRE_PARAM_LAST; i++)
1256 params[i] = lit->u.params[i];
1257 if (params_seen != NULL)
1258 *params_seen = 1;
1259 break;
1260 case EMPTY:
1261 break;
1262 default:
1263 assert(0);
1264 break;
1265 }
1266 break;
1267
1268 case UNION:
1269 /* Subexpressions starting earlier take priority over ones
1270 starting later, so we prefer the left subexpression over the
1271 right subexpression. */
1272 uni = (tre_union_t *)node->obj;
1273 if (uni->left->nullable)
1274 STACK_PUSHX(stack, voidptr, uni->left)
1275 else if (uni->right->nullable)
1276 STACK_PUSHX(stack, voidptr, uni->right)
1277 else
1278 assert(0);
1279 break;
1280
1281 case CATENATION:
1282 /* The path must go through both children. */
1283 cat = (tre_catenation_t *)node->obj;
1284 assert(cat->left->nullable);
1285 assert(cat->right->nullable);
1286 STACK_PUSHX(stack, voidptr, cat->left);
1287 STACK_PUSHX(stack, voidptr, cat->right);
1288 break;
1289
1290 case ITERATION:
1291 /* A match with an empty string is preferred over no match at
1292 all, so we go through the argument if possible. */
1293 iter = (tre_iteration_t *)node->obj;
1294 if (iter->arg->nullable)
1295 STACK_PUSHX(stack, voidptr, iter->arg);
1296 break;
1297
1298 default:
1299 assert(0);
1300 break;
1301 }
1302 }
1303
1304 return status;
1305 }
1306
1307
1308 typedef enum {
1309 NFL_RECURSE,
1310 NFL_POST_UNION,
1311 NFL_POST_CATENATION,
1312 NFL_POST_ITERATION
1313 } tre_nfl_stack_symbol_t;
1314
1315
1316 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
1317 the nodes of the AST `tree'. */
1318 static reg_errcode_t
1319 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
1320 {
1321 int bottom = tre_stack_num_objects(stack);
1322
1323 STACK_PUSHR(stack, voidptr, tree);
1324 STACK_PUSHR(stack, long, NFL_RECURSE);
1325
1326 while (tre_stack_num_objects(stack) > bottom)
1327 {
1328 tre_nfl_stack_symbol_t symbol;
1329 tre_ast_node_t *node;
1330
1331 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_long(stack);
1332 node = tre_stack_pop_voidptr(stack);
1333 switch (symbol)
1334 {
1335 case NFL_RECURSE:
1336 switch (node->type)
1337 {
1338 case LITERAL:
1339 {
1340 tre_literal_t *lit = (tre_literal_t *)node->obj;
1341 if (IS_BACKREF(lit))
1342 {
1343 /* Back references: nullable = false, firstpos = {i},
1344 lastpos = {i}. */
1345 node->nullable = 0;
1346 node->firstpos = tre_set_one(mem, lit->position, 0,
1347 TRE_CHAR_MAX, 0, NULL, -1);
1348 if (!node->firstpos)
1349 return REG_ESPACE;
1350 node->lastpos = tre_set_one(mem, lit->position, 0,
1351 TRE_CHAR_MAX, 0, NULL,
1352 (int)lit->code_max);
1353 if (!node->lastpos)
1354 return REG_ESPACE;
1355 }
1356 else if (lit->code_min < 0)
1357 {
1358 /* Tags, empty strings, params, and zero width assertions:
1359 nullable = true, firstpos = {}, and lastpos = {}. */
1360 node->nullable = 1;
1361 node->firstpos = tre_set_empty(mem);
1362 if (!node->firstpos)
1363 return REG_ESPACE;
1364 node->lastpos = tre_set_empty(mem);
1365 if (!node->lastpos)
1366 return REG_ESPACE;
1367 }
1368 else
1369 {
1370 /* Literal at position i: nullable = false, firstpos = {i},
1371 lastpos = {i}. */
1372 node->nullable = 0;
1373 node->firstpos =
1374 tre_set_one(mem, lit->position, (int)lit->code_min,
1375 (int)lit->code_max, 0, NULL, -1);
1376 if (!node->firstpos)
1377 return REG_ESPACE;
1378 node->lastpos = tre_set_one(mem, lit->position,
1379 (int)lit->code_min,
1380 (int)lit->code_max,
1381 lit->u.class, lit->neg_classes,
1382 -1);
1383 if (!node->lastpos)
1384 return REG_ESPACE;
1385 }
1386 break;
1387 }
1388
1389 case UNION:
1390 /* Compute the attributes for the two subtrees, and after that
1391 for this node. */
1392 STACK_PUSHR(stack, voidptr, node);
1393 STACK_PUSHR(stack, long, NFL_POST_UNION);
1394 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
1395 STACK_PUSHR(stack, long, NFL_RECURSE);
1396 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
1397 STACK_PUSHR(stack, long, NFL_RECURSE);
1398 break;
1399
1400 case CATENATION:
1401 /* Compute the attributes for the two subtrees, and after that
1402 for this node. */
1403 STACK_PUSHR(stack, voidptr, node);
1404 STACK_PUSHR(stack, long, NFL_POST_CATENATION);
1405 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
1406 STACK_PUSHR(stack, long, NFL_RECURSE);
1407 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
1408 STACK_PUSHR(stack, long, NFL_RECURSE);
1409 break;
1410
1411 case ITERATION:
1412 /* Compute the attributes for the subtree, and after that for
1413 this node. */
1414 STACK_PUSHR(stack, voidptr, node);
1415 STACK_PUSHR(stack, long, NFL_POST_ITERATION);
1416 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
1417 STACK_PUSHR(stack, long, NFL_RECURSE);
1418 break;
1419 }
1420 break; /* end case: NFL_RECURSE */
1421
1422 case NFL_POST_UNION:
1423 {
1424 tre_union_t *uni = (tre_union_t *)node->obj;
1425 node->nullable = uni->left->nullable || uni->right->nullable;
1426 node->firstpos = tre_set_union(mem, uni->left->firstpos,
1427 uni->right->firstpos, NULL, 0, NULL);
1428 if (!node->firstpos)
1429 return REG_ESPACE;
1430 node->lastpos = tre_set_union(mem, uni->left->lastpos,
1431 uni->right->lastpos, NULL, 0, NULL);
1432 if (!node->lastpos)
1433 return REG_ESPACE;
1434 break;
1435 }
1436
1437 case NFL_POST_ITERATION:
1438 {
1439 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1440
1441 if (iter->min == 0 || iter->arg->nullable)
1442 node->nullable = 1;
1443 else
1444 node->nullable = 0;
1445 node->firstpos = iter->arg->firstpos;
1446 node->lastpos = iter->arg->lastpos;
1447 break;
1448 }
1449
1450 case NFL_POST_CATENATION:
1451 {
1452 int num_tags, *tags, assertions, params_seen;
1453 int *params;
1454 reg_errcode_t status;
1455 tre_catenation_t *cat = node->obj;
1456 node->nullable = cat->left->nullable && cat->right->nullable;
1457
1458 /* Compute firstpos. */
1459 if (cat->left->nullable)
1460 {
1461 /* The left side matches the empty string. Make a first pass
1462 with tre_match_empty() to get the number of tags and
1463 parameters. */
1464 status = tre_match_empty(stack, cat->left,
1465 NULL, NULL, NULL, &num_tags,
1466 ¶ms_seen);
1467 if (status != REG_OK)
1468 return status;
1469 /* Allocate arrays for the tags and parameters. */
1470 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
1471 if (!tags)
1472 return REG_ESPACE;
1473 tags[0] = -1;
1474 assertions = 0;
1475 params = NULL;
1476 if (params_seen)
1477 {
1478 params = tre_mem_alloc(mem, sizeof(*params)
1479 * TRE_PARAM_LAST);
1480 if (!params)
1481 {
1482 xfree(tags);
1483 return REG_ESPACE;
1484 }
1485 }
1486 /* Second pass with tre_mach_empty() to get the list of
1487 tags and parameters. */
1488 status = tre_match_empty(stack, cat->left, tags,
1489 &assertions, params, NULL, NULL);
1490 if (status != REG_OK)
1491 {
1492 xfree(tags);
1493 return status;
1494 }
1495 node->firstpos =
1496 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
1497 tags, assertions, params);
1498 xfree(tags);
1499 if (!node->firstpos)
1500 return REG_ESPACE;
1501 }
1502 else
1503 {
1504 node->firstpos = cat->left->firstpos;
1505 }
1506
1507 /* Compute lastpos. */
1508 if (cat->right->nullable)
1509 {
1510 /* The right side matches the empty string. Make a first pass
1511 with tre_match_empty() to get the number of tags and
1512 parameters. */
1513 status = tre_match_empty(stack, cat->right,
1514 NULL, NULL, NULL, &num_tags,
1515 ¶ms_seen);
1516 if (status != REG_OK)
1517 return status;
1518 /* Allocate arrays for the tags and parameters. */
1519 tags = xmalloc(sizeof(int) * (num_tags + 1));
1520 if (!tags)
1521 return REG_ESPACE;
1522 tags[0] = -1;
1523 assertions = 0;
1524 params = NULL;
1525 if (params_seen)
1526 {
1527 params = tre_mem_alloc(mem, sizeof(*params)
1528 * TRE_PARAM_LAST);
1529 if (!params)
1530 {
1531 xfree(tags);
1532 return REG_ESPACE;
1533 }
1534 }
1535 /* Second pass with tre_mach_empty() to get the list of
1536 tags and parameters. */
1537 status = tre_match_empty(stack, cat->right, tags,
1538 &assertions, params, NULL, NULL);
1539 if (status != REG_OK)
1540 {
1541 xfree(tags);
1542 return status;
1543 }
1544 node->lastpos =
1545 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
1546 tags, assertions, params);
1547 xfree(tags);
1548 if (!node->lastpos)
1549 return REG_ESPACE;
1550 }
1551 else
1552 {
1553 node->lastpos = cat->right->lastpos;
1554 }
1555 break;
1556 }
1557
1558 default:
1559 assert(0);
1560 break;
1561 }
1562 }
1563
1564 return REG_OK;
1565 }
1566
1567
1568 /* Adds a transition from each position in `p1' to each position in `p2'. */
1569 static reg_errcode_t
1570 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
1571 tre_tnfa_transition_t *transitions,
1572 int *counts, int *offs)
1573 {
1574 tre_pos_and_tags_t *orig_p2 = p2;
1575 tre_tnfa_transition_t *trans;
1576 int i, j, k, l, dup, prev_p2_pos;
1577
1578 if (transitions != NULL)
1579 while (p1->position >= 0)
1580 {
1581 p2 = orig_p2;
1582 prev_p2_pos = -1;
1583 while (p2->position >= 0)
1584 {
1585 /* Optimization: if this position was already handled, skip it. */
1586 if (p2->position == prev_p2_pos)
1587 {
1588 p2++;
1589 continue;
1590 }
1591 prev_p2_pos = p2->position;
1592 /* Set `trans' to point to the next unused transition from
1593 position `p1->position'. */
1594 trans = transitions + offs[p1->position];
1595 while (trans->state != NULL)
1596 {
1597 #if 0
1598 /* If we find a previous transition from `p1->position' to
1599 `p2->position', it is overwritten. This can happen only
1600 if there are nested loops in the regexp, like in "((a)*)*".
1601 In POSIX.2 repetition using the outer loop is always
1602 preferred over using the inner loop. Therefore the
1603 transition for the inner loop is useless and can be thrown
1604 away. */
1605 /* XXX - The same position is used for all nodes in a bracket
1606 expression, so this optimization cannot be used (it will
1607 break bracket expressions) unless I figure out a way to
1608 detect it here. */
1609 if (trans->state_id == p2->position)
1610 {
1611 DPRINT(("*"));
1612 break;
1613 }
1614 #endif
1615 trans++;
1616 }
1617
1618 if (trans->state == NULL)
1619 (trans + 1)->state = NULL;
1620 /* Use the character ranges, assertions, etc. from `p1' for
1621 the transition from `p1' to `p2'. */
1622 trans->code_min = (tre_cint_t) p1->code_min;
1623 trans->code_max = (tre_cint_t) p1->code_max;
1624 trans->state = transitions + offs[p2->position];
1625 trans->state_id = p2->position;
1626 trans->assertions = p1->assertions | p2->assertions
1627 | (p1->class ? ASSERT_CHAR_CLASS : 0)
1628 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
1629 if (p1->backref >= 0)
1630 {
1631 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
1632 assert(p2->backref < 0);
1633 trans->u.backref = p1->backref;
1634 trans->assertions |= ASSERT_BACKREF;
1635 }
1636 else
1637 trans->u.class = p1->class;
1638 if (p1->neg_classes != NULL)
1639 {
1640 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
1641 trans->neg_classes =
1642 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
1643 if (trans->neg_classes == NULL)
1644 return REG_ESPACE;
1645 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
1646 trans->neg_classes[i] = p1->neg_classes[i];
1647 trans->neg_classes[i] = (tre_ctype_t)0;
1648 }
1649 else
1650 trans->neg_classes = NULL;
1651
1652 /* Find out how many tags this transition has. */
1653 i = 0;
1654 if (p1->tags != NULL)
1655 while(p1->tags[i] >= 0)
1656 i++;
1657 j = 0;
1658 if (p2->tags != NULL)
1659 while(p2->tags[j] >= 0)
1660 j++;
1661
1662 /* If we are overwriting a transition, free the old tag array. */
1663 if (trans->tags != NULL)
1664 xfree(trans->tags);
1665 trans->tags = NULL;
1666
1667 /* If there were any tags, allocate an array and fill it. */
1668 if (i + j > 0)
1669 {
1670 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
1671 if (!trans->tags)
1672 return REG_ESPACE;
1673 i = 0;
1674 if (p1->tags != NULL)
1675 while(p1->tags[i] >= 0)
1676 {
1677 trans->tags[i] = p1->tags[i];
1678 i++;
1679 }
1680 l = i;
1681 j = 0;
1682 if (p2->tags != NULL)
1683 while (p2->tags[j] >= 0)
1684 {
1685 /* Don't add duplicates. */
1686 dup = 0;
1687 for (k = 0; k < i; k++)
1688 if (trans->tags[k] == p2->tags[j])
1689 {
1690 dup = 1;
1691 break;
1692 }
1693 if (!dup)
1694 trans->tags[l++] = p2->tags[j];
1695 j++;
1696 }
1697 trans->tags[l] = -1;
1698 }
1699
1700 /* Set the parameter array. If both `p2' and `p1' have same
1701 parameters, the values in `p2' override those in `p1'. */
1702 if (p1->params || p2->params)
1703 {
1704 if (!trans->params)
1705 trans->params = xmalloc(sizeof(*trans->params)
1706 * TRE_PARAM_LAST);
1707 if (!trans->params)
1708 return REG_ESPACE;
1709 for (i = 0; i < TRE_PARAM_LAST; i++)
1710 {
1711 trans->params[i] = TRE_PARAM_UNSET;
1712 if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
1713 trans->params[i] = p1->params[i];
1714 if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
1715 trans->params[i] = p2->params[i];
1716 }
1717 }
1718 else
1719 {
1720 if (trans->params)
1721 xfree(trans->params);
1722 trans->params = NULL;
1723 }
1724
1725
1726 #ifdef TRE_DEBUG
1727 {
1728 int *tags;
1729
1730 DPRINT((" %2d -> %2d on %3d", p1->position, p2->position,
1731 p1->code_min));
1732 if (p1->code_max != p1->code_min)
1733 DPRINT(("-%3d", p1->code_max));
1734 tags = trans->tags;
1735 if (tags)
1736 {
1737 DPRINT((", tags ["));
1738 while (*tags >= 0)
1739 {
1740 DPRINT(("%d", *tags));
1741 tags++;
1742 if (*tags >= 0)
1743 DPRINT((","));
1744 }
1745 DPRINT(("]"));
1746 }
1747 if (trans->assertions)
1748 DPRINT((", assert %d", trans->assertions));
1749 if (trans->assertions & ASSERT_BACKREF)
1750 DPRINT((", backref %d", trans->u.backref));
1751 else if (trans->u.class)
1752 DPRINT((", class %ld", (long)trans->u.class));
1753 if (trans->neg_classes)
1754 DPRINT((", neg_classes %p", trans->neg_classes));
1755 if (trans->params)
1756 {
1757 DPRINT((", "));
1758 tre_print_params(trans->params);
1759 }
1760 DPRINT(("\n"));
1761 }
1762 #endif /* TRE_DEBUG */
1763 p2++;
1764 }
1765 p1++;
1766 }
1767 else
1768 /* Compute a maximum limit for the number of transitions leaving
1769 from each state. */
1770 while (p1->position >= 0)
1771 {
1772 p2 = orig_p2;
1773 while (p2->position >= 0)
1774 {
1775 counts[p1->position]++;
1776 p2++;
1777 }
1778 p1++;
1779 }
1780 return REG_OK;
1781 }
1782
1783 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
1784 labelled with one character range (there are no transitions on empty
1785 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
1786 the regexp. */
1787 static reg_errcode_t
1788 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
1789 int *counts, int *offs)
1790 {
1791 tre_union_t *uni;
1792 tre_catenation_t *cat;
1793 tre_iteration_t *iter;
1794 reg_errcode_t errcode = REG_OK;
1795
1796 /* XXX - recurse using a stack!. */
1797 switch (node->type)
1798 {
1799 case LITERAL:
1800 break;
1801 case UNION:
1802 uni = (tre_union_t *)node->obj;
1803 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
1804 if (errcode != REG_OK)
1805 return errcode;
1806 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
1807 break;
1808
1809 case CATENATION:
1810 cat = (tre_catenation_t *)node->obj;
1811 /* Add a transition from each position in cat->left->lastpos
1812 to each position in cat->right->firstpos. */
1813 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
1814 transitions, counts, offs);
1815 if (errcode != REG_OK)
1816 return errcode;
1817 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
1818 if (errcode != REG_OK)
1819 return errcode;
1820 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
1821 break;
1822
1823 case ITERATION:
1824 iter = (tre_iteration_t *)node->obj;
1825 assert(iter->max == -1 || iter->max == 1);
1826
1827 if (iter->max == -1)
1828 {
1829 assert(iter->min == 0 || iter->min == 1);
1830 /* Add a transition from each last position in the iterated
1831 expression to each first position. */
1832 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
1833 transitions, counts, offs);
1834 if (errcode != REG_OK)
1835 return errcode;
1836 }
1837 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
1838 break;
1839 }
1840 return errcode;
1841 }
1842
1843
1844 #define ERROR_EXIT(err) \
1845 do \
1846 { \
1847 errcode = err; \
1848 if (/*CONSTCOND*/(void)1,1) \
1849 goto error_exit; \
1850 } \
1851 while (/*CONSTCOND*/(void)0,0)
1852
1853
1854 int
1855 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
1856 {
1857 tre_stack_t *stack;
1858 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
1859 tre_pos_and_tags_t *p;
1860 int *counts = NULL, *offs = NULL;
1861 int i, add = 0;
1862 tre_tnfa_transition_t *transitions, *initial;
1863 tre_tnfa_t *tnfa = NULL;
1864 tre_submatch_data_t *submatch_data;
1865 tre_tag_direction_t *tag_directions = NULL;
1866 reg_errcode_t errcode;
1867 tre_mem_t mem;
1868
1869 /* Parse context. */
1870 tre_parse_ctx_t parse_ctx;
1871
1872 /* Allocate a stack used throughout the compilation process for various
1873 purposes. */
1874 stack = tre_stack_new(512, 10240, 128);
1875 if (!stack)
1876 return REG_ESPACE;
1877 /* Allocate a fast memory allocator. */
1878 mem = tre_mem_new();
1879 if (!mem)
1880 {
1881 tre_stack_destroy(stack);
1882 return REG_ESPACE;
1883 }
1884
1885 /* Parse the regexp. */
1886 memset(&parse_ctx, 0, sizeof(parse_ctx));
1887 parse_ctx.mem = mem;
1888 parse_ctx.stack = stack;
1889 parse_ctx.re = regex;
1890 parse_ctx.len = n;
1891 parse_ctx.cflags = cflags;
1892 parse_ctx.max_backref = -1;
1893 /* workaround for PR#14408: use 8-bit optimizations in 8-bit mode */
1894 #ifdef REG_USEBYTES
1895 parse_ctx.cur_max = (cflags & REG_USEBYTES) ? 1 : TRE_MB_CUR_MAX;
1896 #else
1897 parse_ctx.cur_max = TRE_MB_CUR_MAX;
1898 #endif
1899 DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
1900 errcode = tre_parse(&parse_ctx);
1901 if (errcode != REG_OK)
1902 ERROR_EXIT(errcode);
1903 preg->re_nsub = parse_ctx.submatch_id - 1;
1904 tree = parse_ctx.result;
1905
1906 /* Back references and approximate matching cannot currently be used
1907 in the same regexp. */
1908 if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
1909 ERROR_EXIT(REG_BADPAT);
1910
1911 #ifdef TRE_DEBUG
1912 tre_ast_print(tree);
1913 #endif /* TRE_DEBUG */
1914
1915 /* Referring to nonexistent subexpressions is illegal. */
1916 if (parse_ctx.max_backref > (int)preg->re_nsub)
1917 ERROR_EXIT(REG_ESUBREG);
1918
1919 /* Allocate the TNFA struct. */
1920 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
1921 if (tnfa == NULL)
1922 ERROR_EXIT(REG_ESPACE);
1923 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
1924 tnfa->have_approx = parse_ctx.have_approx;
1925 tnfa->num_submatches = parse_ctx.submatch_id;
1926
1927 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
1928 regexp does not have back references, this can be skipped. */
1929 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
1930 {
1931 DPRINT(("tre_compile: setting up tags\n"));
1932
1933 /* Figure out how many tags we will need. */
1934 errcode = tre_add_tags(NULL, stack, tree, tnfa);
1935 if (errcode != REG_OK)
1936 ERROR_EXIT(errcode);
1937 #ifdef TRE_DEBUG
1938 tre_ast_print(tree);
1939 #endif /* TRE_DEBUG */
1940
1941 if (tnfa->num_tags > 0)
1942 {
1943 tag_directions = xmalloc(sizeof(*tag_directions)
1944 * (tnfa->num_tags + 1));
1945 if (tag_directions == NULL)
1946 ERROR_EXIT(REG_ESPACE);
1947 tnfa->tag_directions = tag_directions;
1948 memset(tag_directions, -1,
1949 sizeof(*tag_directions) * (tnfa->num_tags + 1));
1950 }
1951 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
1952 sizeof(*tnfa->minimal_tags));
1953 if (tnfa->minimal_tags == NULL)
1954 ERROR_EXIT(REG_ESPACE);
1955
1956 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
1957 sizeof(*submatch_data));
1958 if (submatch_data == NULL)
1959 ERROR_EXIT(REG_ESPACE);
1960 tnfa->submatch_data = submatch_data;
1961
1962 errcode = tre_add_tags(mem, stack, tree, tnfa);
1963 if (errcode != REG_OK)
1964 ERROR_EXIT(errcode);
1965
1966 #ifdef TRE_DEBUG
1967 for (i = 0; i < parse_ctx.submatch_id; i++)
1968 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
1969 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
1970 for (i = 0; i < tnfa->num_tags; i++)
1971 DPRINT(("t%d is %s\n", i,
1972 tag_directions[i] == TRE_TAG_MINIMIZE ?
1973 "minimized" : "maximized"));
1974 #endif /* TRE_DEBUG */
1975 }
1976
1977 /* Expand iteration nodes. */
1978 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
1979 tag_directions, &tnfa->params_depth);
1980 if (errcode != REG_OK)
1981 ERROR_EXIT(errcode);
1982
1983 /* Add a dummy node for the final state.
1984 XXX - For certain patterns this dummy node can be optimized away,
1985 for example "a*" or "ab*". Figure out a simple way to detect
1986 this possibility. */
1987 tmp_ast_l = tree;
1988 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
1989 if (tmp_ast_r == NULL)
1990 ERROR_EXIT(REG_ESPACE);
1991
1992 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
1993 if (tree == NULL)
1994 ERROR_EXIT(REG_ESPACE);
1995
1996 #ifdef TRE_DEBUG
1997 tre_ast_print(tree);
1998 DPRINT(("Number of states: %d\n", parse_ctx.position));
1999 #endif /* TRE_DEBUG */
2000
2001 errcode = tre_compute_nfl(mem, stack, tree);
2002 if (errcode != REG_OK)
2003 ERROR_EXIT(errcode);
2004
2005 counts = xmalloc(sizeof(int) * parse_ctx.position);
2006 if (counts == NULL)
2007 ERROR_EXIT(REG_ESPACE);
2008
2009 offs = xmalloc(sizeof(int) * parse_ctx.position);
2010 if (offs == NULL)
2011 ERROR_EXIT(REG_ESPACE);
2012
2013 for (i = 0; i < parse_ctx.position; i++)
2014 counts[i] = 0;
2015 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2016
2017 add = 0;
2018 for (i = 0; i < parse_ctx.position; i++)
2019 {
2020 offs[i] = add;
2021 add += counts[i] + 1;
2022 counts[i] = 0;
2023 }
2024 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2025 if (transitions == NULL)
2026 ERROR_EXIT(REG_ESPACE);
2027 tnfa->transitions = transitions;
2028 tnfa->num_transitions = add;
2029
2030 DPRINT(("Converting to TNFA:\n"));
2031 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2032 if (errcode != REG_OK)
2033 ERROR_EXIT(errcode);
2034
2035 /* If in eight bit mode, compute a table of characters that can be the
2036 first character of a match. */
2037 tnfa->first_char = -1;
2038 if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
2039 {
2040 int count = 0;
2041 tre_cint_t k;
2042 DPRINT(("Characters that can start a match:"));
2043 tnfa->firstpos_chars = xcalloc(256, sizeof(char));
2044 if (tnfa->firstpos_chars == NULL)
2045 ERROR_EXIT(REG_ESPACE);
2046 for (p = tree->firstpos; p->position >= 0; p++)
2047 {
2048 tre_tnfa_transition_t *j = transitions + offs[p->position];
2049 while (j->state != NULL)
2050 {
2051 for (k = j->code_min; k <= j->code_max && k < 256; k++)
2052 {
2053 DPRINT((" %d", k));
2054 tnfa->firstpos_chars[k] = 1;
2055 count++;
2056 }
2057 j++;
2058 }
2059 }
2060 DPRINT(("\n"));
2061 #define TRE_OPTIMIZE_FIRST_CHAR 1
2062 #if TRE_OPTIMIZE_FIRST_CHAR
2063 if (count == 1)
2064 {
2065 for (k = 0; k < 256; k++)
2066 if (tnfa->firstpos_chars[k])
2067 {
2068 DPRINT(("first char must be %d\n", k));
2069 tnfa->first_char = k;
2070 xfree(tnfa->firstpos_chars);
2071 tnfa->firstpos_chars = NULL;
2072 break;
2073 }
2074 }
2075 #endif
2076
2077 }
2078 else
2079 tnfa->firstpos_chars = NULL;
2080
2081
2082 p = tree->firstpos;
2083 i = 0;
2084 while (p->position >= 0)
2085 {
2086 i++;
2087
2088 #ifdef TRE_DEBUG
2089 {
2090 int *tags;
2091 DPRINT(("initial: %d", p->position));
2092 tags = p->tags;
2093 if (tags != NULL)
2094 {
2095 if (*tags >= 0)
2096 DPRINT(("/"));
2097 while (*tags >= 0)
2098 {
2099 DPRINT(("%d", *tags));
2100 tags++;
2101 if (*tags >= 0)
2102 DPRINT((","));
2103 }
2104 }
2105 DPRINT((", assert %d", p->assertions));
2106 if (p->params)
2107 {
2108 DPRINT((", "));
2109 tre_print_params(p->params);
2110 }
2111 DPRINT(("\n"));
2112 }
2113 #endif /* TRE_DEBUG */
2114
2115 p++;
2116 }
2117
2118 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2119 if (initial == NULL)
2120 ERROR_EXIT(REG_ESPACE);
2121 tnfa->initial = initial;
2122
2123 i = 0;
2124 for (p = tree->firstpos; p->position >= 0; p++)
2125 {
2126 initial[i].state = transitions + offs[p->position];
2127 initial[i].state_id = p->position;
2128 initial[i].tags = NULL;
2129 /* Copy the arrays p->tags, and p->params, they are allocated
2130 from a tre_mem object. */
2131 if (p->tags)
2132 {
2133 int j;
2134 for (j = 0; p->tags[j] >= 0; j++);
2135 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2136 if (!initial[i].tags)
2137 ERROR_EXIT(REG_ESPACE);
2138 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2139 }
2140 initial[i].params = NULL;
2141 if (p->params)
2142 {
2143 initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
2144 if (!initial[i].params)
2145 ERROR_EXIT(REG_ESPACE);
2146 memcpy(initial[i].params, p->params,
2147 sizeof(*p->params) * TRE_PARAM_LAST);
2148 }
2149 initial[i].assertions = p->assertions;
2150 i++;
2151 }
2152 initial[i].state = NULL;
2153
2154 tnfa->num_transitions = add;
2155 tnfa->final = transitions + offs[tree->lastpos[0].position];
2156 tnfa->num_states = parse_ctx.position;
2157 tnfa->cflags = cflags;
2158
2159 DPRINT(("final state %p\n", (void *)tnfa->final));
2160
2161 tre_mem_destroy(mem);
2162 tre_stack_destroy(stack);
2163 xfree(counts);
2164 xfree(offs);
2165
2166 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2167 return REG_OK;
2168
2169 error_exit:
2170 /* Free everything that was allocated and return the error code. */
2171 tre_mem_destroy(mem);
2172 if (stack != NULL)
2173 tre_stack_destroy(stack);
2174 if (counts != NULL)
2175 xfree(counts);
2176 if (offs != NULL)
2177 xfree(offs);
2178 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2179 tre_free(preg);
2180 return errcode;
2181 }
2182
2183
2184
2185
2186 void
2187 tre_free(regex_t *preg)
2188 {
2189 tre_tnfa_t *tnfa;
2190 unsigned int i;
2191 tre_tnfa_transition_t *trans;
2192
2193 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2194 if (!tnfa)
2195 return;
2196
2197 for (i = 0; i < tnfa->num_transitions; i++)
2198 if (tnfa->transitions[i].state)
2199 {
2200 if (tnfa->transitions[i].tags)
2201 xfree(tnfa->transitions[i].tags);
2202 if (tnfa->transitions[i].neg_classes)
2203 xfree(tnfa->transitions[i].neg_classes);
2204 if (tnfa->transitions[i].params)
2205 xfree(tnfa->transitions[i].params);
2206 }
2207 if (tnfa->transitions)
2208 xfree(tnfa->transitions);
2209
2210 if (tnfa->initial)
2211 {
2212 for (trans = tnfa->initial; trans->state; trans++)
2213 {
2214 if (trans->tags)
2215 xfree(trans->tags);
2216 if (trans->params)
2217 xfree(trans->params);
2218 }
2219 xfree(tnfa->initial);
2220 }
2221
2222 if (tnfa->submatch_data)
2223 {
2224 for (i = 0; i < tnfa->num_submatches; i++)
2225 if (tnfa->submatch_data[i].parents)
2226 xfree(tnfa->submatch_data[i].parents);
2227 xfree(tnfa->submatch_data);
2228 }
2229
2230 if (tnfa->tag_directions)
2231 xfree(tnfa->tag_directions);
2232 if (tnfa->firstpos_chars)
2233 xfree(tnfa->firstpos_chars);
2234 if (tnfa->minimal_tags)
2235 xfree(tnfa->minimal_tags);
2236 xfree(tnfa);
2237 }
2238
2239 char *
2240 tre_version(void)
2241 {
2242 static char str[256];
2243 char *version;
2244
2245 if (str[0] == 0)
2246 {
2247 (void) tre_config(TRE_CONFIG_VERSION, &version);
2248 (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
2249 }
2250 return str;
2251 }
2252
2253 int
2254 tre_config(int query, void *result)
2255 {
2256 int *int_result = result;
2257 const char **string_result = result;
2258
2259 switch (query)
2260 {
2261 case TRE_CONFIG_APPROX:
2262 #ifdef TRE_APPROX
2263 *int_result = 1;
2264 #else /* !TRE_APPROX */
2265 *int_result = 0;
2266 #endif /* !TRE_APPROX */
2267 return REG_OK;
2268
2269 case TRE_CONFIG_WCHAR:
2270 #ifdef TRE_WCHAR
2271 *int_result = 1;
2272 #else /* !TRE_WCHAR */
2273 *int_result = 0;
2274 #endif /* !TRE_WCHAR */
2275 return REG_OK;
2276
2277 case TRE_CONFIG_MULTIBYTE:
2278 #ifdef TRE_MULTIBYTE
2279 *int_result = 1;
2280 #else /* !TRE_MULTIBYTE */
2281 *int_result = 0;
2282 #endif /* !TRE_MULTIBYTE */
2283 return REG_OK;
2284
2285 case TRE_CONFIG_SYSTEM_ABI:
2286 #ifdef TRE_CONFIG_SYSTEM_ABI
2287 *int_result = 1;
2288 #else /* !TRE_CONFIG_SYSTEM_ABI */
2289 *int_result = 0;
2290 #endif /* !TRE_CONFIG_SYSTEM_ABI */
2291 return REG_OK;
2292
2293 case TRE_CONFIG_VERSION:
2294 *string_result = TRE_VERSION;
2295 return REG_OK;
2296 }
2297
2298 return REG_NOMATCH;
2299 }
2300
2301
2302 /* EOF */
2303