tre-compile.c revision 1.2 1 1.1 agc /*
2 1.1 agc tre-compile.c - TRE regex compiler
3 1.1 agc
4 1.1 agc This software is released under a BSD-style license.
5 1.1 agc See the file LICENSE for details and copyright.
6 1.1 agc
7 1.1 agc */
8 1.1 agc
9 1.1 agc /*
10 1.1 agc TODO:
11 1.1 agc - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12 1.1 agc function calls.
13 1.1 agc */
14 1.1 agc
15 1.1 agc
16 1.1 agc #ifdef HAVE_CONFIG_H
17 1.1 agc #include <config.h>
18 1.1 agc #endif /* HAVE_CONFIG_H */
19 1.1 agc #include <stdio.h>
20 1.1 agc #include <assert.h>
21 1.1 agc #include <string.h>
22 1.1 agc
23 1.1 agc #include "tre-internal.h"
24 1.1 agc #include "tre-mem.h"
25 1.1 agc #include "tre-stack.h"
26 1.1 agc #include "tre-ast.h"
27 1.1 agc #include "tre-parse.h"
28 1.1 agc #include "tre-compile.h"
29 1.1 agc #include "tre.h"
30 1.1 agc #include "xmalloc.h"
31 1.1 agc
32 1.1 agc /*
33 1.1 agc Algorithms to setup tags so that submatch addressing can be done.
34 1.1 agc */
35 1.1 agc
36 1.1 agc
37 1.1 agc /* Inserts a catenation node to the root of the tree given in `node'.
38 1.1 agc As the left child a new tag with number `tag_id' to `node' is added,
39 1.1 agc and the right child is the old root. */
40 1.1 agc static reg_errcode_t
41 1.1 agc tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
42 1.1 agc {
43 1.1 agc tre_catenation_t *c;
44 1.1 agc
45 1.1 agc DPRINT(("add_tag_left: tag %d\n", tag_id));
46 1.1 agc
47 1.1 agc c = tre_mem_alloc(mem, sizeof(*c));
48 1.1 agc if (c == NULL)
49 1.1 agc return REG_ESPACE;
50 1.1 agc c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
51 1.1 agc if (c->left == NULL)
52 1.1 agc return REG_ESPACE;
53 1.1 agc c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
54 1.1 agc if (c->right == NULL)
55 1.1 agc return REG_ESPACE;
56 1.1 agc
57 1.1 agc c->right->obj = node->obj;
58 1.1 agc c->right->type = node->type;
59 1.1 agc c->right->nullable = -1;
60 1.1 agc c->right->submatch_id = -1;
61 1.1 agc c->right->firstpos = NULL;
62 1.1 agc c->right->lastpos = NULL;
63 1.1 agc c->right->num_tags = 0;
64 1.1 agc node->obj = c;
65 1.1 agc node->type = CATENATION;
66 1.1 agc return REG_OK;
67 1.1 agc }
68 1.1 agc
69 1.1 agc /* Inserts a catenation node to the root of the tree given in `node'.
70 1.1 agc As the right child a new tag with number `tag_id' to `node' is added,
71 1.1 agc and the left child is the old root. */
72 1.1 agc static reg_errcode_t
73 1.1 agc tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
74 1.1 agc {
75 1.1 agc tre_catenation_t *c;
76 1.1 agc
77 1.1 agc DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
78 1.1 agc
79 1.1 agc c = tre_mem_alloc(mem, sizeof(*c));
80 1.1 agc if (c == NULL)
81 1.1 agc return REG_ESPACE;
82 1.1 agc c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
83 1.1 agc if (c->right == NULL)
84 1.1 agc return REG_ESPACE;
85 1.1 agc c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
86 1.1 agc if (c->left == NULL)
87 1.1 agc return REG_ESPACE;
88 1.1 agc
89 1.1 agc c->left->obj = node->obj;
90 1.1 agc c->left->type = node->type;
91 1.1 agc c->left->nullable = -1;
92 1.1 agc c->left->submatch_id = -1;
93 1.1 agc c->left->firstpos = NULL;
94 1.1 agc c->left->lastpos = NULL;
95 1.1 agc c->left->num_tags = 0;
96 1.1 agc node->obj = c;
97 1.1 agc node->type = CATENATION;
98 1.1 agc return REG_OK;
99 1.1 agc }
100 1.1 agc
101 1.1 agc typedef enum {
102 1.1 agc ADDTAGS_RECURSE,
103 1.1 agc ADDTAGS_AFTER_ITERATION,
104 1.1 agc ADDTAGS_AFTER_UNION_LEFT,
105 1.1 agc ADDTAGS_AFTER_UNION_RIGHT,
106 1.1 agc ADDTAGS_AFTER_CAT_LEFT,
107 1.1 agc ADDTAGS_AFTER_CAT_RIGHT,
108 1.1 agc ADDTAGS_SET_SUBMATCH_END
109 1.1 agc } tre_addtags_symbol_t;
110 1.1 agc
111 1.1 agc
112 1.1 agc typedef struct {
113 1.1 agc int tag;
114 1.1 agc int next_tag;
115 1.1 agc } tre_tag_states_t;
116 1.1 agc
117 1.1 agc
118 1.1 agc /* Go through `regset' and set submatch data for submatches that are
119 1.1 agc using this tag. */
120 1.1 agc static void
121 1.1 agc tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
122 1.1 agc {
123 1.1 agc int i;
124 1.1 agc
125 1.1 agc for (i = 0; regset[i] >= 0; i++)
126 1.1 agc {
127 1.1 agc int id = regset[i] / 2;
128 1.1 agc int start = !(regset[i] % 2);
129 1.1 agc DPRINT((" Using tag %d for %s offset of "
130 1.1 agc "submatch %d\n", tag,
131 1.1 agc start ? "start" : "end", id));
132 1.1 agc if (start)
133 1.1 agc tnfa->submatch_data[id].so_tag = tag;
134 1.1 agc else
135 1.1 agc tnfa->submatch_data[id].eo_tag = tag;
136 1.1 agc }
137 1.1 agc regset[0] = -1;
138 1.1 agc }
139 1.1 agc
140 1.1 agc
141 1.1 agc /* Adds tags to appropriate locations in the parse tree in `tree', so that
142 1.1 agc subexpressions marked for submatch addressing can be traced. */
143 1.1 agc static reg_errcode_t
144 1.1 agc tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
145 1.1 agc tre_tnfa_t *tnfa)
146 1.1 agc {
147 1.1 agc reg_errcode_t status = REG_OK;
148 1.1 agc tre_addtags_symbol_t symbol;
149 1.1 agc tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
150 1.1 agc int bottom = tre_stack_num_objects(stack);
151 1.1 agc /* True for first pass (counting number of needed tags) */
152 1.1 agc int first_pass = (mem == NULL || tnfa == NULL);
153 1.1 agc int *regset, *orig_regset;
154 1.1 agc int num_tags = 0; /* Total number of tags. */
155 1.1 agc int num_minimals = 0; /* Number of special minimal tags. */
156 1.1 agc int tag = 0; /* The tag that is to be added next. */
157 1.1 agc int next_tag = 1; /* Next tag to use after this one. */
158 1.1 agc int *parents; /* Stack of submatches the current submatch is
159 1.1 agc contained in. */
160 1.1 agc int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
161 1.1 agc tre_tag_states_t *saved_states;
162 1.1 agc
163 1.1 agc tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
164 1.1 agc if (!first_pass)
165 1.1 agc {
166 1.1 agc tnfa->end_tag = 0;
167 1.1 agc tnfa->minimal_tags[0] = -1;
168 1.1 agc }
169 1.1 agc
170 1.1 agc regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
171 1.1 agc if (regset == NULL)
172 1.1 agc return REG_ESPACE;
173 1.1 agc regset[0] = -1;
174 1.1 agc orig_regset = regset;
175 1.1 agc
176 1.1 agc parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
177 1.1 agc if (parents == NULL)
178 1.1 agc {
179 1.1 agc xfree(regset);
180 1.1 agc return REG_ESPACE;
181 1.1 agc }
182 1.1 agc parents[0] = -1;
183 1.1 agc
184 1.1 agc saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
185 1.1 agc if (saved_states == NULL)
186 1.1 agc {
187 1.1 agc xfree(regset);
188 1.1 agc xfree(parents);
189 1.1 agc return REG_ESPACE;
190 1.1 agc }
191 1.1 agc else
192 1.1 agc {
193 1.1 agc unsigned int i;
194 1.1 agc for (i = 0; i <= tnfa->num_submatches; i++)
195 1.1 agc saved_states[i].tag = -1;
196 1.1 agc }
197 1.1 agc
198 1.1 agc STACK_PUSH(stack, voidptr, node);
199 1.2 christos STACK_PUSH(stack, long, ADDTAGS_RECURSE);
200 1.1 agc
201 1.1 agc while (tre_stack_num_objects(stack) > bottom)
202 1.1 agc {
203 1.1 agc if (status != REG_OK)
204 1.1 agc break;
205 1.1 agc
206 1.2 christos symbol = (tre_addtags_symbol_t)tre_stack_pop_long(stack);
207 1.1 agc switch (symbol)
208 1.1 agc {
209 1.1 agc
210 1.1 agc case ADDTAGS_SET_SUBMATCH_END:
211 1.1 agc {
212 1.2 christos int id = (int)tre_stack_pop_long(stack);
213 1.1 agc int i;
214 1.1 agc
215 1.1 agc /* Add end of this submatch to regset. */
216 1.1 agc for (i = 0; regset[i] >= 0; i++);
217 1.1 agc regset[i] = id * 2 + 1;
218 1.1 agc regset[i + 1] = -1;
219 1.1 agc
220 1.1 agc /* Pop this submatch from the parents stack. */
221 1.1 agc for (i = 0; parents[i] >= 0; i++);
222 1.1 agc parents[i - 1] = -1;
223 1.1 agc break;
224 1.1 agc }
225 1.1 agc
226 1.1 agc case ADDTAGS_RECURSE:
227 1.1 agc node = tre_stack_pop_voidptr(stack);
228 1.1 agc
229 1.1 agc if (node->submatch_id >= 0)
230 1.1 agc {
231 1.1 agc int id = node->submatch_id;
232 1.1 agc int i;
233 1.1 agc
234 1.1 agc
235 1.1 agc /* Add start of this submatch to regset. */
236 1.1 agc for (i = 0; regset[i] >= 0; i++);
237 1.1 agc regset[i] = id * 2;
238 1.1 agc regset[i + 1] = -1;
239 1.1 agc
240 1.1 agc if (!first_pass)
241 1.1 agc {
242 1.1 agc for (i = 0; parents[i] >= 0; i++);
243 1.1 agc tnfa->submatch_data[id].parents = NULL;
244 1.1 agc if (i > 0)
245 1.1 agc {
246 1.1 agc int *p = xmalloc(sizeof(*p) * (i + 1));
247 1.1 agc if (p == NULL)
248 1.1 agc {
249 1.1 agc status = REG_ESPACE;
250 1.1 agc break;
251 1.1 agc }
252 1.1 agc assert(tnfa->submatch_data[id].parents == NULL);
253 1.1 agc tnfa->submatch_data[id].parents = p;
254 1.1 agc for (i = 0; parents[i] >= 0; i++)
255 1.1 agc p[i] = parents[i];
256 1.1 agc p[i] = -1;
257 1.1 agc }
258 1.1 agc }
259 1.1 agc
260 1.1 agc /* Add end of this submatch to regset after processing this
261 1.1 agc node. */
262 1.2 christos STACK_PUSHX(stack, long, node->submatch_id);
263 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_SET_SUBMATCH_END);
264 1.1 agc }
265 1.1 agc
266 1.1 agc switch (node->type)
267 1.1 agc {
268 1.1 agc case LITERAL:
269 1.1 agc {
270 1.1 agc tre_literal_t *lit = node->obj;
271 1.1 agc
272 1.1 agc if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
273 1.1 agc {
274 1.1 agc int i;
275 1.1 agc DPRINT(("Literal %d-%d\n",
276 1.1 agc (int)lit->code_min, (int)lit->code_max));
277 1.1 agc if (regset[0] >= 0)
278 1.1 agc {
279 1.1 agc /* Regset is not empty, so add a tag before the
280 1.1 agc literal or backref. */
281 1.1 agc if (!first_pass)
282 1.1 agc {
283 1.1 agc status = tre_add_tag_left(mem, node, tag);
284 1.1 agc tnfa->tag_directions[tag] = direction;
285 1.1 agc if (minimal_tag >= 0)
286 1.1 agc {
287 1.1 agc DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
288 1.1 agc for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
289 1.1 agc tnfa->minimal_tags[i] = tag;
290 1.1 agc tnfa->minimal_tags[i + 1] = minimal_tag;
291 1.1 agc tnfa->minimal_tags[i + 2] = -1;
292 1.1 agc minimal_tag = -1;
293 1.1 agc num_minimals++;
294 1.1 agc }
295 1.1 agc tre_purge_regset(regset, tnfa, tag);
296 1.1 agc }
297 1.1 agc else
298 1.1 agc {
299 1.1 agc DPRINT((" num_tags = 1\n"));
300 1.1 agc node->num_tags = 1;
301 1.1 agc }
302 1.1 agc
303 1.1 agc DPRINT((" num_tags++\n"));
304 1.1 agc regset[0] = -1;
305 1.1 agc tag = next_tag;
306 1.1 agc num_tags++;
307 1.1 agc next_tag++;
308 1.1 agc }
309 1.1 agc }
310 1.1 agc else
311 1.1 agc {
312 1.1 agc assert(!IS_TAG(lit));
313 1.1 agc }
314 1.1 agc break;
315 1.1 agc }
316 1.1 agc case CATENATION:
317 1.1 agc {
318 1.1 agc tre_catenation_t *cat = node->obj;
319 1.1 agc tre_ast_node_t *left = cat->left;
320 1.1 agc tre_ast_node_t *right = cat->right;
321 1.1 agc int reserved_tag = -1;
322 1.1 agc DPRINT(("Catenation, next_tag = %d\n", next_tag));
323 1.1 agc
324 1.1 agc
325 1.1 agc /* After processing right child. */
326 1.1 agc STACK_PUSHX(stack, voidptr, node);
327 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_RIGHT);
328 1.1 agc
329 1.1 agc /* Process right child. */
330 1.1 agc STACK_PUSHX(stack, voidptr, right);
331 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
332 1.1 agc
333 1.1 agc /* After processing left child. */
334 1.2 christos STACK_PUSHX(stack, long, (long)(next_tag + left->num_tags));
335 1.1 agc DPRINT((" Pushing %d for after left\n",
336 1.1 agc next_tag + left->num_tags));
337 1.1 agc if (left->num_tags > 0 && right->num_tags > 0)
338 1.1 agc {
339 1.1 agc /* Reserve the next tag to the right child. */
340 1.1 agc DPRINT((" Reserving next_tag %d to right child\n",
341 1.1 agc next_tag));
342 1.1 agc reserved_tag = next_tag;
343 1.1 agc next_tag++;
344 1.1 agc }
345 1.2 christos STACK_PUSHX(stack, long, reserved_tag);
346 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_LEFT);
347 1.1 agc
348 1.1 agc /* Process left child. */
349 1.1 agc STACK_PUSHX(stack, voidptr, left);
350 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
351 1.1 agc
352 1.1 agc }
353 1.1 agc break;
354 1.1 agc case ITERATION:
355 1.1 agc {
356 1.1 agc tre_iteration_t *iter = node->obj;
357 1.1 agc DPRINT(("Iteration\n"));
358 1.1 agc
359 1.1 agc if (first_pass)
360 1.1 agc {
361 1.2 christos STACK_PUSHX(stack, long, regset[0] >= 0 || iter->minimal);
362 1.1 agc }
363 1.1 agc else
364 1.1 agc {
365 1.2 christos STACK_PUSHX(stack, long, tag);
366 1.2 christos STACK_PUSHX(stack, long, iter->minimal);
367 1.1 agc }
368 1.1 agc STACK_PUSHX(stack, voidptr, node);
369 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_AFTER_ITERATION);
370 1.1 agc
371 1.1 agc STACK_PUSHX(stack, voidptr, iter->arg);
372 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
373 1.1 agc
374 1.1 agc /* Regset is not empty, so add a tag here. */
375 1.1 agc if (regset[0] >= 0 || iter->minimal)
376 1.1 agc {
377 1.1 agc if (!first_pass)
378 1.1 agc {
379 1.1 agc int i;
380 1.1 agc status = tre_add_tag_left(mem, node, tag);
381 1.1 agc if (iter->minimal)
382 1.1 agc tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
383 1.1 agc else
384 1.1 agc tnfa->tag_directions[tag] = direction;
385 1.1 agc if (minimal_tag >= 0)
386 1.1 agc {
387 1.1 agc DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
388 1.1 agc for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
389 1.1 agc tnfa->minimal_tags[i] = tag;
390 1.1 agc tnfa->minimal_tags[i + 1] = minimal_tag;
391 1.1 agc tnfa->minimal_tags[i + 2] = -1;
392 1.1 agc minimal_tag = -1;
393 1.1 agc num_minimals++;
394 1.1 agc }
395 1.1 agc tre_purge_regset(regset, tnfa, tag);
396 1.1 agc }
397 1.1 agc
398 1.1 agc DPRINT((" num_tags++\n"));
399 1.1 agc regset[0] = -1;
400 1.1 agc tag = next_tag;
401 1.1 agc num_tags++;
402 1.1 agc next_tag++;
403 1.1 agc }
404 1.1 agc direction = TRE_TAG_MINIMIZE;
405 1.1 agc }
406 1.1 agc break;
407 1.1 agc case UNION:
408 1.1 agc {
409 1.1 agc tre_union_t *uni = node->obj;
410 1.1 agc tre_ast_node_t *left = uni->left;
411 1.1 agc tre_ast_node_t *right = uni->right;
412 1.1 agc int left_tag;
413 1.1 agc int right_tag;
414 1.1 agc
415 1.1 agc if (regset[0] >= 0)
416 1.1 agc {
417 1.1 agc left_tag = next_tag;
418 1.1 agc right_tag = next_tag + 1;
419 1.1 agc }
420 1.1 agc else
421 1.1 agc {
422 1.1 agc left_tag = tag;
423 1.1 agc right_tag = next_tag;
424 1.1 agc }
425 1.1 agc
426 1.1 agc DPRINT(("Union\n"));
427 1.1 agc
428 1.1 agc /* After processing right child. */
429 1.2 christos STACK_PUSHX(stack, long, right_tag);
430 1.2 christos STACK_PUSHX(stack, long, left_tag);
431 1.1 agc STACK_PUSHX(stack, voidptr, regset);
432 1.2 christos STACK_PUSHX(stack, long, regset[0] >= 0);
433 1.1 agc STACK_PUSHX(stack, voidptr, node);
434 1.1 agc STACK_PUSHX(stack, voidptr, right);
435 1.1 agc STACK_PUSHX(stack, voidptr, left);
436 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_RIGHT);
437 1.1 agc
438 1.1 agc /* Process right child. */
439 1.1 agc STACK_PUSHX(stack, voidptr, right);
440 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
441 1.1 agc
442 1.1 agc /* After processing left child. */
443 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_LEFT);
444 1.1 agc
445 1.1 agc /* Process left child. */
446 1.1 agc STACK_PUSHX(stack, voidptr, left);
447 1.2 christos STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
448 1.1 agc
449 1.1 agc /* Regset is not empty, so add a tag here. */
450 1.1 agc if (regset[0] >= 0)
451 1.1 agc {
452 1.1 agc if (!first_pass)
453 1.1 agc {
454 1.1 agc int i;
455 1.1 agc status = tre_add_tag_left(mem, node, tag);
456 1.1 agc tnfa->tag_directions[tag] = direction;
457 1.1 agc if (minimal_tag >= 0)
458 1.1 agc {
459 1.1 agc DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
460 1.1 agc for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
461 1.1 agc tnfa->minimal_tags[i] = tag;
462 1.1 agc tnfa->minimal_tags[i + 1] = minimal_tag;
463 1.1 agc tnfa->minimal_tags[i + 2] = -1;
464 1.1 agc minimal_tag = -1;
465 1.1 agc num_minimals++;
466 1.1 agc }
467 1.1 agc tre_purge_regset(regset, tnfa, tag);
468 1.1 agc }
469 1.1 agc
470 1.1 agc DPRINT((" num_tags++\n"));
471 1.1 agc regset[0] = -1;
472 1.1 agc tag = next_tag;
473 1.1 agc num_tags++;
474 1.1 agc next_tag++;
475 1.1 agc }
476 1.1 agc
477 1.1 agc if (node->num_submatches > 0)
478 1.1 agc {
479 1.1 agc /* The next two tags are reserved for markers. */
480 1.1 agc next_tag++;
481 1.1 agc tag = next_tag;
482 1.1 agc next_tag++;
483 1.1 agc }
484 1.1 agc
485 1.1 agc break;
486 1.1 agc }
487 1.1 agc }
488 1.1 agc
489 1.1 agc if (node->submatch_id >= 0)
490 1.1 agc {
491 1.1 agc int i;
492 1.1 agc /* Push this submatch on the parents stack. */
493 1.1 agc for (i = 0; parents[i] >= 0; i++);
494 1.1 agc parents[i] = node->submatch_id;
495 1.1 agc parents[i + 1] = -1;
496 1.1 agc }
497 1.1 agc
498 1.1 agc break; /* end case: ADDTAGS_RECURSE */
499 1.1 agc
500 1.1 agc case ADDTAGS_AFTER_ITERATION:
501 1.1 agc {
502 1.1 agc int minimal = 0;
503 1.1 agc int enter_tag;
504 1.1 agc node = tre_stack_pop_voidptr(stack);
505 1.1 agc if (first_pass)
506 1.1 agc {
507 1.1 agc node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
508 1.2 christos + tre_stack_pop_long(stack);
509 1.1 agc minimal_tag = -1;
510 1.1 agc }
511 1.1 agc else
512 1.1 agc {
513 1.1 agc minimal = tre_stack_pop_int(stack);
514 1.1 agc enter_tag = tre_stack_pop_int(stack);
515 1.1 agc if (minimal)
516 1.1 agc minimal_tag = enter_tag;
517 1.1 agc }
518 1.1 agc
519 1.1 agc DPRINT(("After iteration\n"));
520 1.1 agc if (!first_pass)
521 1.1 agc {
522 1.1 agc DPRINT((" Setting direction to %s\n",
523 1.1 agc minimal ? "minimize" : "maximize"));
524 1.1 agc if (minimal)
525 1.1 agc direction = TRE_TAG_MINIMIZE;
526 1.1 agc else
527 1.1 agc direction = TRE_TAG_MAXIMIZE;
528 1.1 agc }
529 1.1 agc break;
530 1.1 agc }
531 1.1 agc
532 1.1 agc case ADDTAGS_AFTER_CAT_LEFT:
533 1.1 agc {
534 1.1 agc int new_tag = tre_stack_pop_int(stack);
535 1.1 agc next_tag = tre_stack_pop_int(stack);
536 1.1 agc DPRINT(("After cat left, tag = %d, next_tag = %d\n",
537 1.1 agc tag, next_tag));
538 1.1 agc if (new_tag >= 0)
539 1.1 agc {
540 1.1 agc DPRINT((" Setting tag to %d\n", new_tag));
541 1.1 agc tag = new_tag;
542 1.1 agc }
543 1.1 agc break;
544 1.1 agc }
545 1.1 agc
546 1.1 agc case ADDTAGS_AFTER_CAT_RIGHT:
547 1.1 agc DPRINT(("After cat right\n"));
548 1.1 agc node = tre_stack_pop_voidptr(stack);
549 1.1 agc if (first_pass)
550 1.1 agc node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
551 1.1 agc + ((tre_catenation_t *)node->obj)->right->num_tags;
552 1.1 agc break;
553 1.1 agc
554 1.1 agc case ADDTAGS_AFTER_UNION_LEFT:
555 1.1 agc DPRINT(("After union left\n"));
556 1.1 agc /* Lift the bottom of the `regset' array so that when processing
557 1.1 agc the right operand the items currently in the array are
558 1.1 agc invisible. The original bottom was saved at ADDTAGS_UNION and
559 1.1 agc will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
560 1.1 agc while (*regset >= 0)
561 1.1 agc regset++;
562 1.1 agc break;
563 1.1 agc
564 1.1 agc case ADDTAGS_AFTER_UNION_RIGHT:
565 1.1 agc {
566 1.1 agc int added_tags, tag_left, tag_right;
567 1.1 agc tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
568 1.1 agc tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
569 1.1 agc DPRINT(("After union right\n"));
570 1.1 agc node = tre_stack_pop_voidptr(stack);
571 1.1 agc added_tags = tre_stack_pop_int(stack);
572 1.1 agc if (first_pass)
573 1.1 agc {
574 1.1 agc node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
575 1.1 agc + ((tre_union_t *)node->obj)->right->num_tags + added_tags
576 1.1 agc + ((node->num_submatches > 0) ? 2 : 0);
577 1.1 agc }
578 1.1 agc regset = tre_stack_pop_voidptr(stack);
579 1.1 agc tag_left = tre_stack_pop_int(stack);
580 1.1 agc tag_right = tre_stack_pop_int(stack);
581 1.1 agc
582 1.1 agc /* Add tags after both children, the left child gets a smaller
583 1.1 agc tag than the right child. This guarantees that we prefer
584 1.1 agc the left child over the right child. */
585 1.1 agc /* XXX - This is not always necessary (if the children have
586 1.1 agc tags which must be seen for every match of that child). */
587 1.1 agc /* XXX - Check if this is the only place where tre_add_tag_right
588 1.1 agc is used. If so, use tre_add_tag_left (putting the tag before
589 1.1 agc the child as opposed after the child) and throw away
590 1.1 agc tre_add_tag_right. */
591 1.1 agc if (node->num_submatches > 0)
592 1.1 agc {
593 1.1 agc if (!first_pass)
594 1.1 agc {
595 1.1 agc status = tre_add_tag_right(mem, left, tag_left);
596 1.1 agc tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
597 1.1 agc status = tre_add_tag_right(mem, right, tag_right);
598 1.1 agc tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
599 1.1 agc }
600 1.1 agc DPRINT((" num_tags += 2\n"));
601 1.1 agc num_tags += 2;
602 1.1 agc }
603 1.1 agc direction = TRE_TAG_MAXIMIZE;
604 1.1 agc break;
605 1.1 agc }
606 1.1 agc
607 1.1 agc default:
608 1.1 agc assert(0);
609 1.1 agc break;
610 1.1 agc
611 1.1 agc } /* end switch(symbol) */
612 1.1 agc } /* end while(tre_stack_num_objects(stack) > bottom) */
613 1.1 agc
614 1.1 agc if (!first_pass)
615 1.1 agc tre_purge_regset(regset, tnfa, tag);
616 1.1 agc
617 1.1 agc if (!first_pass && minimal_tag >= 0)
618 1.1 agc {
619 1.1 agc int i;
620 1.1 agc DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
621 1.1 agc for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
622 1.1 agc tnfa->minimal_tags[i] = tag;
623 1.1 agc tnfa->minimal_tags[i + 1] = minimal_tag;
624 1.1 agc tnfa->minimal_tags[i + 2] = -1;
625 1.1 agc minimal_tag = -1;
626 1.1 agc num_minimals++;
627 1.1 agc }
628 1.1 agc
629 1.1 agc DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
630 1.1 agc first_pass? "First pass" : "Second pass", num_tags));
631 1.1 agc
632 1.1 agc assert(tree->num_tags == num_tags);
633 1.1 agc tnfa->end_tag = num_tags;
634 1.1 agc tnfa->num_tags = num_tags;
635 1.1 agc tnfa->num_minimals = num_minimals;
636 1.1 agc xfree(orig_regset);
637 1.1 agc xfree(parents);
638 1.1 agc xfree(saved_states);
639 1.1 agc return status;
640 1.1 agc }
641 1.1 agc
642 1.1 agc
643 1.1 agc
644 1.1 agc /*
645 1.1 agc AST to TNFA compilation routines.
646 1.1 agc */
647 1.1 agc
648 1.1 agc typedef enum {
649 1.1 agc COPY_RECURSE,
650 1.1 agc COPY_SET_RESULT_PTR
651 1.1 agc } tre_copyast_symbol_t;
652 1.1 agc
653 1.1 agc /* Flags for tre_copy_ast(). */
654 1.1 agc #define COPY_REMOVE_TAGS 1
655 1.1 agc #define COPY_MAXIMIZE_FIRST_TAG 2
656 1.1 agc
657 1.1 agc static reg_errcode_t
658 1.1 agc tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
659 1.1 agc int flags, int *pos_add, tre_tag_direction_t *tag_directions,
660 1.1 agc tre_ast_node_t **copy, int *max_pos)
661 1.1 agc {
662 1.1 agc reg_errcode_t status = REG_OK;
663 1.1 agc int bottom = tre_stack_num_objects(stack);
664 1.1 agc int num_copied = 0;
665 1.1 agc int first_tag = 1;
666 1.1 agc tre_ast_node_t **result = copy;
667 1.1 agc tre_copyast_symbol_t symbol;
668 1.1 agc
669 1.1 agc STACK_PUSH(stack, voidptr, ast);
670 1.2 christos STACK_PUSH(stack, long, COPY_RECURSE);
671 1.1 agc
672 1.1 agc while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
673 1.1 agc {
674 1.1 agc tre_ast_node_t *node;
675 1.1 agc if (status != REG_OK)
676 1.1 agc break;
677 1.1 agc
678 1.2 christos symbol = (tre_copyast_symbol_t)tre_stack_pop_long(stack);
679 1.1 agc switch (symbol)
680 1.1 agc {
681 1.1 agc case COPY_SET_RESULT_PTR:
682 1.1 agc result = tre_stack_pop_voidptr(stack);
683 1.1 agc break;
684 1.1 agc case COPY_RECURSE:
685 1.1 agc node = tre_stack_pop_voidptr(stack);
686 1.1 agc switch (node->type)
687 1.1 agc {
688 1.1 agc case LITERAL:
689 1.1 agc {
690 1.1 agc tre_literal_t *lit = node->obj;
691 1.1 agc int pos = lit->position;
692 1.1 agc int min = lit->code_min;
693 1.1 agc int max = lit->code_max;
694 1.1 agc if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
695 1.1 agc {
696 1.1 agc /* XXX - e.g. [ab] has only one position but two
697 1.1 agc nodes, so we are creating holes in the state space
698 1.1 agc here. Not fatal, just wastes memory. */
699 1.1 agc pos += *pos_add;
700 1.1 agc num_copied++;
701 1.1 agc }
702 1.1 agc else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
703 1.1 agc {
704 1.1 agc /* Change this tag to empty. */
705 1.1 agc min = EMPTY;
706 1.1 agc max = pos = -1;
707 1.1 agc }
708 1.1 agc else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
709 1.1 agc && first_tag)
710 1.1 agc {
711 1.1 agc /* Maximize the first tag. */
712 1.1 agc tag_directions[max] = TRE_TAG_MAXIMIZE;
713 1.1 agc first_tag = 0;
714 1.1 agc }
715 1.1 agc *result = tre_ast_new_literal(mem, min, max, pos);
716 1.1 agc if (*result == NULL)
717 1.1 agc status = REG_ESPACE;
718 1.1 agc
719 1.1 agc if (pos > *max_pos)
720 1.1 agc *max_pos = pos;
721 1.1 agc break;
722 1.1 agc }
723 1.1 agc case UNION:
724 1.1 agc {
725 1.1 agc tre_union_t *uni = node->obj;
726 1.1 agc tre_union_t *tmp;
727 1.1 agc *result = tre_ast_new_union(mem, uni->left, uni->right);
728 1.1 agc if (*result == NULL)
729 1.1 agc {
730 1.1 agc status = REG_ESPACE;
731 1.1 agc break;
732 1.1 agc }
733 1.1 agc tmp = (*result)->obj;
734 1.1 agc result = &tmp->left;
735 1.1 agc STACK_PUSHX(stack, voidptr, uni->right);
736 1.2 christos STACK_PUSHX(stack, long, COPY_RECURSE);
737 1.1 agc STACK_PUSHX(stack, voidptr, &tmp->right);
738 1.2 christos STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR);
739 1.1 agc STACK_PUSHX(stack, voidptr, uni->left);
740 1.2 christos STACK_PUSHX(stack, long, COPY_RECURSE);
741 1.1 agc break;
742 1.1 agc }
743 1.1 agc case CATENATION:
744 1.1 agc {
745 1.1 agc tre_catenation_t *cat = node->obj;
746 1.1 agc tre_catenation_t *tmp;
747 1.1 agc *result = tre_ast_new_catenation(mem, cat->left, cat->right);
748 1.1 agc if (*result == NULL)
749 1.1 agc {
750 1.1 agc status = REG_ESPACE;
751 1.1 agc break;
752 1.1 agc }
753 1.1 agc tmp = (*result)->obj;
754 1.1 agc tmp->left = NULL;
755 1.1 agc tmp->right = NULL;
756 1.1 agc result = &tmp->left;
757 1.1 agc
758 1.1 agc STACK_PUSHX(stack, voidptr, cat->right);
759 1.2 christos STACK_PUSHX(stack, long, COPY_RECURSE);
760 1.1 agc STACK_PUSHX(stack, voidptr, &tmp->right);
761 1.2 christos STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR);
762 1.1 agc STACK_PUSHX(stack, voidptr, cat->left);
763 1.2 christos STACK_PUSHX(stack, long, COPY_RECURSE);
764 1.1 agc break;
765 1.1 agc }
766 1.1 agc case ITERATION:
767 1.1 agc {
768 1.1 agc tre_iteration_t *iter = node->obj;
769 1.1 agc STACK_PUSHX(stack, voidptr, iter->arg);
770 1.2 christos STACK_PUSHX(stack, long, COPY_RECURSE);
771 1.1 agc *result = tre_ast_new_iter(mem, iter->arg, iter->min,
772 1.1 agc iter->max, iter->minimal);
773 1.1 agc if (*result == NULL)
774 1.1 agc {
775 1.1 agc status = REG_ESPACE;
776 1.1 agc break;
777 1.1 agc }
778 1.1 agc iter = (*result)->obj;
779 1.1 agc result = &iter->arg;
780 1.1 agc break;
781 1.1 agc }
782 1.1 agc default:
783 1.1 agc assert(0);
784 1.1 agc break;
785 1.1 agc }
786 1.1 agc break;
787 1.1 agc }
788 1.1 agc }
789 1.1 agc *pos_add += num_copied;
790 1.1 agc return status;
791 1.1 agc }
792 1.1 agc
793 1.1 agc typedef enum {
794 1.1 agc EXPAND_RECURSE,
795 1.1 agc EXPAND_AFTER_ITER
796 1.1 agc } tre_expand_ast_symbol_t;
797 1.1 agc
798 1.1 agc /* Expands each iteration node that has a finite nonzero minimum or maximum
799 1.1 agc iteration count to a catenated sequence of copies of the node. */
800 1.1 agc static reg_errcode_t
801 1.1 agc tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
802 1.1 agc int *position, tre_tag_direction_t *tag_directions,
803 1.1 agc int *max_depth)
804 1.1 agc {
805 1.1 agc reg_errcode_t status = REG_OK;
806 1.1 agc int bottom = tre_stack_num_objects(stack);
807 1.1 agc int pos_add = 0;
808 1.1 agc int pos_add_total = 0;
809 1.1 agc int max_pos = 0;
810 1.1 agc /* Current approximate matching parameters. */
811 1.1 agc int params[TRE_PARAM_LAST];
812 1.1 agc /* Approximate parameter nesting level. */
813 1.1 agc int params_depth = 0;
814 1.1 agc int iter_depth = 0;
815 1.1 agc int i;
816 1.1 agc
817 1.1 agc for (i = 0; i < TRE_PARAM_LAST; i++)
818 1.1 agc params[i] = TRE_PARAM_DEFAULT;
819 1.1 agc
820 1.1 agc STACK_PUSHR(stack, voidptr, ast);
821 1.2 christos STACK_PUSHR(stack, long, EXPAND_RECURSE);
822 1.1 agc while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
823 1.1 agc {
824 1.1 agc tre_ast_node_t *node;
825 1.1 agc tre_expand_ast_symbol_t symbol;
826 1.1 agc
827 1.1 agc if (status != REG_OK)
828 1.1 agc break;
829 1.1 agc
830 1.1 agc DPRINT(("pos_add %d\n", pos_add));
831 1.1 agc
832 1.2 christos symbol = (tre_expand_ast_symbol_t)tre_stack_pop_long(stack);
833 1.1 agc node = tre_stack_pop_voidptr(stack);
834 1.1 agc switch (symbol)
835 1.1 agc {
836 1.1 agc case EXPAND_RECURSE:
837 1.1 agc switch (node->type)
838 1.1 agc {
839 1.1 agc case LITERAL:
840 1.1 agc {
841 1.1 agc tre_literal_t *lit= node->obj;
842 1.1 agc if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
843 1.1 agc {
844 1.1 agc lit->position += pos_add;
845 1.1 agc if (lit->position > max_pos)
846 1.1 agc max_pos = lit->position;
847 1.1 agc }
848 1.1 agc break;
849 1.1 agc }
850 1.1 agc case UNION:
851 1.1 agc {
852 1.1 agc tre_union_t *uni = node->obj;
853 1.1 agc STACK_PUSHX(stack, voidptr, uni->right);
854 1.2 christos STACK_PUSHX(stack, long, EXPAND_RECURSE);
855 1.1 agc STACK_PUSHX(stack, voidptr, uni->left);
856 1.2 christos STACK_PUSHX(stack, long, EXPAND_RECURSE);
857 1.1 agc break;
858 1.1 agc }
859 1.1 agc case CATENATION:
860 1.1 agc {
861 1.1 agc tre_catenation_t *cat = node->obj;
862 1.1 agc STACK_PUSHX(stack, voidptr, cat->right);
863 1.2 christos STACK_PUSHX(stack, long, EXPAND_RECURSE);
864 1.1 agc STACK_PUSHX(stack, voidptr, cat->left);
865 1.2 christos STACK_PUSHX(stack, long, EXPAND_RECURSE);
866 1.1 agc break;
867 1.1 agc }
868 1.1 agc case ITERATION:
869 1.1 agc {
870 1.1 agc tre_iteration_t *iter = node->obj;
871 1.2 christos STACK_PUSHX(stack, long, pos_add);
872 1.1 agc STACK_PUSHX(stack, voidptr, node);
873 1.2 christos STACK_PUSHX(stack, long, EXPAND_AFTER_ITER);
874 1.1 agc STACK_PUSHX(stack, voidptr, iter->arg);
875 1.2 christos STACK_PUSHX(stack, long, EXPAND_RECURSE);
876 1.1 agc /* If we are going to expand this node at EXPAND_AFTER_ITER
877 1.1 agc then don't increase the `pos' fields of the nodes now, it
878 1.1 agc will get done when expanding. */
879 1.1 agc if (iter->min > 1 || iter->max > 1)
880 1.1 agc pos_add = 0;
881 1.1 agc iter_depth++;
882 1.1 agc DPRINT(("iter\n"));
883 1.1 agc break;
884 1.1 agc }
885 1.1 agc default:
886 1.1 agc assert(0);
887 1.1 agc break;
888 1.1 agc }
889 1.1 agc break;
890 1.1 agc case EXPAND_AFTER_ITER:
891 1.1 agc {
892 1.1 agc tre_iteration_t *iter = node->obj;
893 1.1 agc int pos_add_last;
894 1.1 agc pos_add = tre_stack_pop_int(stack);
895 1.1 agc pos_add_last = pos_add;
896 1.1 agc if (iter->min > 1 || iter->max > 1)
897 1.1 agc {
898 1.1 agc tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
899 1.1 agc int j;
900 1.1 agc int pos_add_save = pos_add;
901 1.1 agc
902 1.1 agc /* Create a catenated sequence of copies of the node. */
903 1.1 agc for (j = 0; j < iter->min; j++)
904 1.1 agc {
905 1.1 agc tre_ast_node_t *copy;
906 1.1 agc /* Remove tags from all but the last copy. */
907 1.1 agc int flags = ((j + 1 < iter->min)
908 1.1 agc ? COPY_REMOVE_TAGS
909 1.1 agc : COPY_MAXIMIZE_FIRST_TAG);
910 1.1 agc DPRINT((" pos_add %d\n", pos_add));
911 1.1 agc pos_add_save = pos_add;
912 1.1 agc status = tre_copy_ast(mem, stack, iter->arg, flags,
913 1.1 agc &pos_add, tag_directions, ©,
914 1.1 agc &max_pos);
915 1.1 agc if (status != REG_OK)
916 1.1 agc return status;
917 1.1 agc if (seq1 != NULL)
918 1.1 agc seq1 = tre_ast_new_catenation(mem, seq1, copy);
919 1.1 agc else
920 1.1 agc seq1 = copy;
921 1.1 agc if (seq1 == NULL)
922 1.1 agc return REG_ESPACE;
923 1.1 agc }
924 1.1 agc
925 1.1 agc if (iter->max == -1)
926 1.1 agc {
927 1.1 agc /* No upper limit. */
928 1.1 agc pos_add_save = pos_add;
929 1.1 agc status = tre_copy_ast(mem, stack, iter->arg, 0,
930 1.1 agc &pos_add, NULL, &seq2, &max_pos);
931 1.1 agc if (status != REG_OK)
932 1.1 agc return status;
933 1.1 agc seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
934 1.1 agc if (seq2 == NULL)
935 1.1 agc return REG_ESPACE;
936 1.1 agc }
937 1.1 agc else
938 1.1 agc {
939 1.1 agc for (j = iter->min; j < iter->max; j++)
940 1.1 agc {
941 1.1 agc tre_ast_node_t *tmp, *copy;
942 1.1 agc pos_add_save = pos_add;
943 1.1 agc status = tre_copy_ast(mem, stack, iter->arg, 0,
944 1.1 agc &pos_add, NULL, ©, &max_pos);
945 1.1 agc if (status != REG_OK)
946 1.1 agc return status;
947 1.1 agc if (seq2 != NULL)
948 1.1 agc seq2 = tre_ast_new_catenation(mem, copy, seq2);
949 1.1 agc else
950 1.1 agc seq2 = copy;
951 1.1 agc if (seq2 == NULL)
952 1.1 agc return REG_ESPACE;
953 1.1 agc tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
954 1.1 agc if (tmp == NULL)
955 1.1 agc return REG_ESPACE;
956 1.1 agc seq2 = tre_ast_new_union(mem, tmp, seq2);
957 1.1 agc if (seq2 == NULL)
958 1.1 agc return REG_ESPACE;
959 1.1 agc }
960 1.1 agc }
961 1.1 agc
962 1.1 agc pos_add = pos_add_save;
963 1.1 agc if (seq1 == NULL)
964 1.1 agc seq1 = seq2;
965 1.1 agc else if (seq2 != NULL)
966 1.1 agc seq1 = tre_ast_new_catenation(mem, seq1, seq2);
967 1.1 agc if (seq1 == NULL)
968 1.1 agc return REG_ESPACE;
969 1.1 agc node->obj = seq1->obj;
970 1.1 agc node->type = seq1->type;
971 1.1 agc }
972 1.1 agc
973 1.1 agc iter_depth--;
974 1.1 agc pos_add_total += pos_add - pos_add_last;
975 1.1 agc if (iter_depth == 0)
976 1.1 agc pos_add = pos_add_total;
977 1.1 agc
978 1.1 agc /* If approximate parameters are specified, surround the result
979 1.1 agc with two parameter setting nodes. The one on the left sets
980 1.1 agc the specified parameters, and the one on the right restores
981 1.1 agc the old parameters. */
982 1.1 agc if (iter->params)
983 1.1 agc {
984 1.1 agc tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
985 1.1 agc int *old_params;
986 1.1 agc
987 1.1 agc tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
988 1.1 agc if (!tmp_l)
989 1.1 agc return REG_ESPACE;
990 1.1 agc ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
991 1.1 agc iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
992 1.1 agc tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
993 1.1 agc if (!tmp_r)
994 1.1 agc return REG_ESPACE;
995 1.1 agc old_params = tre_mem_alloc(mem, sizeof(*old_params)
996 1.1 agc * TRE_PARAM_LAST);
997 1.1 agc if (!old_params)
998 1.1 agc return REG_ESPACE;
999 1.1 agc for (i = 0; i < TRE_PARAM_LAST; i++)
1000 1.1 agc old_params[i] = params[i];
1001 1.1 agc ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
1002 1.1 agc old_params[TRE_PARAM_DEPTH] = params_depth;
1003 1.1 agc /* XXX - this is the only place where ast_new_node is
1004 1.1 agc needed -- should be moved inside AST module. */
1005 1.1 agc node_copy = tre_ast_new_node(mem, ITERATION,
1006 1.1 agc sizeof(tre_iteration_t));
1007 1.1 agc if (!node_copy)
1008 1.1 agc return REG_ESPACE;
1009 1.1 agc node_copy->obj = node->obj;
1010 1.1 agc tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
1011 1.1 agc if (!tmp_node)
1012 1.1 agc return REG_ESPACE;
1013 1.1 agc tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
1014 1.1 agc if (!tmp_node)
1015 1.1 agc return REG_ESPACE;
1016 1.1 agc /* Replace the contents of `node' with `tmp_node'. */
1017 1.1 agc memcpy(node, tmp_node, sizeof(*node));
1018 1.1 agc node->obj = tmp_node->obj;
1019 1.1 agc node->type = tmp_node->type;
1020 1.1 agc params_depth++;
1021 1.1 agc if (params_depth > *max_depth)
1022 1.1 agc *max_depth = params_depth;
1023 1.1 agc }
1024 1.1 agc break;
1025 1.1 agc }
1026 1.1 agc default:
1027 1.1 agc assert(0);
1028 1.1 agc break;
1029 1.1 agc }
1030 1.1 agc }
1031 1.1 agc
1032 1.1 agc *position += pos_add_total;
1033 1.1 agc
1034 1.1 agc /* `max_pos' should never be larger than `*position' if the above
1035 1.1 agc code works, but just an extra safeguard let's make sure
1036 1.1 agc `*position' is set large enough so enough memory will be
1037 1.1 agc allocated for the transition table. */
1038 1.1 agc if (max_pos > *position)
1039 1.1 agc *position = max_pos;
1040 1.1 agc
1041 1.1 agc #ifdef TRE_DEBUG
1042 1.1 agc DPRINT(("Expanded AST:\n"));
1043 1.1 agc tre_ast_print(ast);
1044 1.1 agc DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
1045 1.1 agc #endif
1046 1.1 agc
1047 1.1 agc return status;
1048 1.1 agc }
1049 1.1 agc
1050 1.1 agc static tre_pos_and_tags_t *
1051 1.1 agc tre_set_empty(tre_mem_t mem)
1052 1.1 agc {
1053 1.1 agc tre_pos_and_tags_t *new_set;
1054 1.1 agc
1055 1.1 agc new_set = tre_mem_calloc(mem, sizeof(*new_set));
1056 1.1 agc if (new_set == NULL)
1057 1.1 agc return NULL;
1058 1.1 agc
1059 1.1 agc new_set[0].position = -1;
1060 1.1 agc new_set[0].code_min = -1;
1061 1.1 agc new_set[0].code_max = -1;
1062 1.1 agc
1063 1.1 agc return new_set;
1064 1.1 agc }
1065 1.1 agc
1066 1.1 agc static tre_pos_and_tags_t *
1067 1.1 agc tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
1068 1.1 agc tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
1069 1.1 agc {
1070 1.1 agc tre_pos_and_tags_t *new_set;
1071 1.1 agc
1072 1.1 agc new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
1073 1.1 agc if (new_set == NULL)
1074 1.1 agc return NULL;
1075 1.1 agc
1076 1.1 agc new_set[0].position = position;
1077 1.1 agc new_set[0].code_min = code_min;
1078 1.1 agc new_set[0].code_max = code_max;
1079 1.1 agc new_set[0].class = class;
1080 1.1 agc new_set[0].neg_classes = neg_classes;
1081 1.1 agc new_set[0].backref = backref;
1082 1.1 agc new_set[1].position = -1;
1083 1.1 agc new_set[1].code_min = -1;
1084 1.1 agc new_set[1].code_max = -1;
1085 1.1 agc
1086 1.1 agc return new_set;
1087 1.1 agc }
1088 1.1 agc
1089 1.1 agc static tre_pos_and_tags_t *
1090 1.1 agc tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
1091 1.1 agc int *tags, int assertions, int *params)
1092 1.1 agc {
1093 1.1 agc int s1, s2, i, j;
1094 1.1 agc tre_pos_and_tags_t *new_set;
1095 1.1 agc int *new_tags;
1096 1.1 agc int num_tags;
1097 1.1 agc
1098 1.1 agc for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
1099 1.1 agc for (s1 = 0; set1[s1].position >= 0; s1++);
1100 1.1 agc for (s2 = 0; set2[s2].position >= 0; s2++);
1101 1.1 agc new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
1102 1.1 agc if (!new_set )
1103 1.1 agc return NULL;
1104 1.1 agc
1105 1.1 agc for (s1 = 0; set1[s1].position >= 0; s1++)
1106 1.1 agc {
1107 1.1 agc new_set[s1].position = set1[s1].position;
1108 1.1 agc new_set[s1].code_min = set1[s1].code_min;
1109 1.1 agc new_set[s1].code_max = set1[s1].code_max;
1110 1.1 agc new_set[s1].assertions = set1[s1].assertions | assertions;
1111 1.1 agc new_set[s1].class = set1[s1].class;
1112 1.1 agc new_set[s1].neg_classes = set1[s1].neg_classes;
1113 1.1 agc new_set[s1].backref = set1[s1].backref;
1114 1.1 agc if (set1[s1].tags == NULL && tags == NULL)
1115 1.1 agc new_set[s1].tags = NULL;
1116 1.1 agc else
1117 1.1 agc {
1118 1.1 agc for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
1119 1.1 agc new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
1120 1.1 agc * (i + num_tags + 1)));
1121 1.1 agc if (new_tags == NULL)
1122 1.1 agc return NULL;
1123 1.1 agc for (j = 0; j < i; j++)
1124 1.1 agc new_tags[j] = set1[s1].tags[j];
1125 1.1 agc for (i = 0; i < num_tags; i++)
1126 1.1 agc new_tags[j + i] = tags[i];
1127 1.1 agc new_tags[j + i] = -1;
1128 1.1 agc new_set[s1].tags = new_tags;
1129 1.1 agc }
1130 1.1 agc if (set1[s1].params)
1131 1.1 agc new_set[s1].params = set1[s1].params;
1132 1.1 agc if (params)
1133 1.1 agc {
1134 1.1 agc if (!new_set[s1].params)
1135 1.1 agc new_set[s1].params = params;
1136 1.1 agc else
1137 1.1 agc {
1138 1.1 agc new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
1139 1.1 agc TRE_PARAM_LAST);
1140 1.1 agc if (!new_set[s1].params)
1141 1.1 agc return NULL;
1142 1.1 agc for (i = 0; i < TRE_PARAM_LAST; i++)
1143 1.1 agc if (params[i] != TRE_PARAM_UNSET)
1144 1.1 agc new_set[s1].params[i] = params[i];
1145 1.1 agc }
1146 1.1 agc }
1147 1.1 agc }
1148 1.1 agc
1149 1.1 agc for (s2 = 0; set2[s2].position >= 0; s2++)
1150 1.1 agc {
1151 1.1 agc new_set[s1 + s2].position = set2[s2].position;
1152 1.1 agc new_set[s1 + s2].code_min = set2[s2].code_min;
1153 1.1 agc new_set[s1 + s2].code_max = set2[s2].code_max;
1154 1.1 agc /* XXX - why not | assertions here as well? */
1155 1.1 agc new_set[s1 + s2].assertions = set2[s2].assertions;
1156 1.1 agc new_set[s1 + s2].class = set2[s2].class;
1157 1.1 agc new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
1158 1.1 agc new_set[s1 + s2].backref = set2[s2].backref;
1159 1.1 agc if (set2[s2].tags == NULL)
1160 1.1 agc new_set[s1 + s2].tags = NULL;
1161 1.1 agc else
1162 1.1 agc {
1163 1.1 agc for (i = 0; set2[s2].tags[i] >= 0; i++);
1164 1.1 agc new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
1165 1.1 agc if (new_tags == NULL)
1166 1.1 agc return NULL;
1167 1.1 agc for (j = 0; j < i; j++)
1168 1.1 agc new_tags[j] = set2[s2].tags[j];
1169 1.1 agc new_tags[j] = -1;
1170 1.1 agc new_set[s1 + s2].tags = new_tags;
1171 1.1 agc }
1172 1.1 agc if (set2[s2].params)
1173 1.1 agc new_set[s1 + s2].params = set2[s2].params;
1174 1.1 agc if (params)
1175 1.1 agc {
1176 1.1 agc if (!new_set[s1 + s2].params)
1177 1.1 agc new_set[s1 + s2].params = params;
1178 1.1 agc else
1179 1.1 agc {
1180 1.1 agc new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
1181 1.1 agc TRE_PARAM_LAST);
1182 1.1 agc if (!new_set[s1 + s2].params)
1183 1.1 agc return NULL;
1184 1.1 agc for (i = 0; i < TRE_PARAM_LAST; i++)
1185 1.1 agc if (params[i] != TRE_PARAM_UNSET)
1186 1.1 agc new_set[s1 + s2].params[i] = params[i];
1187 1.1 agc }
1188 1.1 agc }
1189 1.1 agc }
1190 1.1 agc new_set[s1 + s2].position = -1;
1191 1.1 agc return new_set;
1192 1.1 agc }
1193 1.1 agc
1194 1.1 agc /* Finds the empty path through `node' which is the one that should be
1195 1.1 agc taken according to POSIX.2 rules, and adds the tags on that path to
1196 1.1 agc `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
1197 1.1 agc set to the number of tags seen on the path. */
1198 1.1 agc static reg_errcode_t
1199 1.1 agc tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
1200 1.1 agc int *assertions, int *params, int *num_tags_seen,
1201 1.1 agc int *params_seen)
1202 1.1 agc {
1203 1.1 agc tre_literal_t *lit;
1204 1.1 agc tre_union_t *uni;
1205 1.1 agc tre_catenation_t *cat;
1206 1.1 agc tre_iteration_t *iter;
1207 1.1 agc int i;
1208 1.1 agc int bottom = tre_stack_num_objects(stack);
1209 1.1 agc reg_errcode_t status = REG_OK;
1210 1.1 agc if (num_tags_seen)
1211 1.1 agc *num_tags_seen = 0;
1212 1.1 agc if (params_seen)
1213 1.1 agc *params_seen = 0;
1214 1.1 agc
1215 1.1 agc status = tre_stack_push_voidptr(stack, node);
1216 1.1 agc
1217 1.1 agc /* Walk through the tree recursively. */
1218 1.1 agc while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1219 1.1 agc {
1220 1.1 agc node = tre_stack_pop_voidptr(stack);
1221 1.1 agc
1222 1.1 agc switch (node->type)
1223 1.1 agc {
1224 1.1 agc case LITERAL:
1225 1.1 agc lit = (tre_literal_t *)node->obj;
1226 1.1 agc switch (lit->code_min)
1227 1.1 agc {
1228 1.1 agc case TAG:
1229 1.1 agc if (lit->code_max >= 0)
1230 1.1 agc {
1231 1.1 agc if (tags != NULL)
1232 1.1 agc {
1233 1.1 agc /* Add the tag to `tags'. */
1234 1.1 agc for (i = 0; tags[i] >= 0; i++)
1235 1.1 agc if (tags[i] == lit->code_max)
1236 1.1 agc break;
1237 1.1 agc if (tags[i] < 0)
1238 1.1 agc {
1239 1.1 agc tags[i] = lit->code_max;
1240 1.1 agc tags[i + 1] = -1;
1241 1.1 agc }
1242 1.1 agc }
1243 1.1 agc if (num_tags_seen)
1244 1.1 agc (*num_tags_seen)++;
1245 1.1 agc }
1246 1.1 agc break;
1247 1.1 agc case ASSERTION:
1248 1.1 agc assert(lit->code_max >= 1
1249 1.1 agc || lit->code_max <= ASSERT_LAST);
1250 1.1 agc if (assertions != NULL)
1251 1.1 agc *assertions |= lit->code_max;
1252 1.1 agc break;
1253 1.1 agc case PARAMETER:
1254 1.1 agc if (params != NULL)
1255 1.1 agc for (i = 0; i < TRE_PARAM_LAST; i++)
1256 1.1 agc params[i] = lit->u.params[i];
1257 1.1 agc if (params_seen != NULL)
1258 1.1 agc *params_seen = 1;
1259 1.1 agc break;
1260 1.1 agc case EMPTY:
1261 1.1 agc break;
1262 1.1 agc default:
1263 1.1 agc assert(0);
1264 1.1 agc break;
1265 1.1 agc }
1266 1.1 agc break;
1267 1.1 agc
1268 1.1 agc case UNION:
1269 1.1 agc /* Subexpressions starting earlier take priority over ones
1270 1.1 agc starting later, so we prefer the left subexpression over the
1271 1.1 agc right subexpression. */
1272 1.1 agc uni = (tre_union_t *)node->obj;
1273 1.1 agc if (uni->left->nullable)
1274 1.1 agc STACK_PUSHX(stack, voidptr, uni->left)
1275 1.1 agc else if (uni->right->nullable)
1276 1.1 agc STACK_PUSHX(stack, voidptr, uni->right)
1277 1.1 agc else
1278 1.1 agc assert(0);
1279 1.1 agc break;
1280 1.1 agc
1281 1.1 agc case CATENATION:
1282 1.1 agc /* The path must go through both children. */
1283 1.1 agc cat = (tre_catenation_t *)node->obj;
1284 1.1 agc assert(cat->left->nullable);
1285 1.1 agc assert(cat->right->nullable);
1286 1.1 agc STACK_PUSHX(stack, voidptr, cat->left);
1287 1.1 agc STACK_PUSHX(stack, voidptr, cat->right);
1288 1.1 agc break;
1289 1.1 agc
1290 1.1 agc case ITERATION:
1291 1.1 agc /* A match with an empty string is preferred over no match at
1292 1.1 agc all, so we go through the argument if possible. */
1293 1.1 agc iter = (tre_iteration_t *)node->obj;
1294 1.1 agc if (iter->arg->nullable)
1295 1.1 agc STACK_PUSHX(stack, voidptr, iter->arg);
1296 1.1 agc break;
1297 1.1 agc
1298 1.1 agc default:
1299 1.1 agc assert(0);
1300 1.1 agc break;
1301 1.1 agc }
1302 1.1 agc }
1303 1.1 agc
1304 1.1 agc return status;
1305 1.1 agc }
1306 1.1 agc
1307 1.1 agc
1308 1.1 agc typedef enum {
1309 1.1 agc NFL_RECURSE,
1310 1.1 agc NFL_POST_UNION,
1311 1.1 agc NFL_POST_CATENATION,
1312 1.1 agc NFL_POST_ITERATION
1313 1.1 agc } tre_nfl_stack_symbol_t;
1314 1.1 agc
1315 1.1 agc
1316 1.1 agc /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
1317 1.1 agc the nodes of the AST `tree'. */
1318 1.1 agc static reg_errcode_t
1319 1.1 agc tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
1320 1.1 agc {
1321 1.1 agc int bottom = tre_stack_num_objects(stack);
1322 1.1 agc
1323 1.1 agc STACK_PUSHR(stack, voidptr, tree);
1324 1.2 christos STACK_PUSHR(stack, long, NFL_RECURSE);
1325 1.1 agc
1326 1.1 agc while (tre_stack_num_objects(stack) > bottom)
1327 1.1 agc {
1328 1.1 agc tre_nfl_stack_symbol_t symbol;
1329 1.1 agc tre_ast_node_t *node;
1330 1.1 agc
1331 1.2 christos symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_long(stack);
1332 1.1 agc node = tre_stack_pop_voidptr(stack);
1333 1.1 agc switch (symbol)
1334 1.1 agc {
1335 1.1 agc case NFL_RECURSE:
1336 1.1 agc switch (node->type)
1337 1.1 agc {
1338 1.1 agc case LITERAL:
1339 1.1 agc {
1340 1.1 agc tre_literal_t *lit = (tre_literal_t *)node->obj;
1341 1.1 agc if (IS_BACKREF(lit))
1342 1.1 agc {
1343 1.1 agc /* Back references: nullable = false, firstpos = {i},
1344 1.1 agc lastpos = {i}. */
1345 1.1 agc node->nullable = 0;
1346 1.1 agc node->firstpos = tre_set_one(mem, lit->position, 0,
1347 1.1 agc TRE_CHAR_MAX, 0, NULL, -1);
1348 1.1 agc if (!node->firstpos)
1349 1.1 agc return REG_ESPACE;
1350 1.1 agc node->lastpos = tre_set_one(mem, lit->position, 0,
1351 1.1 agc TRE_CHAR_MAX, 0, NULL,
1352 1.1 agc (int)lit->code_max);
1353 1.1 agc if (!node->lastpos)
1354 1.1 agc return REG_ESPACE;
1355 1.1 agc }
1356 1.1 agc else if (lit->code_min < 0)
1357 1.1 agc {
1358 1.1 agc /* Tags, empty strings, params, and zero width assertions:
1359 1.1 agc nullable = true, firstpos = {}, and lastpos = {}. */
1360 1.1 agc node->nullable = 1;
1361 1.1 agc node->firstpos = tre_set_empty(mem);
1362 1.1 agc if (!node->firstpos)
1363 1.1 agc return REG_ESPACE;
1364 1.1 agc node->lastpos = tre_set_empty(mem);
1365 1.1 agc if (!node->lastpos)
1366 1.1 agc return REG_ESPACE;
1367 1.1 agc }
1368 1.1 agc else
1369 1.1 agc {
1370 1.1 agc /* Literal at position i: nullable = false, firstpos = {i},
1371 1.1 agc lastpos = {i}. */
1372 1.1 agc node->nullable = 0;
1373 1.1 agc node->firstpos =
1374 1.1 agc tre_set_one(mem, lit->position, (int)lit->code_min,
1375 1.1 agc (int)lit->code_max, 0, NULL, -1);
1376 1.1 agc if (!node->firstpos)
1377 1.1 agc return REG_ESPACE;
1378 1.1 agc node->lastpos = tre_set_one(mem, lit->position,
1379 1.1 agc (int)lit->code_min,
1380 1.1 agc (int)lit->code_max,
1381 1.1 agc lit->u.class, lit->neg_classes,
1382 1.1 agc -1);
1383 1.1 agc if (!node->lastpos)
1384 1.1 agc return REG_ESPACE;
1385 1.1 agc }
1386 1.1 agc break;
1387 1.1 agc }
1388 1.1 agc
1389 1.1 agc case UNION:
1390 1.1 agc /* Compute the attributes for the two subtrees, and after that
1391 1.1 agc for this node. */
1392 1.1 agc STACK_PUSHR(stack, voidptr, node);
1393 1.2 christos STACK_PUSHR(stack, long, NFL_POST_UNION);
1394 1.1 agc STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
1395 1.2 christos STACK_PUSHR(stack, long, NFL_RECURSE);
1396 1.1 agc STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
1397 1.2 christos STACK_PUSHR(stack, long, NFL_RECURSE);
1398 1.1 agc break;
1399 1.1 agc
1400 1.1 agc case CATENATION:
1401 1.1 agc /* Compute the attributes for the two subtrees, and after that
1402 1.1 agc for this node. */
1403 1.1 agc STACK_PUSHR(stack, voidptr, node);
1404 1.2 christos STACK_PUSHR(stack, long, NFL_POST_CATENATION);
1405 1.1 agc STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
1406 1.2 christos STACK_PUSHR(stack, long, NFL_RECURSE);
1407 1.1 agc STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
1408 1.2 christos STACK_PUSHR(stack, long, NFL_RECURSE);
1409 1.1 agc break;
1410 1.1 agc
1411 1.1 agc case ITERATION:
1412 1.1 agc /* Compute the attributes for the subtree, and after that for
1413 1.1 agc this node. */
1414 1.1 agc STACK_PUSHR(stack, voidptr, node);
1415 1.2 christos STACK_PUSHR(stack, long, NFL_POST_ITERATION);
1416 1.1 agc STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
1417 1.2 christos STACK_PUSHR(stack, long, NFL_RECURSE);
1418 1.1 agc break;
1419 1.1 agc }
1420 1.1 agc break; /* end case: NFL_RECURSE */
1421 1.1 agc
1422 1.1 agc case NFL_POST_UNION:
1423 1.1 agc {
1424 1.1 agc tre_union_t *uni = (tre_union_t *)node->obj;
1425 1.1 agc node->nullable = uni->left->nullable || uni->right->nullable;
1426 1.1 agc node->firstpos = tre_set_union(mem, uni->left->firstpos,
1427 1.1 agc uni->right->firstpos, NULL, 0, NULL);
1428 1.1 agc if (!node->firstpos)
1429 1.1 agc return REG_ESPACE;
1430 1.1 agc node->lastpos = tre_set_union(mem, uni->left->lastpos,
1431 1.1 agc uni->right->lastpos, NULL, 0, NULL);
1432 1.1 agc if (!node->lastpos)
1433 1.1 agc return REG_ESPACE;
1434 1.1 agc break;
1435 1.1 agc }
1436 1.1 agc
1437 1.1 agc case NFL_POST_ITERATION:
1438 1.1 agc {
1439 1.1 agc tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1440 1.1 agc
1441 1.1 agc if (iter->min == 0 || iter->arg->nullable)
1442 1.1 agc node->nullable = 1;
1443 1.1 agc else
1444 1.1 agc node->nullable = 0;
1445 1.1 agc node->firstpos = iter->arg->firstpos;
1446 1.1 agc node->lastpos = iter->arg->lastpos;
1447 1.1 agc break;
1448 1.1 agc }
1449 1.1 agc
1450 1.1 agc case NFL_POST_CATENATION:
1451 1.1 agc {
1452 1.1 agc int num_tags, *tags, assertions, params_seen;
1453 1.1 agc int *params;
1454 1.1 agc reg_errcode_t status;
1455 1.1 agc tre_catenation_t *cat = node->obj;
1456 1.1 agc node->nullable = cat->left->nullable && cat->right->nullable;
1457 1.1 agc
1458 1.1 agc /* Compute firstpos. */
1459 1.1 agc if (cat->left->nullable)
1460 1.1 agc {
1461 1.1 agc /* The left side matches the empty string. Make a first pass
1462 1.1 agc with tre_match_empty() to get the number of tags and
1463 1.1 agc parameters. */
1464 1.1 agc status = tre_match_empty(stack, cat->left,
1465 1.1 agc NULL, NULL, NULL, &num_tags,
1466 1.1 agc ¶ms_seen);
1467 1.1 agc if (status != REG_OK)
1468 1.1 agc return status;
1469 1.1 agc /* Allocate arrays for the tags and parameters. */
1470 1.1 agc tags = xmalloc(sizeof(*tags) * (num_tags + 1));
1471 1.1 agc if (!tags)
1472 1.1 agc return REG_ESPACE;
1473 1.1 agc tags[0] = -1;
1474 1.1 agc assertions = 0;
1475 1.1 agc params = NULL;
1476 1.1 agc if (params_seen)
1477 1.1 agc {
1478 1.1 agc params = tre_mem_alloc(mem, sizeof(*params)
1479 1.1 agc * TRE_PARAM_LAST);
1480 1.1 agc if (!params)
1481 1.1 agc {
1482 1.1 agc xfree(tags);
1483 1.1 agc return REG_ESPACE;
1484 1.1 agc }
1485 1.1 agc }
1486 1.1 agc /* Second pass with tre_mach_empty() to get the list of
1487 1.1 agc tags and parameters. */
1488 1.1 agc status = tre_match_empty(stack, cat->left, tags,
1489 1.1 agc &assertions, params, NULL, NULL);
1490 1.1 agc if (status != REG_OK)
1491 1.1 agc {
1492 1.1 agc xfree(tags);
1493 1.1 agc return status;
1494 1.1 agc }
1495 1.1 agc node->firstpos =
1496 1.1 agc tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
1497 1.1 agc tags, assertions, params);
1498 1.1 agc xfree(tags);
1499 1.1 agc if (!node->firstpos)
1500 1.1 agc return REG_ESPACE;
1501 1.1 agc }
1502 1.1 agc else
1503 1.1 agc {
1504 1.1 agc node->firstpos = cat->left->firstpos;
1505 1.1 agc }
1506 1.1 agc
1507 1.1 agc /* Compute lastpos. */
1508 1.1 agc if (cat->right->nullable)
1509 1.1 agc {
1510 1.1 agc /* The right side matches the empty string. Make a first pass
1511 1.1 agc with tre_match_empty() to get the number of tags and
1512 1.1 agc parameters. */
1513 1.1 agc status = tre_match_empty(stack, cat->right,
1514 1.1 agc NULL, NULL, NULL, &num_tags,
1515 1.1 agc ¶ms_seen);
1516 1.1 agc if (status != REG_OK)
1517 1.1 agc return status;
1518 1.1 agc /* Allocate arrays for the tags and parameters. */
1519 1.1 agc tags = xmalloc(sizeof(int) * (num_tags + 1));
1520 1.1 agc if (!tags)
1521 1.1 agc return REG_ESPACE;
1522 1.1 agc tags[0] = -1;
1523 1.1 agc assertions = 0;
1524 1.1 agc params = NULL;
1525 1.1 agc if (params_seen)
1526 1.1 agc {
1527 1.1 agc params = tre_mem_alloc(mem, sizeof(*params)
1528 1.1 agc * TRE_PARAM_LAST);
1529 1.1 agc if (!params)
1530 1.1 agc {
1531 1.1 agc xfree(tags);
1532 1.1 agc return REG_ESPACE;
1533 1.1 agc }
1534 1.1 agc }
1535 1.1 agc /* Second pass with tre_mach_empty() to get the list of
1536 1.1 agc tags and parameters. */
1537 1.1 agc status = tre_match_empty(stack, cat->right, tags,
1538 1.1 agc &assertions, params, NULL, NULL);
1539 1.1 agc if (status != REG_OK)
1540 1.1 agc {
1541 1.1 agc xfree(tags);
1542 1.1 agc return status;
1543 1.1 agc }
1544 1.1 agc node->lastpos =
1545 1.1 agc tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
1546 1.1 agc tags, assertions, params);
1547 1.1 agc xfree(tags);
1548 1.1 agc if (!node->lastpos)
1549 1.1 agc return REG_ESPACE;
1550 1.1 agc }
1551 1.1 agc else
1552 1.1 agc {
1553 1.1 agc node->lastpos = cat->right->lastpos;
1554 1.1 agc }
1555 1.1 agc break;
1556 1.1 agc }
1557 1.1 agc
1558 1.1 agc default:
1559 1.1 agc assert(0);
1560 1.1 agc break;
1561 1.1 agc }
1562 1.1 agc }
1563 1.1 agc
1564 1.1 agc return REG_OK;
1565 1.1 agc }
1566 1.1 agc
1567 1.1 agc
1568 1.1 agc /* Adds a transition from each position in `p1' to each position in `p2'. */
1569 1.1 agc static reg_errcode_t
1570 1.1 agc tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
1571 1.1 agc tre_tnfa_transition_t *transitions,
1572 1.1 agc int *counts, int *offs)
1573 1.1 agc {
1574 1.1 agc tre_pos_and_tags_t *orig_p2 = p2;
1575 1.1 agc tre_tnfa_transition_t *trans;
1576 1.1 agc int i, j, k, l, dup, prev_p2_pos;
1577 1.1 agc
1578 1.1 agc if (transitions != NULL)
1579 1.1 agc while (p1->position >= 0)
1580 1.1 agc {
1581 1.1 agc p2 = orig_p2;
1582 1.1 agc prev_p2_pos = -1;
1583 1.1 agc while (p2->position >= 0)
1584 1.1 agc {
1585 1.1 agc /* Optimization: if this position was already handled, skip it. */
1586 1.1 agc if (p2->position == prev_p2_pos)
1587 1.1 agc {
1588 1.1 agc p2++;
1589 1.1 agc continue;
1590 1.1 agc }
1591 1.1 agc prev_p2_pos = p2->position;
1592 1.1 agc /* Set `trans' to point to the next unused transition from
1593 1.1 agc position `p1->position'. */
1594 1.1 agc trans = transitions + offs[p1->position];
1595 1.1 agc while (trans->state != NULL)
1596 1.1 agc {
1597 1.1 agc #if 0
1598 1.1 agc /* If we find a previous transition from `p1->position' to
1599 1.1 agc `p2->position', it is overwritten. This can happen only
1600 1.1 agc if there are nested loops in the regexp, like in "((a)*)*".
1601 1.1 agc In POSIX.2 repetition using the outer loop is always
1602 1.1 agc preferred over using the inner loop. Therefore the
1603 1.1 agc transition for the inner loop is useless and can be thrown
1604 1.1 agc away. */
1605 1.1 agc /* XXX - The same position is used for all nodes in a bracket
1606 1.1 agc expression, so this optimization cannot be used (it will
1607 1.1 agc break bracket expressions) unless I figure out a way to
1608 1.1 agc detect it here. */
1609 1.1 agc if (trans->state_id == p2->position)
1610 1.1 agc {
1611 1.1 agc DPRINT(("*"));
1612 1.1 agc break;
1613 1.1 agc }
1614 1.1 agc #endif
1615 1.1 agc trans++;
1616 1.1 agc }
1617 1.1 agc
1618 1.1 agc if (trans->state == NULL)
1619 1.1 agc (trans + 1)->state = NULL;
1620 1.1 agc /* Use the character ranges, assertions, etc. from `p1' for
1621 1.1 agc the transition from `p1' to `p2'. */
1622 1.1 agc trans->code_min = p1->code_min;
1623 1.1 agc trans->code_max = p1->code_max;
1624 1.1 agc trans->state = transitions + offs[p2->position];
1625 1.1 agc trans->state_id = p2->position;
1626 1.1 agc trans->assertions = p1->assertions | p2->assertions
1627 1.1 agc | (p1->class ? ASSERT_CHAR_CLASS : 0)
1628 1.1 agc | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
1629 1.1 agc if (p1->backref >= 0)
1630 1.1 agc {
1631 1.1 agc assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
1632 1.1 agc assert(p2->backref < 0);
1633 1.1 agc trans->u.backref = p1->backref;
1634 1.1 agc trans->assertions |= ASSERT_BACKREF;
1635 1.1 agc }
1636 1.1 agc else
1637 1.1 agc trans->u.class = p1->class;
1638 1.1 agc if (p1->neg_classes != NULL)
1639 1.1 agc {
1640 1.1 agc for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
1641 1.1 agc trans->neg_classes =
1642 1.1 agc xmalloc(sizeof(*trans->neg_classes) * (i + 1));
1643 1.1 agc if (trans->neg_classes == NULL)
1644 1.1 agc return REG_ESPACE;
1645 1.1 agc for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
1646 1.1 agc trans->neg_classes[i] = p1->neg_classes[i];
1647 1.1 agc trans->neg_classes[i] = (tre_ctype_t)0;
1648 1.1 agc }
1649 1.1 agc else
1650 1.1 agc trans->neg_classes = NULL;
1651 1.1 agc
1652 1.1 agc /* Find out how many tags this transition has. */
1653 1.1 agc i = 0;
1654 1.1 agc if (p1->tags != NULL)
1655 1.1 agc while(p1->tags[i] >= 0)
1656 1.1 agc i++;
1657 1.1 agc j = 0;
1658 1.1 agc if (p2->tags != NULL)
1659 1.1 agc while(p2->tags[j] >= 0)
1660 1.1 agc j++;
1661 1.1 agc
1662 1.1 agc /* If we are overwriting a transition, free the old tag array. */
1663 1.1 agc if (trans->tags != NULL)
1664 1.1 agc xfree(trans->tags);
1665 1.1 agc trans->tags = NULL;
1666 1.1 agc
1667 1.1 agc /* If there were any tags, allocate an array and fill it. */
1668 1.1 agc if (i + j > 0)
1669 1.1 agc {
1670 1.1 agc trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
1671 1.1 agc if (!trans->tags)
1672 1.1 agc return REG_ESPACE;
1673 1.1 agc i = 0;
1674 1.1 agc if (p1->tags != NULL)
1675 1.1 agc while(p1->tags[i] >= 0)
1676 1.1 agc {
1677 1.1 agc trans->tags[i] = p1->tags[i];
1678 1.1 agc i++;
1679 1.1 agc }
1680 1.1 agc l = i;
1681 1.1 agc j = 0;
1682 1.1 agc if (p2->tags != NULL)
1683 1.1 agc while (p2->tags[j] >= 0)
1684 1.1 agc {
1685 1.1 agc /* Don't add duplicates. */
1686 1.1 agc dup = 0;
1687 1.1 agc for (k = 0; k < i; k++)
1688 1.1 agc if (trans->tags[k] == p2->tags[j])
1689 1.1 agc {
1690 1.1 agc dup = 1;
1691 1.1 agc break;
1692 1.1 agc }
1693 1.1 agc if (!dup)
1694 1.1 agc trans->tags[l++] = p2->tags[j];
1695 1.1 agc j++;
1696 1.1 agc }
1697 1.1 agc trans->tags[l] = -1;
1698 1.1 agc }
1699 1.1 agc
1700 1.1 agc /* Set the parameter array. If both `p2' and `p1' have same
1701 1.1 agc parameters, the values in `p2' override those in `p1'. */
1702 1.1 agc if (p1->params || p2->params)
1703 1.1 agc {
1704 1.1 agc if (!trans->params)
1705 1.1 agc trans->params = xmalloc(sizeof(*trans->params)
1706 1.1 agc * TRE_PARAM_LAST);
1707 1.1 agc if (!trans->params)
1708 1.1 agc return REG_ESPACE;
1709 1.1 agc for (i = 0; i < TRE_PARAM_LAST; i++)
1710 1.1 agc {
1711 1.1 agc trans->params[i] = TRE_PARAM_UNSET;
1712 1.1 agc if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
1713 1.1 agc trans->params[i] = p1->params[i];
1714 1.1 agc if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
1715 1.1 agc trans->params[i] = p2->params[i];
1716 1.1 agc }
1717 1.1 agc }
1718 1.1 agc else
1719 1.1 agc {
1720 1.1 agc if (trans->params)
1721 1.1 agc xfree(trans->params);
1722 1.1 agc trans->params = NULL;
1723 1.1 agc }
1724 1.1 agc
1725 1.1 agc
1726 1.1 agc #ifdef TRE_DEBUG
1727 1.1 agc {
1728 1.1 agc int *tags;
1729 1.1 agc
1730 1.1 agc DPRINT((" %2d -> %2d on %3d", p1->position, p2->position,
1731 1.1 agc p1->code_min));
1732 1.1 agc if (p1->code_max != p1->code_min)
1733 1.1 agc DPRINT(("-%3d", p1->code_max));
1734 1.1 agc tags = trans->tags;
1735 1.1 agc if (tags)
1736 1.1 agc {
1737 1.1 agc DPRINT((", tags ["));
1738 1.1 agc while (*tags >= 0)
1739 1.1 agc {
1740 1.1 agc DPRINT(("%d", *tags));
1741 1.1 agc tags++;
1742 1.1 agc if (*tags >= 0)
1743 1.1 agc DPRINT((","));
1744 1.1 agc }
1745 1.1 agc DPRINT(("]"));
1746 1.1 agc }
1747 1.1 agc if (trans->assertions)
1748 1.1 agc DPRINT((", assert %d", trans->assertions));
1749 1.1 agc if (trans->assertions & ASSERT_BACKREF)
1750 1.1 agc DPRINT((", backref %d", trans->u.backref));
1751 1.1 agc else if (trans->u.class)
1752 1.1 agc DPRINT((", class %ld", (long)trans->u.class));
1753 1.1 agc if (trans->neg_classes)
1754 1.1 agc DPRINT((", neg_classes %p", trans->neg_classes));
1755 1.1 agc if (trans->params)
1756 1.1 agc {
1757 1.1 agc DPRINT((", "));
1758 1.1 agc tre_print_params(trans->params);
1759 1.1 agc }
1760 1.1 agc DPRINT(("\n"));
1761 1.1 agc }
1762 1.1 agc #endif /* TRE_DEBUG */
1763 1.1 agc p2++;
1764 1.1 agc }
1765 1.1 agc p1++;
1766 1.1 agc }
1767 1.1 agc else
1768 1.1 agc /* Compute a maximum limit for the number of transitions leaving
1769 1.1 agc from each state. */
1770 1.1 agc while (p1->position >= 0)
1771 1.1 agc {
1772 1.1 agc p2 = orig_p2;
1773 1.1 agc while (p2->position >= 0)
1774 1.1 agc {
1775 1.1 agc counts[p1->position]++;
1776 1.1 agc p2++;
1777 1.1 agc }
1778 1.1 agc p1++;
1779 1.1 agc }
1780 1.1 agc return REG_OK;
1781 1.1 agc }
1782 1.1 agc
1783 1.1 agc /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
1784 1.1 agc labelled with one character range (there are no transitions on empty
1785 1.1 agc strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
1786 1.1 agc the regexp. */
1787 1.1 agc static reg_errcode_t
1788 1.1 agc tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
1789 1.1 agc int *counts, int *offs)
1790 1.1 agc {
1791 1.1 agc tre_union_t *uni;
1792 1.1 agc tre_catenation_t *cat;
1793 1.1 agc tre_iteration_t *iter;
1794 1.1 agc reg_errcode_t errcode = REG_OK;
1795 1.1 agc
1796 1.1 agc /* XXX - recurse using a stack!. */
1797 1.1 agc switch (node->type)
1798 1.1 agc {
1799 1.1 agc case LITERAL:
1800 1.1 agc break;
1801 1.1 agc case UNION:
1802 1.1 agc uni = (tre_union_t *)node->obj;
1803 1.1 agc errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
1804 1.1 agc if (errcode != REG_OK)
1805 1.1 agc return errcode;
1806 1.1 agc errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
1807 1.1 agc break;
1808 1.1 agc
1809 1.1 agc case CATENATION:
1810 1.1 agc cat = (tre_catenation_t *)node->obj;
1811 1.1 agc /* Add a transition from each position in cat->left->lastpos
1812 1.1 agc to each position in cat->right->firstpos. */
1813 1.1 agc errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
1814 1.1 agc transitions, counts, offs);
1815 1.1 agc if (errcode != REG_OK)
1816 1.1 agc return errcode;
1817 1.1 agc errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
1818 1.1 agc if (errcode != REG_OK)
1819 1.1 agc return errcode;
1820 1.1 agc errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
1821 1.1 agc break;
1822 1.1 agc
1823 1.1 agc case ITERATION:
1824 1.1 agc iter = (tre_iteration_t *)node->obj;
1825 1.1 agc assert(iter->max == -1 || iter->max == 1);
1826 1.1 agc
1827 1.1 agc if (iter->max == -1)
1828 1.1 agc {
1829 1.1 agc assert(iter->min == 0 || iter->min == 1);
1830 1.1 agc /* Add a transition from each last position in the iterated
1831 1.1 agc expression to each first position. */
1832 1.1 agc errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
1833 1.1 agc transitions, counts, offs);
1834 1.1 agc if (errcode != REG_OK)
1835 1.1 agc return errcode;
1836 1.1 agc }
1837 1.1 agc errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
1838 1.1 agc break;
1839 1.1 agc }
1840 1.1 agc return errcode;
1841 1.1 agc }
1842 1.1 agc
1843 1.1 agc
1844 1.1 agc #define ERROR_EXIT(err) \
1845 1.1 agc do \
1846 1.1 agc { \
1847 1.1 agc errcode = err; \
1848 1.1 agc if (/*CONSTCOND*/1) \
1849 1.1 agc goto error_exit; \
1850 1.1 agc } \
1851 1.1 agc while (/*CONSTCOND*/0)
1852 1.1 agc
1853 1.1 agc
1854 1.1 agc int
1855 1.1 agc tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
1856 1.1 agc {
1857 1.1 agc tre_stack_t *stack;
1858 1.1 agc tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
1859 1.1 agc tre_pos_and_tags_t *p;
1860 1.1 agc int *counts = NULL, *offs = NULL;
1861 1.1 agc int i, add = 0;
1862 1.1 agc tre_tnfa_transition_t *transitions, *initial;
1863 1.1 agc tre_tnfa_t *tnfa = NULL;
1864 1.1 agc tre_submatch_data_t *submatch_data;
1865 1.1 agc tre_tag_direction_t *tag_directions = NULL;
1866 1.1 agc reg_errcode_t errcode;
1867 1.1 agc tre_mem_t mem;
1868 1.1 agc
1869 1.1 agc /* Parse context. */
1870 1.1 agc tre_parse_ctx_t parse_ctx;
1871 1.1 agc
1872 1.1 agc /* Allocate a stack used throughout the compilation process for various
1873 1.1 agc purposes. */
1874 1.1 agc stack = tre_stack_new(512, 10240, 128);
1875 1.1 agc if (!stack)
1876 1.1 agc return REG_ESPACE;
1877 1.1 agc /* Allocate a fast memory allocator. */
1878 1.1 agc mem = tre_mem_new();
1879 1.1 agc if (!mem)
1880 1.1 agc {
1881 1.1 agc tre_stack_destroy(stack);
1882 1.1 agc return REG_ESPACE;
1883 1.1 agc }
1884 1.1 agc
1885 1.1 agc /* Parse the regexp. */
1886 1.1 agc memset(&parse_ctx, 0, sizeof(parse_ctx));
1887 1.1 agc parse_ctx.mem = mem;
1888 1.1 agc parse_ctx.stack = stack;
1889 1.1 agc parse_ctx.re = regex;
1890 1.1 agc parse_ctx.len = n;
1891 1.1 agc parse_ctx.cflags = cflags;
1892 1.1 agc parse_ctx.max_backref = -1;
1893 1.1 agc DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
1894 1.1 agc errcode = tre_parse(&parse_ctx);
1895 1.1 agc if (errcode != REG_OK)
1896 1.1 agc ERROR_EXIT(errcode);
1897 1.1 agc preg->re_nsub = parse_ctx.submatch_id - 1;
1898 1.1 agc tree = parse_ctx.result;
1899 1.1 agc
1900 1.1 agc /* Back references and approximate matching cannot currently be used
1901 1.1 agc in the same regexp. */
1902 1.1 agc if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
1903 1.1 agc ERROR_EXIT(REG_BADPAT);
1904 1.1 agc
1905 1.1 agc #ifdef TRE_DEBUG
1906 1.1 agc tre_ast_print(tree);
1907 1.1 agc #endif /* TRE_DEBUG */
1908 1.1 agc
1909 1.1 agc /* Referring to nonexistent subexpressions is illegal. */
1910 1.1 agc if (parse_ctx.max_backref > (int)preg->re_nsub)
1911 1.1 agc ERROR_EXIT(REG_ESUBREG);
1912 1.1 agc
1913 1.1 agc /* Allocate the TNFA struct. */
1914 1.1 agc tnfa = xcalloc(1, sizeof(tre_tnfa_t));
1915 1.1 agc if (tnfa == NULL)
1916 1.1 agc ERROR_EXIT(REG_ESPACE);
1917 1.1 agc tnfa->have_backrefs = parse_ctx.max_backref >= 0;
1918 1.1 agc tnfa->have_approx = parse_ctx.have_approx;
1919 1.1 agc tnfa->num_submatches = parse_ctx.submatch_id;
1920 1.1 agc
1921 1.1 agc /* Set up tags for submatch addressing. If REG_NOSUB is set and the
1922 1.1 agc regexp does not have back references, this can be skipped. */
1923 1.1 agc if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
1924 1.1 agc {
1925 1.1 agc DPRINT(("tre_compile: setting up tags\n"));
1926 1.1 agc
1927 1.1 agc /* Figure out how many tags we will need. */
1928 1.1 agc errcode = tre_add_tags(NULL, stack, tree, tnfa);
1929 1.1 agc if (errcode != REG_OK)
1930 1.1 agc ERROR_EXIT(errcode);
1931 1.1 agc #ifdef TRE_DEBUG
1932 1.1 agc tre_ast_print(tree);
1933 1.1 agc #endif /* TRE_DEBUG */
1934 1.1 agc
1935 1.1 agc if (tnfa->num_tags > 0)
1936 1.1 agc {
1937 1.1 agc tag_directions = xmalloc(sizeof(*tag_directions)
1938 1.1 agc * (tnfa->num_tags + 1));
1939 1.1 agc if (tag_directions == NULL)
1940 1.1 agc ERROR_EXIT(REG_ESPACE);
1941 1.1 agc tnfa->tag_directions = tag_directions;
1942 1.1 agc memset(tag_directions, -1,
1943 1.1 agc sizeof(*tag_directions) * (tnfa->num_tags + 1));
1944 1.1 agc }
1945 1.1 agc tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
1946 1.1 agc sizeof(tnfa->minimal_tags));
1947 1.1 agc if (tnfa->minimal_tags == NULL)
1948 1.1 agc ERROR_EXIT(REG_ESPACE);
1949 1.1 agc
1950 1.1 agc submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
1951 1.1 agc sizeof(*submatch_data));
1952 1.1 agc if (submatch_data == NULL)
1953 1.1 agc ERROR_EXIT(REG_ESPACE);
1954 1.1 agc tnfa->submatch_data = submatch_data;
1955 1.1 agc
1956 1.1 agc errcode = tre_add_tags(mem, stack, tree, tnfa);
1957 1.1 agc if (errcode != REG_OK)
1958 1.1 agc ERROR_EXIT(errcode);
1959 1.1 agc
1960 1.1 agc #ifdef TRE_DEBUG
1961 1.1 agc for (i = 0; i < parse_ctx.submatch_id; i++)
1962 1.1 agc DPRINT(("pmatch[%d] = {t%d, t%d}\n",
1963 1.1 agc i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
1964 1.1 agc for (i = 0; i < tnfa->num_tags; i++)
1965 1.1 agc DPRINT(("t%d is %s\n", i,
1966 1.1 agc tag_directions[i] == TRE_TAG_MINIMIZE ?
1967 1.1 agc "minimized" : "maximized"));
1968 1.1 agc #endif /* TRE_DEBUG */
1969 1.1 agc }
1970 1.1 agc
1971 1.1 agc /* Expand iteration nodes. */
1972 1.1 agc errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
1973 1.1 agc tag_directions, &tnfa->params_depth);
1974 1.1 agc if (errcode != REG_OK)
1975 1.1 agc ERROR_EXIT(errcode);
1976 1.1 agc
1977 1.1 agc /* Add a dummy node for the final state.
1978 1.1 agc XXX - For certain patterns this dummy node can be optimized away,
1979 1.1 agc for example "a*" or "ab*". Figure out a simple way to detect
1980 1.1 agc this possibility. */
1981 1.1 agc tmp_ast_l = tree;
1982 1.1 agc tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
1983 1.1 agc if (tmp_ast_r == NULL)
1984 1.1 agc ERROR_EXIT(REG_ESPACE);
1985 1.1 agc
1986 1.1 agc tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
1987 1.1 agc if (tree == NULL)
1988 1.1 agc ERROR_EXIT(REG_ESPACE);
1989 1.1 agc
1990 1.1 agc #ifdef TRE_DEBUG
1991 1.1 agc tre_ast_print(tree);
1992 1.1 agc DPRINT(("Number of states: %d\n", parse_ctx.position));
1993 1.1 agc #endif /* TRE_DEBUG */
1994 1.1 agc
1995 1.1 agc errcode = tre_compute_nfl(mem, stack, tree);
1996 1.1 agc if (errcode != REG_OK)
1997 1.1 agc ERROR_EXIT(errcode);
1998 1.1 agc
1999 1.1 agc counts = xmalloc(sizeof(int) * parse_ctx.position);
2000 1.1 agc if (counts == NULL)
2001 1.1 agc ERROR_EXIT(REG_ESPACE);
2002 1.1 agc
2003 1.1 agc offs = xmalloc(sizeof(int) * parse_ctx.position);
2004 1.1 agc if (offs == NULL)
2005 1.1 agc ERROR_EXIT(REG_ESPACE);
2006 1.1 agc
2007 1.1 agc for (i = 0; i < parse_ctx.position; i++)
2008 1.1 agc counts[i] = 0;
2009 1.1 agc tre_ast_to_tnfa(tree, NULL, counts, NULL);
2010 1.1 agc
2011 1.1 agc add = 0;
2012 1.1 agc for (i = 0; i < parse_ctx.position; i++)
2013 1.1 agc {
2014 1.1 agc offs[i] = add;
2015 1.1 agc add += counts[i] + 1;
2016 1.1 agc counts[i] = 0;
2017 1.1 agc }
2018 1.1 agc transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2019 1.1 agc if (transitions == NULL)
2020 1.1 agc ERROR_EXIT(REG_ESPACE);
2021 1.1 agc tnfa->transitions = transitions;
2022 1.1 agc tnfa->num_transitions = add;
2023 1.1 agc
2024 1.1 agc DPRINT(("Converting to TNFA:\n"));
2025 1.1 agc errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2026 1.1 agc if (errcode != REG_OK)
2027 1.1 agc ERROR_EXIT(errcode);
2028 1.1 agc
2029 1.1 agc /* If in eight bit mode, compute a table of characters that can be the
2030 1.1 agc first character of a match. */
2031 1.1 agc tnfa->first_char = -1;
2032 1.1 agc if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
2033 1.1 agc {
2034 1.1 agc int count = 0;
2035 1.1 agc tre_cint_t k;
2036 1.1 agc DPRINT(("Characters that can start a match:"));
2037 1.1 agc tnfa->firstpos_chars = xcalloc(256, sizeof(char));
2038 1.1 agc if (tnfa->firstpos_chars == NULL)
2039 1.1 agc ERROR_EXIT(REG_ESPACE);
2040 1.1 agc for (p = tree->firstpos; p->position >= 0; p++)
2041 1.1 agc {
2042 1.1 agc tre_tnfa_transition_t *j = transitions + offs[p->position];
2043 1.1 agc while (j->state != NULL)
2044 1.1 agc {
2045 1.1 agc for (k = j->code_min; k <= j->code_max && k < 256; k++)
2046 1.1 agc {
2047 1.1 agc DPRINT((" %d", k));
2048 1.1 agc tnfa->firstpos_chars[k] = 1;
2049 1.1 agc count++;
2050 1.1 agc }
2051 1.1 agc j++;
2052 1.1 agc }
2053 1.1 agc }
2054 1.1 agc DPRINT(("\n"));
2055 1.1 agc #define TRE_OPTIMIZE_FIRST_CHAR 1
2056 1.1 agc #if TRE_OPTIMIZE_FIRST_CHAR
2057 1.1 agc if (count == 1)
2058 1.1 agc {
2059 1.1 agc for (k = 0; k < 256; k++)
2060 1.1 agc if (tnfa->firstpos_chars[k])
2061 1.1 agc {
2062 1.1 agc DPRINT(("first char must be %d\n", k));
2063 1.1 agc tnfa->first_char = k;
2064 1.1 agc xfree(tnfa->firstpos_chars);
2065 1.1 agc tnfa->firstpos_chars = NULL;
2066 1.1 agc break;
2067 1.1 agc }
2068 1.1 agc }
2069 1.1 agc #endif
2070 1.1 agc
2071 1.1 agc }
2072 1.1 agc else
2073 1.1 agc tnfa->firstpos_chars = NULL;
2074 1.1 agc
2075 1.1 agc
2076 1.1 agc p = tree->firstpos;
2077 1.1 agc i = 0;
2078 1.1 agc while (p->position >= 0)
2079 1.1 agc {
2080 1.1 agc i++;
2081 1.1 agc
2082 1.1 agc #ifdef TRE_DEBUG
2083 1.1 agc {
2084 1.1 agc int *tags;
2085 1.1 agc DPRINT(("initial: %d", p->position));
2086 1.1 agc tags = p->tags;
2087 1.1 agc if (tags != NULL)
2088 1.1 agc {
2089 1.1 agc if (*tags >= 0)
2090 1.1 agc DPRINT(("/"));
2091 1.1 agc while (*tags >= 0)
2092 1.1 agc {
2093 1.1 agc DPRINT(("%d", *tags));
2094 1.1 agc tags++;
2095 1.1 agc if (*tags >= 0)
2096 1.1 agc DPRINT((","));
2097 1.1 agc }
2098 1.1 agc }
2099 1.1 agc DPRINT((", assert %d", p->assertions));
2100 1.1 agc if (p->params)
2101 1.1 agc {
2102 1.1 agc DPRINT((", "));
2103 1.1 agc tre_print_params(p->params);
2104 1.1 agc }
2105 1.1 agc DPRINT(("\n"));
2106 1.1 agc }
2107 1.1 agc #endif /* TRE_DEBUG */
2108 1.1 agc
2109 1.1 agc p++;
2110 1.1 agc }
2111 1.1 agc
2112 1.1 agc initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2113 1.1 agc if (initial == NULL)
2114 1.1 agc ERROR_EXIT(REG_ESPACE);
2115 1.1 agc tnfa->initial = initial;
2116 1.1 agc
2117 1.1 agc i = 0;
2118 1.1 agc for (p = tree->firstpos; p->position >= 0; p++)
2119 1.1 agc {
2120 1.1 agc initial[i].state = transitions + offs[p->position];
2121 1.1 agc initial[i].state_id = p->position;
2122 1.1 agc initial[i].tags = NULL;
2123 1.1 agc /* Copy the arrays p->tags, and p->params, they are allocated
2124 1.1 agc from a tre_mem object. */
2125 1.1 agc if (p->tags)
2126 1.1 agc {
2127 1.1 agc int j;
2128 1.1 agc for (j = 0; p->tags[j] >= 0; j++);
2129 1.1 agc initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2130 1.1 agc if (!initial[i].tags)
2131 1.1 agc ERROR_EXIT(REG_ESPACE);
2132 1.1 agc memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2133 1.1 agc }
2134 1.1 agc initial[i].params = NULL;
2135 1.1 agc if (p->params)
2136 1.1 agc {
2137 1.1 agc initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
2138 1.1 agc if (!initial[i].params)
2139 1.1 agc ERROR_EXIT(REG_ESPACE);
2140 1.1 agc memcpy(initial[i].params, p->params,
2141 1.1 agc sizeof(*p->params) * TRE_PARAM_LAST);
2142 1.1 agc }
2143 1.1 agc initial[i].assertions = p->assertions;
2144 1.1 agc i++;
2145 1.1 agc }
2146 1.1 agc initial[i].state = NULL;
2147 1.1 agc
2148 1.1 agc tnfa->num_transitions = add;
2149 1.1 agc tnfa->final = transitions + offs[tree->lastpos[0].position];
2150 1.1 agc tnfa->num_states = parse_ctx.position;
2151 1.1 agc tnfa->cflags = cflags;
2152 1.1 agc
2153 1.1 agc DPRINT(("final state %p\n", (void *)tnfa->final));
2154 1.1 agc
2155 1.1 agc tre_mem_destroy(mem);
2156 1.1 agc tre_stack_destroy(stack);
2157 1.1 agc xfree(counts);
2158 1.1 agc xfree(offs);
2159 1.1 agc
2160 1.1 agc preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2161 1.1 agc return REG_OK;
2162 1.1 agc
2163 1.1 agc error_exit:
2164 1.1 agc /* Free everything that was allocated and return the error code. */
2165 1.1 agc tre_mem_destroy(mem);
2166 1.1 agc if (stack != NULL)
2167 1.1 agc tre_stack_destroy(stack);
2168 1.1 agc if (counts != NULL)
2169 1.1 agc xfree(counts);
2170 1.1 agc if (offs != NULL)
2171 1.1 agc xfree(offs);
2172 1.1 agc preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2173 1.1 agc tre_free(preg);
2174 1.1 agc return errcode;
2175 1.1 agc }
2176 1.1 agc
2177 1.1 agc
2178 1.1 agc
2179 1.1 agc
2180 1.1 agc void
2181 1.1 agc tre_free(regex_t *preg)
2182 1.1 agc {
2183 1.1 agc tre_tnfa_t *tnfa;
2184 1.1 agc unsigned int i;
2185 1.1 agc tre_tnfa_transition_t *trans;
2186 1.1 agc
2187 1.1 agc tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2188 1.1 agc if (!tnfa)
2189 1.1 agc return;
2190 1.1 agc
2191 1.1 agc for (i = 0; i < tnfa->num_transitions; i++)
2192 1.1 agc if (tnfa->transitions[i].state)
2193 1.1 agc {
2194 1.1 agc if (tnfa->transitions[i].tags)
2195 1.1 agc xfree(tnfa->transitions[i].tags);
2196 1.1 agc if (tnfa->transitions[i].neg_classes)
2197 1.1 agc xfree(tnfa->transitions[i].neg_classes);
2198 1.1 agc if (tnfa->transitions[i].params)
2199 1.1 agc xfree(tnfa->transitions[i].params);
2200 1.1 agc }
2201 1.1 agc if (tnfa->transitions)
2202 1.1 agc xfree(tnfa->transitions);
2203 1.1 agc
2204 1.1 agc if (tnfa->initial)
2205 1.1 agc {
2206 1.1 agc for (trans = tnfa->initial; trans->state; trans++)
2207 1.1 agc {
2208 1.1 agc if (trans->tags)
2209 1.1 agc xfree(trans->tags);
2210 1.1 agc if (trans->params)
2211 1.1 agc xfree(trans->params);
2212 1.1 agc }
2213 1.1 agc xfree(tnfa->initial);
2214 1.1 agc }
2215 1.1 agc
2216 1.1 agc if (tnfa->submatch_data)
2217 1.1 agc {
2218 1.1 agc for (i = 0; i < tnfa->num_submatches; i++)
2219 1.1 agc if (tnfa->submatch_data[i].parents)
2220 1.1 agc xfree(tnfa->submatch_data[i].parents);
2221 1.1 agc xfree(tnfa->submatch_data);
2222 1.1 agc }
2223 1.1 agc
2224 1.1 agc if (tnfa->tag_directions)
2225 1.1 agc xfree(tnfa->tag_directions);
2226 1.1 agc if (tnfa->firstpos_chars)
2227 1.1 agc xfree(tnfa->firstpos_chars);
2228 1.1 agc if (tnfa->minimal_tags)
2229 1.1 agc xfree(tnfa->minimal_tags);
2230 1.1 agc xfree(tnfa);
2231 1.1 agc }
2232 1.1 agc
2233 1.1 agc char *
2234 1.1 agc tre_version(void)
2235 1.1 agc {
2236 1.1 agc static char str[256];
2237 1.1 agc char *version;
2238 1.1 agc
2239 1.1 agc if (str[0] == 0)
2240 1.1 agc {
2241 1.1 agc (void) tre_config(TRE_CONFIG_VERSION, &version);
2242 1.1 agc (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
2243 1.1 agc }
2244 1.1 agc return str;
2245 1.1 agc }
2246 1.1 agc
2247 1.1 agc int
2248 1.1 agc tre_config(int query, void *result)
2249 1.1 agc {
2250 1.1 agc int *int_result = result;
2251 1.1 agc const char **string_result = result;
2252 1.1 agc
2253 1.1 agc switch (query)
2254 1.1 agc {
2255 1.1 agc case TRE_CONFIG_APPROX:
2256 1.1 agc #ifdef TRE_APPROX
2257 1.1 agc *int_result = 1;
2258 1.1 agc #else /* !TRE_APPROX */
2259 1.1 agc *int_result = 0;
2260 1.1 agc #endif /* !TRE_APPROX */
2261 1.1 agc return REG_OK;
2262 1.1 agc
2263 1.1 agc case TRE_CONFIG_WCHAR:
2264 1.1 agc #ifdef TRE_WCHAR
2265 1.1 agc *int_result = 1;
2266 1.1 agc #else /* !TRE_WCHAR */
2267 1.1 agc *int_result = 0;
2268 1.1 agc #endif /* !TRE_WCHAR */
2269 1.1 agc return REG_OK;
2270 1.1 agc
2271 1.1 agc case TRE_CONFIG_MULTIBYTE:
2272 1.1 agc #ifdef TRE_MULTIBYTE
2273 1.1 agc *int_result = 1;
2274 1.1 agc #else /* !TRE_MULTIBYTE */
2275 1.1 agc *int_result = 0;
2276 1.1 agc #endif /* !TRE_MULTIBYTE */
2277 1.1 agc return REG_OK;
2278 1.1 agc
2279 1.1 agc case TRE_CONFIG_SYSTEM_ABI:
2280 1.1 agc #ifdef TRE_CONFIG_SYSTEM_ABI
2281 1.1 agc *int_result = 1;
2282 1.1 agc #else /* !TRE_CONFIG_SYSTEM_ABI */
2283 1.1 agc *int_result = 0;
2284 1.1 agc #endif /* !TRE_CONFIG_SYSTEM_ABI */
2285 1.1 agc return REG_OK;
2286 1.1 agc
2287 1.1 agc case TRE_CONFIG_VERSION:
2288 1.1 agc *string_result = TRE_VERSION;
2289 1.1 agc return REG_OK;
2290 1.1 agc }
2291 1.1 agc
2292 1.1 agc return REG_NOMATCH;
2293 1.1 agc }
2294 1.1 agc
2295 1.1 agc
2296 1.1 agc /* EOF */
2297