Home | History | Annotate | Line # | Download | only in lib
tre-compile.c revision 1.2
      1  1.1       agc /*
      2  1.1       agc   tre-compile.c - TRE regex compiler
      3  1.1       agc 
      4  1.1       agc   This software is released under a BSD-style license.
      5  1.1       agc   See the file LICENSE for details and copyright.
      6  1.1       agc 
      7  1.1       agc */
      8  1.1       agc 
      9  1.1       agc /*
     10  1.1       agc   TODO:
     11  1.1       agc    - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
     12  1.1       agc      function calls.
     13  1.1       agc */
     14  1.1       agc 
     15  1.1       agc 
     16  1.1       agc #ifdef HAVE_CONFIG_H
     17  1.1       agc #include <config.h>
     18  1.1       agc #endif /* HAVE_CONFIG_H */
     19  1.1       agc #include <stdio.h>
     20  1.1       agc #include <assert.h>
     21  1.1       agc #include <string.h>
     22  1.1       agc 
     23  1.1       agc #include "tre-internal.h"
     24  1.1       agc #include "tre-mem.h"
     25  1.1       agc #include "tre-stack.h"
     26  1.1       agc #include "tre-ast.h"
     27  1.1       agc #include "tre-parse.h"
     28  1.1       agc #include "tre-compile.h"
     29  1.1       agc #include "tre.h"
     30  1.1       agc #include "xmalloc.h"
     31  1.1       agc 
     32  1.1       agc /*
     33  1.1       agc   Algorithms to setup tags so that submatch addressing can be done.
     34  1.1       agc */
     35  1.1       agc 
     36  1.1       agc 
     37  1.1       agc /* Inserts a catenation node to the root of the tree given in `node'.
     38  1.1       agc    As the left child a new tag with number `tag_id' to `node' is added,
     39  1.1       agc    and the right child is the old root. */
     40  1.1       agc static reg_errcode_t
     41  1.1       agc tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
     42  1.1       agc {
     43  1.1       agc   tre_catenation_t *c;
     44  1.1       agc 
     45  1.1       agc   DPRINT(("add_tag_left: tag %d\n", tag_id));
     46  1.1       agc 
     47  1.1       agc   c = tre_mem_alloc(mem, sizeof(*c));
     48  1.1       agc   if (c == NULL)
     49  1.1       agc     return REG_ESPACE;
     50  1.1       agc   c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
     51  1.1       agc   if (c->left == NULL)
     52  1.1       agc     return REG_ESPACE;
     53  1.1       agc   c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
     54  1.1       agc   if (c->right == NULL)
     55  1.1       agc     return REG_ESPACE;
     56  1.1       agc 
     57  1.1       agc   c->right->obj = node->obj;
     58  1.1       agc   c->right->type = node->type;
     59  1.1       agc   c->right->nullable = -1;
     60  1.1       agc   c->right->submatch_id = -1;
     61  1.1       agc   c->right->firstpos = NULL;
     62  1.1       agc   c->right->lastpos = NULL;
     63  1.1       agc   c->right->num_tags = 0;
     64  1.1       agc   node->obj = c;
     65  1.1       agc   node->type = CATENATION;
     66  1.1       agc   return REG_OK;
     67  1.1       agc }
     68  1.1       agc 
     69  1.1       agc /* Inserts a catenation node to the root of the tree given in `node'.
     70  1.1       agc    As the right child a new tag with number `tag_id' to `node' is added,
     71  1.1       agc    and the left child is the old root. */
     72  1.1       agc static reg_errcode_t
     73  1.1       agc tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
     74  1.1       agc {
     75  1.1       agc   tre_catenation_t *c;
     76  1.1       agc 
     77  1.1       agc   DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
     78  1.1       agc 
     79  1.1       agc   c = tre_mem_alloc(mem, sizeof(*c));
     80  1.1       agc   if (c == NULL)
     81  1.1       agc     return REG_ESPACE;
     82  1.1       agc   c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
     83  1.1       agc   if (c->right == NULL)
     84  1.1       agc     return REG_ESPACE;
     85  1.1       agc   c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
     86  1.1       agc   if (c->left == NULL)
     87  1.1       agc     return REG_ESPACE;
     88  1.1       agc 
     89  1.1       agc   c->left->obj = node->obj;
     90  1.1       agc   c->left->type = node->type;
     91  1.1       agc   c->left->nullable = -1;
     92  1.1       agc   c->left->submatch_id = -1;
     93  1.1       agc   c->left->firstpos = NULL;
     94  1.1       agc   c->left->lastpos = NULL;
     95  1.1       agc   c->left->num_tags = 0;
     96  1.1       agc   node->obj = c;
     97  1.1       agc   node->type = CATENATION;
     98  1.1       agc   return REG_OK;
     99  1.1       agc }
    100  1.1       agc 
    101  1.1       agc typedef enum {
    102  1.1       agc   ADDTAGS_RECURSE,
    103  1.1       agc   ADDTAGS_AFTER_ITERATION,
    104  1.1       agc   ADDTAGS_AFTER_UNION_LEFT,
    105  1.1       agc   ADDTAGS_AFTER_UNION_RIGHT,
    106  1.1       agc   ADDTAGS_AFTER_CAT_LEFT,
    107  1.1       agc   ADDTAGS_AFTER_CAT_RIGHT,
    108  1.1       agc   ADDTAGS_SET_SUBMATCH_END
    109  1.1       agc } tre_addtags_symbol_t;
    110  1.1       agc 
    111  1.1       agc 
    112  1.1       agc typedef struct {
    113  1.1       agc   int tag;
    114  1.1       agc   int next_tag;
    115  1.1       agc } tre_tag_states_t;
    116  1.1       agc 
    117  1.1       agc 
    118  1.1       agc /* Go through `regset' and set submatch data for submatches that are
    119  1.1       agc    using this tag. */
    120  1.1       agc static void
    121  1.1       agc tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
    122  1.1       agc {
    123  1.1       agc   int i;
    124  1.1       agc 
    125  1.1       agc   for (i = 0; regset[i] >= 0; i++)
    126  1.1       agc     {
    127  1.1       agc       int id = regset[i] / 2;
    128  1.1       agc       int start = !(regset[i] % 2);
    129  1.1       agc       DPRINT(("  Using tag %d for %s offset of "
    130  1.1       agc 	      "submatch %d\n", tag,
    131  1.1       agc 	      start ? "start" : "end", id));
    132  1.1       agc       if (start)
    133  1.1       agc 	tnfa->submatch_data[id].so_tag = tag;
    134  1.1       agc       else
    135  1.1       agc 	tnfa->submatch_data[id].eo_tag = tag;
    136  1.1       agc     }
    137  1.1       agc   regset[0] = -1;
    138  1.1       agc }
    139  1.1       agc 
    140  1.1       agc 
    141  1.1       agc /* Adds tags to appropriate locations in the parse tree in `tree', so that
    142  1.1       agc    subexpressions marked for submatch addressing can be traced. */
    143  1.1       agc static reg_errcode_t
    144  1.1       agc tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
    145  1.1       agc 	     tre_tnfa_t *tnfa)
    146  1.1       agc {
    147  1.1       agc   reg_errcode_t status = REG_OK;
    148  1.1       agc   tre_addtags_symbol_t symbol;
    149  1.1       agc   tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
    150  1.1       agc   int bottom = tre_stack_num_objects(stack);
    151  1.1       agc   /* True for first pass (counting number of needed tags) */
    152  1.1       agc   int first_pass = (mem == NULL || tnfa == NULL);
    153  1.1       agc   int *regset, *orig_regset;
    154  1.1       agc   int num_tags = 0; /* Total number of tags. */
    155  1.1       agc   int num_minimals = 0;	 /* Number of special minimal tags. */
    156  1.1       agc   int tag = 0;	    /* The tag that is to be added next. */
    157  1.1       agc   int next_tag = 1; /* Next tag to use after this one. */
    158  1.1       agc   int *parents;	    /* Stack of submatches the current submatch is
    159  1.1       agc 		       contained in. */
    160  1.1       agc   int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
    161  1.1       agc   tre_tag_states_t *saved_states;
    162  1.1       agc 
    163  1.1       agc   tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
    164  1.1       agc   if (!first_pass)
    165  1.1       agc     {
    166  1.1       agc       tnfa->end_tag = 0;
    167  1.1       agc       tnfa->minimal_tags[0] = -1;
    168  1.1       agc     }
    169  1.1       agc 
    170  1.1       agc   regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
    171  1.1       agc   if (regset == NULL)
    172  1.1       agc     return REG_ESPACE;
    173  1.1       agc   regset[0] = -1;
    174  1.1       agc   orig_regset = regset;
    175  1.1       agc 
    176  1.1       agc   parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
    177  1.1       agc   if (parents == NULL)
    178  1.1       agc     {
    179  1.1       agc       xfree(regset);
    180  1.1       agc       return REG_ESPACE;
    181  1.1       agc     }
    182  1.1       agc   parents[0] = -1;
    183  1.1       agc 
    184  1.1       agc   saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
    185  1.1       agc   if (saved_states == NULL)
    186  1.1       agc     {
    187  1.1       agc       xfree(regset);
    188  1.1       agc       xfree(parents);
    189  1.1       agc       return REG_ESPACE;
    190  1.1       agc     }
    191  1.1       agc   else
    192  1.1       agc     {
    193  1.1       agc       unsigned int i;
    194  1.1       agc       for (i = 0; i <= tnfa->num_submatches; i++)
    195  1.1       agc 	saved_states[i].tag = -1;
    196  1.1       agc     }
    197  1.1       agc 
    198  1.1       agc   STACK_PUSH(stack, voidptr, node);
    199  1.2  christos   STACK_PUSH(stack, long, ADDTAGS_RECURSE);
    200  1.1       agc 
    201  1.1       agc   while (tre_stack_num_objects(stack) > bottom)
    202  1.1       agc     {
    203  1.1       agc       if (status != REG_OK)
    204  1.1       agc 	break;
    205  1.1       agc 
    206  1.2  christos       symbol = (tre_addtags_symbol_t)tre_stack_pop_long(stack);
    207  1.1       agc       switch (symbol)
    208  1.1       agc 	{
    209  1.1       agc 
    210  1.1       agc 	case ADDTAGS_SET_SUBMATCH_END:
    211  1.1       agc 	  {
    212  1.2  christos 	    int id = (int)tre_stack_pop_long(stack);
    213  1.1       agc 	    int i;
    214  1.1       agc 
    215  1.1       agc 	    /* Add end of this submatch to regset. */
    216  1.1       agc 	    for (i = 0; regset[i] >= 0; i++);
    217  1.1       agc 	    regset[i] = id * 2 + 1;
    218  1.1       agc 	    regset[i + 1] = -1;
    219  1.1       agc 
    220  1.1       agc 	    /* Pop this submatch from the parents stack. */
    221  1.1       agc 	    for (i = 0; parents[i] >= 0; i++);
    222  1.1       agc 	    parents[i - 1] = -1;
    223  1.1       agc 	    break;
    224  1.1       agc 	  }
    225  1.1       agc 
    226  1.1       agc 	case ADDTAGS_RECURSE:
    227  1.1       agc 	  node = tre_stack_pop_voidptr(stack);
    228  1.1       agc 
    229  1.1       agc 	  if (node->submatch_id >= 0)
    230  1.1       agc 	    {
    231  1.1       agc 	      int id = node->submatch_id;
    232  1.1       agc 	      int i;
    233  1.1       agc 
    234  1.1       agc 
    235  1.1       agc 	      /* Add start of this submatch to regset. */
    236  1.1       agc 	      for (i = 0; regset[i] >= 0; i++);
    237  1.1       agc 	      regset[i] = id * 2;
    238  1.1       agc 	      regset[i + 1] = -1;
    239  1.1       agc 
    240  1.1       agc 	      if (!first_pass)
    241  1.1       agc 		{
    242  1.1       agc 		  for (i = 0; parents[i] >= 0; i++);
    243  1.1       agc 		  tnfa->submatch_data[id].parents = NULL;
    244  1.1       agc 		  if (i > 0)
    245  1.1       agc 		    {
    246  1.1       agc 		      int *p = xmalloc(sizeof(*p) * (i + 1));
    247  1.1       agc 		      if (p == NULL)
    248  1.1       agc 			{
    249  1.1       agc 			  status = REG_ESPACE;
    250  1.1       agc 			  break;
    251  1.1       agc 			}
    252  1.1       agc 		      assert(tnfa->submatch_data[id].parents == NULL);
    253  1.1       agc 		      tnfa->submatch_data[id].parents = p;
    254  1.1       agc 		      for (i = 0; parents[i] >= 0; i++)
    255  1.1       agc 			p[i] = parents[i];
    256  1.1       agc 		      p[i] = -1;
    257  1.1       agc 		    }
    258  1.1       agc 		}
    259  1.1       agc 
    260  1.1       agc 	      /* Add end of this submatch to regset after processing this
    261  1.1       agc 		 node. */
    262  1.2  christos 	      STACK_PUSHX(stack, long, node->submatch_id);
    263  1.2  christos 	      STACK_PUSHX(stack, long, ADDTAGS_SET_SUBMATCH_END);
    264  1.1       agc 	    }
    265  1.1       agc 
    266  1.1       agc 	  switch (node->type)
    267  1.1       agc 	    {
    268  1.1       agc 	    case LITERAL:
    269  1.1       agc 	      {
    270  1.1       agc 		tre_literal_t *lit = node->obj;
    271  1.1       agc 
    272  1.1       agc 		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
    273  1.1       agc 		  {
    274  1.1       agc 		    int i;
    275  1.1       agc 		    DPRINT(("Literal %d-%d\n",
    276  1.1       agc 			    (int)lit->code_min, (int)lit->code_max));
    277  1.1       agc 		    if (regset[0] >= 0)
    278  1.1       agc 		      {
    279  1.1       agc 			/* Regset is not empty, so add a tag before the
    280  1.1       agc 			   literal or backref. */
    281  1.1       agc 			if (!first_pass)
    282  1.1       agc 			  {
    283  1.1       agc 			    status = tre_add_tag_left(mem, node, tag);
    284  1.1       agc 			    tnfa->tag_directions[tag] = direction;
    285  1.1       agc 			    if (minimal_tag >= 0)
    286  1.1       agc 			      {
    287  1.1       agc 				DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
    288  1.1       agc 				for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
    289  1.1       agc 				tnfa->minimal_tags[i] = tag;
    290  1.1       agc 				tnfa->minimal_tags[i + 1] = minimal_tag;
    291  1.1       agc 				tnfa->minimal_tags[i + 2] = -1;
    292  1.1       agc 				minimal_tag = -1;
    293  1.1       agc 				num_minimals++;
    294  1.1       agc 			      }
    295  1.1       agc 			    tre_purge_regset(regset, tnfa, tag);
    296  1.1       agc 			  }
    297  1.1       agc 			else
    298  1.1       agc 			  {
    299  1.1       agc 			    DPRINT(("  num_tags = 1\n"));
    300  1.1       agc 			    node->num_tags = 1;
    301  1.1       agc 			  }
    302  1.1       agc 
    303  1.1       agc 			DPRINT(("  num_tags++\n"));
    304  1.1       agc 			regset[0] = -1;
    305  1.1       agc 			tag = next_tag;
    306  1.1       agc 			num_tags++;
    307  1.1       agc 			next_tag++;
    308  1.1       agc 		      }
    309  1.1       agc 		  }
    310  1.1       agc 		else
    311  1.1       agc 		  {
    312  1.1       agc 		    assert(!IS_TAG(lit));
    313  1.1       agc 		  }
    314  1.1       agc 		break;
    315  1.1       agc 	      }
    316  1.1       agc 	    case CATENATION:
    317  1.1       agc 	      {
    318  1.1       agc 		tre_catenation_t *cat = node->obj;
    319  1.1       agc 		tre_ast_node_t *left = cat->left;
    320  1.1       agc 		tre_ast_node_t *right = cat->right;
    321  1.1       agc 		int reserved_tag = -1;
    322  1.1       agc 		DPRINT(("Catenation, next_tag = %d\n", next_tag));
    323  1.1       agc 
    324  1.1       agc 
    325  1.1       agc 		/* After processing right child. */
    326  1.1       agc 		STACK_PUSHX(stack, voidptr, node);
    327  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_RIGHT);
    328  1.1       agc 
    329  1.1       agc 		/* Process right child. */
    330  1.1       agc 		STACK_PUSHX(stack, voidptr, right);
    331  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
    332  1.1       agc 
    333  1.1       agc 		/* After processing left child. */
    334  1.2  christos 		STACK_PUSHX(stack, long, (long)(next_tag + left->num_tags));
    335  1.1       agc 		DPRINT(("  Pushing %d for after left\n",
    336  1.1       agc 			next_tag + left->num_tags));
    337  1.1       agc 		if (left->num_tags > 0 && right->num_tags > 0)
    338  1.1       agc 		  {
    339  1.1       agc 		    /* Reserve the next tag to the right child. */
    340  1.1       agc 		    DPRINT(("  Reserving next_tag %d to right child\n",
    341  1.1       agc 			    next_tag));
    342  1.1       agc 		    reserved_tag = next_tag;
    343  1.1       agc 		    next_tag++;
    344  1.1       agc 		  }
    345  1.2  christos 		STACK_PUSHX(stack, long, reserved_tag);
    346  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_AFTER_CAT_LEFT);
    347  1.1       agc 
    348  1.1       agc 		/* Process left child. */
    349  1.1       agc 		STACK_PUSHX(stack, voidptr, left);
    350  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
    351  1.1       agc 
    352  1.1       agc 		}
    353  1.1       agc 	      break;
    354  1.1       agc 	    case ITERATION:
    355  1.1       agc 	      {
    356  1.1       agc 		tre_iteration_t *iter = node->obj;
    357  1.1       agc 		DPRINT(("Iteration\n"));
    358  1.1       agc 
    359  1.1       agc 		if (first_pass)
    360  1.1       agc 		  {
    361  1.2  christos 		    STACK_PUSHX(stack, long, regset[0] >= 0 || iter->minimal);
    362  1.1       agc 		  }
    363  1.1       agc 		else
    364  1.1       agc 		  {
    365  1.2  christos 		    STACK_PUSHX(stack, long, tag);
    366  1.2  christos 		    STACK_PUSHX(stack, long, iter->minimal);
    367  1.1       agc 		  }
    368  1.1       agc 		STACK_PUSHX(stack, voidptr, node);
    369  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_AFTER_ITERATION);
    370  1.1       agc 
    371  1.1       agc 		STACK_PUSHX(stack, voidptr, iter->arg);
    372  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
    373  1.1       agc 
    374  1.1       agc 		/* Regset is not empty, so add a tag here. */
    375  1.1       agc 		if (regset[0] >= 0 || iter->minimal)
    376  1.1       agc 		  {
    377  1.1       agc 		    if (!first_pass)
    378  1.1       agc 		      {
    379  1.1       agc 			int i;
    380  1.1       agc 			status = tre_add_tag_left(mem, node, tag);
    381  1.1       agc 			if (iter->minimal)
    382  1.1       agc 			  tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
    383  1.1       agc 			else
    384  1.1       agc 			  tnfa->tag_directions[tag] = direction;
    385  1.1       agc 			if (minimal_tag >= 0)
    386  1.1       agc 			  {
    387  1.1       agc 			    DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
    388  1.1       agc 			    for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
    389  1.1       agc 			    tnfa->minimal_tags[i] = tag;
    390  1.1       agc 			    tnfa->minimal_tags[i + 1] = minimal_tag;
    391  1.1       agc 			    tnfa->minimal_tags[i + 2] = -1;
    392  1.1       agc 			    minimal_tag = -1;
    393  1.1       agc 			    num_minimals++;
    394  1.1       agc 			  }
    395  1.1       agc 			tre_purge_regset(regset, tnfa, tag);
    396  1.1       agc 		      }
    397  1.1       agc 
    398  1.1       agc 		    DPRINT(("  num_tags++\n"));
    399  1.1       agc 		    regset[0] = -1;
    400  1.1       agc 		    tag = next_tag;
    401  1.1       agc 		    num_tags++;
    402  1.1       agc 		    next_tag++;
    403  1.1       agc 		  }
    404  1.1       agc 		direction = TRE_TAG_MINIMIZE;
    405  1.1       agc 	      }
    406  1.1       agc 	      break;
    407  1.1       agc 	    case UNION:
    408  1.1       agc 	      {
    409  1.1       agc 		tre_union_t *uni = node->obj;
    410  1.1       agc 		tre_ast_node_t *left = uni->left;
    411  1.1       agc 		tre_ast_node_t *right = uni->right;
    412  1.1       agc 		int left_tag;
    413  1.1       agc 		int right_tag;
    414  1.1       agc 
    415  1.1       agc 		if (regset[0] >= 0)
    416  1.1       agc 		  {
    417  1.1       agc 		    left_tag = next_tag;
    418  1.1       agc 		    right_tag = next_tag + 1;
    419  1.1       agc 		  }
    420  1.1       agc 		else
    421  1.1       agc 		  {
    422  1.1       agc 		    left_tag = tag;
    423  1.1       agc 		    right_tag = next_tag;
    424  1.1       agc 		  }
    425  1.1       agc 
    426  1.1       agc 		DPRINT(("Union\n"));
    427  1.1       agc 
    428  1.1       agc 		/* After processing right child. */
    429  1.2  christos 		STACK_PUSHX(stack, long, right_tag);
    430  1.2  christos 		STACK_PUSHX(stack, long, left_tag);
    431  1.1       agc 		STACK_PUSHX(stack, voidptr, regset);
    432  1.2  christos 		STACK_PUSHX(stack, long, regset[0] >= 0);
    433  1.1       agc 		STACK_PUSHX(stack, voidptr, node);
    434  1.1       agc 		STACK_PUSHX(stack, voidptr, right);
    435  1.1       agc 		STACK_PUSHX(stack, voidptr, left);
    436  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_RIGHT);
    437  1.1       agc 
    438  1.1       agc 		/* Process right child. */
    439  1.1       agc 		STACK_PUSHX(stack, voidptr, right);
    440  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
    441  1.1       agc 
    442  1.1       agc 		/* After processing left child. */
    443  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_AFTER_UNION_LEFT);
    444  1.1       agc 
    445  1.1       agc 		/* Process left child. */
    446  1.1       agc 		STACK_PUSHX(stack, voidptr, left);
    447  1.2  christos 		STACK_PUSHX(stack, long, ADDTAGS_RECURSE);
    448  1.1       agc 
    449  1.1       agc 		/* Regset is not empty, so add a tag here. */
    450  1.1       agc 		if (regset[0] >= 0)
    451  1.1       agc 		  {
    452  1.1       agc 		    if (!first_pass)
    453  1.1       agc 		      {
    454  1.1       agc 			int i;
    455  1.1       agc 			status = tre_add_tag_left(mem, node, tag);
    456  1.1       agc 			tnfa->tag_directions[tag] = direction;
    457  1.1       agc 			if (minimal_tag >= 0)
    458  1.1       agc 			  {
    459  1.1       agc 			    DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
    460  1.1       agc 			    for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
    461  1.1       agc 			    tnfa->minimal_tags[i] = tag;
    462  1.1       agc 			    tnfa->minimal_tags[i + 1] = minimal_tag;
    463  1.1       agc 			    tnfa->minimal_tags[i + 2] = -1;
    464  1.1       agc 			    minimal_tag = -1;
    465  1.1       agc 			    num_minimals++;
    466  1.1       agc 			  }
    467  1.1       agc 			tre_purge_regset(regset, tnfa, tag);
    468  1.1       agc 		      }
    469  1.1       agc 
    470  1.1       agc 		    DPRINT(("  num_tags++\n"));
    471  1.1       agc 		    regset[0] = -1;
    472  1.1       agc 		    tag = next_tag;
    473  1.1       agc 		    num_tags++;
    474  1.1       agc 		    next_tag++;
    475  1.1       agc 		  }
    476  1.1       agc 
    477  1.1       agc 		if (node->num_submatches > 0)
    478  1.1       agc 		  {
    479  1.1       agc 		    /* The next two tags are reserved for markers. */
    480  1.1       agc 		    next_tag++;
    481  1.1       agc 		    tag = next_tag;
    482  1.1       agc 		    next_tag++;
    483  1.1       agc 		  }
    484  1.1       agc 
    485  1.1       agc 		break;
    486  1.1       agc 	      }
    487  1.1       agc 	    }
    488  1.1       agc 
    489  1.1       agc 	  if (node->submatch_id >= 0)
    490  1.1       agc 	    {
    491  1.1       agc 	      int i;
    492  1.1       agc 	      /* Push this submatch on the parents stack. */
    493  1.1       agc 	      for (i = 0; parents[i] >= 0; i++);
    494  1.1       agc 	      parents[i] = node->submatch_id;
    495  1.1       agc 	      parents[i + 1] = -1;
    496  1.1       agc 	    }
    497  1.1       agc 
    498  1.1       agc 	  break; /* end case: ADDTAGS_RECURSE */
    499  1.1       agc 
    500  1.1       agc 	case ADDTAGS_AFTER_ITERATION:
    501  1.1       agc 	  {
    502  1.1       agc 	    int minimal = 0;
    503  1.1       agc 	    int enter_tag;
    504  1.1       agc 	    node = tre_stack_pop_voidptr(stack);
    505  1.1       agc 	    if (first_pass)
    506  1.1       agc 	      {
    507  1.1       agc 		node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
    508  1.2  christos 		  + tre_stack_pop_long(stack);
    509  1.1       agc 		minimal_tag = -1;
    510  1.1       agc 	      }
    511  1.1       agc 	    else
    512  1.1       agc 	      {
    513  1.1       agc 		minimal = tre_stack_pop_int(stack);
    514  1.1       agc 		enter_tag = tre_stack_pop_int(stack);
    515  1.1       agc 		if (minimal)
    516  1.1       agc 		  minimal_tag = enter_tag;
    517  1.1       agc 	      }
    518  1.1       agc 
    519  1.1       agc 	    DPRINT(("After iteration\n"));
    520  1.1       agc 	    if (!first_pass)
    521  1.1       agc 	      {
    522  1.1       agc 		DPRINT(("  Setting direction to %s\n",
    523  1.1       agc 			minimal ? "minimize" : "maximize"));
    524  1.1       agc 		if (minimal)
    525  1.1       agc 		  direction = TRE_TAG_MINIMIZE;
    526  1.1       agc 		else
    527  1.1       agc 		  direction = TRE_TAG_MAXIMIZE;
    528  1.1       agc 	      }
    529  1.1       agc 	    break;
    530  1.1       agc 	  }
    531  1.1       agc 
    532  1.1       agc 	case ADDTAGS_AFTER_CAT_LEFT:
    533  1.1       agc 	  {
    534  1.1       agc 	    int new_tag = tre_stack_pop_int(stack);
    535  1.1       agc 	    next_tag = tre_stack_pop_int(stack);
    536  1.1       agc 	    DPRINT(("After cat left, tag = %d, next_tag = %d\n",
    537  1.1       agc 		    tag, next_tag));
    538  1.1       agc 	    if (new_tag >= 0)
    539  1.1       agc 	      {
    540  1.1       agc 		DPRINT(("  Setting tag to %d\n", new_tag));
    541  1.1       agc 		tag = new_tag;
    542  1.1       agc 	      }
    543  1.1       agc 	    break;
    544  1.1       agc 	  }
    545  1.1       agc 
    546  1.1       agc 	case ADDTAGS_AFTER_CAT_RIGHT:
    547  1.1       agc 	  DPRINT(("After cat right\n"));
    548  1.1       agc 	  node = tre_stack_pop_voidptr(stack);
    549  1.1       agc 	  if (first_pass)
    550  1.1       agc 	    node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
    551  1.1       agc 	      + ((tre_catenation_t *)node->obj)->right->num_tags;
    552  1.1       agc 	  break;
    553  1.1       agc 
    554  1.1       agc 	case ADDTAGS_AFTER_UNION_LEFT:
    555  1.1       agc 	  DPRINT(("After union left\n"));
    556  1.1       agc 	  /* Lift the bottom of the `regset' array so that when processing
    557  1.1       agc 	     the right operand the items currently in the array are
    558  1.1       agc 	     invisible.	 The original bottom was saved at ADDTAGS_UNION and
    559  1.1       agc 	     will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
    560  1.1       agc 	  while (*regset >= 0)
    561  1.1       agc 	    regset++;
    562  1.1       agc 	  break;
    563  1.1       agc 
    564  1.1       agc 	case ADDTAGS_AFTER_UNION_RIGHT:
    565  1.1       agc 	  {
    566  1.1       agc 	    int added_tags, tag_left, tag_right;
    567  1.1       agc 	    tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
    568  1.1       agc 	    tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
    569  1.1       agc 	    DPRINT(("After union right\n"));
    570  1.1       agc 	    node = tre_stack_pop_voidptr(stack);
    571  1.1       agc 	    added_tags = tre_stack_pop_int(stack);
    572  1.1       agc 	    if (first_pass)
    573  1.1       agc 	      {
    574  1.1       agc 		node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
    575  1.1       agc 		  + ((tre_union_t *)node->obj)->right->num_tags + added_tags
    576  1.1       agc 		  + ((node->num_submatches > 0) ? 2 : 0);
    577  1.1       agc 	      }
    578  1.1       agc 	    regset = tre_stack_pop_voidptr(stack);
    579  1.1       agc 	    tag_left = tre_stack_pop_int(stack);
    580  1.1       agc 	    tag_right = tre_stack_pop_int(stack);
    581  1.1       agc 
    582  1.1       agc 	    /* Add tags after both children, the left child gets a smaller
    583  1.1       agc 	       tag than the right child.  This guarantees that we prefer
    584  1.1       agc 	       the left child over the right child. */
    585  1.1       agc 	    /* XXX - This is not always necessary (if the children have
    586  1.1       agc 	       tags which must be seen for every match of that child). */
    587  1.1       agc 	    /* XXX - Check if this is the only place where tre_add_tag_right
    588  1.1       agc 	       is used.	 If so, use tre_add_tag_left (putting the tag before
    589  1.1       agc 	       the child as opposed after the child) and throw away
    590  1.1       agc 	       tre_add_tag_right. */
    591  1.1       agc 	    if (node->num_submatches > 0)
    592  1.1       agc 	      {
    593  1.1       agc 		if (!first_pass)
    594  1.1       agc 		  {
    595  1.1       agc 		    status = tre_add_tag_right(mem, left, tag_left);
    596  1.1       agc 		    tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
    597  1.1       agc 		    status = tre_add_tag_right(mem, right, tag_right);
    598  1.1       agc 		    tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
    599  1.1       agc 		  }
    600  1.1       agc 		DPRINT(("  num_tags += 2\n"));
    601  1.1       agc 		num_tags += 2;
    602  1.1       agc 	      }
    603  1.1       agc 	    direction = TRE_TAG_MAXIMIZE;
    604  1.1       agc 	    break;
    605  1.1       agc 	  }
    606  1.1       agc 
    607  1.1       agc 	default:
    608  1.1       agc 	  assert(0);
    609  1.1       agc 	  break;
    610  1.1       agc 
    611  1.1       agc 	} /* end switch(symbol) */
    612  1.1       agc     } /* end while(tre_stack_num_objects(stack) > bottom) */
    613  1.1       agc 
    614  1.1       agc   if (!first_pass)
    615  1.1       agc     tre_purge_regset(regset, tnfa, tag);
    616  1.1       agc 
    617  1.1       agc   if (!first_pass && minimal_tag >= 0)
    618  1.1       agc     {
    619  1.1       agc       int i;
    620  1.1       agc       DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
    621  1.1       agc       for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
    622  1.1       agc       tnfa->minimal_tags[i] = tag;
    623  1.1       agc       tnfa->minimal_tags[i + 1] = minimal_tag;
    624  1.1       agc       tnfa->minimal_tags[i + 2] = -1;
    625  1.1       agc       minimal_tag = -1;
    626  1.1       agc       num_minimals++;
    627  1.1       agc     }
    628  1.1       agc 
    629  1.1       agc   DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
    630  1.1       agc 	  first_pass? "First pass" : "Second pass", num_tags));
    631  1.1       agc 
    632  1.1       agc   assert(tree->num_tags == num_tags);
    633  1.1       agc   tnfa->end_tag = num_tags;
    634  1.1       agc   tnfa->num_tags = num_tags;
    635  1.1       agc   tnfa->num_minimals = num_minimals;
    636  1.1       agc   xfree(orig_regset);
    637  1.1       agc   xfree(parents);
    638  1.1       agc   xfree(saved_states);
    639  1.1       agc   return status;
    640  1.1       agc }
    641  1.1       agc 
    642  1.1       agc 
    643  1.1       agc 
    644  1.1       agc /*
    645  1.1       agc   AST to TNFA compilation routines.
    646  1.1       agc */
    647  1.1       agc 
    648  1.1       agc typedef enum {
    649  1.1       agc   COPY_RECURSE,
    650  1.1       agc   COPY_SET_RESULT_PTR
    651  1.1       agc } tre_copyast_symbol_t;
    652  1.1       agc 
    653  1.1       agc /* Flags for tre_copy_ast(). */
    654  1.1       agc #define COPY_REMOVE_TAGS	 1
    655  1.1       agc #define COPY_MAXIMIZE_FIRST_TAG	 2
    656  1.1       agc 
    657  1.1       agc static reg_errcode_t
    658  1.1       agc tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
    659  1.1       agc 	     int flags, int *pos_add, tre_tag_direction_t *tag_directions,
    660  1.1       agc 	     tre_ast_node_t **copy, int *max_pos)
    661  1.1       agc {
    662  1.1       agc   reg_errcode_t status = REG_OK;
    663  1.1       agc   int bottom = tre_stack_num_objects(stack);
    664  1.1       agc   int num_copied = 0;
    665  1.1       agc   int first_tag = 1;
    666  1.1       agc   tre_ast_node_t **result = copy;
    667  1.1       agc   tre_copyast_symbol_t symbol;
    668  1.1       agc 
    669  1.1       agc   STACK_PUSH(stack, voidptr, ast);
    670  1.2  christos   STACK_PUSH(stack, long, COPY_RECURSE);
    671  1.1       agc 
    672  1.1       agc   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
    673  1.1       agc     {
    674  1.1       agc       tre_ast_node_t *node;
    675  1.1       agc       if (status != REG_OK)
    676  1.1       agc 	break;
    677  1.1       agc 
    678  1.2  christos       symbol = (tre_copyast_symbol_t)tre_stack_pop_long(stack);
    679  1.1       agc       switch (symbol)
    680  1.1       agc 	{
    681  1.1       agc 	case COPY_SET_RESULT_PTR:
    682  1.1       agc 	  result = tre_stack_pop_voidptr(stack);
    683  1.1       agc 	  break;
    684  1.1       agc 	case COPY_RECURSE:
    685  1.1       agc 	  node = tre_stack_pop_voidptr(stack);
    686  1.1       agc 	  switch (node->type)
    687  1.1       agc 	    {
    688  1.1       agc 	    case LITERAL:
    689  1.1       agc 	      {
    690  1.1       agc 		tre_literal_t *lit = node->obj;
    691  1.1       agc 		int pos = lit->position;
    692  1.1       agc 		int min = lit->code_min;
    693  1.1       agc 		int max = lit->code_max;
    694  1.1       agc 		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
    695  1.1       agc 		  {
    696  1.1       agc 		    /* XXX - e.g. [ab] has only one position but two
    697  1.1       agc 		       nodes, so we are creating holes in the state space
    698  1.1       agc 		       here.  Not fatal, just wastes memory. */
    699  1.1       agc 		    pos += *pos_add;
    700  1.1       agc 		    num_copied++;
    701  1.1       agc 		  }
    702  1.1       agc 		else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
    703  1.1       agc 		  {
    704  1.1       agc 		    /* Change this tag to empty. */
    705  1.1       agc 		    min = EMPTY;
    706  1.1       agc 		    max = pos = -1;
    707  1.1       agc 		  }
    708  1.1       agc 		else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
    709  1.1       agc 			 && first_tag)
    710  1.1       agc 		  {
    711  1.1       agc 		    /* Maximize the first tag. */
    712  1.1       agc 		    tag_directions[max] = TRE_TAG_MAXIMIZE;
    713  1.1       agc 		    first_tag = 0;
    714  1.1       agc 		  }
    715  1.1       agc 		*result = tre_ast_new_literal(mem, min, max, pos);
    716  1.1       agc 		if (*result == NULL)
    717  1.1       agc 		  status = REG_ESPACE;
    718  1.1       agc 
    719  1.1       agc 		if (pos > *max_pos)
    720  1.1       agc 		  *max_pos = pos;
    721  1.1       agc 		break;
    722  1.1       agc 	      }
    723  1.1       agc 	    case UNION:
    724  1.1       agc 	      {
    725  1.1       agc 		tre_union_t *uni = node->obj;
    726  1.1       agc 		tre_union_t *tmp;
    727  1.1       agc 		*result = tre_ast_new_union(mem, uni->left, uni->right);
    728  1.1       agc 		if (*result == NULL)
    729  1.1       agc 		  {
    730  1.1       agc 		    status = REG_ESPACE;
    731  1.1       agc 		    break;
    732  1.1       agc 		  }
    733  1.1       agc 		tmp = (*result)->obj;
    734  1.1       agc 		result = &tmp->left;
    735  1.1       agc 		STACK_PUSHX(stack, voidptr, uni->right);
    736  1.2  christos 		STACK_PUSHX(stack, long, COPY_RECURSE);
    737  1.1       agc 		STACK_PUSHX(stack, voidptr, &tmp->right);
    738  1.2  christos 		STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR);
    739  1.1       agc 		STACK_PUSHX(stack, voidptr, uni->left);
    740  1.2  christos 		STACK_PUSHX(stack, long, COPY_RECURSE);
    741  1.1       agc 		break;
    742  1.1       agc 	      }
    743  1.1       agc 	    case CATENATION:
    744  1.1       agc 	      {
    745  1.1       agc 		tre_catenation_t *cat = node->obj;
    746  1.1       agc 		tre_catenation_t *tmp;
    747  1.1       agc 		*result = tre_ast_new_catenation(mem, cat->left, cat->right);
    748  1.1       agc 		if (*result == NULL)
    749  1.1       agc 		  {
    750  1.1       agc 		    status = REG_ESPACE;
    751  1.1       agc 		    break;
    752  1.1       agc 		  }
    753  1.1       agc 		tmp = (*result)->obj;
    754  1.1       agc 		tmp->left = NULL;
    755  1.1       agc 		tmp->right = NULL;
    756  1.1       agc 		result = &tmp->left;
    757  1.1       agc 
    758  1.1       agc 		STACK_PUSHX(stack, voidptr, cat->right);
    759  1.2  christos 		STACK_PUSHX(stack, long, COPY_RECURSE);
    760  1.1       agc 		STACK_PUSHX(stack, voidptr, &tmp->right);
    761  1.2  christos 		STACK_PUSHX(stack, long, COPY_SET_RESULT_PTR);
    762  1.1       agc 		STACK_PUSHX(stack, voidptr, cat->left);
    763  1.2  christos 		STACK_PUSHX(stack, long, COPY_RECURSE);
    764  1.1       agc 		break;
    765  1.1       agc 	      }
    766  1.1       agc 	    case ITERATION:
    767  1.1       agc 	      {
    768  1.1       agc 		tre_iteration_t *iter = node->obj;
    769  1.1       agc 		STACK_PUSHX(stack, voidptr, iter->arg);
    770  1.2  christos 		STACK_PUSHX(stack, long, COPY_RECURSE);
    771  1.1       agc 		*result = tre_ast_new_iter(mem, iter->arg, iter->min,
    772  1.1       agc 					   iter->max, iter->minimal);
    773  1.1       agc 		if (*result == NULL)
    774  1.1       agc 		  {
    775  1.1       agc 		    status = REG_ESPACE;
    776  1.1       agc 		    break;
    777  1.1       agc 		  }
    778  1.1       agc 		iter = (*result)->obj;
    779  1.1       agc 		result = &iter->arg;
    780  1.1       agc 		break;
    781  1.1       agc 	      }
    782  1.1       agc 	    default:
    783  1.1       agc 	      assert(0);
    784  1.1       agc 	      break;
    785  1.1       agc 	    }
    786  1.1       agc 	  break;
    787  1.1       agc 	}
    788  1.1       agc     }
    789  1.1       agc   *pos_add += num_copied;
    790  1.1       agc   return status;
    791  1.1       agc }
    792  1.1       agc 
    793  1.1       agc typedef enum {
    794  1.1       agc   EXPAND_RECURSE,
    795  1.1       agc   EXPAND_AFTER_ITER
    796  1.1       agc } tre_expand_ast_symbol_t;
    797  1.1       agc 
    798  1.1       agc /* Expands each iteration node that has a finite nonzero minimum or maximum
    799  1.1       agc    iteration count to a catenated sequence of copies of the node. */
    800  1.1       agc static reg_errcode_t
    801  1.1       agc tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
    802  1.1       agc 	       int *position, tre_tag_direction_t *tag_directions,
    803  1.1       agc 	       int *max_depth)
    804  1.1       agc {
    805  1.1       agc   reg_errcode_t status = REG_OK;
    806  1.1       agc   int bottom = tre_stack_num_objects(stack);
    807  1.1       agc   int pos_add = 0;
    808  1.1       agc   int pos_add_total = 0;
    809  1.1       agc   int max_pos = 0;
    810  1.1       agc   /* Current approximate matching parameters. */
    811  1.1       agc   int params[TRE_PARAM_LAST];
    812  1.1       agc   /* Approximate parameter nesting level. */
    813  1.1       agc   int params_depth = 0;
    814  1.1       agc   int iter_depth = 0;
    815  1.1       agc   int i;
    816  1.1       agc 
    817  1.1       agc   for (i = 0; i < TRE_PARAM_LAST; i++)
    818  1.1       agc     params[i] = TRE_PARAM_DEFAULT;
    819  1.1       agc 
    820  1.1       agc   STACK_PUSHR(stack, voidptr, ast);
    821  1.2  christos   STACK_PUSHR(stack, long, EXPAND_RECURSE);
    822  1.1       agc   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
    823  1.1       agc     {
    824  1.1       agc       tre_ast_node_t *node;
    825  1.1       agc       tre_expand_ast_symbol_t symbol;
    826  1.1       agc 
    827  1.1       agc       if (status != REG_OK)
    828  1.1       agc 	break;
    829  1.1       agc 
    830  1.1       agc       DPRINT(("pos_add %d\n", pos_add));
    831  1.1       agc 
    832  1.2  christos       symbol = (tre_expand_ast_symbol_t)tre_stack_pop_long(stack);
    833  1.1       agc       node = tre_stack_pop_voidptr(stack);
    834  1.1       agc       switch (symbol)
    835  1.1       agc 	{
    836  1.1       agc 	case EXPAND_RECURSE:
    837  1.1       agc 	  switch (node->type)
    838  1.1       agc 	    {
    839  1.1       agc 	    case LITERAL:
    840  1.1       agc 	      {
    841  1.1       agc 		tre_literal_t *lit= node->obj;
    842  1.1       agc 		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
    843  1.1       agc 		  {
    844  1.1       agc 		    lit->position += pos_add;
    845  1.1       agc 		    if (lit->position > max_pos)
    846  1.1       agc 		      max_pos = lit->position;
    847  1.1       agc 		  }
    848  1.1       agc 		break;
    849  1.1       agc 	      }
    850  1.1       agc 	    case UNION:
    851  1.1       agc 	      {
    852  1.1       agc 		tre_union_t *uni = node->obj;
    853  1.1       agc 		STACK_PUSHX(stack, voidptr, uni->right);
    854  1.2  christos 		STACK_PUSHX(stack, long, EXPAND_RECURSE);
    855  1.1       agc 		STACK_PUSHX(stack, voidptr, uni->left);
    856  1.2  christos 		STACK_PUSHX(stack, long, EXPAND_RECURSE);
    857  1.1       agc 		break;
    858  1.1       agc 	      }
    859  1.1       agc 	    case CATENATION:
    860  1.1       agc 	      {
    861  1.1       agc 		tre_catenation_t *cat = node->obj;
    862  1.1       agc 		STACK_PUSHX(stack, voidptr, cat->right);
    863  1.2  christos 		STACK_PUSHX(stack, long, EXPAND_RECURSE);
    864  1.1       agc 		STACK_PUSHX(stack, voidptr, cat->left);
    865  1.2  christos 		STACK_PUSHX(stack, long, EXPAND_RECURSE);
    866  1.1       agc 		break;
    867  1.1       agc 	      }
    868  1.1       agc 	    case ITERATION:
    869  1.1       agc 	      {
    870  1.1       agc 		tre_iteration_t *iter = node->obj;
    871  1.2  christos 		STACK_PUSHX(stack, long, pos_add);
    872  1.1       agc 		STACK_PUSHX(stack, voidptr, node);
    873  1.2  christos 		STACK_PUSHX(stack, long, EXPAND_AFTER_ITER);
    874  1.1       agc 		STACK_PUSHX(stack, voidptr, iter->arg);
    875  1.2  christos 		STACK_PUSHX(stack, long, EXPAND_RECURSE);
    876  1.1       agc 		/* If we are going to expand this node at EXPAND_AFTER_ITER
    877  1.1       agc 		   then don't increase the `pos' fields of the nodes now, it
    878  1.1       agc 		   will get done when expanding. */
    879  1.1       agc 		if (iter->min > 1 || iter->max > 1)
    880  1.1       agc 		  pos_add = 0;
    881  1.1       agc 		iter_depth++;
    882  1.1       agc 		DPRINT(("iter\n"));
    883  1.1       agc 		break;
    884  1.1       agc 	      }
    885  1.1       agc 	    default:
    886  1.1       agc 	      assert(0);
    887  1.1       agc 	      break;
    888  1.1       agc 	    }
    889  1.1       agc 	  break;
    890  1.1       agc 	case EXPAND_AFTER_ITER:
    891  1.1       agc 	  {
    892  1.1       agc 	    tre_iteration_t *iter = node->obj;
    893  1.1       agc 	    int pos_add_last;
    894  1.1       agc 	    pos_add = tre_stack_pop_int(stack);
    895  1.1       agc 	    pos_add_last = pos_add;
    896  1.1       agc 	    if (iter->min > 1 || iter->max > 1)
    897  1.1       agc 	      {
    898  1.1       agc 		tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
    899  1.1       agc 		int j;
    900  1.1       agc 		int pos_add_save = pos_add;
    901  1.1       agc 
    902  1.1       agc 		/* Create a catenated sequence of copies of the node. */
    903  1.1       agc 		for (j = 0; j < iter->min; j++)
    904  1.1       agc 		  {
    905  1.1       agc 		    tre_ast_node_t *copy;
    906  1.1       agc 		    /* Remove tags from all but the last copy. */
    907  1.1       agc 		    int flags = ((j + 1 < iter->min)
    908  1.1       agc 				 ? COPY_REMOVE_TAGS
    909  1.1       agc 				 : COPY_MAXIMIZE_FIRST_TAG);
    910  1.1       agc 		    DPRINT(("  pos_add %d\n", pos_add));
    911  1.1       agc 		    pos_add_save = pos_add;
    912  1.1       agc 		    status = tre_copy_ast(mem, stack, iter->arg, flags,
    913  1.1       agc 					  &pos_add, tag_directions, &copy,
    914  1.1       agc 					  &max_pos);
    915  1.1       agc 		    if (status != REG_OK)
    916  1.1       agc 		      return status;
    917  1.1       agc 		    if (seq1 != NULL)
    918  1.1       agc 		      seq1 = tre_ast_new_catenation(mem, seq1, copy);
    919  1.1       agc 		    else
    920  1.1       agc 		      seq1 = copy;
    921  1.1       agc 		    if (seq1 == NULL)
    922  1.1       agc 		      return REG_ESPACE;
    923  1.1       agc 		  }
    924  1.1       agc 
    925  1.1       agc 		if (iter->max == -1)
    926  1.1       agc 		  {
    927  1.1       agc 		    /* No upper limit. */
    928  1.1       agc 		    pos_add_save = pos_add;
    929  1.1       agc 		    status = tre_copy_ast(mem, stack, iter->arg, 0,
    930  1.1       agc 					  &pos_add, NULL, &seq2, &max_pos);
    931  1.1       agc 		    if (status != REG_OK)
    932  1.1       agc 		      return status;
    933  1.1       agc 		    seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
    934  1.1       agc 		    if (seq2 == NULL)
    935  1.1       agc 		      return REG_ESPACE;
    936  1.1       agc 		  }
    937  1.1       agc 		else
    938  1.1       agc 		  {
    939  1.1       agc 		    for (j = iter->min; j < iter->max; j++)
    940  1.1       agc 		      {
    941  1.1       agc 			tre_ast_node_t *tmp, *copy;
    942  1.1       agc 			pos_add_save = pos_add;
    943  1.1       agc 			status = tre_copy_ast(mem, stack, iter->arg, 0,
    944  1.1       agc 					      &pos_add, NULL, &copy, &max_pos);
    945  1.1       agc 			if (status != REG_OK)
    946  1.1       agc 			  return status;
    947  1.1       agc 			if (seq2 != NULL)
    948  1.1       agc 			  seq2 = tre_ast_new_catenation(mem, copy, seq2);
    949  1.1       agc 			else
    950  1.1       agc 			  seq2 = copy;
    951  1.1       agc 			if (seq2 == NULL)
    952  1.1       agc 			  return REG_ESPACE;
    953  1.1       agc 			tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
    954  1.1       agc 			if (tmp == NULL)
    955  1.1       agc 			  return REG_ESPACE;
    956  1.1       agc 			seq2 = tre_ast_new_union(mem, tmp, seq2);
    957  1.1       agc 			if (seq2 == NULL)
    958  1.1       agc 			  return REG_ESPACE;
    959  1.1       agc 		      }
    960  1.1       agc 		  }
    961  1.1       agc 
    962  1.1       agc 		pos_add = pos_add_save;
    963  1.1       agc 		if (seq1 == NULL)
    964  1.1       agc 		  seq1 = seq2;
    965  1.1       agc 		else if (seq2 != NULL)
    966  1.1       agc 		  seq1 = tre_ast_new_catenation(mem, seq1, seq2);
    967  1.1       agc 		if (seq1 == NULL)
    968  1.1       agc 		  return REG_ESPACE;
    969  1.1       agc 		node->obj = seq1->obj;
    970  1.1       agc 		node->type = seq1->type;
    971  1.1       agc 	      }
    972  1.1       agc 
    973  1.1       agc 	    iter_depth--;
    974  1.1       agc 	    pos_add_total += pos_add - pos_add_last;
    975  1.1       agc 	    if (iter_depth == 0)
    976  1.1       agc 	      pos_add = pos_add_total;
    977  1.1       agc 
    978  1.1       agc 	    /* If approximate parameters are specified, surround the result
    979  1.1       agc 	       with two parameter setting nodes.  The one on the left sets
    980  1.1       agc 	       the specified parameters, and the one on the right restores
    981  1.1       agc 	       the old parameters. */
    982  1.1       agc 	    if (iter->params)
    983  1.1       agc 	      {
    984  1.1       agc 		tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
    985  1.1       agc 		int *old_params;
    986  1.1       agc 
    987  1.1       agc 		tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
    988  1.1       agc 		if (!tmp_l)
    989  1.1       agc 		  return REG_ESPACE;
    990  1.1       agc 		((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
    991  1.1       agc 		iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
    992  1.1       agc 		tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
    993  1.1       agc 		if (!tmp_r)
    994  1.1       agc 		  return REG_ESPACE;
    995  1.1       agc 		old_params = tre_mem_alloc(mem, sizeof(*old_params)
    996  1.1       agc 					   * TRE_PARAM_LAST);
    997  1.1       agc 		if (!old_params)
    998  1.1       agc 		  return REG_ESPACE;
    999  1.1       agc 		for (i = 0; i < TRE_PARAM_LAST; i++)
   1000  1.1       agc 		  old_params[i] = params[i];
   1001  1.1       agc 		((tre_literal_t *)tmp_r->obj)->u.params = old_params;
   1002  1.1       agc 		old_params[TRE_PARAM_DEPTH] = params_depth;
   1003  1.1       agc 		/* XXX - this is the only place where ast_new_node is
   1004  1.1       agc 		   needed -- should be moved inside AST module. */
   1005  1.1       agc 		node_copy = tre_ast_new_node(mem, ITERATION,
   1006  1.1       agc 					     sizeof(tre_iteration_t));
   1007  1.1       agc 		if (!node_copy)
   1008  1.1       agc 		  return REG_ESPACE;
   1009  1.1       agc 		node_copy->obj = node->obj;
   1010  1.1       agc 		tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
   1011  1.1       agc 		if (!tmp_node)
   1012  1.1       agc 		  return REG_ESPACE;
   1013  1.1       agc 		tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
   1014  1.1       agc 		if (!tmp_node)
   1015  1.1       agc 		  return REG_ESPACE;
   1016  1.1       agc 		/* Replace the contents of `node' with `tmp_node'. */
   1017  1.1       agc 		memcpy(node, tmp_node, sizeof(*node));
   1018  1.1       agc 		node->obj = tmp_node->obj;
   1019  1.1       agc 		node->type = tmp_node->type;
   1020  1.1       agc 		params_depth++;
   1021  1.1       agc 		if (params_depth > *max_depth)
   1022  1.1       agc 		  *max_depth = params_depth;
   1023  1.1       agc 	      }
   1024  1.1       agc 	    break;
   1025  1.1       agc 	  }
   1026  1.1       agc 	default:
   1027  1.1       agc 	  assert(0);
   1028  1.1       agc 	  break;
   1029  1.1       agc 	}
   1030  1.1       agc     }
   1031  1.1       agc 
   1032  1.1       agc   *position += pos_add_total;
   1033  1.1       agc 
   1034  1.1       agc   /* `max_pos' should never be larger than `*position' if the above
   1035  1.1       agc      code works, but just an extra safeguard let's make sure
   1036  1.1       agc      `*position' is set large enough so enough memory will be
   1037  1.1       agc      allocated for the transition table. */
   1038  1.1       agc   if (max_pos > *position)
   1039  1.1       agc     *position = max_pos;
   1040  1.1       agc 
   1041  1.1       agc #ifdef TRE_DEBUG
   1042  1.1       agc   DPRINT(("Expanded AST:\n"));
   1043  1.1       agc   tre_ast_print(ast);
   1044  1.1       agc   DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
   1045  1.1       agc #endif
   1046  1.1       agc 
   1047  1.1       agc   return status;
   1048  1.1       agc }
   1049  1.1       agc 
   1050  1.1       agc static tre_pos_and_tags_t *
   1051  1.1       agc tre_set_empty(tre_mem_t mem)
   1052  1.1       agc {
   1053  1.1       agc   tre_pos_and_tags_t *new_set;
   1054  1.1       agc 
   1055  1.1       agc   new_set = tre_mem_calloc(mem, sizeof(*new_set));
   1056  1.1       agc   if (new_set == NULL)
   1057  1.1       agc     return NULL;
   1058  1.1       agc 
   1059  1.1       agc   new_set[0].position = -1;
   1060  1.1       agc   new_set[0].code_min = -1;
   1061  1.1       agc   new_set[0].code_max = -1;
   1062  1.1       agc 
   1063  1.1       agc   return new_set;
   1064  1.1       agc }
   1065  1.1       agc 
   1066  1.1       agc static tre_pos_and_tags_t *
   1067  1.1       agc tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
   1068  1.1       agc 	    tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
   1069  1.1       agc {
   1070  1.1       agc   tre_pos_and_tags_t *new_set;
   1071  1.1       agc 
   1072  1.1       agc   new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
   1073  1.1       agc   if (new_set == NULL)
   1074  1.1       agc     return NULL;
   1075  1.1       agc 
   1076  1.1       agc   new_set[0].position = position;
   1077  1.1       agc   new_set[0].code_min = code_min;
   1078  1.1       agc   new_set[0].code_max = code_max;
   1079  1.1       agc   new_set[0].class = class;
   1080  1.1       agc   new_set[0].neg_classes = neg_classes;
   1081  1.1       agc   new_set[0].backref = backref;
   1082  1.1       agc   new_set[1].position = -1;
   1083  1.1       agc   new_set[1].code_min = -1;
   1084  1.1       agc   new_set[1].code_max = -1;
   1085  1.1       agc 
   1086  1.1       agc   return new_set;
   1087  1.1       agc }
   1088  1.1       agc 
   1089  1.1       agc static tre_pos_and_tags_t *
   1090  1.1       agc tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
   1091  1.1       agc 	      int *tags, int assertions, int *params)
   1092  1.1       agc {
   1093  1.1       agc   int s1, s2, i, j;
   1094  1.1       agc   tre_pos_and_tags_t *new_set;
   1095  1.1       agc   int *new_tags;
   1096  1.1       agc   int num_tags;
   1097  1.1       agc 
   1098  1.1       agc   for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
   1099  1.1       agc   for (s1 = 0; set1[s1].position >= 0; s1++);
   1100  1.1       agc   for (s2 = 0; set2[s2].position >= 0; s2++);
   1101  1.1       agc   new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
   1102  1.1       agc   if (!new_set )
   1103  1.1       agc     return NULL;
   1104  1.1       agc 
   1105  1.1       agc   for (s1 = 0; set1[s1].position >= 0; s1++)
   1106  1.1       agc     {
   1107  1.1       agc       new_set[s1].position = set1[s1].position;
   1108  1.1       agc       new_set[s1].code_min = set1[s1].code_min;
   1109  1.1       agc       new_set[s1].code_max = set1[s1].code_max;
   1110  1.1       agc       new_set[s1].assertions = set1[s1].assertions | assertions;
   1111  1.1       agc       new_set[s1].class = set1[s1].class;
   1112  1.1       agc       new_set[s1].neg_classes = set1[s1].neg_classes;
   1113  1.1       agc       new_set[s1].backref = set1[s1].backref;
   1114  1.1       agc       if (set1[s1].tags == NULL && tags == NULL)
   1115  1.1       agc 	new_set[s1].tags = NULL;
   1116  1.1       agc       else
   1117  1.1       agc 	{
   1118  1.1       agc 	  for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
   1119  1.1       agc 	  new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
   1120  1.1       agc 					 * (i + num_tags + 1)));
   1121  1.1       agc 	  if (new_tags == NULL)
   1122  1.1       agc 	    return NULL;
   1123  1.1       agc 	  for (j = 0; j < i; j++)
   1124  1.1       agc 	    new_tags[j] = set1[s1].tags[j];
   1125  1.1       agc 	  for (i = 0; i < num_tags; i++)
   1126  1.1       agc 	    new_tags[j + i] = tags[i];
   1127  1.1       agc 	  new_tags[j + i] = -1;
   1128  1.1       agc 	  new_set[s1].tags = new_tags;
   1129  1.1       agc 	}
   1130  1.1       agc       if (set1[s1].params)
   1131  1.1       agc 	new_set[s1].params = set1[s1].params;
   1132  1.1       agc       if (params)
   1133  1.1       agc 	{
   1134  1.1       agc 	  if (!new_set[s1].params)
   1135  1.1       agc 	    new_set[s1].params = params;
   1136  1.1       agc 	  else
   1137  1.1       agc 	    {
   1138  1.1       agc 	      new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
   1139  1.1       agc 						 TRE_PARAM_LAST);
   1140  1.1       agc 	      if (!new_set[s1].params)
   1141  1.1       agc 		return NULL;
   1142  1.1       agc 	      for (i = 0; i < TRE_PARAM_LAST; i++)
   1143  1.1       agc 		if (params[i] != TRE_PARAM_UNSET)
   1144  1.1       agc 		  new_set[s1].params[i] = params[i];
   1145  1.1       agc 	    }
   1146  1.1       agc 	}
   1147  1.1       agc     }
   1148  1.1       agc 
   1149  1.1       agc   for (s2 = 0; set2[s2].position >= 0; s2++)
   1150  1.1       agc     {
   1151  1.1       agc       new_set[s1 + s2].position = set2[s2].position;
   1152  1.1       agc       new_set[s1 + s2].code_min = set2[s2].code_min;
   1153  1.1       agc       new_set[s1 + s2].code_max = set2[s2].code_max;
   1154  1.1       agc       /* XXX - why not | assertions here as well? */
   1155  1.1       agc       new_set[s1 + s2].assertions = set2[s2].assertions;
   1156  1.1       agc       new_set[s1 + s2].class = set2[s2].class;
   1157  1.1       agc       new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
   1158  1.1       agc       new_set[s1 + s2].backref = set2[s2].backref;
   1159  1.1       agc       if (set2[s2].tags == NULL)
   1160  1.1       agc 	new_set[s1 + s2].tags = NULL;
   1161  1.1       agc       else
   1162  1.1       agc 	{
   1163  1.1       agc 	  for (i = 0; set2[s2].tags[i] >= 0; i++);
   1164  1.1       agc 	  new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
   1165  1.1       agc 	  if (new_tags == NULL)
   1166  1.1       agc 	    return NULL;
   1167  1.1       agc 	  for (j = 0; j < i; j++)
   1168  1.1       agc 	    new_tags[j] = set2[s2].tags[j];
   1169  1.1       agc 	  new_tags[j] = -1;
   1170  1.1       agc 	  new_set[s1 + s2].tags = new_tags;
   1171  1.1       agc 	}
   1172  1.1       agc       if (set2[s2].params)
   1173  1.1       agc 	new_set[s1 + s2].params = set2[s2].params;
   1174  1.1       agc       if (params)
   1175  1.1       agc 	{
   1176  1.1       agc 	  if (!new_set[s1 + s2].params)
   1177  1.1       agc 	    new_set[s1 + s2].params = params;
   1178  1.1       agc 	  else
   1179  1.1       agc 	    {
   1180  1.1       agc 	      new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
   1181  1.1       agc 						      TRE_PARAM_LAST);
   1182  1.1       agc 	      if (!new_set[s1 + s2].params)
   1183  1.1       agc 		return NULL;
   1184  1.1       agc 	      for (i = 0; i < TRE_PARAM_LAST; i++)
   1185  1.1       agc 		if (params[i] != TRE_PARAM_UNSET)
   1186  1.1       agc 		  new_set[s1 + s2].params[i] = params[i];
   1187  1.1       agc 	    }
   1188  1.1       agc 	}
   1189  1.1       agc     }
   1190  1.1       agc   new_set[s1 + s2].position = -1;
   1191  1.1       agc   return new_set;
   1192  1.1       agc }
   1193  1.1       agc 
   1194  1.1       agc /* Finds the empty path through `node' which is the one that should be
   1195  1.1       agc    taken according to POSIX.2 rules, and adds the tags on that path to
   1196  1.1       agc    `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
   1197  1.1       agc    set to the number of tags seen on the path. */
   1198  1.1       agc static reg_errcode_t
   1199  1.1       agc tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
   1200  1.1       agc 		int *assertions, int *params, int *num_tags_seen,
   1201  1.1       agc 		int *params_seen)
   1202  1.1       agc {
   1203  1.1       agc   tre_literal_t *lit;
   1204  1.1       agc   tre_union_t *uni;
   1205  1.1       agc   tre_catenation_t *cat;
   1206  1.1       agc   tre_iteration_t *iter;
   1207  1.1       agc   int i;
   1208  1.1       agc   int bottom = tre_stack_num_objects(stack);
   1209  1.1       agc   reg_errcode_t status = REG_OK;
   1210  1.1       agc   if (num_tags_seen)
   1211  1.1       agc     *num_tags_seen = 0;
   1212  1.1       agc   if (params_seen)
   1213  1.1       agc     *params_seen = 0;
   1214  1.1       agc 
   1215  1.1       agc   status = tre_stack_push_voidptr(stack, node);
   1216  1.1       agc 
   1217  1.1       agc   /* Walk through the tree recursively. */
   1218  1.1       agc   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
   1219  1.1       agc     {
   1220  1.1       agc       node = tre_stack_pop_voidptr(stack);
   1221  1.1       agc 
   1222  1.1       agc       switch (node->type)
   1223  1.1       agc 	{
   1224  1.1       agc 	case LITERAL:
   1225  1.1       agc 	  lit = (tre_literal_t *)node->obj;
   1226  1.1       agc 	  switch (lit->code_min)
   1227  1.1       agc 	    {
   1228  1.1       agc 	    case TAG:
   1229  1.1       agc 	      if (lit->code_max >= 0)
   1230  1.1       agc 		{
   1231  1.1       agc 		  if (tags != NULL)
   1232  1.1       agc 		    {
   1233  1.1       agc 		      /* Add the tag to `tags'. */
   1234  1.1       agc 		      for (i = 0; tags[i] >= 0; i++)
   1235  1.1       agc 			if (tags[i] == lit->code_max)
   1236  1.1       agc 			  break;
   1237  1.1       agc 		      if (tags[i] < 0)
   1238  1.1       agc 			{
   1239  1.1       agc 			  tags[i] = lit->code_max;
   1240  1.1       agc 			  tags[i + 1] = -1;
   1241  1.1       agc 			}
   1242  1.1       agc 		    }
   1243  1.1       agc 		  if (num_tags_seen)
   1244  1.1       agc 		    (*num_tags_seen)++;
   1245  1.1       agc 		}
   1246  1.1       agc 	      break;
   1247  1.1       agc 	    case ASSERTION:
   1248  1.1       agc 	      assert(lit->code_max >= 1
   1249  1.1       agc 		     || lit->code_max <= ASSERT_LAST);
   1250  1.1       agc 	      if (assertions != NULL)
   1251  1.1       agc 		*assertions |= lit->code_max;
   1252  1.1       agc 	      break;
   1253  1.1       agc 	    case PARAMETER:
   1254  1.1       agc 	      if (params != NULL)
   1255  1.1       agc 		for (i = 0; i < TRE_PARAM_LAST; i++)
   1256  1.1       agc 		  params[i] = lit->u.params[i];
   1257  1.1       agc 	      if (params_seen != NULL)
   1258  1.1       agc 		*params_seen = 1;
   1259  1.1       agc 	      break;
   1260  1.1       agc 	    case EMPTY:
   1261  1.1       agc 	      break;
   1262  1.1       agc 	    default:
   1263  1.1       agc 	      assert(0);
   1264  1.1       agc 	      break;
   1265  1.1       agc 	    }
   1266  1.1       agc 	  break;
   1267  1.1       agc 
   1268  1.1       agc 	case UNION:
   1269  1.1       agc 	  /* Subexpressions starting earlier take priority over ones
   1270  1.1       agc 	     starting later, so we prefer the left subexpression over the
   1271  1.1       agc 	     right subexpression. */
   1272  1.1       agc 	  uni = (tre_union_t *)node->obj;
   1273  1.1       agc 	  if (uni->left->nullable)
   1274  1.1       agc 	    STACK_PUSHX(stack, voidptr, uni->left)
   1275  1.1       agc 	  else if (uni->right->nullable)
   1276  1.1       agc 	    STACK_PUSHX(stack, voidptr, uni->right)
   1277  1.1       agc 	  else
   1278  1.1       agc 	    assert(0);
   1279  1.1       agc 	  break;
   1280  1.1       agc 
   1281  1.1       agc 	case CATENATION:
   1282  1.1       agc 	  /* The path must go through both children. */
   1283  1.1       agc 	  cat = (tre_catenation_t *)node->obj;
   1284  1.1       agc 	  assert(cat->left->nullable);
   1285  1.1       agc 	  assert(cat->right->nullable);
   1286  1.1       agc 	  STACK_PUSHX(stack, voidptr, cat->left);
   1287  1.1       agc 	  STACK_PUSHX(stack, voidptr, cat->right);
   1288  1.1       agc 	  break;
   1289  1.1       agc 
   1290  1.1       agc 	case ITERATION:
   1291  1.1       agc 	  /* A match with an empty string is preferred over no match at
   1292  1.1       agc 	     all, so we go through the argument if possible. */
   1293  1.1       agc 	  iter = (tre_iteration_t *)node->obj;
   1294  1.1       agc 	  if (iter->arg->nullable)
   1295  1.1       agc 	    STACK_PUSHX(stack, voidptr, iter->arg);
   1296  1.1       agc 	  break;
   1297  1.1       agc 
   1298  1.1       agc 	default:
   1299  1.1       agc 	  assert(0);
   1300  1.1       agc 	  break;
   1301  1.1       agc 	}
   1302  1.1       agc     }
   1303  1.1       agc 
   1304  1.1       agc   return status;
   1305  1.1       agc }
   1306  1.1       agc 
   1307  1.1       agc 
   1308  1.1       agc typedef enum {
   1309  1.1       agc   NFL_RECURSE,
   1310  1.1       agc   NFL_POST_UNION,
   1311  1.1       agc   NFL_POST_CATENATION,
   1312  1.1       agc   NFL_POST_ITERATION
   1313  1.1       agc } tre_nfl_stack_symbol_t;
   1314  1.1       agc 
   1315  1.1       agc 
   1316  1.1       agc /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
   1317  1.1       agc    the nodes of the AST `tree'. */
   1318  1.1       agc static reg_errcode_t
   1319  1.1       agc tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
   1320  1.1       agc {
   1321  1.1       agc   int bottom = tre_stack_num_objects(stack);
   1322  1.1       agc 
   1323  1.1       agc   STACK_PUSHR(stack, voidptr, tree);
   1324  1.2  christos   STACK_PUSHR(stack, long, NFL_RECURSE);
   1325  1.1       agc 
   1326  1.1       agc   while (tre_stack_num_objects(stack) > bottom)
   1327  1.1       agc     {
   1328  1.1       agc       tre_nfl_stack_symbol_t symbol;
   1329  1.1       agc       tre_ast_node_t *node;
   1330  1.1       agc 
   1331  1.2  christos       symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_long(stack);
   1332  1.1       agc       node = tre_stack_pop_voidptr(stack);
   1333  1.1       agc       switch (symbol)
   1334  1.1       agc 	{
   1335  1.1       agc 	case NFL_RECURSE:
   1336  1.1       agc 	  switch (node->type)
   1337  1.1       agc 	    {
   1338  1.1       agc 	    case LITERAL:
   1339  1.1       agc 	      {
   1340  1.1       agc 		tre_literal_t *lit = (tre_literal_t *)node->obj;
   1341  1.1       agc 		if (IS_BACKREF(lit))
   1342  1.1       agc 		  {
   1343  1.1       agc 		    /* Back references: nullable = false, firstpos = {i},
   1344  1.1       agc 		       lastpos = {i}. */
   1345  1.1       agc 		    node->nullable = 0;
   1346  1.1       agc 		    node->firstpos = tre_set_one(mem, lit->position, 0,
   1347  1.1       agc 					     TRE_CHAR_MAX, 0, NULL, -1);
   1348  1.1       agc 		    if (!node->firstpos)
   1349  1.1       agc 		      return REG_ESPACE;
   1350  1.1       agc 		    node->lastpos = tre_set_one(mem, lit->position, 0,
   1351  1.1       agc 						TRE_CHAR_MAX, 0, NULL,
   1352  1.1       agc 						(int)lit->code_max);
   1353  1.1       agc 		    if (!node->lastpos)
   1354  1.1       agc 		      return REG_ESPACE;
   1355  1.1       agc 		  }
   1356  1.1       agc 		else if (lit->code_min < 0)
   1357  1.1       agc 		  {
   1358  1.1       agc 		    /* Tags, empty strings, params, and zero width assertions:
   1359  1.1       agc 		       nullable = true, firstpos = {}, and lastpos = {}. */
   1360  1.1       agc 		    node->nullable = 1;
   1361  1.1       agc 		    node->firstpos = tre_set_empty(mem);
   1362  1.1       agc 		    if (!node->firstpos)
   1363  1.1       agc 		      return REG_ESPACE;
   1364  1.1       agc 		    node->lastpos = tre_set_empty(mem);
   1365  1.1       agc 		    if (!node->lastpos)
   1366  1.1       agc 		      return REG_ESPACE;
   1367  1.1       agc 		  }
   1368  1.1       agc 		else
   1369  1.1       agc 		  {
   1370  1.1       agc 		    /* Literal at position i: nullable = false, firstpos = {i},
   1371  1.1       agc 		       lastpos = {i}. */
   1372  1.1       agc 		    node->nullable = 0;
   1373  1.1       agc 		    node->firstpos =
   1374  1.1       agc 		      tre_set_one(mem, lit->position, (int)lit->code_min,
   1375  1.1       agc 				  (int)lit->code_max, 0, NULL, -1);
   1376  1.1       agc 		    if (!node->firstpos)
   1377  1.1       agc 		      return REG_ESPACE;
   1378  1.1       agc 		    node->lastpos = tre_set_one(mem, lit->position,
   1379  1.1       agc 						(int)lit->code_min,
   1380  1.1       agc 						(int)lit->code_max,
   1381  1.1       agc 						lit->u.class, lit->neg_classes,
   1382  1.1       agc 						-1);
   1383  1.1       agc 		    if (!node->lastpos)
   1384  1.1       agc 		      return REG_ESPACE;
   1385  1.1       agc 		  }
   1386  1.1       agc 		break;
   1387  1.1       agc 	      }
   1388  1.1       agc 
   1389  1.1       agc 	    case UNION:
   1390  1.1       agc 	      /* Compute the attributes for the two subtrees, and after that
   1391  1.1       agc 		 for this node. */
   1392  1.1       agc 	      STACK_PUSHR(stack, voidptr, node);
   1393  1.2  christos 	      STACK_PUSHR(stack, long, NFL_POST_UNION);
   1394  1.1       agc 	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
   1395  1.2  christos 	      STACK_PUSHR(stack, long, NFL_RECURSE);
   1396  1.1       agc 	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
   1397  1.2  christos 	      STACK_PUSHR(stack, long, NFL_RECURSE);
   1398  1.1       agc 	      break;
   1399  1.1       agc 
   1400  1.1       agc 	    case CATENATION:
   1401  1.1       agc 	      /* Compute the attributes for the two subtrees, and after that
   1402  1.1       agc 		 for this node. */
   1403  1.1       agc 	      STACK_PUSHR(stack, voidptr, node);
   1404  1.2  christos 	      STACK_PUSHR(stack, long, NFL_POST_CATENATION);
   1405  1.1       agc 	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
   1406  1.2  christos 	      STACK_PUSHR(stack, long, NFL_RECURSE);
   1407  1.1       agc 	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
   1408  1.2  christos 	      STACK_PUSHR(stack, long, NFL_RECURSE);
   1409  1.1       agc 	      break;
   1410  1.1       agc 
   1411  1.1       agc 	    case ITERATION:
   1412  1.1       agc 	      /* Compute the attributes for the subtree, and after that for
   1413  1.1       agc 		 this node. */
   1414  1.1       agc 	      STACK_PUSHR(stack, voidptr, node);
   1415  1.2  christos 	      STACK_PUSHR(stack, long, NFL_POST_ITERATION);
   1416  1.1       agc 	      STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
   1417  1.2  christos 	      STACK_PUSHR(stack, long, NFL_RECURSE);
   1418  1.1       agc 	      break;
   1419  1.1       agc 	    }
   1420  1.1       agc 	  break; /* end case: NFL_RECURSE */
   1421  1.1       agc 
   1422  1.1       agc 	case NFL_POST_UNION:
   1423  1.1       agc 	  {
   1424  1.1       agc 	    tre_union_t *uni = (tre_union_t *)node->obj;
   1425  1.1       agc 	    node->nullable = uni->left->nullable || uni->right->nullable;
   1426  1.1       agc 	    node->firstpos = tre_set_union(mem, uni->left->firstpos,
   1427  1.1       agc 					   uni->right->firstpos, NULL, 0, NULL);
   1428  1.1       agc 	    if (!node->firstpos)
   1429  1.1       agc 	      return REG_ESPACE;
   1430  1.1       agc 	    node->lastpos = tre_set_union(mem, uni->left->lastpos,
   1431  1.1       agc 					  uni->right->lastpos, NULL, 0, NULL);
   1432  1.1       agc 	    if (!node->lastpos)
   1433  1.1       agc 	      return REG_ESPACE;
   1434  1.1       agc 	    break;
   1435  1.1       agc 	  }
   1436  1.1       agc 
   1437  1.1       agc 	case NFL_POST_ITERATION:
   1438  1.1       agc 	  {
   1439  1.1       agc 	    tre_iteration_t *iter = (tre_iteration_t *)node->obj;
   1440  1.1       agc 
   1441  1.1       agc 	    if (iter->min == 0 || iter->arg->nullable)
   1442  1.1       agc 	      node->nullable = 1;
   1443  1.1       agc 	    else
   1444  1.1       agc 	      node->nullable = 0;
   1445  1.1       agc 	    node->firstpos = iter->arg->firstpos;
   1446  1.1       agc 	    node->lastpos = iter->arg->lastpos;
   1447  1.1       agc 	    break;
   1448  1.1       agc 	  }
   1449  1.1       agc 
   1450  1.1       agc 	case NFL_POST_CATENATION:
   1451  1.1       agc 	  {
   1452  1.1       agc 	    int num_tags, *tags, assertions, params_seen;
   1453  1.1       agc 	    int *params;
   1454  1.1       agc 	    reg_errcode_t status;
   1455  1.1       agc 	    tre_catenation_t *cat = node->obj;
   1456  1.1       agc 	    node->nullable = cat->left->nullable && cat->right->nullable;
   1457  1.1       agc 
   1458  1.1       agc 	    /* Compute firstpos. */
   1459  1.1       agc 	    if (cat->left->nullable)
   1460  1.1       agc 	      {
   1461  1.1       agc 		/* The left side matches the empty string.  Make a first pass
   1462  1.1       agc 		   with tre_match_empty() to get the number of tags and
   1463  1.1       agc 		   parameters. */
   1464  1.1       agc 		status = tre_match_empty(stack, cat->left,
   1465  1.1       agc 					 NULL, NULL, NULL, &num_tags,
   1466  1.1       agc 					 &params_seen);
   1467  1.1       agc 		if (status != REG_OK)
   1468  1.1       agc 		  return status;
   1469  1.1       agc 		/* Allocate arrays for the tags and parameters. */
   1470  1.1       agc 		tags = xmalloc(sizeof(*tags) * (num_tags + 1));
   1471  1.1       agc 		if (!tags)
   1472  1.1       agc 		  return REG_ESPACE;
   1473  1.1       agc 		tags[0] = -1;
   1474  1.1       agc 		assertions = 0;
   1475  1.1       agc 		params = NULL;
   1476  1.1       agc 		if (params_seen)
   1477  1.1       agc 		  {
   1478  1.1       agc 		    params = tre_mem_alloc(mem, sizeof(*params)
   1479  1.1       agc 					   * TRE_PARAM_LAST);
   1480  1.1       agc 		    if (!params)
   1481  1.1       agc 		      {
   1482  1.1       agc 			xfree(tags);
   1483  1.1       agc 			return REG_ESPACE;
   1484  1.1       agc 		      }
   1485  1.1       agc 		  }
   1486  1.1       agc 		/* Second pass with tre_mach_empty() to get the list of
   1487  1.1       agc 		   tags and parameters. */
   1488  1.1       agc 		status = tre_match_empty(stack, cat->left, tags,
   1489  1.1       agc 					 &assertions, params, NULL, NULL);
   1490  1.1       agc 		if (status != REG_OK)
   1491  1.1       agc 		  {
   1492  1.1       agc 		    xfree(tags);
   1493  1.1       agc 		    return status;
   1494  1.1       agc 		  }
   1495  1.1       agc 		node->firstpos =
   1496  1.1       agc 		  tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
   1497  1.1       agc 				tags, assertions, params);
   1498  1.1       agc 		xfree(tags);
   1499  1.1       agc 		if (!node->firstpos)
   1500  1.1       agc 		  return REG_ESPACE;
   1501  1.1       agc 	      }
   1502  1.1       agc 	    else
   1503  1.1       agc 	      {
   1504  1.1       agc 		node->firstpos = cat->left->firstpos;
   1505  1.1       agc 	      }
   1506  1.1       agc 
   1507  1.1       agc 	    /* Compute lastpos. */
   1508  1.1       agc 	    if (cat->right->nullable)
   1509  1.1       agc 	      {
   1510  1.1       agc 		/* The right side matches the empty string.  Make a first pass
   1511  1.1       agc 		   with tre_match_empty() to get the number of tags and
   1512  1.1       agc 		   parameters. */
   1513  1.1       agc 		status = tre_match_empty(stack, cat->right,
   1514  1.1       agc 					 NULL, NULL, NULL, &num_tags,
   1515  1.1       agc 					 &params_seen);
   1516  1.1       agc 		if (status != REG_OK)
   1517  1.1       agc 		  return status;
   1518  1.1       agc 		/* Allocate arrays for the tags and parameters. */
   1519  1.1       agc 		tags = xmalloc(sizeof(int) * (num_tags + 1));
   1520  1.1       agc 		if (!tags)
   1521  1.1       agc 		  return REG_ESPACE;
   1522  1.1       agc 		tags[0] = -1;
   1523  1.1       agc 		assertions = 0;
   1524  1.1       agc 		params = NULL;
   1525  1.1       agc 		if (params_seen)
   1526  1.1       agc 		  {
   1527  1.1       agc 		    params = tre_mem_alloc(mem, sizeof(*params)
   1528  1.1       agc 					   * TRE_PARAM_LAST);
   1529  1.1       agc 		    if (!params)
   1530  1.1       agc 		      {
   1531  1.1       agc 			xfree(tags);
   1532  1.1       agc 			return REG_ESPACE;
   1533  1.1       agc 		      }
   1534  1.1       agc 		  }
   1535  1.1       agc 		/* Second pass with tre_mach_empty() to get the list of
   1536  1.1       agc 		   tags and parameters. */
   1537  1.1       agc 		status = tre_match_empty(stack, cat->right, tags,
   1538  1.1       agc 					 &assertions, params, NULL, NULL);
   1539  1.1       agc 		if (status != REG_OK)
   1540  1.1       agc 		  {
   1541  1.1       agc 		    xfree(tags);
   1542  1.1       agc 		    return status;
   1543  1.1       agc 		  }
   1544  1.1       agc 		node->lastpos =
   1545  1.1       agc 		  tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
   1546  1.1       agc 				tags, assertions, params);
   1547  1.1       agc 		xfree(tags);
   1548  1.1       agc 		if (!node->lastpos)
   1549  1.1       agc 		  return REG_ESPACE;
   1550  1.1       agc 	      }
   1551  1.1       agc 	    else
   1552  1.1       agc 	      {
   1553  1.1       agc 		node->lastpos = cat->right->lastpos;
   1554  1.1       agc 	      }
   1555  1.1       agc 	    break;
   1556  1.1       agc 	  }
   1557  1.1       agc 
   1558  1.1       agc 	default:
   1559  1.1       agc 	  assert(0);
   1560  1.1       agc 	  break;
   1561  1.1       agc 	}
   1562  1.1       agc     }
   1563  1.1       agc 
   1564  1.1       agc   return REG_OK;
   1565  1.1       agc }
   1566  1.1       agc 
   1567  1.1       agc 
   1568  1.1       agc /* Adds a transition from each position in `p1' to each position in `p2'. */
   1569  1.1       agc static reg_errcode_t
   1570  1.1       agc tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
   1571  1.1       agc 	       tre_tnfa_transition_t *transitions,
   1572  1.1       agc 	       int *counts, int *offs)
   1573  1.1       agc {
   1574  1.1       agc   tre_pos_and_tags_t *orig_p2 = p2;
   1575  1.1       agc   tre_tnfa_transition_t *trans;
   1576  1.1       agc   int i, j, k, l, dup, prev_p2_pos;
   1577  1.1       agc 
   1578  1.1       agc   if (transitions != NULL)
   1579  1.1       agc     while (p1->position >= 0)
   1580  1.1       agc       {
   1581  1.1       agc 	p2 = orig_p2;
   1582  1.1       agc 	prev_p2_pos = -1;
   1583  1.1       agc 	while (p2->position >= 0)
   1584  1.1       agc 	  {
   1585  1.1       agc 	    /* Optimization: if this position was already handled, skip it. */
   1586  1.1       agc 	    if (p2->position == prev_p2_pos)
   1587  1.1       agc 	      {
   1588  1.1       agc 		p2++;
   1589  1.1       agc 		continue;
   1590  1.1       agc 	      }
   1591  1.1       agc 	    prev_p2_pos = p2->position;
   1592  1.1       agc 	    /* Set `trans' to point to the next unused transition from
   1593  1.1       agc 	       position `p1->position'. */
   1594  1.1       agc 	    trans = transitions + offs[p1->position];
   1595  1.1       agc 	    while (trans->state != NULL)
   1596  1.1       agc 	      {
   1597  1.1       agc #if 0
   1598  1.1       agc 		/* If we find a previous transition from `p1->position' to
   1599  1.1       agc 		   `p2->position', it is overwritten.  This can happen only
   1600  1.1       agc 		   if there are nested loops in the regexp, like in "((a)*)*".
   1601  1.1       agc 		   In POSIX.2 repetition using the outer loop is always
   1602  1.1       agc 		   preferred over using the inner loop.	 Therefore the
   1603  1.1       agc 		   transition for the inner loop is useless and can be thrown
   1604  1.1       agc 		   away. */
   1605  1.1       agc 		/* XXX - The same position is used for all nodes in a bracket
   1606  1.1       agc 		   expression, so this optimization cannot be used (it will
   1607  1.1       agc 		   break bracket expressions) unless I figure out a way to
   1608  1.1       agc 		   detect it here. */
   1609  1.1       agc 		if (trans->state_id == p2->position)
   1610  1.1       agc 		  {
   1611  1.1       agc 		    DPRINT(("*"));
   1612  1.1       agc 		    break;
   1613  1.1       agc 		  }
   1614  1.1       agc #endif
   1615  1.1       agc 		trans++;
   1616  1.1       agc 	      }
   1617  1.1       agc 
   1618  1.1       agc 	    if (trans->state == NULL)
   1619  1.1       agc 	      (trans + 1)->state = NULL;
   1620  1.1       agc 	    /* Use the character ranges, assertions, etc. from `p1' for
   1621  1.1       agc 	       the transition from `p1' to `p2'. */
   1622  1.1       agc 	    trans->code_min = p1->code_min;
   1623  1.1       agc 	    trans->code_max = p1->code_max;
   1624  1.1       agc 	    trans->state = transitions + offs[p2->position];
   1625  1.1       agc 	    trans->state_id = p2->position;
   1626  1.1       agc 	    trans->assertions = p1->assertions | p2->assertions
   1627  1.1       agc 	      | (p1->class ? ASSERT_CHAR_CLASS : 0)
   1628  1.1       agc 	      | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
   1629  1.1       agc 	    if (p1->backref >= 0)
   1630  1.1       agc 	      {
   1631  1.1       agc 		assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
   1632  1.1       agc 		assert(p2->backref < 0);
   1633  1.1       agc 		trans->u.backref = p1->backref;
   1634  1.1       agc 		trans->assertions |= ASSERT_BACKREF;
   1635  1.1       agc 	      }
   1636  1.1       agc 	    else
   1637  1.1       agc 	      trans->u.class = p1->class;
   1638  1.1       agc 	    if (p1->neg_classes != NULL)
   1639  1.1       agc 	      {
   1640  1.1       agc 		for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
   1641  1.1       agc 		trans->neg_classes =
   1642  1.1       agc 		  xmalloc(sizeof(*trans->neg_classes) * (i + 1));
   1643  1.1       agc 		if (trans->neg_classes == NULL)
   1644  1.1       agc 		  return REG_ESPACE;
   1645  1.1       agc 		for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
   1646  1.1       agc 		  trans->neg_classes[i] = p1->neg_classes[i];
   1647  1.1       agc 		trans->neg_classes[i] = (tre_ctype_t)0;
   1648  1.1       agc 	      }
   1649  1.1       agc 	    else
   1650  1.1       agc 	      trans->neg_classes = NULL;
   1651  1.1       agc 
   1652  1.1       agc 	    /* Find out how many tags this transition has. */
   1653  1.1       agc 	    i = 0;
   1654  1.1       agc 	    if (p1->tags != NULL)
   1655  1.1       agc 	      while(p1->tags[i] >= 0)
   1656  1.1       agc 		i++;
   1657  1.1       agc 	    j = 0;
   1658  1.1       agc 	    if (p2->tags != NULL)
   1659  1.1       agc 	      while(p2->tags[j] >= 0)
   1660  1.1       agc 		j++;
   1661  1.1       agc 
   1662  1.1       agc 	    /* If we are overwriting a transition, free the old tag array. */
   1663  1.1       agc 	    if (trans->tags != NULL)
   1664  1.1       agc 	      xfree(trans->tags);
   1665  1.1       agc 	    trans->tags = NULL;
   1666  1.1       agc 
   1667  1.1       agc 	    /* If there were any tags, allocate an array and fill it. */
   1668  1.1       agc 	    if (i + j > 0)
   1669  1.1       agc 	      {
   1670  1.1       agc 		trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
   1671  1.1       agc 		if (!trans->tags)
   1672  1.1       agc 		  return REG_ESPACE;
   1673  1.1       agc 		i = 0;
   1674  1.1       agc 		if (p1->tags != NULL)
   1675  1.1       agc 		  while(p1->tags[i] >= 0)
   1676  1.1       agc 		    {
   1677  1.1       agc 		      trans->tags[i] = p1->tags[i];
   1678  1.1       agc 		      i++;
   1679  1.1       agc 		    }
   1680  1.1       agc 		l = i;
   1681  1.1       agc 		j = 0;
   1682  1.1       agc 		if (p2->tags != NULL)
   1683  1.1       agc 		  while (p2->tags[j] >= 0)
   1684  1.1       agc 		    {
   1685  1.1       agc 		      /* Don't add duplicates. */
   1686  1.1       agc 		      dup = 0;
   1687  1.1       agc 		      for (k = 0; k < i; k++)
   1688  1.1       agc 			if (trans->tags[k] == p2->tags[j])
   1689  1.1       agc 			  {
   1690  1.1       agc 			    dup = 1;
   1691  1.1       agc 			    break;
   1692  1.1       agc 			  }
   1693  1.1       agc 		      if (!dup)
   1694  1.1       agc 			trans->tags[l++] = p2->tags[j];
   1695  1.1       agc 		      j++;
   1696  1.1       agc 		    }
   1697  1.1       agc 		trans->tags[l] = -1;
   1698  1.1       agc 	      }
   1699  1.1       agc 
   1700  1.1       agc 	    /* Set the parameter array.	 If both `p2' and `p1' have same
   1701  1.1       agc 	       parameters, the values in `p2' override those in `p1'. */
   1702  1.1       agc 	    if (p1->params || p2->params)
   1703  1.1       agc 	      {
   1704  1.1       agc 		if (!trans->params)
   1705  1.1       agc 		  trans->params = xmalloc(sizeof(*trans->params)
   1706  1.1       agc 					  * TRE_PARAM_LAST);
   1707  1.1       agc 		if (!trans->params)
   1708  1.1       agc 		  return REG_ESPACE;
   1709  1.1       agc 		for (i = 0; i < TRE_PARAM_LAST; i++)
   1710  1.1       agc 		  {
   1711  1.1       agc 		    trans->params[i] = TRE_PARAM_UNSET;
   1712  1.1       agc 		    if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
   1713  1.1       agc 		      trans->params[i] = p1->params[i];
   1714  1.1       agc 		    if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
   1715  1.1       agc 		      trans->params[i] = p2->params[i];
   1716  1.1       agc 		  }
   1717  1.1       agc 	      }
   1718  1.1       agc 	    else
   1719  1.1       agc 	      {
   1720  1.1       agc 		if (trans->params)
   1721  1.1       agc 		  xfree(trans->params);
   1722  1.1       agc 		trans->params = NULL;
   1723  1.1       agc 	      }
   1724  1.1       agc 
   1725  1.1       agc 
   1726  1.1       agc #ifdef TRE_DEBUG
   1727  1.1       agc 	    {
   1728  1.1       agc 	      int *tags;
   1729  1.1       agc 
   1730  1.1       agc 	      DPRINT(("	 %2d -> %2d on %3d", p1->position, p2->position,
   1731  1.1       agc 		      p1->code_min));
   1732  1.1       agc 	      if (p1->code_max != p1->code_min)
   1733  1.1       agc 		DPRINT(("-%3d", p1->code_max));
   1734  1.1       agc 	      tags = trans->tags;
   1735  1.1       agc 	      if (tags)
   1736  1.1       agc 		{
   1737  1.1       agc 		  DPRINT((", tags ["));
   1738  1.1       agc 		  while (*tags >= 0)
   1739  1.1       agc 		    {
   1740  1.1       agc 		      DPRINT(("%d", *tags));
   1741  1.1       agc 		      tags++;
   1742  1.1       agc 		      if (*tags >= 0)
   1743  1.1       agc 			DPRINT((","));
   1744  1.1       agc 		    }
   1745  1.1       agc 		  DPRINT(("]"));
   1746  1.1       agc 		}
   1747  1.1       agc 	      if (trans->assertions)
   1748  1.1       agc 		DPRINT((", assert %d", trans->assertions));
   1749  1.1       agc 	      if (trans->assertions & ASSERT_BACKREF)
   1750  1.1       agc 		DPRINT((", backref %d", trans->u.backref));
   1751  1.1       agc 	      else if (trans->u.class)
   1752  1.1       agc 		DPRINT((", class %ld", (long)trans->u.class));
   1753  1.1       agc 	      if (trans->neg_classes)
   1754  1.1       agc 		DPRINT((", neg_classes %p", trans->neg_classes));
   1755  1.1       agc 	      if (trans->params)
   1756  1.1       agc 		{
   1757  1.1       agc 		  DPRINT((", "));
   1758  1.1       agc 		  tre_print_params(trans->params);
   1759  1.1       agc 		}
   1760  1.1       agc 	      DPRINT(("\n"));
   1761  1.1       agc 	    }
   1762  1.1       agc #endif /* TRE_DEBUG */
   1763  1.1       agc 	    p2++;
   1764  1.1       agc 	  }
   1765  1.1       agc 	p1++;
   1766  1.1       agc       }
   1767  1.1       agc   else
   1768  1.1       agc     /* Compute a maximum limit for the number of transitions leaving
   1769  1.1       agc        from each state. */
   1770  1.1       agc     while (p1->position >= 0)
   1771  1.1       agc       {
   1772  1.1       agc 	p2 = orig_p2;
   1773  1.1       agc 	while (p2->position >= 0)
   1774  1.1       agc 	  {
   1775  1.1       agc 	    counts[p1->position]++;
   1776  1.1       agc 	    p2++;
   1777  1.1       agc 	  }
   1778  1.1       agc 	p1++;
   1779  1.1       agc       }
   1780  1.1       agc   return REG_OK;
   1781  1.1       agc }
   1782  1.1       agc 
   1783  1.1       agc /* Converts the syntax tree to a TNFA.	All the transitions in the TNFA are
   1784  1.1       agc    labelled with one character range (there are no transitions on empty
   1785  1.1       agc    strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
   1786  1.1       agc    the regexp. */
   1787  1.1       agc static reg_errcode_t
   1788  1.1       agc tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
   1789  1.1       agc 		int *counts, int *offs)
   1790  1.1       agc {
   1791  1.1       agc   tre_union_t *uni;
   1792  1.1       agc   tre_catenation_t *cat;
   1793  1.1       agc   tre_iteration_t *iter;
   1794  1.1       agc   reg_errcode_t errcode = REG_OK;
   1795  1.1       agc 
   1796  1.1       agc   /* XXX - recurse using a stack!. */
   1797  1.1       agc   switch (node->type)
   1798  1.1       agc     {
   1799  1.1       agc     case LITERAL:
   1800  1.1       agc       break;
   1801  1.1       agc     case UNION:
   1802  1.1       agc       uni = (tre_union_t *)node->obj;
   1803  1.1       agc       errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
   1804  1.1       agc       if (errcode != REG_OK)
   1805  1.1       agc 	return errcode;
   1806  1.1       agc       errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
   1807  1.1       agc       break;
   1808  1.1       agc 
   1809  1.1       agc     case CATENATION:
   1810  1.1       agc       cat = (tre_catenation_t *)node->obj;
   1811  1.1       agc       /* Add a transition from each position in cat->left->lastpos
   1812  1.1       agc 	 to each position in cat->right->firstpos. */
   1813  1.1       agc       errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
   1814  1.1       agc 			       transitions, counts, offs);
   1815  1.1       agc       if (errcode != REG_OK)
   1816  1.1       agc 	return errcode;
   1817  1.1       agc       errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
   1818  1.1       agc       if (errcode != REG_OK)
   1819  1.1       agc 	return errcode;
   1820  1.1       agc       errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
   1821  1.1       agc       break;
   1822  1.1       agc 
   1823  1.1       agc     case ITERATION:
   1824  1.1       agc       iter = (tre_iteration_t *)node->obj;
   1825  1.1       agc       assert(iter->max == -1 || iter->max == 1);
   1826  1.1       agc 
   1827  1.1       agc       if (iter->max == -1)
   1828  1.1       agc 	{
   1829  1.1       agc 	  assert(iter->min == 0 || iter->min == 1);
   1830  1.1       agc 	  /* Add a transition from each last position in the iterated
   1831  1.1       agc 	     expression to each first position. */
   1832  1.1       agc 	  errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
   1833  1.1       agc 				   transitions, counts, offs);
   1834  1.1       agc 	  if (errcode != REG_OK)
   1835  1.1       agc 	    return errcode;
   1836  1.1       agc 	}
   1837  1.1       agc       errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
   1838  1.1       agc       break;
   1839  1.1       agc     }
   1840  1.1       agc   return errcode;
   1841  1.1       agc }
   1842  1.1       agc 
   1843  1.1       agc 
   1844  1.1       agc #define ERROR_EXIT(err)		  \
   1845  1.1       agc   do				  \
   1846  1.1       agc     {				  \
   1847  1.1       agc       errcode = err;		  \
   1848  1.1       agc       if (/*CONSTCOND*/1)	  \
   1849  1.1       agc       	goto error_exit;	  \
   1850  1.1       agc     }				  \
   1851  1.1       agc  while (/*CONSTCOND*/0)
   1852  1.1       agc 
   1853  1.1       agc 
   1854  1.1       agc int
   1855  1.1       agc tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
   1856  1.1       agc {
   1857  1.1       agc   tre_stack_t *stack;
   1858  1.1       agc   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
   1859  1.1       agc   tre_pos_and_tags_t *p;
   1860  1.1       agc   int *counts = NULL, *offs = NULL;
   1861  1.1       agc   int i, add = 0;
   1862  1.1       agc   tre_tnfa_transition_t *transitions, *initial;
   1863  1.1       agc   tre_tnfa_t *tnfa = NULL;
   1864  1.1       agc   tre_submatch_data_t *submatch_data;
   1865  1.1       agc   tre_tag_direction_t *tag_directions = NULL;
   1866  1.1       agc   reg_errcode_t errcode;
   1867  1.1       agc   tre_mem_t mem;
   1868  1.1       agc 
   1869  1.1       agc   /* Parse context. */
   1870  1.1       agc   tre_parse_ctx_t parse_ctx;
   1871  1.1       agc 
   1872  1.1       agc   /* Allocate a stack used throughout the compilation process for various
   1873  1.1       agc      purposes. */
   1874  1.1       agc   stack = tre_stack_new(512, 10240, 128);
   1875  1.1       agc   if (!stack)
   1876  1.1       agc     return REG_ESPACE;
   1877  1.1       agc   /* Allocate a fast memory allocator. */
   1878  1.1       agc   mem = tre_mem_new();
   1879  1.1       agc   if (!mem)
   1880  1.1       agc     {
   1881  1.1       agc       tre_stack_destroy(stack);
   1882  1.1       agc       return REG_ESPACE;
   1883  1.1       agc     }
   1884  1.1       agc 
   1885  1.1       agc   /* Parse the regexp. */
   1886  1.1       agc   memset(&parse_ctx, 0, sizeof(parse_ctx));
   1887  1.1       agc   parse_ctx.mem = mem;
   1888  1.1       agc   parse_ctx.stack = stack;
   1889  1.1       agc   parse_ctx.re = regex;
   1890  1.1       agc   parse_ctx.len = n;
   1891  1.1       agc   parse_ctx.cflags = cflags;
   1892  1.1       agc   parse_ctx.max_backref = -1;
   1893  1.1       agc   DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
   1894  1.1       agc   errcode = tre_parse(&parse_ctx);
   1895  1.1       agc   if (errcode != REG_OK)
   1896  1.1       agc     ERROR_EXIT(errcode);
   1897  1.1       agc   preg->re_nsub = parse_ctx.submatch_id - 1;
   1898  1.1       agc   tree = parse_ctx.result;
   1899  1.1       agc 
   1900  1.1       agc   /* Back references and approximate matching cannot currently be used
   1901  1.1       agc      in the same regexp. */
   1902  1.1       agc   if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
   1903  1.1       agc     ERROR_EXIT(REG_BADPAT);
   1904  1.1       agc 
   1905  1.1       agc #ifdef TRE_DEBUG
   1906  1.1       agc   tre_ast_print(tree);
   1907  1.1       agc #endif /* TRE_DEBUG */
   1908  1.1       agc 
   1909  1.1       agc   /* Referring to nonexistent subexpressions is illegal. */
   1910  1.1       agc   if (parse_ctx.max_backref > (int)preg->re_nsub)
   1911  1.1       agc     ERROR_EXIT(REG_ESUBREG);
   1912  1.1       agc 
   1913  1.1       agc   /* Allocate the TNFA struct. */
   1914  1.1       agc   tnfa = xcalloc(1, sizeof(tre_tnfa_t));
   1915  1.1       agc   if (tnfa == NULL)
   1916  1.1       agc     ERROR_EXIT(REG_ESPACE);
   1917  1.1       agc   tnfa->have_backrefs = parse_ctx.max_backref >= 0;
   1918  1.1       agc   tnfa->have_approx = parse_ctx.have_approx;
   1919  1.1       agc   tnfa->num_submatches = parse_ctx.submatch_id;
   1920  1.1       agc 
   1921  1.1       agc   /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
   1922  1.1       agc      regexp does not have back references, this can be skipped. */
   1923  1.1       agc   if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
   1924  1.1       agc     {
   1925  1.1       agc       DPRINT(("tre_compile: setting up tags\n"));
   1926  1.1       agc 
   1927  1.1       agc       /* Figure out how many tags we will need. */
   1928  1.1       agc       errcode = tre_add_tags(NULL, stack, tree, tnfa);
   1929  1.1       agc       if (errcode != REG_OK)
   1930  1.1       agc 	ERROR_EXIT(errcode);
   1931  1.1       agc #ifdef TRE_DEBUG
   1932  1.1       agc       tre_ast_print(tree);
   1933  1.1       agc #endif /* TRE_DEBUG */
   1934  1.1       agc 
   1935  1.1       agc       if (tnfa->num_tags > 0)
   1936  1.1       agc 	{
   1937  1.1       agc 	  tag_directions = xmalloc(sizeof(*tag_directions)
   1938  1.1       agc 				   * (tnfa->num_tags + 1));
   1939  1.1       agc 	  if (tag_directions == NULL)
   1940  1.1       agc 	    ERROR_EXIT(REG_ESPACE);
   1941  1.1       agc 	  tnfa->tag_directions = tag_directions;
   1942  1.1       agc 	  memset(tag_directions, -1,
   1943  1.1       agc 		 sizeof(*tag_directions) * (tnfa->num_tags + 1));
   1944  1.1       agc 	}
   1945  1.1       agc       tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
   1946  1.1       agc 				   sizeof(tnfa->minimal_tags));
   1947  1.1       agc       if (tnfa->minimal_tags == NULL)
   1948  1.1       agc 	ERROR_EXIT(REG_ESPACE);
   1949  1.1       agc 
   1950  1.1       agc       submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
   1951  1.1       agc 			      sizeof(*submatch_data));
   1952  1.1       agc       if (submatch_data == NULL)
   1953  1.1       agc 	ERROR_EXIT(REG_ESPACE);
   1954  1.1       agc       tnfa->submatch_data = submatch_data;
   1955  1.1       agc 
   1956  1.1       agc       errcode = tre_add_tags(mem, stack, tree, tnfa);
   1957  1.1       agc       if (errcode != REG_OK)
   1958  1.1       agc 	ERROR_EXIT(errcode);
   1959  1.1       agc 
   1960  1.1       agc #ifdef TRE_DEBUG
   1961  1.1       agc       for (i = 0; i < parse_ctx.submatch_id; i++)
   1962  1.1       agc 	DPRINT(("pmatch[%d] = {t%d, t%d}\n",
   1963  1.1       agc 		i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
   1964  1.1       agc       for (i = 0; i < tnfa->num_tags; i++)
   1965  1.1       agc 	DPRINT(("t%d is %s\n", i,
   1966  1.1       agc 		tag_directions[i] == TRE_TAG_MINIMIZE ?
   1967  1.1       agc 		"minimized" : "maximized"));
   1968  1.1       agc #endif /* TRE_DEBUG */
   1969  1.1       agc     }
   1970  1.1       agc 
   1971  1.1       agc   /* Expand iteration nodes. */
   1972  1.1       agc   errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
   1973  1.1       agc 			   tag_directions, &tnfa->params_depth);
   1974  1.1       agc   if (errcode != REG_OK)
   1975  1.1       agc     ERROR_EXIT(errcode);
   1976  1.1       agc 
   1977  1.1       agc   /* Add a dummy node for the final state.
   1978  1.1       agc      XXX - For certain patterns this dummy node can be optimized away,
   1979  1.1       agc 	   for example "a*" or "ab*".	Figure out a simple way to detect
   1980  1.1       agc 	   this possibility. */
   1981  1.1       agc   tmp_ast_l = tree;
   1982  1.1       agc   tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
   1983  1.1       agc   if (tmp_ast_r == NULL)
   1984  1.1       agc     ERROR_EXIT(REG_ESPACE);
   1985  1.1       agc 
   1986  1.1       agc   tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
   1987  1.1       agc   if (tree == NULL)
   1988  1.1       agc     ERROR_EXIT(REG_ESPACE);
   1989  1.1       agc 
   1990  1.1       agc #ifdef TRE_DEBUG
   1991  1.1       agc   tre_ast_print(tree);
   1992  1.1       agc   DPRINT(("Number of states: %d\n", parse_ctx.position));
   1993  1.1       agc #endif /* TRE_DEBUG */
   1994  1.1       agc 
   1995  1.1       agc   errcode = tre_compute_nfl(mem, stack, tree);
   1996  1.1       agc   if (errcode != REG_OK)
   1997  1.1       agc     ERROR_EXIT(errcode);
   1998  1.1       agc 
   1999  1.1       agc   counts = xmalloc(sizeof(int) * parse_ctx.position);
   2000  1.1       agc   if (counts == NULL)
   2001  1.1       agc     ERROR_EXIT(REG_ESPACE);
   2002  1.1       agc 
   2003  1.1       agc   offs = xmalloc(sizeof(int) * parse_ctx.position);
   2004  1.1       agc   if (offs == NULL)
   2005  1.1       agc     ERROR_EXIT(REG_ESPACE);
   2006  1.1       agc 
   2007  1.1       agc   for (i = 0; i < parse_ctx.position; i++)
   2008  1.1       agc     counts[i] = 0;
   2009  1.1       agc   tre_ast_to_tnfa(tree, NULL, counts, NULL);
   2010  1.1       agc 
   2011  1.1       agc   add = 0;
   2012  1.1       agc   for (i = 0; i < parse_ctx.position; i++)
   2013  1.1       agc     {
   2014  1.1       agc       offs[i] = add;
   2015  1.1       agc       add += counts[i] + 1;
   2016  1.1       agc       counts[i] = 0;
   2017  1.1       agc     }
   2018  1.1       agc   transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
   2019  1.1       agc   if (transitions == NULL)
   2020  1.1       agc     ERROR_EXIT(REG_ESPACE);
   2021  1.1       agc   tnfa->transitions = transitions;
   2022  1.1       agc   tnfa->num_transitions = add;
   2023  1.1       agc 
   2024  1.1       agc   DPRINT(("Converting to TNFA:\n"));
   2025  1.1       agc   errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
   2026  1.1       agc   if (errcode != REG_OK)
   2027  1.1       agc     ERROR_EXIT(errcode);
   2028  1.1       agc 
   2029  1.1       agc   /* If in eight bit mode, compute a table of characters that can be the
   2030  1.1       agc      first character of a match. */
   2031  1.1       agc   tnfa->first_char = -1;
   2032  1.1       agc   if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
   2033  1.1       agc     {
   2034  1.1       agc       int count = 0;
   2035  1.1       agc       tre_cint_t k;
   2036  1.1       agc       DPRINT(("Characters that can start a match:"));
   2037  1.1       agc       tnfa->firstpos_chars = xcalloc(256, sizeof(char));
   2038  1.1       agc       if (tnfa->firstpos_chars == NULL)
   2039  1.1       agc 	ERROR_EXIT(REG_ESPACE);
   2040  1.1       agc       for (p = tree->firstpos; p->position >= 0; p++)
   2041  1.1       agc 	{
   2042  1.1       agc 	  tre_tnfa_transition_t *j = transitions + offs[p->position];
   2043  1.1       agc 	  while (j->state != NULL)
   2044  1.1       agc 	    {
   2045  1.1       agc 	      for (k = j->code_min; k <= j->code_max && k < 256; k++)
   2046  1.1       agc 		{
   2047  1.1       agc 		  DPRINT((" %d", k));
   2048  1.1       agc 		  tnfa->firstpos_chars[k] = 1;
   2049  1.1       agc 		  count++;
   2050  1.1       agc 		}
   2051  1.1       agc 	      j++;
   2052  1.1       agc 	    }
   2053  1.1       agc 	}
   2054  1.1       agc       DPRINT(("\n"));
   2055  1.1       agc #define TRE_OPTIMIZE_FIRST_CHAR 1
   2056  1.1       agc #if TRE_OPTIMIZE_FIRST_CHAR
   2057  1.1       agc       if (count == 1)
   2058  1.1       agc 	{
   2059  1.1       agc 	  for (k = 0; k < 256; k++)
   2060  1.1       agc 	    if (tnfa->firstpos_chars[k])
   2061  1.1       agc 	      {
   2062  1.1       agc 		DPRINT(("first char must be %d\n", k));
   2063  1.1       agc 		tnfa->first_char = k;
   2064  1.1       agc 		xfree(tnfa->firstpos_chars);
   2065  1.1       agc 		tnfa->firstpos_chars = NULL;
   2066  1.1       agc 		break;
   2067  1.1       agc 	      }
   2068  1.1       agc 	}
   2069  1.1       agc #endif
   2070  1.1       agc 
   2071  1.1       agc     }
   2072  1.1       agc   else
   2073  1.1       agc     tnfa->firstpos_chars = NULL;
   2074  1.1       agc 
   2075  1.1       agc 
   2076  1.1       agc   p = tree->firstpos;
   2077  1.1       agc   i = 0;
   2078  1.1       agc   while (p->position >= 0)
   2079  1.1       agc     {
   2080  1.1       agc       i++;
   2081  1.1       agc 
   2082  1.1       agc #ifdef TRE_DEBUG
   2083  1.1       agc       {
   2084  1.1       agc 	int *tags;
   2085  1.1       agc 	DPRINT(("initial: %d", p->position));
   2086  1.1       agc 	tags = p->tags;
   2087  1.1       agc 	if (tags != NULL)
   2088  1.1       agc 	  {
   2089  1.1       agc 	    if (*tags >= 0)
   2090  1.1       agc 	      DPRINT(("/"));
   2091  1.1       agc 	    while (*tags >= 0)
   2092  1.1       agc 	      {
   2093  1.1       agc 		DPRINT(("%d", *tags));
   2094  1.1       agc 		tags++;
   2095  1.1       agc 		if (*tags >= 0)
   2096  1.1       agc 		  DPRINT((","));
   2097  1.1       agc 	      }
   2098  1.1       agc 	  }
   2099  1.1       agc 	DPRINT((", assert %d", p->assertions));
   2100  1.1       agc 	if (p->params)
   2101  1.1       agc 	  {
   2102  1.1       agc 	    DPRINT((", "));
   2103  1.1       agc 	    tre_print_params(p->params);
   2104  1.1       agc 	  }
   2105  1.1       agc 	DPRINT(("\n"));
   2106  1.1       agc       }
   2107  1.1       agc #endif /* TRE_DEBUG */
   2108  1.1       agc 
   2109  1.1       agc       p++;
   2110  1.1       agc     }
   2111  1.1       agc 
   2112  1.1       agc   initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
   2113  1.1       agc   if (initial == NULL)
   2114  1.1       agc     ERROR_EXIT(REG_ESPACE);
   2115  1.1       agc   tnfa->initial = initial;
   2116  1.1       agc 
   2117  1.1       agc   i = 0;
   2118  1.1       agc   for (p = tree->firstpos; p->position >= 0; p++)
   2119  1.1       agc     {
   2120  1.1       agc       initial[i].state = transitions + offs[p->position];
   2121  1.1       agc       initial[i].state_id = p->position;
   2122  1.1       agc       initial[i].tags = NULL;
   2123  1.1       agc       /* Copy the arrays p->tags, and p->params, they are allocated
   2124  1.1       agc 	 from a tre_mem object. */
   2125  1.1       agc       if (p->tags)
   2126  1.1       agc 	{
   2127  1.1       agc 	  int j;
   2128  1.1       agc 	  for (j = 0; p->tags[j] >= 0; j++);
   2129  1.1       agc 	  initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
   2130  1.1       agc 	  if (!initial[i].tags)
   2131  1.1       agc 	    ERROR_EXIT(REG_ESPACE);
   2132  1.1       agc 	  memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
   2133  1.1       agc 	}
   2134  1.1       agc       initial[i].params = NULL;
   2135  1.1       agc       if (p->params)
   2136  1.1       agc 	{
   2137  1.1       agc 	  initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
   2138  1.1       agc 	  if (!initial[i].params)
   2139  1.1       agc 	    ERROR_EXIT(REG_ESPACE);
   2140  1.1       agc 	  memcpy(initial[i].params, p->params,
   2141  1.1       agc 		 sizeof(*p->params) * TRE_PARAM_LAST);
   2142  1.1       agc 	}
   2143  1.1       agc       initial[i].assertions = p->assertions;
   2144  1.1       agc       i++;
   2145  1.1       agc     }
   2146  1.1       agc   initial[i].state = NULL;
   2147  1.1       agc 
   2148  1.1       agc   tnfa->num_transitions = add;
   2149  1.1       agc   tnfa->final = transitions + offs[tree->lastpos[0].position];
   2150  1.1       agc   tnfa->num_states = parse_ctx.position;
   2151  1.1       agc   tnfa->cflags = cflags;
   2152  1.1       agc 
   2153  1.1       agc   DPRINT(("final state %p\n", (void *)tnfa->final));
   2154  1.1       agc 
   2155  1.1       agc   tre_mem_destroy(mem);
   2156  1.1       agc   tre_stack_destroy(stack);
   2157  1.1       agc   xfree(counts);
   2158  1.1       agc   xfree(offs);
   2159  1.1       agc 
   2160  1.1       agc   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
   2161  1.1       agc   return REG_OK;
   2162  1.1       agc 
   2163  1.1       agc  error_exit:
   2164  1.1       agc   /* Free everything that was allocated and return the error code. */
   2165  1.1       agc   tre_mem_destroy(mem);
   2166  1.1       agc   if (stack != NULL)
   2167  1.1       agc     tre_stack_destroy(stack);
   2168  1.1       agc   if (counts != NULL)
   2169  1.1       agc     xfree(counts);
   2170  1.1       agc   if (offs != NULL)
   2171  1.1       agc     xfree(offs);
   2172  1.1       agc   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
   2173  1.1       agc   tre_free(preg);
   2174  1.1       agc   return errcode;
   2175  1.1       agc }
   2176  1.1       agc 
   2177  1.1       agc 
   2178  1.1       agc 
   2179  1.1       agc 
   2180  1.1       agc void
   2181  1.1       agc tre_free(regex_t *preg)
   2182  1.1       agc {
   2183  1.1       agc   tre_tnfa_t *tnfa;
   2184  1.1       agc   unsigned int i;
   2185  1.1       agc   tre_tnfa_transition_t *trans;
   2186  1.1       agc 
   2187  1.1       agc   tnfa = (void *)preg->TRE_REGEX_T_FIELD;
   2188  1.1       agc   if (!tnfa)
   2189  1.1       agc     return;
   2190  1.1       agc 
   2191  1.1       agc   for (i = 0; i < tnfa->num_transitions; i++)
   2192  1.1       agc     if (tnfa->transitions[i].state)
   2193  1.1       agc       {
   2194  1.1       agc 	if (tnfa->transitions[i].tags)
   2195  1.1       agc 	  xfree(tnfa->transitions[i].tags);
   2196  1.1       agc 	if (tnfa->transitions[i].neg_classes)
   2197  1.1       agc 	  xfree(tnfa->transitions[i].neg_classes);
   2198  1.1       agc 	if (tnfa->transitions[i].params)
   2199  1.1       agc 	  xfree(tnfa->transitions[i].params);
   2200  1.1       agc       }
   2201  1.1       agc   if (tnfa->transitions)
   2202  1.1       agc     xfree(tnfa->transitions);
   2203  1.1       agc 
   2204  1.1       agc   if (tnfa->initial)
   2205  1.1       agc     {
   2206  1.1       agc       for (trans = tnfa->initial; trans->state; trans++)
   2207  1.1       agc 	{
   2208  1.1       agc 	  if (trans->tags)
   2209  1.1       agc 	    xfree(trans->tags);
   2210  1.1       agc 	  if (trans->params)
   2211  1.1       agc 	    xfree(trans->params);
   2212  1.1       agc 	}
   2213  1.1       agc       xfree(tnfa->initial);
   2214  1.1       agc     }
   2215  1.1       agc 
   2216  1.1       agc   if (tnfa->submatch_data)
   2217  1.1       agc     {
   2218  1.1       agc       for (i = 0; i < tnfa->num_submatches; i++)
   2219  1.1       agc 	if (tnfa->submatch_data[i].parents)
   2220  1.1       agc 	  xfree(tnfa->submatch_data[i].parents);
   2221  1.1       agc       xfree(tnfa->submatch_data);
   2222  1.1       agc     }
   2223  1.1       agc 
   2224  1.1       agc   if (tnfa->tag_directions)
   2225  1.1       agc     xfree(tnfa->tag_directions);
   2226  1.1       agc   if (tnfa->firstpos_chars)
   2227  1.1       agc     xfree(tnfa->firstpos_chars);
   2228  1.1       agc   if (tnfa->minimal_tags)
   2229  1.1       agc     xfree(tnfa->minimal_tags);
   2230  1.1       agc   xfree(tnfa);
   2231  1.1       agc }
   2232  1.1       agc 
   2233  1.1       agc char *
   2234  1.1       agc tre_version(void)
   2235  1.1       agc {
   2236  1.1       agc   static char str[256];
   2237  1.1       agc   char *version;
   2238  1.1       agc 
   2239  1.1       agc   if (str[0] == 0)
   2240  1.1       agc     {
   2241  1.1       agc       (void) tre_config(TRE_CONFIG_VERSION, &version);
   2242  1.1       agc       (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
   2243  1.1       agc     }
   2244  1.1       agc   return str;
   2245  1.1       agc }
   2246  1.1       agc 
   2247  1.1       agc int
   2248  1.1       agc tre_config(int query, void *result)
   2249  1.1       agc {
   2250  1.1       agc   int *int_result = result;
   2251  1.1       agc   const char **string_result = result;
   2252  1.1       agc 
   2253  1.1       agc   switch (query)
   2254  1.1       agc     {
   2255  1.1       agc     case TRE_CONFIG_APPROX:
   2256  1.1       agc #ifdef TRE_APPROX
   2257  1.1       agc       *int_result = 1;
   2258  1.1       agc #else /* !TRE_APPROX */
   2259  1.1       agc       *int_result = 0;
   2260  1.1       agc #endif /* !TRE_APPROX */
   2261  1.1       agc       return REG_OK;
   2262  1.1       agc 
   2263  1.1       agc     case TRE_CONFIG_WCHAR:
   2264  1.1       agc #ifdef TRE_WCHAR
   2265  1.1       agc       *int_result = 1;
   2266  1.1       agc #else /* !TRE_WCHAR */
   2267  1.1       agc       *int_result = 0;
   2268  1.1       agc #endif /* !TRE_WCHAR */
   2269  1.1       agc       return REG_OK;
   2270  1.1       agc 
   2271  1.1       agc     case TRE_CONFIG_MULTIBYTE:
   2272  1.1       agc #ifdef TRE_MULTIBYTE
   2273  1.1       agc       *int_result = 1;
   2274  1.1       agc #else /* !TRE_MULTIBYTE */
   2275  1.1       agc       *int_result = 0;
   2276  1.1       agc #endif /* !TRE_MULTIBYTE */
   2277  1.1       agc       return REG_OK;
   2278  1.1       agc 
   2279  1.1       agc     case TRE_CONFIG_SYSTEM_ABI:
   2280  1.1       agc #ifdef TRE_CONFIG_SYSTEM_ABI
   2281  1.1       agc       *int_result = 1;
   2282  1.1       agc #else /* !TRE_CONFIG_SYSTEM_ABI */
   2283  1.1       agc       *int_result = 0;
   2284  1.1       agc #endif /* !TRE_CONFIG_SYSTEM_ABI */
   2285  1.1       agc       return REG_OK;
   2286  1.1       agc 
   2287  1.1       agc     case TRE_CONFIG_VERSION:
   2288  1.1       agc       *string_result = TRE_VERSION;
   2289  1.1       agc       return REG_OK;
   2290  1.1       agc     }
   2291  1.1       agc 
   2292  1.1       agc   return REG_NOMATCH;
   2293  1.1       agc }
   2294  1.1       agc 
   2295  1.1       agc 
   2296  1.1       agc /* EOF */
   2297