Home | History | Annotate | Line # | Download | only in lib
      1 /*
      2   tre-ast.h - Abstract syntax tree (AST) definitions
      3 
      4   This software is released under a BSD-style license.
      5   See the file LICENSE for details and copyright.
      6 
      7 */
      8 
      9 
     10 #ifndef TRE_AST_H
     11 #define TRE_AST_H 1
     12 
     13 #include "tre-mem.h"
     14 #include "tre-internal.h"
     15 #include "tre-compile.h"
     16 
     17 /* The different AST node types. */
     18 typedef enum {
     19   LITERAL,
     20   CATENATION,
     21   ITERATION,
     22   UNION
     23 } tre_ast_type_t;
     24 
     25 /* Special subtypes of TRE_LITERAL. */
     26 #define EMPTY	  -1   /* Empty leaf (denotes empty string). */
     27 #define ASSERTION -2   /* Assertion leaf. */
     28 #define TAG	  -3   /* Tag leaf. */
     29 #define BACKREF	  -4   /* Back reference leaf. */
     30 #define PARAMETER -5   /* Parameter. */
     31 
     32 #define IS_SPECIAL(x)	((x)->code_min < 0)
     33 #define IS_EMPTY(x)	((x)->code_min == EMPTY)
     34 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
     35 #define IS_TAG(x)	((x)->code_min == TAG)
     36 #define IS_BACKREF(x)	((x)->code_min == BACKREF)
     37 #define IS_PARAMETER(x) ((x)->code_min == PARAMETER)
     38 
     39 
     40 /* A generic AST node.  All AST nodes consist of this node on the top
     41    level with `obj' pointing to the actual content. */
     42 typedef struct {
     43   tre_ast_type_t type;   /* Type of the node. */
     44   void *obj;             /* Pointer to actual node. */
     45   int nullable;
     46   int submatch_id;
     47   size_t num_submatches;
     48   size_t num_tags;
     49   tre_pos_and_tags_t *firstpos;
     50   tre_pos_and_tags_t *lastpos;
     51 } tre_ast_node_t;
     52 
     53 
     54 /* A "literal" node.  These are created for assertions, back references,
     55    tags, matching parameter settings, and all expressions that match one
     56    character. */
     57 typedef struct {
     58   int code_min;
     59   int code_max;
     60   int position;
     61   union {
     62     tre_ctype_t class;
     63     int *params;
     64   } u;
     65   tre_ctype_t *neg_classes;
     66 } tre_literal_t;
     67 
     68 /* A "catenation" node.	 These are created when two regexps are concatenated.
     69    If there are more than one subexpressions in sequence, the `left' part
     70    holds all but the last, and `right' part holds the last subexpression
     71    (catenation is left associative). */
     72 typedef struct {
     73   tre_ast_node_t *left;
     74   tre_ast_node_t *right;
     75 } tre_catenation_t;
     76 
     77 /* An "iteration" node.	 These are created for the "*", "+", "?", and "{m,n}"
     78    operators. */
     79 typedef struct {
     80   /* Subexpression to match. */
     81   tre_ast_node_t *arg;
     82   /* Minimum number of consecutive matches. */
     83   int min;
     84   /* Maximum number of consecutive matches. */
     85   int max;
     86   /* If 0, match as many characters as possible, if 1 match as few as
     87      possible.	Note that this does not always mean the same thing as
     88      matching as many/few repetitions as possible. */
     89   unsigned int minimal:1;
     90   /* Approximate matching parameters (or NULL). */
     91   int *params;
     92 } tre_iteration_t;
     93 
     94 /* An "union" node.  These are created for the "|" operator. */
     95 typedef struct {
     96   tre_ast_node_t *left;
     97   tre_ast_node_t *right;
     98 } tre_union_t;
     99 
    100 tre_ast_node_t *
    101 tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
    102 
    103 tre_ast_node_t *
    104 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
    105 
    106 tre_ast_node_t *
    107 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
    108 		 int minimal);
    109 
    110 tre_ast_node_t *
    111 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
    112 
    113 tre_ast_node_t *
    114 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
    115 		       tre_ast_node_t *right);
    116 
    117 #ifdef TRE_DEBUG
    118 void
    119 tre_ast_print(tre_ast_node_t *tree);
    120 
    121 /* XXX - rethink AST printing API */
    122 void
    123 tre_print_params(int *params);
    124 #endif /* TRE_DEBUG */
    125 
    126 #endif /* TRE_AST_H */
    127 
    128 /* EOF */
    129