Home | History | Annotate | Line # | Download | only in lib
      1 /*
      2   tre-internal.h - TRE internal definitions
      3 
      4   This software is released under a BSD-style license.
      5   See the file LICENSE for details and copyright.
      6 
      7 */
      8 
      9 #ifndef TRE_INTERNAL_H
     10 #define TRE_INTERNAL_H 1
     11 
     12 #ifdef HAVE_WCHAR_H
     13 #include <wchar.h>
     14 #endif /* HAVE_WCHAR_H */
     15 
     16 #ifdef HAVE_WCTYPE_H
     17 #include <wctype.h>
     18 #endif /* !HAVE_WCTYPE_H */
     19 
     20 #include <ctype.h>
     21 #include "tre.h"
     22 
     23 #ifdef TRE_DEBUG
     24 #include <stdio.h>
     25 #define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/(void)0,0)
     26 #else /* !TRE_DEBUG */
     27 #define DPRINT(msg) do { } while(/*CONSTCOND*/(void)0,0)
     28 #endif /* !TRE_DEBUG */
     29 
     30 #define elementsof(x)	( sizeof(x) / sizeof(x[0]) )
     31 
     32 #ifdef HAVE_MBRTOWC
     33 #define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
     34 #else /* !HAVE_MBRTOWC */
     35 #ifdef HAVE_MBTOWC
     36 #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
     37 #endif /* HAVE_MBTOWC */
     38 #endif /* !HAVE_MBRTOWC */
     39 
     40 #ifdef TRE_MULTIBYTE
     41 #ifdef HAVE_MBSTATE_T
     42 #define TRE_MBSTATE
     43 #endif /* TRE_MULTIBYTE */
     44 #endif /* HAVE_MBSTATE_T */
     45 
     46 /* Define the character types and functions. */
     47 #ifdef TRE_WCHAR
     48 
     49 /* Wide characters. */
     50 typedef wint_t tre_cint_t;
     51 #define TRE_CHAR_MAX WCHAR_MAX
     52 
     53 #ifdef TRE_MULTIBYTE
     54 #define TRE_MB_CUR_MAX MB_CUR_MAX
     55 #else /* !TRE_MULTIBYTE */
     56 #define TRE_MB_CUR_MAX 1
     57 #endif /* !TRE_MULTIBYTE */
     58 
     59 #define tre_isalnum iswalnum
     60 #define tre_isalpha iswalpha
     61 #ifdef HAVE_ISWBLANK
     62 #define tre_isblank iswblank
     63 #endif /* HAVE_ISWBLANK */
     64 #define tre_iscntrl iswcntrl
     65 #define tre_isdigit iswdigit
     66 #define tre_isgraph iswgraph
     67 #define tre_islower iswlower
     68 #define tre_isprint iswprint
     69 #define tre_ispunct iswpunct
     70 #define tre_isspace iswspace
     71 #define tre_isupper iswupper
     72 #define tre_isxdigit iswxdigit
     73 
     74 #define tre_tolower towlower
     75 #define tre_toupper towupper
     76 #define tre_strlen  wcslen
     77 
     78 #else /* !TRE_WCHAR */
     79 
     80 /* 8 bit characters. */
     81 typedef short tre_cint_t;
     82 #define TRE_CHAR_MAX 255
     83 #define TRE_MB_CUR_MAX 1
     84 
     85 #define tre_isalnum isalnum
     86 #define tre_isalpha isalpha
     87 #ifdef HAVE_ISASCII
     88 #define tre_isascii isascii
     89 #endif /* HAVE_ISASCII */
     90 #ifdef HAVE_ISBLANK
     91 #define tre_isblank isblank
     92 #endif /* HAVE_ISBLANK */
     93 #define tre_iscntrl iscntrl
     94 #define tre_isdigit isdigit
     95 #define tre_isgraph isgraph
     96 #define tre_islower islower
     97 #define tre_isprint isprint
     98 #define tre_ispunct ispunct
     99 #define tre_isspace isspace
    100 #define tre_isupper isupper
    101 #define tre_isxdigit isxdigit
    102 
    103 #define tre_tolower(c) (tre_cint_t)(tolower(c))
    104 #define tre_toupper(c) (tre_cint_t)(toupper(c))
    105 #define tre_strlen(s)  (strlen((const char*)s))
    106 
    107 #endif /* !TRE_WCHAR */
    108 
    109 #if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
    110 #define TRE_USE_SYSTEM_WCTYPE 1
    111 #endif
    112 
    113 #ifdef TRE_USE_SYSTEM_WCTYPE
    114 /* Use system provided iswctype() and wctype(). */
    115 typedef wctype_t tre_ctype_t;
    116 #define tre_isctype iswctype
    117 #define tre_ctype   wctype
    118 #else /* !TRE_USE_SYSTEM_WCTYPE */
    119 /* Define our own versions of iswctype() and wctype(). */
    120 typedef int (*tre_ctype_t)(tre_cint_t);
    121 #define tre_isctype(c, type) ( (type)(c) )
    122 tre_ctype_t tre_ctype(const char *name);
    123 #endif /* !TRE_USE_SYSTEM_WCTYPE */
    124 
    125 typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
    126 
    127 /* Returns number of bytes to add to (char *)ptr to make it
    128    properly aligned for the type. */
    129 #define ALIGN(ptr, type) \
    130   ((((long)ptr) % sizeof(type)) \
    131    ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
    132    : 0)
    133 
    134 #undef MAX
    135 #undef MIN
    136 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
    137 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
    138 
    139 /* Define STRF to the correct printf formatter for strings. */
    140 #ifdef TRE_WCHAR
    141 #define STRF "ls"
    142 #else /* !TRE_WCHAR */
    143 #define STRF "s"
    144 #endif /* !TRE_WCHAR */
    145 
    146 /* TNFA transition type. A TNFA state is an array of transitions,
    147    the terminator is a transition with NULL `state'. */
    148 typedef struct tnfa_transition tre_tnfa_transition_t;
    149 
    150 struct tnfa_transition {
    151   /* Range of accepted characters. */
    152   tre_cint_t code_min;
    153   tre_cint_t code_max;
    154   /* Pointer to the destination state. */
    155   tre_tnfa_transition_t *state;
    156   /* ID number of the destination state. */
    157   int state_id;
    158   /* -1 terminated array of tags (or NULL). */
    159   int *tags;
    160   /* Matching parameters settings (or NULL). */
    161   int *params;
    162   /* Assertion bitmap. */
    163   int assertions;
    164   /* Assertion parameters. */
    165   union {
    166     /* Character class assertion. */
    167     tre_ctype_t class;
    168     /* Back reference assertion. */
    169     int backref;
    170   } u;
    171   /* Negative character class assertions. */
    172   tre_ctype_t *neg_classes;
    173 };
    174 
    175 
    176 /* Assertions. */
    177 #define ASSERT_AT_BOL		  1   /* Beginning of line. */
    178 #define ASSERT_AT_EOL		  2   /* End of line. */
    179 #define ASSERT_CHAR_CLASS	  4   /* Character class in `class'. */
    180 #define ASSERT_CHAR_CLASS_NEG	  8   /* Character classes in `neg_classes'. */
    181 #define ASSERT_AT_BOW		 16   /* Beginning of word. */
    182 #define ASSERT_AT_EOW		 32   /* End of word. */
    183 #define ASSERT_AT_WB		 64   /* Word boundary. */
    184 #define ASSERT_AT_WB_NEG	128   /* Not a word boundary. */
    185 #define ASSERT_BACKREF		256   /* A back reference in `backref'. */
    186 #define ASSERT_LAST		256
    187 
    188 /* Tag directions. */
    189 typedef enum {
    190   TRE_TAG_MINIMIZE = 0,
    191   TRE_TAG_MAXIMIZE = 1
    192 } tre_tag_direction_t;
    193 
    194 /* Parameters that can be changed dynamically while matching. */
    195 typedef enum {
    196   TRE_PARAM_COST_INS	    = 0,
    197   TRE_PARAM_COST_DEL	    = 1,
    198   TRE_PARAM_COST_SUBST	    = 2,
    199   TRE_PARAM_COST_MAX	    = 3,
    200   TRE_PARAM_MAX_INS	    = 4,
    201   TRE_PARAM_MAX_DEL	    = 5,
    202   TRE_PARAM_MAX_SUBST	    = 6,
    203   TRE_PARAM_MAX_ERR	    = 7,
    204   TRE_PARAM_DEPTH	    = 8,
    205   TRE_PARAM_LAST	    = 9
    206 } tre_param_t;
    207 
    208 /* Unset matching parameter */
    209 #define TRE_PARAM_UNSET -1
    210 
    211 /* Signifies the default matching parameter value. */
    212 #define TRE_PARAM_DEFAULT -2
    213 
    214 /* Instructions to compute submatch register values from tag values
    215    after a successful match.  */
    216 struct tre_submatch_data {
    217   /* Tag that gives the value for rm_so (submatch start offset). */
    218   int so_tag;
    219   /* Tag that gives the value for rm_eo (submatch end offset). */
    220   int eo_tag;
    221   /* List of submatches this submatch is contained in. */
    222   int *parents;
    223 };
    224 
    225 typedef struct tre_submatch_data tre_submatch_data_t;
    226 
    227 
    228 /* TNFA definition. */
    229 typedef struct tnfa tre_tnfa_t;
    230 
    231 struct tnfa {
    232   tre_tnfa_transition_t *transitions;
    233   size_t num_transitions;
    234   tre_tnfa_transition_t *initial;
    235   tre_tnfa_transition_t *final;
    236   tre_submatch_data_t *submatch_data;
    237   char *firstpos_chars;
    238   int first_char;
    239   size_t num_submatches;
    240   tre_tag_direction_t *tag_directions;
    241   int *minimal_tags;
    242   size_t num_tags;
    243   size_t num_minimals;
    244   int end_tag;
    245   size_t num_states;
    246   int cflags;
    247   int have_backrefs;
    248   int have_approx;
    249   int params_depth;
    250 };
    251 
    252 int
    253 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags);
    254 
    255 void
    256 tre_free(regex_t *preg);
    257 
    258 void
    259 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
    260 		const tre_tnfa_t *tnfa, int *tags, int match_eo);
    261 
    262 reg_errcode_t
    263 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
    264 		      tre_str_type_t type, int *match_tags, int eflags,
    265 		      int *match_end_ofs);
    266 
    267 reg_errcode_t
    268 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
    269 		      tre_str_type_t type, int *match_tags, int eflags,
    270 		      int *match_end_ofs);
    271 
    272 reg_errcode_t
    273 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
    274 		       int len, tre_str_type_t type, int *match_tags,
    275 		       int eflags, int *match_end_ofs);
    276 
    277 #ifdef TRE_APPROX
    278 reg_errcode_t
    279 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
    280 		    tre_str_type_t type, int *match_tags,
    281 		    regamatch_t *match, regaparams_t params,
    282 		    int eflags, int *match_end_ofs);
    283 #endif /* TRE_APPROX */
    284 
    285 #endif /* TRE_INTERNAL_H */
    286 
    287 /* EOF */
    288