1 1.1 agc /* 2 1.1 agc tre-match-backtrack.c - TRE backtracking regex matching engine 3 1.1 agc 4 1.1 agc This software is released under a BSD-style license. 5 1.1 agc See the file LICENSE for details and copyright. 6 1.1 agc 7 1.1 agc */ 8 1.1 agc 9 1.1 agc /* 10 1.1 agc This matcher is for regexps that use back referencing. Regexp matching 11 1.1 agc with back referencing is an NP-complete problem on the number of back 12 1.1 agc references. The easiest way to match them is to use a backtracking 13 1.1 agc routine which basically goes through all possible paths in the TNFA 14 1.1 agc and chooses the one which results in the best (leftmost and longest) 15 1.1 agc match. This can be spectacularly expensive and may run out of stack 16 1.1 agc space, but there really is no better known generic algorithm. Quoting 17 1.1 agc Henry Spencer from comp.compilers: 18 1.1 agc <URL: http://compilers.iecc.com/comparch/article/93-03-102> 19 1.1 agc 20 1.1 agc POSIX.2 REs require longest match, which is really exciting to 21 1.1 agc implement since the obsolete ("basic") variant also includes 22 1.1 agc \<digit>. I haven't found a better way of tackling this than doing 23 1.1 agc a preliminary match using a DFA (or simulation) on a modified RE 24 1.1 agc that just replicates subREs for \<digit>, and then doing a 25 1.1 agc backtracking match to determine whether the subRE matches were 26 1.1 agc right. This can be rather slow, but I console myself with the 27 1.1 agc thought that people who use \<digit> deserve very slow execution. 28 1.1 agc (Pun unintentional but very appropriate.) 29 1.1 agc 30 1.1 agc */ 31 1.1 agc 32 1.1 agc 33 1.1 agc #ifdef HAVE_CONFIG_H 34 1.1 agc #include <config.h> 35 1.1 agc #endif /* HAVE_CONFIG_H */ 36 1.1 agc 37 1.3 christos #include "tre-alloca.h" 38 1.1 agc 39 1.1 agc #include <assert.h> 40 1.1 agc #include <stdlib.h> 41 1.1 agc #include <string.h> 42 1.1 agc #ifdef HAVE_WCHAR_H 43 1.1 agc #include <wchar.h> 44 1.1 agc #endif /* HAVE_WCHAR_H */ 45 1.1 agc #ifdef HAVE_WCTYPE_H 46 1.1 agc #include <wctype.h> 47 1.1 agc #endif /* HAVE_WCTYPE_H */ 48 1.1 agc #ifndef TRE_WCHAR 49 1.1 agc #include <ctype.h> 50 1.1 agc #endif /* !TRE_WCHAR */ 51 1.1 agc #ifdef HAVE_MALLOC_H 52 1.1 agc #include <malloc.h> 53 1.1 agc #endif /* HAVE_MALLOC_H */ 54 1.1 agc 55 1.1 agc #include "tre-internal.h" 56 1.1 agc #include "tre-mem.h" 57 1.1 agc #include "tre-match-utils.h" 58 1.1 agc #include "tre.h" 59 1.1 agc #include "xmalloc.h" 60 1.1 agc 61 1.1 agc typedef struct { 62 1.1 agc int pos; 63 1.1 agc const char *str_byte; 64 1.1 agc #ifdef TRE_WCHAR 65 1.1 agc const wchar_t *str_wide; 66 1.1 agc #endif /* TRE_WCHAR */ 67 1.1 agc tre_tnfa_transition_t *state; 68 1.1 agc int state_id; 69 1.1 agc int next_c; 70 1.1 agc int *tags; 71 1.1 agc #ifdef TRE_MBSTATE 72 1.1 agc mbstate_t mbstate; 73 1.1 agc #endif /* TRE_MBSTATE */ 74 1.1 agc } tre_backtrack_item_t; 75 1.1 agc 76 1.1 agc typedef struct tre_backtrack_struct { 77 1.1 agc tre_backtrack_item_t item; 78 1.1 agc struct tre_backtrack_struct *prev; 79 1.1 agc struct tre_backtrack_struct *next; 80 1.1 agc } *tre_backtrack_t; 81 1.1 agc 82 1.1 agc #ifdef TRE_WCHAR 83 1.1 agc #define BT_STACK_WIDE_IN(_str_wide) stack->item.str_wide = (_str_wide) 84 1.1 agc #define BT_STACK_WIDE_OUT (str_wide) = stack->item.str_wide 85 1.1 agc #else /* !TRE_WCHAR */ 86 1.1 agc #define BT_STACK_WIDE_IN(_str_wide) 87 1.1 agc #define BT_STACK_WIDE_OUT 88 1.1 agc #endif /* !TRE_WCHAR */ 89 1.1 agc 90 1.1 agc #ifdef TRE_MBSTATE 91 1.1 agc #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) 92 1.1 agc #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate 93 1.1 agc #else /* !TRE_MBSTATE */ 94 1.1 agc #define BT_STACK_MBSTATE_IN 95 1.1 agc #define BT_STACK_MBSTATE_OUT 96 1.1 agc #endif /* !TRE_MBSTATE */ 97 1.1 agc 98 1.1 agc 99 1.1 agc #ifdef TRE_USE_ALLOCA 100 1.1 agc #define tre_bt_mem_new tre_mem_newa 101 1.1 agc #define tre_bt_mem_alloc tre_mem_alloca 102 1.5 rin #define tre_bt_mem_destroy(obj) do { } while (/*CONSTCOND*/(void)0,0) 103 1.1 agc #else /* !TRE_USE_ALLOCA */ 104 1.1 agc #define tre_bt_mem_new tre_mem_new 105 1.1 agc #define tre_bt_mem_alloc tre_mem_alloc 106 1.1 agc #define tre_bt_mem_destroy tre_mem_destroy 107 1.1 agc #endif /* !TRE_USE_ALLOCA */ 108 1.1 agc 109 1.1 agc 110 1.1 agc #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \ 111 1.1 agc do \ 112 1.1 agc { \ 113 1.3 christos size_t i; \ 114 1.1 agc if (!stack->next) \ 115 1.1 agc { \ 116 1.1 agc tre_backtrack_t s; \ 117 1.1 agc s = tre_bt_mem_alloc(mem, sizeof(*s)); \ 118 1.1 agc if (!s) \ 119 1.1 agc { \ 120 1.1 agc tre_bt_mem_destroy(mem); \ 121 1.1 agc if (tags) \ 122 1.1 agc xfree(tags); \ 123 1.1 agc if (pmatch) \ 124 1.1 agc xfree(pmatch); \ 125 1.1 agc if (states_seen) \ 126 1.1 agc xfree(states_seen); \ 127 1.1 agc return REG_ESPACE; \ 128 1.1 agc } \ 129 1.1 agc s->prev = stack; \ 130 1.1 agc s->next = NULL; \ 131 1.1 agc s->item.tags = tre_bt_mem_alloc(mem, \ 132 1.1 agc sizeof(*tags) * tnfa->num_tags); \ 133 1.1 agc if (!s->item.tags) \ 134 1.1 agc { \ 135 1.1 agc tre_bt_mem_destroy(mem); \ 136 1.1 agc if (tags) \ 137 1.1 agc xfree(tags); \ 138 1.1 agc if (pmatch) \ 139 1.1 agc xfree(pmatch); \ 140 1.1 agc if (states_seen) \ 141 1.1 agc xfree(states_seen); \ 142 1.1 agc return REG_ESPACE; \ 143 1.1 agc } \ 144 1.1 agc stack->next = s; \ 145 1.1 agc stack = s; \ 146 1.1 agc } \ 147 1.1 agc else \ 148 1.1 agc stack = stack->next; \ 149 1.1 agc stack->item.pos = (_pos); \ 150 1.1 agc stack->item.str_byte = (_str_byte); \ 151 1.1 agc BT_STACK_WIDE_IN(_str_wide); \ 152 1.1 agc stack->item.state = (_state); \ 153 1.1 agc stack->item.state_id = (_state_id); \ 154 1.1 agc stack->item.next_c = (_next_c); \ 155 1.1 agc for (i = 0; i < tnfa->num_tags; i++) \ 156 1.1 agc stack->item.tags[i] = (_tags)[i]; \ 157 1.1 agc BT_STACK_MBSTATE_IN; \ 158 1.1 agc } \ 159 1.5 rin while (/*CONSTCOND*/(void)0,0) 160 1.1 agc 161 1.1 agc #define BT_STACK_POP() \ 162 1.1 agc do \ 163 1.1 agc { \ 164 1.3 christos size_t i; \ 165 1.1 agc assert(stack->prev); \ 166 1.1 agc pos = stack->item.pos; \ 167 1.1 agc if (type == STR_USER) \ 168 1.1 agc str_source->rewind(pos + pos_add_next, str_source->context); \ 169 1.1 agc str_byte = stack->item.str_byte; \ 170 1.1 agc BT_STACK_WIDE_OUT; \ 171 1.1 agc state = stack->item.state; \ 172 1.5 rin next_c = (tre_char_t) stack->item.next_c; \ 173 1.1 agc for (i = 0; i < tnfa->num_tags; i++) \ 174 1.1 agc tags[i] = stack->item.tags[i]; \ 175 1.1 agc BT_STACK_MBSTATE_OUT; \ 176 1.1 agc stack = stack->prev; \ 177 1.1 agc } \ 178 1.5 rin while (/*CONSTCOND*/(void)0,0) 179 1.1 agc 180 1.1 agc #undef MIN 181 1.1 agc #define MIN(a, b) ((a) <= (b) ? (a) : (b)) 182 1.1 agc 183 1.1 agc reg_errcode_t 184 1.1 agc tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, 185 1.1 agc int len, tre_str_type_t type, int *match_tags, 186 1.1 agc int eflags, int *match_end_ofs) 187 1.1 agc { 188 1.1 agc /* State variables required by GET_NEXT_WCHAR. */ 189 1.1 agc tre_char_t prev_c = 0, next_c = 0; 190 1.1 agc const char *str_byte = string; 191 1.1 agc int pos = 0; 192 1.1 agc unsigned int pos_add_next = 1; 193 1.1 agc #ifdef TRE_WCHAR 194 1.1 agc const wchar_t *str_wide = string; 195 1.1 agc #ifdef TRE_MBSTATE 196 1.1 agc mbstate_t mbstate; 197 1.1 agc #endif /* TRE_MBSTATE */ 198 1.1 agc #endif /* TRE_WCHAR */ 199 1.1 agc int reg_notbol = eflags & REG_NOTBOL; 200 1.1 agc int reg_noteol = eflags & REG_NOTEOL; 201 1.1 agc int reg_newline = tnfa->cflags & REG_NEWLINE; 202 1.3 christos size_t str_user_end = 0; 203 1.1 agc 204 1.1 agc /* These are used to remember the necessary values of the above 205 1.1 agc variables to return to the position where the current search 206 1.1 agc started from. */ 207 1.1 agc int next_c_start; 208 1.1 agc const char *str_byte_start; 209 1.1 agc int pos_start = -1; 210 1.1 agc #ifdef TRE_WCHAR 211 1.1 agc const wchar_t *str_wide_start; 212 1.1 agc #endif /* TRE_WCHAR */ 213 1.1 agc #ifdef TRE_MBSTATE 214 1.1 agc mbstate_t mbstate_start; 215 1.1 agc #endif /* TRE_MBSTATE */ 216 1.1 agc 217 1.1 agc /* End offset of best match so far, or -1 if no match found yet. */ 218 1.1 agc int match_eo = -1; 219 1.1 agc /* Tag arrays. */ 220 1.1 agc int *next_tags, *tags = NULL; 221 1.1 agc /* Current TNFA state. */ 222 1.1 agc tre_tnfa_transition_t *state; 223 1.1 agc int *states_seen = NULL; 224 1.1 agc 225 1.1 agc /* Memory allocator to for allocating the backtracking stack. */ 226 1.1 agc tre_mem_t mem = tre_bt_mem_new(); 227 1.1 agc 228 1.1 agc /* The backtracking stack. */ 229 1.1 agc tre_backtrack_t stack; 230 1.1 agc 231 1.1 agc tre_tnfa_transition_t *trans_i; 232 1.1 agc regmatch_t *pmatch = NULL; 233 1.6 rin reg_errcode_t ret; 234 1.1 agc 235 1.1 agc #ifdef TRE_MBSTATE 236 1.1 agc memset(&mbstate, '\0', sizeof(mbstate)); 237 1.1 agc #endif /* TRE_MBSTATE */ 238 1.1 agc 239 1.1 agc if (!mem) 240 1.1 agc return REG_ESPACE; 241 1.1 agc stack = tre_bt_mem_alloc(mem, sizeof(*stack)); 242 1.1 agc if (!stack) 243 1.1 agc { 244 1.1 agc ret = REG_ESPACE; 245 1.1 agc goto error_exit; 246 1.1 agc } 247 1.1 agc stack->prev = NULL; 248 1.1 agc stack->next = NULL; 249 1.1 agc 250 1.1 agc DPRINT(("tnfa_execute_backtrack, input type %d\n", type)); 251 1.1 agc DPRINT(("len = %d\n", len)); 252 1.1 agc 253 1.1 agc #ifdef TRE_USE_ALLOCA 254 1.1 agc tags = alloca(sizeof(*tags) * tnfa->num_tags); 255 1.1 agc pmatch = alloca(sizeof(*pmatch) * tnfa->num_submatches); 256 1.1 agc states_seen = alloca(sizeof(*states_seen) * tnfa->num_states); 257 1.1 agc #else /* !TRE_USE_ALLOCA */ 258 1.1 agc if (tnfa->num_tags) 259 1.1 agc { 260 1.1 agc tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 261 1.1 agc if (!tags) 262 1.1 agc { 263 1.1 agc ret = REG_ESPACE; 264 1.1 agc goto error_exit; 265 1.1 agc } 266 1.1 agc } 267 1.1 agc if (tnfa->num_submatches) 268 1.1 agc { 269 1.1 agc pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches); 270 1.1 agc if (!pmatch) 271 1.1 agc { 272 1.1 agc ret = REG_ESPACE; 273 1.1 agc goto error_exit; 274 1.1 agc } 275 1.1 agc } 276 1.1 agc if (tnfa->num_states) 277 1.1 agc { 278 1.1 agc states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states); 279 1.1 agc if (!states_seen) 280 1.1 agc { 281 1.1 agc ret = REG_ESPACE; 282 1.1 agc goto error_exit; 283 1.1 agc } 284 1.1 agc } 285 1.1 agc #endif /* !TRE_USE_ALLOCA */ 286 1.1 agc 287 1.1 agc retry: 288 1.1 agc { 289 1.3 christos size_t i; 290 1.1 agc for (i = 0; i < tnfa->num_tags; i++) 291 1.1 agc { 292 1.1 agc tags[i] = -1; 293 1.1 agc if (match_tags) 294 1.1 agc match_tags[i] = -1; 295 1.1 agc } 296 1.1 agc for (i = 0; i < tnfa->num_states; i++) 297 1.1 agc states_seen[i] = 0; 298 1.1 agc } 299 1.1 agc 300 1.1 agc state = NULL; 301 1.1 agc pos = pos_start; 302 1.1 agc if (type == STR_USER) 303 1.1 agc str_source->rewind(pos + pos_add_next, str_source->context); 304 1.1 agc GET_NEXT_WCHAR(); 305 1.1 agc pos_start = pos; 306 1.1 agc next_c_start = next_c; 307 1.1 agc str_byte_start = str_byte; 308 1.1 agc #ifdef TRE_WCHAR 309 1.1 agc str_wide_start = str_wide; 310 1.1 agc #endif /* TRE_WCHAR */ 311 1.1 agc #ifdef TRE_MBSTATE 312 1.1 agc mbstate_start = mbstate; 313 1.1 agc #endif /* TRE_MBSTATE */ 314 1.1 agc 315 1.1 agc /* Handle initial states. */ 316 1.1 agc next_tags = NULL; 317 1.1 agc for (trans_i = tnfa->initial; trans_i->state; trans_i++) 318 1.1 agc { 319 1.1 agc DPRINT(("> init %p, prev_c %lc\n", trans_i->state, (tre_cint_t)prev_c)); 320 1.1 agc if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) 321 1.1 agc { 322 1.1 agc DPRINT(("assert failed\n")); 323 1.1 agc continue; 324 1.1 agc } 325 1.1 agc if (state == NULL) 326 1.1 agc { 327 1.1 agc /* Start from this state. */ 328 1.1 agc state = trans_i->state; 329 1.1 agc next_tags = trans_i->tags; 330 1.1 agc } 331 1.1 agc else 332 1.1 agc { 333 1.1 agc /* Backtrack to this state. */ 334 1.1 agc DPRINT(("saving state %d for backtracking\n", trans_i->state_id)); 335 1.1 agc BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state, 336 1.1 agc trans_i->state_id, next_c, tags, mbstate); 337 1.1 agc { 338 1.1 agc int *tmp = trans_i->tags; 339 1.1 agc if (tmp) 340 1.1 agc while (*tmp >= 0) 341 1.1 agc stack->item.tags[*tmp++] = pos; 342 1.1 agc } 343 1.1 agc } 344 1.1 agc } 345 1.1 agc 346 1.1 agc if (next_tags) 347 1.1 agc for (; *next_tags >= 0; next_tags++) 348 1.1 agc tags[*next_tags] = pos; 349 1.1 agc 350 1.1 agc 351 1.1 agc DPRINT(("entering match loop, pos %d, str_byte %p\n", pos, str_byte)); 352 1.1 agc DPRINT(("pos:chr/code | state and tags\n")); 353 1.1 agc DPRINT(("-------------+------------------------------------------------\n")); 354 1.1 agc 355 1.1 agc if (state == NULL) 356 1.1 agc goto backtrack; 357 1.1 agc 358 1.5 rin while (/*CONSTCOND*/(void)1,1) 359 1.1 agc { 360 1.1 agc tre_tnfa_transition_t *next_state; 361 1.1 agc int empty_br_match; 362 1.1 agc 363 1.1 agc DPRINT(("start loop\n")); 364 1.1 agc if (state == tnfa->final) 365 1.1 agc { 366 1.1 agc DPRINT((" match found, %d %d\n", match_eo, pos)); 367 1.1 agc if (match_eo < pos 368 1.1 agc || (match_eo == pos 369 1.1 agc && match_tags 370 1.1 agc && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, 371 1.1 agc tags, match_tags))) 372 1.1 agc { 373 1.3 christos size_t i; 374 1.1 agc /* This match wins the previous match. */ 375 1.1 agc DPRINT((" win previous\n")); 376 1.1 agc match_eo = pos; 377 1.1 agc if (match_tags) 378 1.1 agc for (i = 0; i < tnfa->num_tags; i++) 379 1.1 agc match_tags[i] = tags[i]; 380 1.1 agc } 381 1.1 agc /* Our TNFAs never have transitions leaving from the final state, 382 1.1 agc so we jump right to backtracking. */ 383 1.1 agc goto backtrack; 384 1.1 agc } 385 1.1 agc 386 1.1 agc #ifdef TRE_DEBUG 387 1.1 agc DPRINT(("%3d:%2lc/%05d | %p ", pos, (tre_cint_t)next_c, (int)next_c, 388 1.1 agc state)); 389 1.1 agc { 390 1.1 agc int i; 391 1.1 agc for (i = 0; i < tnfa->num_tags; i++) 392 1.1 agc DPRINT(("%d%s", tags[i], i < tnfa->num_tags - 1 ? ", " : "")); 393 1.1 agc DPRINT(("\n")); 394 1.1 agc } 395 1.1 agc #endif /* TRE_DEBUG */ 396 1.1 agc 397 1.1 agc /* Go to the next character in the input string. */ 398 1.1 agc empty_br_match = 0; 399 1.1 agc trans_i = state; 400 1.1 agc if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) 401 1.1 agc { 402 1.1 agc /* This is a back reference state. All transitions leaving from 403 1.1 agc this state have the same back reference "assertion". Instead 404 1.1 agc of reading the next character, we match the back reference. */ 405 1.2 ahoka regoff_t so, eo, bt = trans_i->u.backref; 406 1.2 ahoka regoff_t bt_len; 407 1.2 ahoka regoff_t result; 408 1.1 agc 409 1.1 agc DPRINT((" should match back reference %d\n", bt)); 410 1.1 agc /* Get the substring we need to match against. Remember to 411 1.1 agc turn off REG_NOSUB temporarily. */ 412 1.7 rin tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, 413 1.1 agc tnfa, tags, pos); 414 1.2 ahoka /* LINTED */so = pmatch[bt].rm_so; 415 1.2 ahoka /* LINTED */eo = pmatch[bt].rm_eo; 416 1.1 agc bt_len = eo - so; 417 1.1 agc 418 1.1 agc #ifdef TRE_DEBUG 419 1.1 agc { 420 1.1 agc int slen; 421 1.1 agc if (len < 0) 422 1.1 agc slen = bt_len; 423 1.1 agc else 424 1.1 agc slen = MIN(bt_len, len - pos); 425 1.1 agc 426 1.1 agc if (type == STR_BYTE) 427 1.1 agc { 428 1.1 agc DPRINT((" substring (len %d) is [%d, %d[: '%.*s'\n", 429 1.1 agc bt_len, so, eo, bt_len, (char*)string + so)); 430 1.1 agc DPRINT((" current string is '%.*s'\n", slen, str_byte - 1)); 431 1.1 agc } 432 1.1 agc #ifdef TRE_WCHAR 433 1.1 agc else if (type == STR_WIDE) 434 1.1 agc { 435 1.1 agc DPRINT((" substring (len %d) is [%d, %d[: '%.*" STRF "'\n", 436 1.1 agc bt_len, so, eo, bt_len, (wchar_t*)string + so)); 437 1.1 agc DPRINT((" current string is '%.*" STRF "'\n", 438 1.1 agc slen, str_wide - 1)); 439 1.1 agc } 440 1.1 agc #endif /* TRE_WCHAR */ 441 1.1 agc } 442 1.1 agc #endif 443 1.1 agc 444 1.1 agc if (len < 0) 445 1.1 agc { 446 1.1 agc if (type == STR_USER) 447 1.1 agc result = str_source->compare((unsigned)so, (unsigned)pos, 448 1.1 agc (unsigned)bt_len, 449 1.1 agc str_source->context); 450 1.1 agc #ifdef TRE_WCHAR 451 1.1 agc else if (type == STR_WIDE) 452 1.1 agc result = wcsncmp((const wchar_t*)string + so, str_wide - 1, 453 1.1 agc (size_t)bt_len); 454 1.1 agc #endif /* TRE_WCHAR */ 455 1.1 agc else 456 1.2 ahoka /* LINTED */result = strncmp((const char*)string + so, str_byte - 1, 457 1.1 agc (size_t)bt_len); 458 1.1 agc } 459 1.1 agc else if (len - pos < bt_len) 460 1.1 agc result = 1; 461 1.1 agc #ifdef TRE_WCHAR 462 1.1 agc else if (type == STR_WIDE) 463 1.1 agc result = wmemcmp((const wchar_t*)string + so, str_wide - 1, 464 1.1 agc (size_t)bt_len); 465 1.1 agc #endif /* TRE_WCHAR */ 466 1.1 agc else 467 1.2 ahoka /* LINTED */result = memcmp((const char*)string + so, str_byte - 1, 468 1.1 agc (size_t)bt_len); 469 1.1 agc 470 1.1 agc if (result == 0) 471 1.1 agc { 472 1.1 agc /* Back reference matched. Check for infinite loop. */ 473 1.1 agc if (bt_len == 0) 474 1.1 agc empty_br_match = 1; 475 1.1 agc if (empty_br_match && states_seen[trans_i->state_id]) 476 1.1 agc { 477 1.1 agc DPRINT((" avoid loop\n")); 478 1.1 agc goto backtrack; 479 1.1 agc } 480 1.1 agc 481 1.1 agc states_seen[trans_i->state_id] = empty_br_match; 482 1.1 agc 483 1.1 agc /* Advance in input string and resync `prev_c', `next_c' 484 1.1 agc and pos. */ 485 1.1 agc DPRINT((" back reference matched\n")); 486 1.2 ahoka /* LINTED */str_byte += bt_len - 1; 487 1.1 agc #ifdef TRE_WCHAR 488 1.2 ahoka /* LINTED */str_wide += bt_len - 1; 489 1.1 agc #endif /* TRE_WCHAR */ 490 1.2 ahoka /* LINTED */pos += bt_len - 1; 491 1.1 agc GET_NEXT_WCHAR(); 492 1.1 agc DPRINT((" pos now %d\n", pos)); 493 1.1 agc } 494 1.1 agc else 495 1.1 agc { 496 1.1 agc DPRINT((" back reference did not match\n")); 497 1.1 agc goto backtrack; 498 1.1 agc } 499 1.1 agc } 500 1.1 agc else 501 1.1 agc { 502 1.1 agc /* Check for end of string. */ 503 1.1 agc if (len < 0) 504 1.1 agc { 505 1.1 agc if (type == STR_USER) 506 1.1 agc { 507 1.1 agc if (str_user_end) 508 1.1 agc goto backtrack; 509 1.1 agc } 510 1.1 agc else if (next_c == L'\0') 511 1.1 agc goto backtrack; 512 1.1 agc } 513 1.1 agc else 514 1.1 agc { 515 1.1 agc if (pos >= len) 516 1.1 agc goto backtrack; 517 1.1 agc } 518 1.1 agc 519 1.1 agc /* Read the next character. */ 520 1.1 agc GET_NEXT_WCHAR(); 521 1.1 agc } 522 1.1 agc 523 1.1 agc next_state = NULL; 524 1.1 agc for (trans_i = state; trans_i->state; trans_i++) 525 1.1 agc { 526 1.1 agc DPRINT((" transition %d-%d (%c-%c) %d to %d\n", 527 1.1 agc trans_i->code_min, trans_i->code_max, 528 1.1 agc trans_i->code_min, trans_i->code_max, 529 1.1 agc trans_i->assertions, trans_i->state_id)); 530 1.1 agc if (trans_i->code_min <= (tre_cint_t)prev_c 531 1.1 agc && trans_i->code_max >= (tre_cint_t)prev_c) 532 1.1 agc { 533 1.1 agc if (trans_i->assertions 534 1.1 agc && (CHECK_ASSERTIONS(trans_i->assertions) 535 1.1 agc || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) 536 1.1 agc { 537 1.1 agc DPRINT((" assertion failed\n")); 538 1.1 agc continue; 539 1.1 agc } 540 1.1 agc 541 1.1 agc if (next_state == NULL) 542 1.1 agc { 543 1.1 agc /* First matching transition. */ 544 1.1 agc DPRINT((" Next state is %d\n", trans_i->state_id)); 545 1.1 agc next_state = trans_i->state; 546 1.1 agc next_tags = trans_i->tags; 547 1.1 agc } 548 1.1 agc else 549 1.1 agc { 550 1.1 agc /* Second matching transition. We may need to backtrack here 551 1.1 agc to take this transition instead of the first one, so we 552 1.1 agc push this transition in the backtracking stack so we can 553 1.1 agc jump back here if needed. */ 554 1.1 agc DPRINT((" saving state %d for backtracking\n", 555 1.1 agc trans_i->state_id)); 556 1.1 agc BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state, 557 1.1 agc trans_i->state_id, next_c, tags, mbstate); 558 1.1 agc { 559 1.1 agc int *tmp; 560 1.1 agc for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) 561 1.1 agc stack->item.tags[*tmp] = pos; 562 1.1 agc } 563 1.1 agc #if 0 /* XXX - it's important not to look at all transitions here to keep 564 1.1 agc the stack small! */ 565 1.1 agc break; 566 1.1 agc #endif 567 1.1 agc } 568 1.1 agc } 569 1.1 agc } 570 1.1 agc 571 1.1 agc if (next_state != NULL) 572 1.1 agc { 573 1.1 agc /* Matching transitions were found. Take the first one. */ 574 1.1 agc state = next_state; 575 1.1 agc 576 1.1 agc /* Update the tag values. */ 577 1.1 agc if (next_tags) 578 1.1 agc while (*next_tags >= 0) 579 1.1 agc tags[*next_tags++] = pos; 580 1.1 agc } 581 1.1 agc else 582 1.1 agc { 583 1.1 agc backtrack: 584 1.1 agc /* A matching transition was not found. Try to backtrack. */ 585 1.1 agc if (stack->prev) 586 1.1 agc { 587 1.1 agc DPRINT((" backtracking\n")); 588 1.4 joerg #if ASSERT_BACKREF 589 1.4 joerg if (stack->item.state->assertions) 590 1.1 agc { 591 1.1 agc DPRINT((" states_seen[%d] = 0\n", 592 1.1 agc stack->item.state_id)); 593 1.1 agc states_seen[stack->item.state_id] = 0; 594 1.1 agc } 595 1.4 joerg #endif 596 1.1 agc 597 1.1 agc BT_STACK_POP(); 598 1.1 agc } 599 1.1 agc else if (match_eo < 0) 600 1.1 agc { 601 1.1 agc /* Try starting from a later position in the input string. */ 602 1.1 agc /* Check for end of string. */ 603 1.1 agc if (len < 0) 604 1.1 agc { 605 1.1 agc if (next_c == L'\0') 606 1.1 agc { 607 1.1 agc DPRINT(("end of string.\n")); 608 1.1 agc break; 609 1.1 agc } 610 1.1 agc } 611 1.1 agc else 612 1.1 agc { 613 1.1 agc if (pos >= len) 614 1.1 agc { 615 1.1 agc DPRINT(("end of string.\n")); 616 1.1 agc break; 617 1.1 agc } 618 1.1 agc } 619 1.1 agc DPRINT(("restarting from next start position\n")); 620 1.5 rin next_c = (tre_char_t) next_c_start; 621 1.1 agc #ifdef TRE_MBSTATE 622 1.1 agc mbstate = mbstate_start; 623 1.1 agc #endif /* TRE_MBSTATE */ 624 1.1 agc str_byte = str_byte_start; 625 1.1 agc #ifdef TRE_WCHAR 626 1.1 agc str_wide = str_wide_start; 627 1.1 agc #endif /* TRE_WCHAR */ 628 1.1 agc goto retry; 629 1.1 agc } 630 1.1 agc else 631 1.1 agc { 632 1.1 agc DPRINT(("finished\n")); 633 1.1 agc break; 634 1.1 agc } 635 1.1 agc } 636 1.1 agc } 637 1.1 agc 638 1.1 agc ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; 639 1.1 agc *match_end_ofs = match_eo; 640 1.1 agc 641 1.1 agc error_exit: 642 1.1 agc tre_bt_mem_destroy(mem); 643 1.1 agc #ifndef TRE_USE_ALLOCA 644 1.1 agc if (tags) 645 1.1 agc xfree(tags); 646 1.1 agc if (pmatch) 647 1.1 agc xfree(pmatch); 648 1.1 agc if (states_seen) 649 1.1 agc xfree(states_seen); 650 1.1 agc #endif /* !TRE_USE_ALLOCA */ 651 1.1 agc 652 1.1 agc return ret; 653 1.1 agc } 654